/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 * 
 *      http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.xerces.impl.xpath.regex;

import java.text.CharacterIterator;

@xerces.internal
Version:$Id: REUtil.java 828015 2009-10-21 13:56:13Z knoaman $
/** * @xerces.internal * * @version $Id: REUtil.java 828015 2009-10-21 13:56:13Z knoaman $ */
public final class REUtil { private REUtil() { } static final int composeFromSurrogates(int high, int low) { return 0x10000 + ((high-0xd800)<<10) + low-0xdc00; } static final boolean isLowSurrogate(int ch) { return (ch & 0xfc00) == 0xdc00; } static final boolean isHighSurrogate(int ch) { return (ch & 0xfc00) == 0xd800; } static final String decomposeToSurrogates(int ch) { char[] chs = new char[2]; ch -= 0x10000; chs[0] = (char)((ch>>10)+0xd800); chs[1] = (char)((ch&0x3ff)+0xdc00); return new String(chs); } static final String substring(CharacterIterator iterator, int begin, int end) { char[] src = new char[end-begin]; for (int i = 0; i < src.length; i ++) src[i] = iterator.setIndex(i+begin); return new String(src); } // ================================================================ static final int getOptionValue(int ch) { int ret = 0; switch (ch) { case 'i': ret = RegularExpression.IGNORE_CASE; break; case 'm': ret = RegularExpression.MULTIPLE_LINES; break; case 's': ret = RegularExpression.SINGLE_LINE; break; case 'x': ret = RegularExpression.EXTENDED_COMMENT; break; case 'u': ret = RegularExpression.USE_UNICODE_CATEGORY; break; case 'w': ret = RegularExpression.UNICODE_WORD_BOUNDARY; break; case 'F': ret = RegularExpression.PROHIBIT_FIXED_STRING_OPTIMIZATION; break; case 'H': ret = RegularExpression.PROHIBIT_HEAD_CHARACTER_OPTIMIZATION; break; case 'X': ret = RegularExpression.XMLSCHEMA_MODE; break; case ',': ret = RegularExpression.SPECIAL_COMMA; break; default: } return ret; } static final int parseOptions(String opts) throws ParseException { if (opts == null) return 0; int options = 0; for (int i = 0; i < opts.length(); i ++) { int v = getOptionValue(opts.charAt(i)); if (v == 0) throw new ParseException("Unknown Option: "+opts.substring(i), -1); options |= v; } return options; } static final String createOptionString(int options) { StringBuffer sb = new StringBuffer(9); if ((options & RegularExpression.PROHIBIT_FIXED_STRING_OPTIMIZATION) != 0) sb.append((char)'F'); if ((options & RegularExpression.PROHIBIT_HEAD_CHARACTER_OPTIMIZATION) != 0) sb.append((char)'H'); if ((options & RegularExpression.XMLSCHEMA_MODE) != 0) sb.append((char)'X'); if ((options & RegularExpression.IGNORE_CASE) != 0) sb.append((char)'i'); if ((options & RegularExpression.MULTIPLE_LINES) != 0) sb.append((char)'m'); if ((options & RegularExpression.SINGLE_LINE) != 0) sb.append((char)'s'); if ((options & RegularExpression.USE_UNICODE_CATEGORY) != 0) sb.append((char)'u'); if ((options & RegularExpression.UNICODE_WORD_BOUNDARY) != 0) sb.append((char)'w'); if ((options & RegularExpression.EXTENDED_COMMENT) != 0) sb.append((char)'x'); if ((options & RegularExpression.SPECIAL_COMMA) != 0) sb.append((char)','); return sb.toString().intern(); } // ================================================================ static String stripExtendedComment(String regex) { int len = regex.length(); StringBuffer buffer = new StringBuffer(len); int offset = 0; int charClass = 0; while (offset < len) { int ch = regex.charAt(offset++); // Skips a white space. if (ch == '\t' || ch == '\n' || ch == '\f' || ch == '\r' || ch == ' ') { // if we are inside a character class, we keep the white space if (charClass > 0) { buffer.append((char)ch); } continue; } if (ch == '#') { // Skips chracters between '#' and a line end. while (offset < len) { ch = regex.charAt(offset++); if (ch == '\r' || ch == '\n') break; } continue; } int next; // Strips an escaped white space. if (ch == '\\' && offset < len) { if ((next = regex.charAt(offset)) == '#' || next == '\t' || next == '\n' || next == '\f' || next == '\r' || next == ' ') { buffer.append((char)next); offset ++; } else { // Other escaped character. buffer.append((char)'\\'); buffer.append((char)next); offset ++; } } else if (ch == '[') { charClass++; buffer.append((char)ch); if (offset < len) { next = regex.charAt(offset); if (next == '[' || next ==']') { buffer.append((char)next); offset ++; } else if (next == '^' && offset + 1 < len) { next = regex.charAt(offset + 1); if (next == '[' || next ==']') { buffer.append((char)'^'); buffer.append((char)next); offset += 2; } } } } else { if (charClass > 0 && ch == ']') { --charClass; } buffer.append((char)ch); } } return buffer.toString(); } // ================================================================
Sample entry.
Usage: org.apache.xerces.utils.regex.REUtil <regex> <string>
/** * Sample entry. * <div>Usage: <KBD>org.apache.xerces.utils.regex.REUtil &lt;regex&gt; &lt;string&gt;</KBD></div> */
public static void main(String[] argv) { String pattern = null; try { String options = ""; String target = null; if( argv.length == 0 ) { System.out.println( "Error:Usage: java REUtil -i|-m|-s|-u|-w|-X regularExpression String" ); System.exit( 0 ); } for (int i = 0; i < argv.length; i ++) { if (argv[i].length() == 0 || argv[i].charAt(0) != '-') { if (pattern == null) pattern = argv[i]; else if (target == null) target = argv[i]; else System.err.println("Unnecessary: "+argv[i]); } else if (argv[i].equals("-i")) { options += "i"; } else if (argv[i].equals("-m")) { options += "m"; } else if (argv[i].equals("-s")) { options += "s"; } else if (argv[i].equals("-u")) { options += "u"; } else if (argv[i].equals("-w")) { options += "w"; } else if (argv[i].equals("-X")) { options += "X"; } else { System.err.println("Unknown option: "+argv[i]); } } RegularExpression reg = new RegularExpression(pattern, options); System.out.println("RegularExpression: "+reg); Match match = new Match(); reg.matches(target, match); for (int i = 0; i < match.getNumberOfGroups(); i ++) { if (i == 0 ) System.out.print("Matched range for the whole pattern: "); else System.out.print("["+i+"]: "); if (match.getBeginning(i) < 0) System.out.println("-1"); else { System.out.print(match.getBeginning(i)+", "+match.getEnd(i)+", "); System.out.println("\""+match.getCapturedText(i)+"\""); } } } catch (ParseException pe) { if (pattern == null) { pe.printStackTrace(); } else { System.err.println("org.apache.xerces.utils.regex.ParseException: "+pe.getMessage()); String indent = " "; System.err.println(indent+pattern); int loc = pe.getLocation(); if (loc >= 0) { System.err.print(indent); for (int i = 0; i < loc; i ++) System.err.print("-"); System.err.println("^"); } } } catch (Exception e) { e.printStackTrace(); } } static final int CACHESIZE = 20; static final RegularExpression[] regexCache = new RegularExpression[CACHESIZE];
Creates a RegularExpression instance. This method caches created instances.
See Also:
  • RegularExpression.RegularExpression(String, String)
/** * Creates a RegularExpression instance. * This method caches created instances. * * @see RegularExpression#RegularExpression(java.lang.String, java.lang.String) */
public static RegularExpression createRegex(String pattern, String options) throws ParseException { RegularExpression re = null; int intOptions = REUtil.parseOptions(options); synchronized (REUtil.regexCache) { int i; for (i = 0; i < REUtil.CACHESIZE; i ++) { RegularExpression cached = REUtil.regexCache[i]; if (cached == null) { i = -1; break; } if (cached.equals(pattern, intOptions)) { re = cached; break; } } if (re != null) { if (i != 0) { System.arraycopy(REUtil.regexCache, 0, REUtil.regexCache, 1, i); REUtil.regexCache[0] = re; } } else { re = new RegularExpression(pattern, options); System.arraycopy(REUtil.regexCache, 0, REUtil.regexCache, 1, REUtil.CACHESIZE-1); REUtil.regexCache[0] = re; } } return re; }
See Also:
  • matches.matches(String)
/** * * @see RegularExpression#matches(java.lang.String) */
public static boolean matches(String regex, String target) throws ParseException { return REUtil.createRegex(regex, null).matches(target); }
See Also:
  • matches.matches(String)
/** * * @see RegularExpression#matches(java.lang.String) */
public static boolean matches(String regex, String options, String target) throws ParseException { return REUtil.createRegex(regex, options).matches(target); } // ================================================================ /** * */ public static String quoteMeta(String literal) { int len = literal.length(); StringBuffer buffer = null; for (int i = 0; i < len; i ++) { int ch = literal.charAt(i); if (".*+?{[()|\\^$".indexOf(ch) >= 0) { if (buffer == null) { buffer = new StringBuffer(i+(len-i)*2); if (i > 0) buffer.append(literal.substring(0, i)); } buffer.append((char)'\\'); buffer.append((char)ch); } else if (buffer != null) buffer.append((char)ch); } return buffer != null ? buffer.toString() : literal; } // ================================================================ static void dumpString(String v) { for (int i = 0; i < v.length(); i ++) { System.out.print(Integer.toHexString(v.charAt(i))); System.out.print(" "); } System.out.println(); } }