/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 * 
 *      http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.xerces.impl.xpath.regex;

This class represents a character class such as [a-z] or a period.
@xerces.internal
Version:$Id: RangeToken.java 965250 2010-07-18 16:04:58Z mrglavas $
/** * This class represents a character class such as [a-z] or a period. * * @xerces.internal * * @version $Id: RangeToken.java 965250 2010-07-18 16:04:58Z mrglavas $ */
final class RangeToken extends Token implements java.io.Serializable { private static final long serialVersionUID = -553983121197679934L; int[] ranges; boolean sorted; boolean compacted; RangeToken icaseCache = null; int[] map = null; int nonMapIndex; RangeToken(int type) { super(type); this.setSorted(false); } // for RANGE or NRANGE protected void addRange(int start, int end) { this.icaseCache = null; //System.err.println("Token#addRange(): "+start+" "+end); int r1, r2; if (start <= end) { r1 = start; r2 = end; } else { r1 = end; r2 = start; } int pos = 0; if (this.ranges == null) { this.ranges = new int[2]; this.ranges[0] = r1; this.ranges[1] = r2; this.setSorted(true); } else { pos = this.ranges.length; if (this.ranges[pos-1]+1 == r1) { this.ranges[pos-1] = r2; return; } int[] temp = new int[pos+2]; System.arraycopy(this.ranges, 0, temp, 0, pos); this.ranges = temp; if (this.ranges[pos-1] >= r1) this.setSorted(false); this.ranges[pos++] = r1; this.ranges[pos] = r2; if (!this.sorted) this.sortRanges(); } } private final boolean isSorted() { return this.sorted; } private final void setSorted(boolean sort) { this.sorted = sort; if (!sort) this.compacted = false; } private final boolean isCompacted() { return this.compacted; } private final void setCompacted() { this.compacted = true; } protected void sortRanges() { if (this.isSorted()) return; if (this.ranges == null) return; //System.err.println("Do sorting: "+this.ranges.length); // Bubble sort // Why? -- In many cases, // this.ranges has few elements. for (int i = this.ranges.length-4; i >= 0; i -= 2) { for (int j = 0; j <= i; j += 2) { if (this.ranges[j] > this.ranges[j+2] || this.ranges[j] == this.ranges[j+2] && this.ranges[j+1] > this.ranges[j+3]) { int tmp; tmp = this.ranges[j+2]; this.ranges[j+2] = this.ranges[j]; this.ranges[j] = tmp; tmp = this.ranges[j+3]; this.ranges[j+3] = this.ranges[j+1]; this.ranges[j+1] = tmp; } } } this.setSorted(true); }
this.ranges is sorted.
/** * this.ranges is sorted. */
protected void compactRanges() { boolean DEBUG = false; if (this.ranges == null || this.ranges.length <= 2) return; if (this.isCompacted()) return; int base = 0; // Index of writing point int target = 0; // Index of processing point while (target < this.ranges.length) { if (base != target) { this.ranges[base] = this.ranges[target++]; this.ranges[base+1] = this.ranges[target++]; } else target += 2; int baseend = this.ranges[base+1]; while (target < this.ranges.length) { if (baseend+1 < this.ranges[target]) break; if (baseend+1 == this.ranges[target]) { if (DEBUG) System.err.println("Token#compactRanges(): Compaction: ["+this.ranges[base] +", "+this.ranges[base+1] +"], ["+this.ranges[target] +", "+this.ranges[target+1] +"] -> ["+this.ranges[base] +", "+this.ranges[target+1] +"]"); this.ranges[base+1] = this.ranges[target+1]; baseend = this.ranges[base+1]; target += 2; } else if (baseend >= this.ranges[target+1]) { if (DEBUG) System.err.println("Token#compactRanges(): Compaction: ["+this.ranges[base] +", "+this.ranges[base+1] +"], ["+this.ranges[target] +", "+this.ranges[target+1] +"] -> ["+this.ranges[base] +", "+this.ranges[base+1] +"]"); target += 2; } else if (baseend < this.ranges[target+1]) { if (DEBUG) System.err.println("Token#compactRanges(): Compaction: ["+this.ranges[base] +", "+this.ranges[base+1] +"], ["+this.ranges[target] +", "+this.ranges[target+1] +"] -> ["+this.ranges[base] +", "+this.ranges[target+1] +"]"); this.ranges[base+1] = this.ranges[target+1]; baseend = this.ranges[base+1]; target += 2; } else { throw new RuntimeException("Token#compactRanges(): Internel Error: [" +this.ranges[base] +","+this.ranges[base+1] +"] ["+this.ranges[target] +","+this.ranges[target+1]+"]"); } } // while base += 2; } if (base != this.ranges.length) { int[] result = new int[base]; System.arraycopy(this.ranges, 0, result, 0, base); this.ranges = result; } this.setCompacted(); } protected void mergeRanges(Token token) { RangeToken tok = (RangeToken)token; this.sortRanges(); tok.sortRanges(); if (tok.ranges == null) return; this.icaseCache = null; this.setSorted(true); if (this.ranges == null) { this.ranges = new int[tok.ranges.length]; System.arraycopy(tok.ranges, 0, this.ranges, 0, tok.ranges.length); return; } int[] result = new int[this.ranges.length+tok.ranges.length]; for (int i = 0, j = 0, k = 0; i < this.ranges.length || j < tok.ranges.length;) { if (i >= this.ranges.length) { result[k++] = tok.ranges[j++]; result[k++] = tok.ranges[j++]; } else if (j >= tok.ranges.length) { result[k++] = this.ranges[i++]; result[k++] = this.ranges[i++]; } else if (tok.ranges[j] < this.ranges[i] || tok.ranges[j] == this.ranges[i] && tok.ranges[j+1] < this.ranges[i+1]) { result[k++] = tok.ranges[j++]; result[k++] = tok.ranges[j++]; } else { result[k++] = this.ranges[i++]; result[k++] = this.ranges[i++]; } } this.ranges = result; } protected void subtractRanges(Token token) { if (token.type == NRANGE) { this.intersectRanges(token); return; } RangeToken tok = (RangeToken)token; if (tok.ranges == null || this.ranges == null) return; this.icaseCache = null; this.sortRanges(); this.compactRanges(); tok.sortRanges(); tok.compactRanges(); //System.err.println("Token#substractRanges(): Entry: "+this.ranges.length+", "+tok.ranges.length); int[] result = new int[this.ranges.length+tok.ranges.length]; int wp = 0, src = 0, sub = 0; while (src < this.ranges.length && sub < tok.ranges.length) { int srcbegin = this.ranges[src]; int srcend = this.ranges[src+1]; int subbegin = tok.ranges[sub]; int subend = tok.ranges[sub+1]; if (srcend < subbegin) { // Not overlapped // src: o-----o // sub: o-----o // res: o-----o // Reuse sub result[wp++] = this.ranges[src++]; result[wp++] = this.ranges[src++]; } else if (srcend >= subbegin && srcbegin <= subend) { // Overlapped // src: o--------o // sub: o----o // sub: o----o // sub: o----o // sub: o------------o if (subbegin <= srcbegin && srcend <= subend) { // src: o--------o // sub: o------------o // res: empty // Reuse sub src += 2; } else if (subbegin <= srcbegin) { // src: o--------o // sub: o----o // res: o-----o // Reuse src(=res) this.ranges[src] = subend+1; sub += 2; } else if (srcend <= subend) { // src: o--------o // sub: o----o // res: o-----o // Reuse sub result[wp++] = srcbegin; result[wp++] = subbegin-1; src += 2; } else { // src: o--------o // sub: o----o // res: o-o o-o // Reuse src(=right res) result[wp++] = srcbegin; result[wp++] = subbegin-1; this.ranges[src] = subend+1; sub += 2; } } else if (subend < srcbegin) { // Not overlapped // src: o-----o // sub: o----o sub += 2; } else { throw new RuntimeException("Token#subtractRanges(): Internal Error: ["+this.ranges[src] +","+this.ranges[src+1] +"] - ["+tok.ranges[sub] +","+tok.ranges[sub+1] +"]"); } } while (src < this.ranges.length) { result[wp++] = this.ranges[src++]; result[wp++] = this.ranges[src++]; } this.ranges = new int[wp]; System.arraycopy(result, 0, this.ranges, 0, wp); // this.ranges is sorted and compacted. }
Params:
  • tok – Ignore whether it is NRANGE or not.
/** * @param tok Ignore whether it is NRANGE or not. */
protected void intersectRanges(Token token) { RangeToken tok = (RangeToken)token; if (tok.ranges == null || this.ranges == null) return; this.icaseCache = null; this.sortRanges(); this.compactRanges(); tok.sortRanges(); tok.compactRanges(); int[] result = new int[this.ranges.length+tok.ranges.length]; int wp = 0, src1 = 0, src2 = 0; while (src1 < this.ranges.length && src2 < tok.ranges.length) { int src1begin = this.ranges[src1]; int src1end = this.ranges[src1+1]; int src2begin = tok.ranges[src2]; int src2end = tok.ranges[src2+1]; if (src1end < src2begin) { // Not overlapped // src1: o-----o // src2: o-----o // res: empty // Reuse src2 src1 += 2; } else if (src1end >= src2begin && src1begin <= src2end) { // Overlapped // src1: o--------o // src2: o----o // src2: o----o // src2: o----o // src2: o------------o if (src2begin <= src1begin && src1end <= src2end) { // src1: o--------o // src2: o------------o // res: o--------o // Reuse src2 result[wp++] = src1begin; result[wp++] = src1end; src1 += 2; } else if (src2begin <= src1begin) { // src1: o--------o // src2: o----o // res: o--o // Reuse the rest of src1 result[wp++] = src1begin; result[wp++] = src2end; this.ranges[src1] = src2end+1; src2 += 2; } else if (src1end <= src2end) { // src1: o--------o // src2: o----o // res: o--o // Reuse src2 result[wp++] = src2begin; result[wp++] = src1end; src1 += 2; } else { // src1: o--------o // src2: o----o // res: o----o // Reuse the rest of src1 result[wp++] = src2begin; result[wp++] = src2end; this.ranges[src1] = src2end+1; } } else if (src2end < src1begin) { // Not overlapped // src1: o-----o // src2: o----o src2 += 2; } else { throw new RuntimeException("Token#intersectRanges(): Internal Error: [" +this.ranges[src1] +","+this.ranges[src1+1] +"] & ["+tok.ranges[src2] +","+tok.ranges[src2+1] +"]"); } } while (src1 < this.ranges.length) { result[wp++] = this.ranges[src1++]; result[wp++] = this.ranges[src1++]; } this.ranges = new int[wp]; System.arraycopy(result, 0, this.ranges, 0, wp); // this.ranges is sorted and compacted. }
for RANGE: Creates complement. for NRANGE: Creates the same meaning RANGE.
/** * for RANGE: Creates complement. * for NRANGE: Creates the same meaning RANGE. */
static Token complementRanges(Token token) { if (token.type != RANGE && token.type != NRANGE) throw new IllegalArgumentException("Token#complementRanges(): must be RANGE: "+token.type); RangeToken tok = (RangeToken)token; tok.sortRanges(); tok.compactRanges(); int len = tok.ranges.length+2; if (tok.ranges[0] == 0) len -= 2; int last = tok.ranges[tok.ranges.length-1]; if (last == UTF16_MAX) len -= 2; RangeToken ret = Token.createRange(); ret.ranges = new int[len]; int wp = 0; if (tok.ranges[0] > 0) { ret.ranges[wp++] = 0; ret.ranges[wp++] = tok.ranges[0]-1; } for (int i = 1; i < tok.ranges.length-2; i += 2) { ret.ranges[wp++] = tok.ranges[i]+1; ret.ranges[wp++] = tok.ranges[i+1]-1; } if (last != UTF16_MAX) { ret.ranges[wp++] = last+1; ret.ranges[wp] = UTF16_MAX; } ret.setCompacted(); return ret; } synchronized RangeToken getCaseInsensitiveToken() { if (this.icaseCache != null) return this.icaseCache; RangeToken uppers = this.type == Token.RANGE ? Token.createRange() : Token.createNRange(); for (int i = 0; i < this.ranges.length; i += 2) { for (int ch = this.ranges[i]; ch <= this.ranges[i+1]; ch ++) { if (ch > 0xffff) uppers.addRange(ch, ch); else { char uch = Character.toUpperCase((char)ch); uppers.addRange(uch, uch); } } } RangeToken lowers = this.type == Token.RANGE ? Token.createRange() : Token.createNRange(); for (int i = 0; i < uppers.ranges.length; i += 2) { for (int ch = uppers.ranges[i]; ch <= uppers.ranges[i+1]; ch ++) { if (ch > 0xffff) lowers.addRange(ch, ch); else { char uch = Character.toLowerCase((char)ch); lowers.addRange(uch, uch); } } } lowers.mergeRanges(uppers); lowers.mergeRanges(this); lowers.compactRanges(); this.icaseCache = lowers; return lowers; } void dumpRanges() { System.err.print("RANGE: "); if (this.ranges == null) { System.err.println(" NULL"); return; } for (int i = 0; i < this.ranges.length; i += 2) { System.err.print("["+this.ranges[i]+","+this.ranges[i+1]+"] "); } System.err.println(""); } boolean match(int ch) { if (this.map == null) this.createMap(); boolean ret; if (this.type == RANGE) { if (ch < MAPSIZE) return (this.map[ch/32] & (1<<(ch&0x1f))) != 0; ret = false; for (int i = this.nonMapIndex; i < this.ranges.length; i += 2) { if (this.ranges[i] <= ch && ch <= this.ranges[i+1]) return true; } } else { if (ch < MAPSIZE) return (this.map[ch/32] & (1<<(ch&0x1f))) == 0; ret = true; for (int i = this.nonMapIndex; i < this.ranges.length; i += 2) { if (this.ranges[i] <= ch && ch <= this.ranges[i+1]) return false; } } return ret; } private static final int MAPSIZE = 256; private void createMap() { int asize = MAPSIZE/32; // 32 is the number of bits in `int'. int [] map = new int[asize]; int nonMapIndex = this.ranges.length; for (int i = 0; i < asize; ++i) { map[i] = 0; } for (int i = 0; i < this.ranges.length; i += 2) { int s = this.ranges[i]; int e = this.ranges[i+1]; if (s < MAPSIZE) { for (int j = s; j <= e && j < MAPSIZE; j++) { map[j/32] |= 1<<(j&0x1f); // s&0x1f : 0-31 } } else { nonMapIndex = i; break; } if (e >= MAPSIZE) { nonMapIndex = i; break; } } this.map = map; this.nonMapIndex = nonMapIndex; //for (int i = 0; i < asize; i ++) System.err.println("Map: "+Integer.toString(this.map[i], 16)); } public String toString(int options) { String ret; if (this.type == RANGE) { if (this == Token.token_dot) ret = "."; else if (this == Token.token_0to9) ret = "\\d"; else if (this == Token.token_wordchars) ret = "\\w"; else if (this == Token.token_spaces) ret = "\\s"; else { StringBuffer sb = new StringBuffer(); sb.append('['); for (int i = 0; i < this.ranges.length; i += 2) { if ((options & RegularExpression.SPECIAL_COMMA) != 0 && i > 0) sb.append(','); if (this.ranges[i] == this.ranges[i+1]) { sb.append(escapeCharInCharClass(this.ranges[i])); } else { sb.append(escapeCharInCharClass(this.ranges[i])); sb.append((char)'-'); sb.append(escapeCharInCharClass(this.ranges[i+1])); } } sb.append(']'); ret = sb.toString(); } } else { if (this == Token.token_not_0to9) ret = "\\D"; else if (this == Token.token_not_wordchars) ret = "\\W"; else if (this == Token.token_not_spaces) ret = "\\S"; else { StringBuffer sb = new StringBuffer(); sb.append("[^"); for (int i = 0; i < this.ranges.length; i += 2) { if ((options & RegularExpression.SPECIAL_COMMA) != 0 && i > 0) sb.append(','); if (this.ranges[i] == this.ranges[i+1]) { sb.append(escapeCharInCharClass(this.ranges[i])); } else { sb.append(escapeCharInCharClass(this.ranges[i])); sb.append('-'); sb.append(escapeCharInCharClass(this.ranges[i+1])); } } sb.append(']'); ret = sb.toString(); } } return ret; } private static String escapeCharInCharClass(int ch) { String ret; switch (ch) { case '[': case ']': case '-': case '^': case ',': case '\\': ret = "\\"+(char)ch; break; case '\f': ret = "\\f"; break; case '\n': ret = "\\n"; break; case '\r': ret = "\\r"; break; case '\t': ret = "\\t"; break; case 0x1b: ret = "\\e"; break; //case 0x0b: ret = "\\v"; break; default: if (ch < 0x20) { String pre = "0"+Integer.toHexString(ch); ret = "\\x"+pre.substring(pre.length()-2, pre.length()); } else if (ch >= 0x10000) { String pre = "0"+Integer.toHexString(ch); ret = "\\v"+pre.substring(pre.length()-6, pre.length()); } else ret = ""+(char)ch; } return ret; } }