/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.lucene.queryparser.classic;

import java.io.StringReader;
import java.text.DateFormat;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.document.DateTools;
import org.apache.lucene.index.Term;
import org.apache.lucene.queryparser.classic.QueryParser.Operator;
import org.apache.lucene.queryparser.flexible.standard.CommonQueryParserConfiguration;
import org.apache.lucene.search.*;
import org.apache.lucene.search.BooleanClause.Occur;
import org.apache.lucene.search.BooleanQuery.TooManyClauses;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefBuilder;
import org.apache.lucene.util.QueryBuilder;
import org.apache.lucene.util.automaton.RegExp;

import static org.apache.lucene.util.automaton.Operations.DEFAULT_MAX_DETERMINIZED_STATES;

This class is overridden by QueryParser in QueryParser.jj and acts to separate the majority of the Java code from the .jj grammar file.
/** This class is overridden by QueryParser in QueryParser.jj * and acts to separate the majority of the Java code from the .jj grammar file. */
public abstract class QueryParserBase extends QueryBuilder implements CommonQueryParserConfiguration { static final int CONJ_NONE = 0; static final int CONJ_AND = 1; static final int CONJ_OR = 2; static final int MOD_NONE = 0; static final int MOD_NOT = 10; static final int MOD_REQ = 11; // make it possible to call setDefaultOperator() without accessing // the nested class:
Alternative form of QueryParser.Operator.AND
/** Alternative form of QueryParser.Operator.AND */
public static final Operator AND_OPERATOR = Operator.AND;
Alternative form of QueryParser.Operator.OR
/** Alternative form of QueryParser.Operator.OR */
public static final Operator OR_OPERATOR = Operator.OR;
The actual operator that parser uses to combine query terms
/** The actual operator that parser uses to combine query terms */
Operator operator = OR_OPERATOR; MultiTermQuery.RewriteMethod multiTermRewriteMethod = MultiTermQuery.CONSTANT_SCORE_REWRITE; boolean allowLeadingWildcard = false; protected String field; int phraseSlop = 0; float fuzzyMinSim = FuzzyQuery.defaultMinSimilarity; int fuzzyPrefixLength = FuzzyQuery.defaultPrefixLength; Locale locale = Locale.getDefault(); TimeZone timeZone = TimeZone.getDefault(); // the default date resolution DateTools.Resolution dateResolution = null; // maps field names to date resolutions Map<String,DateTools.Resolution> fieldToDateResolution = null; boolean autoGeneratePhraseQueries; int maxDeterminizedStates = DEFAULT_MAX_DETERMINIZED_STATES; // So the generated QueryParser(CharStream) won't error out protected QueryParserBase() { super(null); }
Initializes a query parser. Called by the QueryParser constructor @param f the default field for query terms. @param a used to find terms in the query text.
/** Initializes a query parser. Called by the QueryParser constructor * @param f the default field for query terms. * @param a used to find terms in the query text. */
public void init(String f, Analyzer a) { setAnalyzer(a); field = f; setAutoGeneratePhraseQueries(false); } // the generated parser will create these in QueryParser public abstract void ReInit(CharStream stream); public abstract Query TopLevelQuery(String field) throws ParseException;
Parses a query string, returning a Query. @param query the query string to be parsed. @throws ParseException if the parsing fails
/** Parses a query string, returning a {@link org.apache.lucene.search.Query}. * @param query the query string to be parsed. * @throws ParseException if the parsing fails */
public Query parse(String query) throws ParseException { ReInit(new FastCharStream(new StringReader(query))); try { // TopLevelQuery is a Query followed by the end-of-input (EOF) Query res = TopLevelQuery(field); return res!=null ? res : newBooleanQuery().build(); } catch (ParseException | TokenMgrError tme) { // rethrow to include the original query: ParseException e = new ParseException("Cannot parse '" +query+ "': " + tme.getMessage()); e.initCause(tme); throw e; } catch (BooleanQuery.TooManyClauses tmc) { ParseException e = new ParseException("Cannot parse '" +query+ "': too many boolean clauses"); e.initCause(tmc); throw e; } }
Returns:Returns the default field.
/** * @return Returns the default field. */
public String getField() { return field; }
See Also:
  • setAutoGeneratePhraseQueries(boolean)
/** * @see #setAutoGeneratePhraseQueries(boolean) */
public final boolean getAutoGeneratePhraseQueries() { return autoGeneratePhraseQueries; }
Set to true if phrase queries will be automatically generated when the analyzer returns more than one term from whitespace delimited text. NOTE: this behavior may not be suitable for all languages.

Set to false if phrase queries should only be generated when surrounded by double quotes.

/** * Set to true if phrase queries will be automatically generated * when the analyzer returns more than one term from whitespace * delimited text. * NOTE: this behavior may not be suitable for all languages. * <p> * Set to false if phrase queries should only be generated when * surrounded by double quotes. */
public void setAutoGeneratePhraseQueries(boolean value) { this.autoGeneratePhraseQueries = value; }
Get the minimal similarity for fuzzy queries.
/** * Get the minimal similarity for fuzzy queries. */
@Override public float getFuzzyMinSim() { return fuzzyMinSim; }
Set the minimum similarity for fuzzy queries. Default is 2f.
/** * Set the minimum similarity for fuzzy queries. * Default is 2f. */
@Override public void setFuzzyMinSim(float fuzzyMinSim) { this.fuzzyMinSim = fuzzyMinSim; }
Get the prefix length for fuzzy queries.
Returns:Returns the fuzzyPrefixLength.
/** * Get the prefix length for fuzzy queries. * @return Returns the fuzzyPrefixLength. */
@Override public int getFuzzyPrefixLength() { return fuzzyPrefixLength; }
Set the prefix length for fuzzy queries. Default is 0.
Params:
  • fuzzyPrefixLength – The fuzzyPrefixLength to set.
/** * Set the prefix length for fuzzy queries. Default is 0. * @param fuzzyPrefixLength The fuzzyPrefixLength to set. */
@Override public void setFuzzyPrefixLength(int fuzzyPrefixLength) { this.fuzzyPrefixLength = fuzzyPrefixLength; }
Sets the default slop for phrases. If zero, then exact phrase matches are required. Default value is zero.
/** * Sets the default slop for phrases. If zero, then exact phrase matches * are required. Default value is zero. */
@Override public void setPhraseSlop(int phraseSlop) { this.phraseSlop = phraseSlop; }
Gets the default slop for phrases.
/** * Gets the default slop for phrases. */
@Override public int getPhraseSlop() { return phraseSlop; }
Set to true to allow leading wildcard characters.

When set, * or ? are allowed as the first character of a PrefixQuery and WildcardQuery. Note that this can produce very slow queries on big indexes.

Default: false.

/** * Set to <code>true</code> to allow leading wildcard characters. * <p> * When set, <code>*</code> or <code>?</code> are allowed as * the first character of a PrefixQuery and WildcardQuery. * Note that this can produce very slow * queries on big indexes. * <p> * Default: false. */
@Override public void setAllowLeadingWildcard(boolean allowLeadingWildcard) { this.allowLeadingWildcard = allowLeadingWildcard; }
See Also:
  • setAllowLeadingWildcard(boolean)
/** * @see #setAllowLeadingWildcard(boolean) */
@Override public boolean getAllowLeadingWildcard() { return allowLeadingWildcard; }
Sets the boolean operator of the QueryParser. In default mode (OR_OPERATOR) terms without any modifiers are considered optional: for example capital of Hungary is equal to capital OR of OR Hungary.
In AND_OPERATOR mode terms are considered to be in conjunction: the above mentioned query is parsed as capital AND of AND Hungary
/** * Sets the boolean operator of the QueryParser. * In default mode (<code>OR_OPERATOR</code>) terms without any modifiers * are considered optional: for example <code>capital of Hungary</code> is equal to * <code>capital OR of OR Hungary</code>.<br> * In <code>AND_OPERATOR</code> mode terms are considered to be in conjunction: the * above mentioned query is parsed as <code>capital AND of AND Hungary</code> */
public void setDefaultOperator(Operator op) { this.operator = op; }
Gets implicit operator setting, which will be either AND_OPERATOR or OR_OPERATOR.
/** * Gets implicit operator setting, which will be either AND_OPERATOR * or OR_OPERATOR. */
public Operator getDefaultOperator() { return operator; }
By default QueryParser uses MultiTermQuery.CONSTANT_SCORE_REWRITE when creating a PrefixQuery, WildcardQuery or TermRangeQuery. This implementation is generally preferable because it a) Runs faster b) Does not have the scarcity of terms unduly influence score c) avoids any TooManyClauses exception. However, if your application really needs to use the old-fashioned BooleanQuery expansion rewriting and the above points are not relevant then use this to change the rewrite method.
/** * By default QueryParser uses {@link org.apache.lucene.search.MultiTermQuery#CONSTANT_SCORE_REWRITE} * when creating a {@link PrefixQuery}, {@link WildcardQuery} or {@link TermRangeQuery}. This implementation is generally preferable because it * a) Runs faster b) Does not have the scarcity of terms unduly influence score * c) avoids any {@link TooManyClauses} exception. * However, if your application really needs to use the * old-fashioned {@link BooleanQuery} expansion rewriting and the above * points are not relevant then use this to change * the rewrite method. */
@Override public void setMultiTermRewriteMethod(MultiTermQuery.RewriteMethod method) { multiTermRewriteMethod = method; }
See Also:
  • setMultiTermRewriteMethod
/** * @see #setMultiTermRewriteMethod */
@Override public MultiTermQuery.RewriteMethod getMultiTermRewriteMethod() { return multiTermRewriteMethod; }
Set locale used by date range parsing, lowercasing, and other locale-sensitive operations.
/** * Set locale used by date range parsing, lowercasing, and other * locale-sensitive operations. */
@Override public void setLocale(Locale locale) { this.locale = locale; }
Returns current locale, allowing access by subclasses.
/** * Returns current locale, allowing access by subclasses. */
@Override public Locale getLocale() { return locale; } @Override public void setTimeZone(TimeZone timeZone) { this.timeZone = timeZone; } @Override public TimeZone getTimeZone() { return timeZone; }
Sets the default date resolution used by RangeQueries for fields for which no specific date resolutions has been set. Field specific resolutions can be set with setDateResolution(String, Resolution).
Params:
  • dateResolution – the default date resolution to set
/** * Sets the default date resolution used by RangeQueries for fields for which no * specific date resolutions has been set. Field specific resolutions can be set * with {@link #setDateResolution(String, org.apache.lucene.document.DateTools.Resolution)}. * * @param dateResolution the default date resolution to set */
@Override public void setDateResolution(DateTools.Resolution dateResolution) { this.dateResolution = dateResolution; }
Sets the date resolution used by RangeQueries for a specific field.
Params:
  • fieldName – field for which the date resolution is to be set
  • dateResolution – date resolution to set
/** * Sets the date resolution used by RangeQueries for a specific field. * * @param fieldName field for which the date resolution is to be set * @param dateResolution date resolution to set */
public void setDateResolution(String fieldName, DateTools.Resolution dateResolution) { if (fieldName == null) { throw new IllegalArgumentException("Field must not be null."); } if (fieldToDateResolution == null) { // lazily initialize HashMap fieldToDateResolution = new HashMap<>(); } fieldToDateResolution.put(fieldName, dateResolution); }
Returns the date resolution that is used by RangeQueries for the given field. Returns null, if no default or field specific date resolution has been set for the given field.
/** * Returns the date resolution that is used by RangeQueries for the given field. * Returns null, if no default or field specific date resolution has been set * for the given field. * */
public DateTools.Resolution getDateResolution(String fieldName) { if (fieldName == null) { throw new IllegalArgumentException("Field must not be null."); } if (fieldToDateResolution == null) { // no field specific date resolutions set; return default date resolution instead return this.dateResolution; } DateTools.Resolution resolution = fieldToDateResolution.get(fieldName); if (resolution == null) { // no date resolutions set for the given field; return default date resolution instead resolution = this.dateResolution; } return resolution; }
Params:
  • maxDeterminizedStates – the maximum number of states that determinizing a regexp query can result in. If the query results in any more states a TooComplexToDeterminizeException is thrown.
/** * @param maxDeterminizedStates the maximum number of states that * determinizing a regexp query can result in. If the query results in any * more states a TooComplexToDeterminizeException is thrown. */
public void setMaxDeterminizedStates(int maxDeterminizedStates) { this.maxDeterminizedStates = maxDeterminizedStates; }
Returns:the maximum number of states that determinizing a regexp query can result in. If the query results in any more states a TooComplexToDeterminizeException is thrown.
/** * @return the maximum number of states that determinizing a regexp query * can result in. If the query results in any more states a * TooComplexToDeterminizeException is thrown. */
public int getMaxDeterminizedStates() { return maxDeterminizedStates; } protected void addClause(List<BooleanClause> clauses, int conj, int mods, Query q) { boolean required, prohibited; // If this term is introduced by AND, make the preceding term required, // unless it's already prohibited if (clauses.size() > 0 && conj == CONJ_AND) { BooleanClause c = clauses.get(clauses.size()-1); if (!c.isProhibited()) clauses.set(clauses.size() - 1, new BooleanClause(c.getQuery(), Occur.MUST)); } if (clauses.size() > 0 && operator == AND_OPERATOR && conj == CONJ_OR) { // If this term is introduced by OR, make the preceding term optional, // unless it's prohibited (that means we leave -a OR b but +a OR b-->a OR b) // notice if the input is a OR b, first term is parsed as required; without // this modification a OR b would parsed as +a OR b BooleanClause c = clauses.get(clauses.size()-1); if (!c.isProhibited()) clauses.set(clauses.size() - 1, new BooleanClause(c.getQuery(), Occur.SHOULD)); } // We might have been passed a null query; the term might have been // filtered away by the analyzer. if (q == null) return; if (operator == OR_OPERATOR) { // We set REQUIRED if we're introduced by AND or +; PROHIBITED if // introduced by NOT or -; make sure not to set both. prohibited = (mods == MOD_NOT); required = (mods == MOD_REQ); if (conj == CONJ_AND && !prohibited) { required = true; } } else { // We set PROHIBITED if we're introduced by NOT or -; We set REQUIRED // if not PROHIBITED and not introduced by OR prohibited = (mods == MOD_NOT); required = (!prohibited && conj != CONJ_OR); } if (required && !prohibited) clauses.add(newBooleanClause(q, BooleanClause.Occur.MUST)); else if (!required && !prohibited) clauses.add(newBooleanClause(q, BooleanClause.Occur.SHOULD)); else if (!required && prohibited) clauses.add(newBooleanClause(q, BooleanClause.Occur.MUST_NOT)); else throw new RuntimeException("Clause cannot be both required and prohibited"); }
Adds clauses generated from analysis over text containing whitespace. There are no operators, so the query's clauses can either be MUST (if the default operator is AND) or SHOULD (default OR). If all of the clauses in the given Query are TermQuery-s, this method flattens the result by adding the TermQuery-s individually to the output clause list; otherwise, the given Query is added as a single clause including its nested clauses.
/** * Adds clauses generated from analysis over text containing whitespace. * There are no operators, so the query's clauses can either be MUST (if the * default operator is AND) or SHOULD (default OR). * * If all of the clauses in the given Query are TermQuery-s, this method flattens the result * by adding the TermQuery-s individually to the output clause list; otherwise, the given Query * is added as a single clause including its nested clauses. */
protected void addMultiTermClauses(List<BooleanClause> clauses, Query q) { // We might have been passed a null query; the term might have been // filtered away by the analyzer. if (q == null) { return; } boolean allNestedTermQueries = false; if (q instanceof BooleanQuery) { allNestedTermQueries = true; for (BooleanClause clause : ((BooleanQuery)q).clauses()) { if ( ! (clause.getQuery() instanceof TermQuery)) { allNestedTermQueries = false; break; } } } if (allNestedTermQueries) { clauses.addAll(((BooleanQuery)q).clauses()); } else { BooleanClause.Occur occur = operator == OR_OPERATOR ? BooleanClause.Occur.SHOULD : BooleanClause.Occur.MUST; if (q instanceof BooleanQuery) { for (BooleanClause clause : ((BooleanQuery)q).clauses()) { clauses.add(newBooleanClause(clause.getQuery(), occur)); } } else { clauses.add(newBooleanClause(q, occur)); } } }
Throws:
  • ParseException – throw in overridden method to disallow
/** * @exception org.apache.lucene.queryparser.classic.ParseException throw in overridden method to disallow */
protected Query getFieldQuery(String field, String queryText, boolean quoted) throws ParseException { return newFieldQuery(getAnalyzer(), field, queryText, quoted); }
Throws:
  • ParseException – throw in overridden method to disallow
/** * @exception org.apache.lucene.queryparser.classic.ParseException throw in overridden method to disallow */
protected Query newFieldQuery(Analyzer analyzer, String field, String queryText, boolean quoted) throws ParseException { BooleanClause.Occur occur = operator == Operator.AND ? BooleanClause.Occur.MUST : BooleanClause.Occur.SHOULD; return createFieldQuery(analyzer, occur, field, queryText, quoted || autoGeneratePhraseQueries, phraseSlop); }
Base implementation delegates to getFieldQuery(String, String, boolean). This method may be overridden, for example, to return a SpanNearQuery instead of a PhraseQuery.
Throws:
/** * Base implementation delegates to {@link #getFieldQuery(String,String,boolean)}. * This method may be overridden, for example, to return * a SpanNearQuery instead of a PhraseQuery. * * @exception org.apache.lucene.queryparser.classic.ParseException throw in overridden method to disallow */
protected Query getFieldQuery(String field, String queryText, int slop) throws ParseException { Query query = getFieldQuery(field, queryText, true); if (query instanceof PhraseQuery) { query = addSlopToPhrase((PhraseQuery) query, slop); } else if (query instanceof MultiPhraseQuery) { MultiPhraseQuery mpq = (MultiPhraseQuery)query; if (slop != mpq.getSlop()) { query = new MultiPhraseQuery.Builder(mpq).setSlop(slop).build(); } } return query; }
Rebuild a phrase query with a slop value
/** * Rebuild a phrase query with a slop value */
private PhraseQuery addSlopToPhrase(PhraseQuery query, int slop) { PhraseQuery.Builder builder = new PhraseQuery.Builder(); builder.setSlop(slop); org.apache.lucene.index.Term[] terms = query.getTerms(); int[] positions = query.getPositions(); for (int i = 0; i < terms.length; ++i) { builder.add(terms[i], positions[i]); } return builder.build(); } protected Query getRangeQuery(String field, String part1, String part2, boolean startInclusive, boolean endInclusive) throws ParseException { DateFormat df = DateFormat.getDateInstance(DateFormat.SHORT, locale); df.setLenient(true); DateTools.Resolution resolution = getDateResolution(field); try { part1 = DateTools.dateToString(df.parse(part1), resolution); } catch (Exception e) { } try { Date d2 = df.parse(part2); if (endInclusive) { // The user can only specify the date, not the time, so make sure // the time is set to the latest possible time of that date to really // include all documents: Calendar cal = Calendar.getInstance(timeZone, locale); cal.setTime(d2); cal.set(Calendar.HOUR_OF_DAY, 23); cal.set(Calendar.MINUTE, 59); cal.set(Calendar.SECOND, 59); cal.set(Calendar.MILLISECOND, 999); d2 = cal.getTime(); } part2 = DateTools.dateToString(d2, resolution); } catch (Exception e) { } return newRangeQuery(field, part1, part2, startInclusive, endInclusive); }
Builds a new BooleanClause instance
Params:
  • q – sub query
  • occur – how this clause should occur when matching documents
Returns:new BooleanClause instance
/** * Builds a new BooleanClause instance * @param q sub query * @param occur how this clause should occur when matching documents * @return new BooleanClause instance */
protected BooleanClause newBooleanClause(Query q, BooleanClause.Occur occur) { return new BooleanClause(q, occur); }
Builds a new PrefixQuery instance
Params:
  • prefix – Prefix term
Returns:new PrefixQuery instance
/** * Builds a new PrefixQuery instance * @param prefix Prefix term * @return new PrefixQuery instance */
protected Query newPrefixQuery(Term prefix){ PrefixQuery query = new PrefixQuery(prefix); query.setRewriteMethod(multiTermRewriteMethod); return query; }
Builds a new RegexpQuery instance
Params:
  • regexp – Regexp term
Returns:new RegexpQuery instance
/** * Builds a new RegexpQuery instance * @param regexp Regexp term * @return new RegexpQuery instance */
protected Query newRegexpQuery(Term regexp) { RegexpQuery query = new RegexpQuery(regexp, RegExp.ALL, maxDeterminizedStates); query.setRewriteMethod(multiTermRewriteMethod); return query; }
Builds a new FuzzyQuery instance
Params:
  • term – Term
  • minimumSimilarity – minimum similarity
  • prefixLength – prefix length
Returns:new FuzzyQuery Instance
/** * Builds a new FuzzyQuery instance * @param term Term * @param minimumSimilarity minimum similarity * @param prefixLength prefix length * @return new FuzzyQuery Instance */
protected Query newFuzzyQuery(Term term, float minimumSimilarity, int prefixLength) { // FuzzyQuery doesn't yet allow constant score rewrite String text = term.text(); int numEdits = FuzzyQuery.floatToEdits(minimumSimilarity, text.codePointCount(0, text.length())); return new FuzzyQuery(term,numEdits,prefixLength); }
Builds a new TermRangeQuery instance
Params:
  • field – Field
  • part1 – min
  • part2 – max
  • startInclusive – true if the start of the range is inclusive
  • endInclusive – true if the end of the range is inclusive
Returns:new TermRangeQuery instance
/** * Builds a new {@link TermRangeQuery} instance * @param field Field * @param part1 min * @param part2 max * @param startInclusive true if the start of the range is inclusive * @param endInclusive true if the end of the range is inclusive * @return new {@link TermRangeQuery} instance */
protected Query newRangeQuery(String field, String part1, String part2, boolean startInclusive, boolean endInclusive) { final BytesRef start; final BytesRef end; if (part1 == null) { start = null; } else { start = getAnalyzer().normalize(field, part1); } if (part2 == null) { end = null; } else { end = getAnalyzer().normalize(field, part2); } final TermRangeQuery query = new TermRangeQuery(field, start, end, startInclusive, endInclusive); query.setRewriteMethod(multiTermRewriteMethod); return query; }
Builds a new MatchAllDocsQuery instance
Returns:new MatchAllDocsQuery instance
/** * Builds a new MatchAllDocsQuery instance * @return new MatchAllDocsQuery instance */
protected Query newMatchAllDocsQuery() { return new MatchAllDocsQuery(); }
Builds a new WildcardQuery instance
Params:
  • t – wildcard term
Returns:new WildcardQuery instance
/** * Builds a new WildcardQuery instance * @param t wildcard term * @return new WildcardQuery instance */
protected Query newWildcardQuery(Term t) { WildcardQuery query = new WildcardQuery(t, maxDeterminizedStates); query.setRewriteMethod(multiTermRewriteMethod); return query; }
Factory method for generating query, given a set of clauses. By default creates a boolean query composed of clauses passed in. Can be overridden by extending classes, to modify query being returned.
Params:
Throws:
Returns:Resulting Query object.
/** * Factory method for generating query, given a set of clauses. * By default creates a boolean query composed of clauses passed in. * * Can be overridden by extending classes, to modify query being * returned. * * @param clauses List that contains {@link org.apache.lucene.search.BooleanClause} instances * to join. * * @return Resulting {@link org.apache.lucene.search.Query} object. * @exception org.apache.lucene.queryparser.classic.ParseException throw in overridden method to disallow */
protected Query getBooleanQuery(List<BooleanClause> clauses) throws ParseException { if (clauses.size()==0) { return null; // all clause words were filtered away by the analyzer. } BooleanQuery.Builder query = newBooleanQuery(); for(final BooleanClause clause: clauses) { query.add(clause); } return query.build(); }
Factory method for generating a query. Called when parser parses an input term token that contains one or more wildcard characters (? and *), but is not a prefix term token (one that has just a single * character at the end)

Depending on settings, prefix term may be lower-cased automatically. It will not go through the default Analyzer, however, since normal Analyzers are unlikely to work properly with wildcard templates.

Can be overridden by extending classes, to provide custom handling for wildcard queries, which may be necessary due to missing analyzer calls.

Params:
  • field – Name of the field query will use.
  • termStr – Term token that contains one or more wild card characters (? or *), but is not simple prefix term
Throws:
Returns:Resulting Query built for the term
/** * Factory method for generating a query. Called when parser * parses an input term token that contains one or more wildcard * characters (? and *), but is not a prefix term token (one * that has just a single * character at the end) *<p> * Depending on settings, prefix term may be lower-cased * automatically. It will not go through the default Analyzer, * however, since normal Analyzers are unlikely to work properly * with wildcard templates. *<p> * Can be overridden by extending classes, to provide custom handling for * wildcard queries, which may be necessary due to missing analyzer calls. * * @param field Name of the field query will use. * @param termStr Term token that contains one or more wild card * characters (? or *), but is not simple prefix term * * @return Resulting {@link org.apache.lucene.search.Query} built for the term * @exception org.apache.lucene.queryparser.classic.ParseException throw in overridden method to disallow */
protected Query getWildcardQuery(String field, String termStr) throws ParseException { if ("*".equals(field)) { if ("*".equals(termStr)) return newMatchAllDocsQuery(); } if (!allowLeadingWildcard && (termStr.startsWith("*") || termStr.startsWith("?"))) throw new ParseException("'*' or '?' not allowed as first character in WildcardQuery"); Term t = new Term(field, analyzeWildcard(field, termStr)); return newWildcardQuery(t); } private static final Pattern WILDCARD_PATTERN = Pattern.compile("(\\\\.)|([?*]+)"); private BytesRef analyzeWildcard(String field, String termStr) { // best effort to not pass the wildcard characters and escaped characters through #normalize Matcher wildcardMatcher = WILDCARD_PATTERN.matcher(termStr); BytesRefBuilder sb = new BytesRefBuilder(); int last = 0; while (wildcardMatcher.find()){ if (wildcardMatcher.start() > 0) { String chunk = termStr.substring(last, wildcardMatcher.start()); BytesRef normalized = getAnalyzer().normalize(field, chunk); sb.append(normalized); } //append the matched group - without normalizing sb.append(new BytesRef(wildcardMatcher.group())); last = wildcardMatcher.end(); } if (last < termStr.length()){ String chunk = termStr.substring(last); BytesRef normalized = getAnalyzer().normalize(field, chunk); sb.append(normalized); } return sb.toBytesRef(); }
Factory method for generating a query. Called when parser parses an input term token that contains a regular expression query.

Depending on settings, pattern term may be lower-cased automatically. It will not go through the default Analyzer, however, since normal Analyzers are unlikely to work properly with regular expression templates.

Can be overridden by extending classes, to provide custom handling for regular expression queries, which may be necessary due to missing analyzer calls.

Params:
  • field – Name of the field query will use.
  • termStr – Term token that contains a regular expression
Throws:
Returns:Resulting Query built for the term
/** * Factory method for generating a query. Called when parser * parses an input term token that contains a regular expression * query. *<p> * Depending on settings, pattern term may be lower-cased * automatically. It will not go through the default Analyzer, * however, since normal Analyzers are unlikely to work properly * with regular expression templates. *<p> * Can be overridden by extending classes, to provide custom handling for * regular expression queries, which may be necessary due to missing analyzer * calls. * * @param field Name of the field query will use. * @param termStr Term token that contains a regular expression * * @return Resulting {@link org.apache.lucene.search.Query} built for the term * @exception org.apache.lucene.queryparser.classic.ParseException throw in overridden method to disallow */
protected Query getRegexpQuery(String field, String termStr) throws ParseException { // We need to pass the whole string to #normalize, which will not work with // custom attribute factories for the binary term impl, and may not work // with some analyzers BytesRef term = getAnalyzer().normalize(field, termStr); Term t = new Term(field, term); return newRegexpQuery(t); }
Factory method for generating a query (similar to getWildcardQuery). Called when parser parses an input term token that uses prefix notation; that is, contains a single '*' wildcard character as its last character. Since this is a special case of generic wildcard term, and such a query can be optimized easily, this usually results in a different query object.

Depending on settings, a prefix term may be lower-cased automatically. It will not go through the default Analyzer, however, since normal Analyzers are unlikely to work properly with wildcard templates.

Can be overridden by extending classes, to provide custom handling for wild card queries, which may be necessary due to missing analyzer calls.

Params:
  • field – Name of the field query will use.
  • termStr – Term token to use for building term for the query (without trailing '*' character!)
Throws:
Returns:Resulting Query built for the term
/** * Factory method for generating a query (similar to * {@link #getWildcardQuery}). Called when parser parses an input term * token that uses prefix notation; that is, contains a single '*' wildcard * character as its last character. Since this is a special case * of generic wildcard term, and such a query can be optimized easily, * this usually results in a different query object. *<p> * Depending on settings, a prefix term may be lower-cased * automatically. It will not go through the default Analyzer, * however, since normal Analyzers are unlikely to work properly * with wildcard templates. *<p> * Can be overridden by extending classes, to provide custom handling for * wild card queries, which may be necessary due to missing analyzer calls. * * @param field Name of the field query will use. * @param termStr Term token to use for building term for the query * (<b>without</b> trailing '*' character!) * * @return Resulting {@link org.apache.lucene.search.Query} built for the term * @exception org.apache.lucene.queryparser.classic.ParseException throw in overridden method to disallow */
protected Query getPrefixQuery(String field, String termStr) throws ParseException { if (!allowLeadingWildcard && termStr.startsWith("*")) throw new ParseException("'*' not allowed as first character in PrefixQuery"); BytesRef term = getAnalyzer().normalize(field, termStr); Term t = new Term(field, term); return newPrefixQuery(t); }
Factory method for generating a query (similar to getWildcardQuery). Called when parser parses an input term token that has the fuzzy suffix (~) appended.
Params:
  • field – Name of the field query will use.
  • termStr – Term token to use for building term for the query
Throws:
Returns:Resulting Query built for the term
/** * Factory method for generating a query (similar to * {@link #getWildcardQuery}). Called when parser parses * an input term token that has the fuzzy suffix (~) appended. * * @param field Name of the field query will use. * @param termStr Term token to use for building term for the query * * @return Resulting {@link org.apache.lucene.search.Query} built for the term * @exception org.apache.lucene.queryparser.classic.ParseException throw in overridden method to disallow */
protected Query getFuzzyQuery(String field, String termStr, float minSimilarity) throws ParseException { BytesRef term = getAnalyzer().normalize(field, termStr); Term t = new Term(field, term); return newFuzzyQuery(t, minSimilarity, fuzzyPrefixLength); } // extracted from the .jj grammar Query handleBareTokenQuery(String qfield, Token term, Token fuzzySlop, boolean prefix, boolean wildcard, boolean fuzzy, boolean regexp) throws ParseException { Query q; String termImage=discardEscapeChar(term.image); if (wildcard) { q = getWildcardQuery(qfield, term.image); } else if (prefix) { q = getPrefixQuery(qfield, discardEscapeChar(term.image.substring (0, term.image.length()-1))); } else if (regexp) { q = getRegexpQuery(qfield, term.image.substring(1, term.image.length()-1)); } else if (fuzzy) { q = handleBareFuzzy(qfield, fuzzySlop, termImage); } else { q = getFieldQuery(qfield, termImage, false); } return q; } Query handleBareFuzzy(String qfield, Token fuzzySlop, String termImage) throws ParseException { Query q; float fms = fuzzyMinSim; try { fms = Float.parseFloat(fuzzySlop.image.substring(1)); } catch (Exception ignored) { } if(fms < 0.0f){ throw new ParseException("Minimum similarity for a FuzzyQuery has to be between 0.0f and 1.0f !"); } else if (fms >= 1.0f && fms != (int) fms) { throw new ParseException("Fractional edit distances are not allowed!"); } q = getFuzzyQuery(qfield, termImage, fms); return q; } // extracted from the .jj grammar Query handleQuotedTerm(String qfield, Token term, Token fuzzySlop) throws ParseException { int s = phraseSlop; // default if (fuzzySlop != null) { try { s = (int)Float.parseFloat(fuzzySlop.image.substring(1)); } catch (Exception ignored) { } } return getFieldQuery(qfield, discardEscapeChar(term.image.substring(1, term.image.length()-1)), s); } // extracted from the .jj grammar Query handleBoost(Query q, Token boost) { if (boost != null) { float f = (float) 1.0; try { f = Float.parseFloat(boost.image); } catch (Exception ignored) { /* Should this be handled somehow? (defaults to "no boost", if * boost number is invalid) */ } // avoid boosting null queries, such as those caused by stop words if (q != null) { q = new BoostQuery(q, f); } } return q; }
Returns a String where the escape char has been removed, or kept only once if there was a double escape. Supports escaped unicode characters, e. g. translates \\u0041 to A.
/** * Returns a String where the escape char has been * removed, or kept only once if there was a double escape. * * Supports escaped unicode characters, e. g. translates * <code>\\u0041</code> to <code>A</code>. * */
String discardEscapeChar(String input) throws ParseException { // Create char array to hold unescaped char sequence char[] output = new char[input.length()]; // The length of the output can be less than the input // due to discarded escape chars. This variable holds // the actual length of the output int length = 0; // We remember whether the last processed character was // an escape character boolean lastCharWasEscapeChar = false; // The multiplier the current unicode digit must be multiplied with. // E. g. the first digit must be multiplied with 16^3, the second with 16^2... int codePointMultiplier = 0; // Used to calculate the codepoint of the escaped unicode character int codePoint = 0; for (int i = 0; i < input.length(); i++) { char curChar = input.charAt(i); if (codePointMultiplier > 0) { codePoint += hexToInt(curChar) * codePointMultiplier; codePointMultiplier >>>= 4; if (codePointMultiplier == 0) { output[length++] = (char)codePoint; codePoint = 0; } } else if (lastCharWasEscapeChar) { if (curChar == 'u') { // found an escaped unicode character codePointMultiplier = 16 * 16 * 16; } else { // this character was escaped output[length] = curChar; length++; } lastCharWasEscapeChar = false; } else { if (curChar == '\\') { lastCharWasEscapeChar = true; } else { output[length] = curChar; length++; } } } if (codePointMultiplier > 0) { throw new ParseException("Truncated unicode escape sequence."); } if (lastCharWasEscapeChar) { throw new ParseException("Term can not end with escape character."); } return new String(output, 0, length); }
Returns the numeric value of the hexadecimal character
/** Returns the numeric value of the hexadecimal character */
static final int hexToInt(char c) throws ParseException { if ('0' <= c && c <= '9') { return c - '0'; } else if ('a' <= c && c <= 'f'){ return c - 'a' + 10; } else if ('A' <= c && c <= 'F') { return c - 'A' + 10; } else { throw new ParseException("Non-hex character in Unicode escape sequence: " + c); } }
Returns a String where those characters that QueryParser expects to be escaped are escaped by a preceding \.
/** * Returns a String where those characters that QueryParser * expects to be escaped are escaped by a preceding <code>\</code>. */
public static String escape(String s) { StringBuilder sb = new StringBuilder(); for (int i = 0; i < s.length(); i++) { char c = s.charAt(i); // These characters are part of the query syntax and must be escaped if (c == '\\' || c == '+' || c == '-' || c == '!' || c == '(' || c == ')' || c == ':' || c == '^' || c == '[' || c == ']' || c == '\"' || c == '{' || c == '}' || c == '~' || c == '*' || c == '?' || c == '|' || c == '&' || c == '/') { sb.append('\\'); } sb.append(c); } return sb.toString(); } }