/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.lucene.queryparser.flexible.standard.processors;

import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.queryparser.flexible.core.QueryNodeException;
import org.apache.lucene.queryparser.flexible.core.nodes.FieldQueryNode;
import org.apache.lucene.queryparser.flexible.core.nodes.FuzzyQueryNode;
import org.apache.lucene.queryparser.flexible.core.nodes.QueryNode;
import org.apache.lucene.queryparser.flexible.core.nodes.QuotedFieldQueryNode;
import org.apache.lucene.queryparser.flexible.core.processors.QueryNodeProcessorImpl;
import org.apache.lucene.queryparser.flexible.core.util.UnescapedCharSequence;
import org.apache.lucene.queryparser.flexible.standard.config.StandardQueryConfigHandler.ConfigurationKeys;
import org.apache.lucene.queryparser.flexible.standard.nodes.PrefixWildcardQueryNode;
import org.apache.lucene.queryparser.flexible.standard.nodes.TermRangeQueryNode;
import org.apache.lucene.queryparser.flexible.standard.nodes.WildcardQueryNode;
import org.apache.lucene.queryparser.flexible.standard.parser.StandardSyntaxParser;
import org.apache.lucene.search.PrefixQuery;
import org.apache.lucene.util.BytesRef;

The StandardSyntaxParser creates PrefixWildcardQueryNode nodes which have values containing the prefixed wildcard. However, Lucene PrefixQuery cannot contain the prefixed wildcard. So, this processor basically removed the prefixed wildcard from the PrefixWildcardQueryNode value.
See Also:
/** * The {@link StandardSyntaxParser} creates {@link PrefixWildcardQueryNode} nodes which * have values containing the prefixed wildcard. However, Lucene * {@link PrefixQuery} cannot contain the prefixed wildcard. So, this processor * basically removed the prefixed wildcard from the * {@link PrefixWildcardQueryNode} value. * * @see PrefixQuery * @see PrefixWildcardQueryNode */
public class WildcardQueryNodeProcessor extends QueryNodeProcessorImpl { private static final Pattern WILDCARD_PATTERN = Pattern.compile("(\\.)|([?*]+)"); // because we call utf8ToString, this will only work with the default TermToBytesRefAttribute private static String analyzeWildcard(Analyzer a, String field, String wildcard) { // best effort to not pass the wildcard characters through #normalize Matcher wildcardMatcher = WILDCARD_PATTERN.matcher(wildcard); StringBuilder sb = new StringBuilder(); int last = 0; while (wildcardMatcher.find()){ // continue if escaped char if (wildcardMatcher.group(1) != null){ continue; } if (wildcardMatcher.start() > 0){ String chunk = wildcard.substring(last, wildcardMatcher.start()); BytesRef normalized = a.normalize(field, chunk); sb.append(normalized.utf8ToString()); } //append the wildcard character sb.append(wildcardMatcher.group(2)); last = wildcardMatcher.end(); } if (last < wildcard.length()){ String chunk = wildcard.substring(last); BytesRef normalized = a.normalize(field, chunk); sb.append(normalized.utf8ToString()); } return sb.toString(); } public WildcardQueryNodeProcessor() { // empty constructor } @Override protected QueryNode postProcessNode(QueryNode node) throws QueryNodeException { // the old Lucene Parser ignores FuzzyQueryNode that are also PrefixWildcardQueryNode or WildcardQueryNode // we do the same here, also ignore empty terms if (node instanceof FieldQueryNode || node instanceof FuzzyQueryNode) { FieldQueryNode fqn = (FieldQueryNode) node; CharSequence text = fqn.getText(); // do not process wildcards for TermRangeQueryNode children and // QuotedFieldQueryNode to reproduce the old parser behavior if (fqn.getParent() instanceof TermRangeQueryNode || fqn instanceof QuotedFieldQueryNode || text.length() <= 0){ // Ignore empty terms return node; } // Code below simulates the old lucene parser behavior for wildcards if (isWildcard(text)) { Analyzer analyzer = getQueryConfigHandler().get(ConfigurationKeys.ANALYZER); if (analyzer != null) { text = analyzeWildcard(analyzer, fqn.getFieldAsString(), text.toString()); } if (isPrefixWildcard(text)) { return new PrefixWildcardQueryNode(fqn.getField(), text, fqn.getBegin(), fqn.getEnd()); } else { return new WildcardQueryNode(fqn.getField(), text, fqn.getBegin(), fqn.getEnd()); } } } return node; } private boolean isWildcard(CharSequence text) { if (text ==null || text.length() <= 0) return false; // If a un-escaped '*' or '?' if found return true // start at the end since it's more common to put wildcards at the end for(int i=text.length()-1; i>=0; i--){ if ((text.charAt(i) == '*' || text.charAt(i) == '?') && !UnescapedCharSequence.wasEscaped(text, i)){ return true; } } return false; } private boolean isPrefixWildcard(CharSequence text) { if (text == null || text.length() <= 0 || !isWildcard(text)) return false; // Validate last character is a '*' and was not escaped // If single '*' is is a wildcard not prefix to simulate old queryparser if (text.charAt(text.length()-1) != '*') return false; if (UnescapedCharSequence.wasEscaped(text, text.length()-1)) return false; if (text.length() == 1) return false; // Only make a prefix if there is only one single star at the end and no '?' or '*' characters // If single wildcard return false to mimic old queryparser for(int i=0; i<text.length(); i++){ if (text.charAt(i) == '?') return false; if (text.charAt(i) == '*' && !UnescapedCharSequence.wasEscaped(text, i)){ if (i == text.length()-1) return true; else return false; } } return false; } @Override protected QueryNode preProcessNode(QueryNode node) throws QueryNodeException { return node; } @Override protected List<QueryNode> setChildrenOrder(List<QueryNode> children) throws QueryNodeException { return children; } }