package org.apache.lucene.queryparser.flexible.standard.processors;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.queryparser.flexible.core.QueryNodeException;
import org.apache.lucene.queryparser.flexible.core.nodes.FieldQueryNode;
import org.apache.lucene.queryparser.flexible.core.nodes.FuzzyQueryNode;
import org.apache.lucene.queryparser.flexible.core.nodes.QueryNode;
import org.apache.lucene.queryparser.flexible.core.nodes.QuotedFieldQueryNode;
import org.apache.lucene.queryparser.flexible.core.processors.QueryNodeProcessorImpl;
import org.apache.lucene.queryparser.flexible.core.util.UnescapedCharSequence;
import org.apache.lucene.queryparser.flexible.standard.config.StandardQueryConfigHandler.ConfigurationKeys;
import org.apache.lucene.queryparser.flexible.standard.nodes.PrefixWildcardQueryNode;
import org.apache.lucene.queryparser.flexible.standard.nodes.TermRangeQueryNode;
import org.apache.lucene.queryparser.flexible.standard.nodes.WildcardQueryNode;
import org.apache.lucene.queryparser.flexible.standard.parser.StandardSyntaxParser;
import org.apache.lucene.search.PrefixQuery;
import org.apache.lucene.util.BytesRef;
public class WildcardQueryNodeProcessor extends QueryNodeProcessorImpl {
private static final Pattern WILDCARD_PATTERN = Pattern.compile("(\\.)|([?*]+)");
private static String analyzeWildcard(Analyzer a, String field, String wildcard) {
Matcher wildcardMatcher = WILDCARD_PATTERN.matcher(wildcard);
StringBuilder sb = new StringBuilder();
int last = 0;
while (wildcardMatcher.find()){
if (wildcardMatcher.group(1) != null){
continue;
}
if (wildcardMatcher.start() > 0){
String chunk = wildcard.substring(last, wildcardMatcher.start());
BytesRef normalized = a.normalize(field, chunk);
sb.append(normalized.utf8ToString());
}
sb.append(wildcardMatcher.group(2));
last = wildcardMatcher.end();
}
if (last < wildcard.length()){
String chunk = wildcard.substring(last);
BytesRef normalized = a.normalize(field, chunk);
sb.append(normalized.utf8ToString());
}
return sb.toString();
}
public WildcardQueryNodeProcessor() {
}
@Override
protected QueryNode postProcessNode(QueryNode node) throws QueryNodeException {
if (node instanceof FieldQueryNode || node instanceof FuzzyQueryNode) {
FieldQueryNode fqn = (FieldQueryNode) node;
CharSequence text = fqn.getText();
if (fqn.getParent() instanceof TermRangeQueryNode
|| fqn instanceof QuotedFieldQueryNode
|| text.length() <= 0){
return node;
}
if (isWildcard(text)) {
Analyzer analyzer = getQueryConfigHandler().get(ConfigurationKeys.ANALYZER);
if (analyzer != null) {
text = analyzeWildcard(analyzer, fqn.getFieldAsString(), text.toString());
}
if (isPrefixWildcard(text)) {
return new PrefixWildcardQueryNode(fqn.getField(), text, fqn.getBegin(), fqn.getEnd());
} else {
return new WildcardQueryNode(fqn.getField(), text, fqn.getBegin(), fqn.getEnd());
}
}
}
return node;
}
private boolean isWildcard(CharSequence text) {
if (text ==null || text.length() <= 0) return false;
for(int i=text.length()-1; i>=0; i--){
if ((text.charAt(i) == '*' || text.charAt(i) == '?') && !UnescapedCharSequence.wasEscaped(text, i)){
return true;
}
}
return false;
}
private boolean isPrefixWildcard(CharSequence text) {
if (text == null || text.length() <= 0 || !isWildcard(text)) return false;
if (text.charAt(text.length()-1) != '*') return false;
if (UnescapedCharSequence.wasEscaped(text, text.length()-1)) return false;
if (text.length() == 1) return false;
for(int i=0; i<text.length(); i++){
if (text.charAt(i) == '?') return false;
if (text.charAt(i) == '*' && !UnescapedCharSequence.wasEscaped(text, i)){
if (i == text.length()-1)
return true;
else
return false;
}
}
return false;
}
@Override
protected QueryNode preProcessNode(QueryNode node) throws QueryNodeException {
return node;
}
@Override
protected List<QueryNode> setChildrenOrder(List<QueryNode> children)
throws QueryNodeException {
return children;
}
}