org.antlr/antlr/3.5.2 : org/antlr/analysis/LL1Analyzer.java

LL1Analyzer
http://antlr.org/antlr: The ANTLR 3 tool. (ANTLR)
BSD licence
/*
 * [The "BSD license"]
 *  Copyright (c) 2010 Terence Parr
 *  All rights reserved.
 *
 *  Redistribution and use in source and binary forms, with or without
 *  modification, are permitted provided that the following conditions
 *  are met:
 *  1. Redistributions of source code must retain the above copyright
 *      notice, this list of conditions and the following disclaimer.
 *  2. Redistributions in binary form must reproduce the above copyright
 *      notice, this list of conditions and the following disclaimer in the
 *      documentation and/or other materials provided with the distribution.
 *  3. The name of the author may not be used to endorse or promote products
 *      derived from this software without specific prior written permission.
 *
 *  THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
 *  IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 *  OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
 *  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
 *  INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
 *  NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 *  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 *  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 *  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
 *  THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */
package org.antlr.analysis;

import org.antlr.grammar.v3.ANTLRParser;
import org.antlr.misc.IntSet;
import org.antlr.misc.IntervalSet;
import org.antlr.tool.Grammar;
import org.antlr.tool.Rule;

import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;

Created by IntelliJ IDEA.
User: parrt
Date: Dec 31, 2007
Time: 1:31:16 PM
To change this template use File | Settings | File Templates.
/**
 * Created by IntelliJ IDEA.
 * User: parrt
 * Date: Dec 31, 2007
 * Time: 1:31:16 PM
 * To change this template use File | Settings | File Templates.
 */
public class LL1Analyzer {
	0	if we hit end of rule and invoker should keep going (epsilon) /**	0	if we hit end of rule and invoker should keep going (epsilon) */
	public static final int DETECT_PRED_EOR = 0;
	1	if we found a nonautobacktracking pred /**	1	if we found a nonautobacktracking pred */
	public static final int DETECT_PRED_FOUND = 1;
	2	if we didn't find such a pred /**	2	if we didn't find such a pred */
	public static final int DETECT_PRED_NOT_FOUND = 2;

	public Grammar grammar;

	Used during LOOK to detect computation cycles /** Used during LOOK to detect computation cycles */
	protected Set<NFAState> lookBusy = new HashSet<NFAState>();

	public Map<NFAState, LookaheadSet> FIRSTCache = new HashMap<NFAState, LookaheadSet>();
	public Map<Rule, LookaheadSet> FOLLOWCache = new HashMap<Rule, LookaheadSet>();

	public LL1Analyzer(Grammar grammar) {
		this.grammar = grammar;
	}

	/*
	public void computeRuleFIRSTSets() {
		if ( getNumberOfDecisions()==0 ) {
			createNFAs();
		}
		for (Iterator it = getRules().iterator(); it.hasNext();) {
			Rule r = (Rule)it.next();
			if ( r.isSynPred ) {
				continue;
			}
			LookaheadSet s = FIRST(r);
			System.out.println("FIRST("+r.name+")="+s);
		}
	}
	*/

	/*
	public Set<String> getOverriddenRulesWithDifferentFIRST() {
		// walk every rule in this grammar and compare FIRST set with
		// those in imported grammars.
		Set<String> rules = new HashSet();
		for (Iterator it = getRules().iterator(); it.hasNext();) {
			Rule r = (Rule)it.next();
			//System.out.println(r.name+" FIRST="+r.FIRST);
			for (int i = 0; i < delegates.size(); i++) {
				Grammar g = delegates.get(i);
				Rule importedRule = g.getRule(r.name);
				if ( importedRule != null ) { // exists in imported grammar
					// System.out.println(r.name+" exists in imported grammar: FIRST="+importedRule.FIRST);
					if ( !r.FIRST.equals(importedRule.FIRST) ) {
						rules.add(r.name);
					}
				}
			}
		}
		return rules;
	}

	public Set<Rule> getImportedRulesSensitiveToOverriddenRulesDueToLOOK() {
		Set<String> diffFIRSTs = getOverriddenRulesWithDifferentFIRST();
		Set<Rule> rules = new HashSet();
		for (Iterator it = diffFIRSTs.iterator(); it.hasNext();) {
			String r = (String) it.next();
			for (int i = 0; i < delegates.size(); i++) {
				Grammar g = delegates.get(i);
				Set<Rule> callers = g.ruleSensitivity.get(r);
				// somebody invokes rule whose FIRST changed in subgrammar?
				if ( callers!=null ) {
					rules.addAll(callers);
					//System.out.println(g.name+" rules "+callers+" sensitive to "+r+"; dup 'em");
				}
			}
		}
		return rules;
	}
*/

	/*
	public LookaheadSet LOOK(Rule r) {
		if ( r.FIRST==null ) {
			r.FIRST = FIRST(r.startState);
		}
		return r.FIRST;
	}
*/

	From an NFA state, s, find the set of all labels reachable from s.
 Used to compute follow sets for error recovery.  Never computes
 a FOLLOW operation.  FIRST stops at end of rules, returning EOR, unless
 invoked from another rule.  I.e., routine properly handles
    a : b A ;
 where b is nullable.
 We record with EOR_TOKEN_TYPE if we hit the end of a rule so we can
 know at runtime (when these sets are used) to start walking up the
 follow chain to compute the real, correct follow set (as opposed to
 the FOLLOW, which is a superset).
 This routine will only be used on parser and tree parser grammars.
/** From an NFA state, s, find the set of all labels reachable from s.
	 *  Used to compute follow sets for error recovery.  Never computes
	 *  a FOLLOW operation.  FIRST stops at end of rules, returning EOR, unless
	 *  invoked from another rule.  I.e., routine properly handles
	 *
	 *     a : b A ;
	 *
	 *  where b is nullable.
	 *
	 *  We record with EOR_TOKEN_TYPE if we hit the end of a rule so we can
	 *  know at runtime (when these sets are used) to start walking up the
	 *  follow chain to compute the real, correct follow set (as opposed to
	 *  the FOLLOW, which is a superset).
	 *
	 *  This routine will only be used on parser and tree parser grammars.
	 */
	public LookaheadSet FIRST(NFAState s) {
		//System.out.println("> FIRST("+s.enclosingRule.name+") in rule "+s.enclosingRule);
		lookBusy.clear();
		LookaheadSet look = _FIRST(s, false);
		//System.out.println("< FIRST("+s.enclosingRule.name+") in rule "+s.enclosingRule+"="+look.toString(this.grammar));
		return look;
	}

	public LookaheadSet FOLLOW(Rule r) {
        //System.out.println("> FOLLOW("+r.name+") in rule "+r.startState.enclosingRule);
		LookaheadSet f = FOLLOWCache.get(r);
		if ( f!=null ) {
			return f;
		}
		f = _FIRST(r.stopState, true);
		FOLLOWCache.put(r, f);
        //System.out.println("< FOLLOW("+r+") in rule "+r.startState.enclosingRule+"="+f.toString(this.grammar));
		return f;
	}

	public LookaheadSet LOOK(NFAState s) {
		if ( NFAToDFAConverter.debug ) {
			System.out.println("> LOOK("+s+")");
		}
		lookBusy.clear();
		LookaheadSet look = _FIRST(s, true);
		// FOLLOW makes no sense (at the moment!) for lexical rules.
		if ( grammar.type!=Grammar.LEXER && look.member(Label.EOR_TOKEN_TYPE) ) {
			// avoid altering FIRST reset as it is cached
			LookaheadSet f = FOLLOW(s.enclosingRule);
			f.orInPlace(look);
			f.remove(Label.EOR_TOKEN_TYPE);
			look = f;
			//look.orInPlace(FOLLOW(s.enclosingRule));
		}
		else if ( grammar.type==Grammar.LEXER && look.member(Label.EOT) ) {
			// if this has EOT, lookahead is all char (all char can follow rule)
			//look = new LookaheadSet(Label.EOT);
			look = new LookaheadSet(IntervalSet.COMPLETE_SET);
		}
		if ( NFAToDFAConverter.debug ) {
			System.out.println("< LOOK("+s+")="+look.toString(grammar));
		}
		return look;
	}

	protected LookaheadSet _FIRST(NFAState s, boolean chaseFollowTransitions) {
		/*
		System.out.println("_LOOK("+s+") in rule "+s.enclosingRule);
		if ( s.transition[0] instanceof RuleClosureTransition ) {
			System.out.println("go to rule "+((NFAState)s.transition[0].target).enclosingRule);
		}
		*/
		if ( !chaseFollowTransitions && s.isAcceptState() ) {
			if ( grammar.type==Grammar.LEXER ) {
				// FOLLOW makes no sense (at the moment!) for lexical rules.
				// assume all char can follow
				return new LookaheadSet(IntervalSet.COMPLETE_SET);
			}
			return new LookaheadSet(Label.EOR_TOKEN_TYPE);
		}

		if ( lookBusy.contains(s) ) {
			// return a copy of an empty set; we may modify set inline
			return new LookaheadSet();
		}
		lookBusy.add(s);

		Transition transition0 = s.transition[0];
		if ( transition0==null ) {
			return null;
		}

		if ( transition0.label.isAtom() ) {
			int atom = transition0.label.getAtom();
			return new LookaheadSet(atom);
		}
		if ( transition0.label.isSet() ) {
			IntSet sl = transition0.label.getSet();
			return new LookaheadSet(sl);
		}

		// compute FIRST of transition 0
		LookaheadSet tset = null;
		// if transition 0 is a rule call and we don't want FOLLOW, check cache
		if ( !chaseFollowTransitions && transition0 instanceof RuleClosureTransition ) {
			tset = FIRSTCache.get((NFAState)transition0.target);
		}

		// if not in cache, must compute
		if ( tset==null ) {
			tset = _FIRST((NFAState)transition0.target, chaseFollowTransitions);
			// save FIRST cache for transition 0 if rule call
			if ( !chaseFollowTransitions && transition0 instanceof RuleClosureTransition ) {
				FIRSTCache.put((NFAState)transition0.target, tset);
			}
		}

        LookaheadSet tsetCached = tset; // tset is stored in cache. We can't return the same instance

		// did we fall off the end?
		if ( grammar.type!=Grammar.LEXER && tset.member(Label.EOR_TOKEN_TYPE) ) {
			if ( transition0 instanceof RuleClosureTransition ) {
				// we called a rule that found the end of the rule.
				// That means the rule is nullable and we need to
				// keep looking at what follows the rule ref.  E.g.,
				// a : b A ; where b is nullable means that LOOK(a)
				// should include A.
				RuleClosureTransition ruleInvocationTrans =
					(RuleClosureTransition)transition0;
				// remove the EOR and get what follows
				//tset.remove(Label.EOR_TOKEN_TYPE);
				NFAState following = ruleInvocationTrans.followState;
				LookaheadSet fset =	_FIRST(following, chaseFollowTransitions);
				fset.orInPlace(tset); // tset cached; or into new set
				fset.remove(Label.EOR_TOKEN_TYPE);
				tset = fset;
			}
		}

		Transition transition1 = s.transition[1];
		if ( transition1!=null ) {
			LookaheadSet tset1 =
				_FIRST((NFAState)transition1.target, chaseFollowTransitions);
			tset1.orInPlace(tset);
			tset = tset1;
		}

		// never return a cached set; clone
		return tset==tsetCached ? new LookaheadSet(tset) : tset;
	}

	Is there a non-syn-pred predicate visible from s that is not in
 the rule enclosing s?  This accounts for most predicate situations
 and lets ANTLR do a simple LL(1)+pred computation.
 TODO: what about gated vs regular preds?
/** Is there a non-syn-pred predicate visible from s that is not in
	 *  the rule enclosing s?  This accounts for most predicate situations
	 *  and lets ANTLR do a simple LL(1)+pred computation.
	 *
	 *  TODO: what about gated vs regular preds?
	 */
	public boolean detectConfoundingPredicates(NFAState s) {
		lookBusy.clear();
		Rule r = s.enclosingRule;
		return _detectConfoundingPredicates(s, r, false) == DETECT_PRED_FOUND;
	}

	protected int _detectConfoundingPredicates(NFAState s,
											   Rule enclosingRule,
											   boolean chaseFollowTransitions)
	{
		//System.out.println("_detectNonAutobacktrackPredicates("+s+")");
		if ( !chaseFollowTransitions && s.isAcceptState() ) {
			if ( grammar.type==Grammar.LEXER ) {
				// FOLLOW makes no sense (at the moment!) for lexical rules.
				// assume all char can follow
				return DETECT_PRED_NOT_FOUND;
			}
			return DETECT_PRED_EOR;
		}

		if ( lookBusy.contains(s) ) {
			// return a copy of an empty set; we may modify set inline
			return DETECT_PRED_NOT_FOUND;
		}
		lookBusy.add(s);

		Transition transition0 = s.transition[0];
		if ( transition0==null ) {
			return DETECT_PRED_NOT_FOUND;
		}

		if ( !(transition0.label.isSemanticPredicate()||
			   transition0.label.isEpsilon()) ) {
			return DETECT_PRED_NOT_FOUND;
		}

		if ( transition0.label.isSemanticPredicate() ) {
			//System.out.println("pred "+transition0.label);
			SemanticContext ctx = transition0.label.getSemanticContext();
			SemanticContext.Predicate p = (SemanticContext.Predicate)ctx;
			if ( p.predicateAST.getType() != ANTLRParser.BACKTRACK_SEMPRED ) {
				return DETECT_PRED_FOUND;
			}
		}
		
		/*
		if ( transition0.label.isSemanticPredicate() ) {
			System.out.println("pred "+transition0.label);
			SemanticContext ctx = transition0.label.getSemanticContext();
			SemanticContext.Predicate p = (SemanticContext.Predicate)ctx;
			// if a non-syn-pred found not in enclosingRule, say we found one
			if ( p.predicateAST.getType() != ANTLRParser.BACKTRACK_SEMPRED &&
				 !p.predicateAST.enclosingRuleName.equals(enclosingRule.name) )
			{
				System.out.println("found pred "+p+" not in "+enclosingRule.name);
				return DETECT_PRED_FOUND;
			}
		}
		*/

		int result = _detectConfoundingPredicates((NFAState)transition0.target,
												  enclosingRule,
												  chaseFollowTransitions);
		if ( result == DETECT_PRED_FOUND ) {
			return DETECT_PRED_FOUND;
		}

		if ( result == DETECT_PRED_EOR ) {
			if ( transition0 instanceof RuleClosureTransition ) {
				// we called a rule that found the end of the rule.
				// That means the rule is nullable and we need to
				// keep looking at what follows the rule ref.  E.g.,
				// a : b A ; where b is nullable means that LOOK(a)
				// should include A.
				RuleClosureTransition ruleInvocationTrans =
					(RuleClosureTransition)transition0;
				NFAState following = ruleInvocationTrans.followState;
				int afterRuleResult =
					_detectConfoundingPredicates(following,
												 enclosingRule,
												 chaseFollowTransitions);
				if ( afterRuleResult == DETECT_PRED_FOUND ) {
					return DETECT_PRED_FOUND;
				}
			}
		}

		Transition transition1 = s.transition[1];
		if ( transition1!=null ) {
			int t1Result =
				_detectConfoundingPredicates((NFAState)transition1.target,
											 enclosingRule,
											 chaseFollowTransitions);
			if ( t1Result == DETECT_PRED_FOUND ) {
				return DETECT_PRED_FOUND;
			}
		}

		return DETECT_PRED_NOT_FOUND;
	}

	Return predicate expression found via epsilon edges from s.  Do
 not look into other rules for now.  Do something simple.  Include
 backtracking synpreds.
/** Return predicate expression found via epsilon edges from s.  Do
	 *  not look into other rules for now.  Do something simple.  Include
	 *  backtracking synpreds.
	 */
	public SemanticContext getPredicates(NFAState altStartState) {
		lookBusy.clear();
		return _getPredicates(altStartState, altStartState);
	}

	protected SemanticContext _getPredicates(NFAState s, NFAState altStartState) {
		//System.out.println("_getPredicates("+s+")");
		if ( s.isAcceptState() ) {
			return null;
		}

		// avoid infinite loops from (..)* etc...
		if ( lookBusy.contains(s) ) {
			return null;
		}
		lookBusy.add(s);

		Transition transition0 = s.transition[0];
		// no transitions
		if ( transition0==null ) {
			return null;
		}

		// not a predicate and not even an epsilon
		if ( !(transition0.label.isSemanticPredicate()||
			   transition0.label.isEpsilon()) ) {
			return null;
		}

		SemanticContext p = null;
		SemanticContext p0;
		SemanticContext p1 = null;
		if ( transition0.label.isSemanticPredicate() ) {
			//System.out.println("pred "+transition0.label);
			p = transition0.label.getSemanticContext();
			// ignore backtracking preds not on left edge for this decision
			if ( ((SemanticContext.Predicate)p).predicateAST.getType() ==
				  ANTLRParser.BACKTRACK_SEMPRED  &&
				 s == altStartState.transition[0].target )
			{
				p = null; // don't count
			}
		}

		// get preds from beyond this state
		p0 = _getPredicates((NFAState)transition0.target, altStartState);

		// get preds from other transition
		Transition transition1 = s.transition[1];
		if ( transition1!=null ) {
			p1 = _getPredicates((NFAState)transition1.target, altStartState);
		}

		// join this&following-right|following-down
		return SemanticContext.and(p,SemanticContext.or(p0,p1));
	}
}
/

org.antlr/ antlr/ 3.5.2/ org/antlr/analysis/LL1Analyzer.java