/*
 * [The "BSD license"]
 *  Copyright (c) 2010 Terence Parr
 *  All rights reserved.
 *
 *  Redistribution and use in source and binary forms, with or without
 *  modification, are permitted provided that the following conditions
 *  are met:
 *  1. Redistributions of source code must retain the above copyright
 *      notice, this list of conditions and the following disclaimer.
 *  2. Redistributions in binary form must reproduce the above copyright
 *      notice, this list of conditions and the following disclaimer in the
 *      documentation and/or other materials provided with the distribution.
 *  3. The name of the author may not be used to endorse or promote products
 *      derived from this software without specific prior written permission.
 *
 *  THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
 *  IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 *  OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
 *  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
 *  INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
 *  NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 *  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 *  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 *  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
 *  THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */package org.antlr.tool;

import org.antlr.analysis.Label;
import org.antlr.analysis.NFAState;
import org.antlr.grammar.v3.ANTLRParser;
import org.antlr.grammar.v3.AssignTokenTypesWalker;
import org.antlr.misc.Utils;
import org.antlr.runtime.RecognitionException;
import org.antlr.runtime.tree.CommonTreeNodeStream;

import java.util.ArrayList;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.Vector;

A tree of component (delegate) grammars. Rules defined in delegates are "inherited" like multi-inheritance so you can override them. All token types must be consistent across rules from all delegate grammars, so they must be stored here in one central place. We have to start out assuming a composite grammar situation as we can't look into the grammar files a priori to see if there is a delegate statement. Because of this, and to avoid duplicating token type tracking in each grammar, even single noncomposite grammars use one of these objects to track token types.
/** A tree of component (delegate) grammars. * * Rules defined in delegates are "inherited" like multi-inheritance * so you can override them. All token types must be consistent across * rules from all delegate grammars, so they must be stored here in one * central place. * * We have to start out assuming a composite grammar situation as we can't * look into the grammar files a priori to see if there is a delegate * statement. Because of this, and to avoid duplicating token type tracking * in each grammar, even single noncomposite grammars use one of these objects * to track token types. */
public class CompositeGrammar { public static final int MIN_RULE_INDEX = 1; public CompositeGrammarTree delegateGrammarTreeRoot;
Used during getRuleReferenceClosure to detect computation cycles
/** Used during getRuleReferenceClosure to detect computation cycles */
protected Set<NFAState> refClosureBusy = new HashSet<NFAState>();
Used to assign state numbers; all grammars in composite share common NFA space. This NFA tracks state numbers number to state mapping.
/** Used to assign state numbers; all grammars in composite share common * NFA space. This NFA tracks state numbers number to state mapping. */
public int stateCounter = 0;
The NFA states in the NFA built from rules across grammars in composite. Maps state number to NFAState object. This is a Vector instead of a List because I need to be able to grow this properly. After talking to Josh Bloch, Collections guy at Sun, I decided this was easiest solution.
/** The NFA states in the NFA built from rules across grammars in composite. * Maps state number to NFAState object. * This is a Vector instead of a List because I need to be able to grow * this properly. After talking to Josh Bloch, Collections guy at Sun, * I decided this was easiest solution. */
protected Vector<NFAState> numberToStateList = new Vector<NFAState>(1000);
Token names and literal tokens like "void" are uniquely indexed. with -1 implying EOF. Characters are different; they go from -1 (EOF) to \uFFFE. For example, 0 could be a binary byte you want to lexer. Labels of DFA/NFA transitions can be both tokens and characters. I use negative numbers for bookkeeping labels like EPSILON. Char/String literals and token types overlap in the same space, however.
/** Token names and literal tokens like "void" are uniquely indexed. * with -1 implying EOF. Characters are different; they go from * -1 (EOF) to \uFFFE. For example, 0 could be a binary byte you * want to lexer. Labels of DFA/NFA transitions can be both tokens * and characters. I use negative numbers for bookkeeping labels * like EPSILON. Char/String literals and token types overlap in the same * space, however. */
protected int maxTokenType = Label.MIN_TOKEN_TYPE-1;
Map token like ID (but not literals like "while") to its token type
/** Map token like ID (but not literals like "while") to its token type */
public Map<String, Integer> tokenIDToTypeMap = new LinkedHashMap<String, Integer>();
Map token literals like "while" to its token type. It may be that WHILE="while"=35, in which case both tokenIDToTypeMap and this field will have entries both mapped to 35.
/** Map token literals like "while" to its token type. It may be that * WHILE="while"=35, in which case both tokenIDToTypeMap and this * field will have entries both mapped to 35. */
public Map<String, Integer> stringLiteralToTypeMap = new LinkedHashMap<String, Integer>();
Reverse index for stringLiteralToTypeMap
/** Reverse index for stringLiteralToTypeMap */
public Vector<String> typeToStringLiteralList = new Vector<String>();
Map a token type to its token name. Must subtract MIN_TOKEN_TYPE from index.
/** Map a token type to its token name. * Must subtract MIN_TOKEN_TYPE from index. */
public Vector<String> typeToTokenList = new Vector<String>();
If combined or lexer grammar, track the rules. Track lexer rules so we can warn about undefined tokens. This is combined set of lexer rules from all lexer grammars seen in all imports.
/** If combined or lexer grammar, track the rules. * Track lexer rules so we can warn about undefined tokens. * This is combined set of lexer rules from all lexer grammars * seen in all imports. */
protected Set<String> lexerRules = new HashSet<String>();
Rules are uniquely labeled from 1..n among all grammars
/** Rules are uniquely labeled from 1..n among all grammars */
protected int ruleIndex = MIN_RULE_INDEX;
Map a rule index to its name; use a Vector on purpose as new collections stuff won't let me setSize and make it grow. :( I need a specific guaranteed index, which the Collections stuff won't let me have.
/** Map a rule index to its name; use a Vector on purpose as new * collections stuff won't let me setSize and make it grow. :( * I need a specific guaranteed index, which the Collections stuff * won't let me have. */
protected Vector<Rule> ruleIndexToRuleList = new Vector<Rule>(); public boolean watchNFAConversion = false; protected void initTokenSymbolTables() { // the faux token types take first NUM_FAUX_LABELS positions // then we must have room for the predefined runtime token types // like DOWN/UP used for tree parsing. typeToTokenList.setSize(Label.NUM_FAUX_LABELS+Label.MIN_TOKEN_TYPE-1); typeToTokenList.set(Label.NUM_FAUX_LABELS+Label.INVALID, "<INVALID>"); typeToTokenList.set(Label.NUM_FAUX_LABELS+Label.EOT, "<EOT>"); typeToTokenList.set(Label.NUM_FAUX_LABELS+Label.SEMPRED, "<SEMPRED>"); typeToTokenList.set(Label.NUM_FAUX_LABELS+Label.SET, "<SET>"); typeToTokenList.set(Label.NUM_FAUX_LABELS+Label.EPSILON, Label.EPSILON_STR); typeToTokenList.set(Label.NUM_FAUX_LABELS+Label.EOF, "EOF"); typeToTokenList.set(Label.NUM_FAUX_LABELS+Label.EOR_TOKEN_TYPE-1, "<EOR>"); typeToTokenList.set(Label.NUM_FAUX_LABELS+Label.DOWN-1, "DOWN"); typeToTokenList.set(Label.NUM_FAUX_LABELS+Label.UP-1, "UP"); tokenIDToTypeMap.put("<INVALID>", Utils.integer(Label.INVALID)); tokenIDToTypeMap.put("<EOT>", Utils.integer(Label.EOT)); tokenIDToTypeMap.put("<SEMPRED>", Utils.integer(Label.SEMPRED)); tokenIDToTypeMap.put("<SET>", Utils.integer(Label.SET)); tokenIDToTypeMap.put("<EPSILON>", Utils.integer(Label.EPSILON)); tokenIDToTypeMap.put("EOF", Utils.integer(Label.EOF)); tokenIDToTypeMap.put("<EOR>", Utils.integer(Label.EOR_TOKEN_TYPE)); tokenIDToTypeMap.put("DOWN", Utils.integer(Label.DOWN)); tokenIDToTypeMap.put("UP", Utils.integer(Label.UP)); } @SuppressWarnings("OverridableMethodCallInConstructor") public CompositeGrammar() { initTokenSymbolTables(); } @SuppressWarnings("OverridableMethodCallInConstructor") public CompositeGrammar(Grammar g) { this(); setDelegationRoot(g); } public void setDelegationRoot(Grammar root) { delegateGrammarTreeRoot = new CompositeGrammarTree(root); root.compositeTreeNode = delegateGrammarTreeRoot; } public Rule getRule(String ruleName) { return delegateGrammarTreeRoot.getRule(ruleName); } public Object getOption(String key) { return delegateGrammarTreeRoot.getOption(key); }
Add delegate grammar as child of delegator
/** Add delegate grammar as child of delegator */
public void addGrammar(Grammar delegator, Grammar delegate) { if ( delegator.compositeTreeNode==null ) { delegator.compositeTreeNode = new CompositeGrammarTree(delegator); } delegator.compositeTreeNode.addChild(new CompositeGrammarTree(delegate)); /*// find delegator in tree so we can add a child to it CompositeGrammarTree t = delegateGrammarTreeRoot.findNode(delegator); t.addChild(); */ // make sure new grammar shares this composite delegate.composite = this; }
Get parent of this grammar
/** Get parent of this grammar */
public Grammar getDelegator(Grammar g) { CompositeGrammarTree me = delegateGrammarTreeRoot.findNode(g); if ( me==null ) { return null; // not found } if ( me.parent!=null ) { return me.parent.grammar; } return null; }
Get list of all delegates from all grammars in the delegate subtree of g. The grammars are in delegation tree preorder. Don't include g itself in list as it is not a delegate of itself.
/** Get list of all delegates from all grammars in the delegate subtree of g. * The grammars are in delegation tree preorder. Don't include g itself * in list as it is not a delegate of itself. */
public List<Grammar> getDelegates(Grammar g) { CompositeGrammarTree t = delegateGrammarTreeRoot.findNode(g); if ( t==null ) { return null; // no delegates } List<Grammar> grammars = t.getPostOrderedGrammarList(); grammars.remove(grammars.size()-1); // remove g (last one) return grammars; } public List<Grammar> getDirectDelegates(Grammar g) { CompositeGrammarTree t = delegateGrammarTreeRoot.findNode(g); List<CompositeGrammarTree> children = t.children; if ( children==null ) { return null; } List<Grammar> grammars = new ArrayList<Grammar>(); for (int i = 0; children!=null && i < children.size(); i++) { CompositeGrammarTree child = children.get(i); grammars.add(child.grammar); } return grammars; }
Get delegates below direct delegates of g
/** Get delegates below direct delegates of g */
public List<Grammar> getIndirectDelegates(Grammar g) { List<Grammar> direct = getDirectDelegates(g); List<Grammar> delegates = getDelegates(g); if ( direct!=null ) { delegates.removeAll(direct); } return delegates; }
Return list of delegate grammars from root down to g. Order is root, ..., g.parent. (g not included).
/** Return list of delegate grammars from root down to g. * Order is root, ..., g.parent. (g not included). */
public List<Grammar> getDelegators(Grammar g) { if ( g==delegateGrammarTreeRoot.grammar ) { return null; } List<Grammar> grammars = new ArrayList<Grammar>(); CompositeGrammarTree t = delegateGrammarTreeRoot.findNode(g); // walk backwards to root, collecting grammars CompositeGrammarTree p = t.parent; while ( p!=null ) { grammars.add(0, p.grammar); // add to head so in order later p = p.parent; } return grammars; }
Get set of rules for grammar g that need to have manual delegation methods. This is the list of rules collected from all direct/indirect delegates minus rules overridden in grammar g. This returns null except for the delegate root because it is the only one that has to have a complete grammar rule interface. The delegates should not be instantiated directly for use as parsers (you can create them to pass to the root parser's ctor as arguments).
/** Get set of rules for grammar g that need to have manual delegation * methods. This is the list of rules collected from all direct/indirect * delegates minus rules overridden in grammar g. * * This returns null except for the delegate root because it is the only * one that has to have a complete grammar rule interface. The delegates * should not be instantiated directly for use as parsers (you can create * them to pass to the root parser's ctor as arguments). */
public Set<? extends Rule> getDelegatedRules(Grammar g) { if ( g!=delegateGrammarTreeRoot.grammar ) { return null; } Set<? extends Rule> rules = getAllImportedRules(g); for (Iterator<? extends Rule> it = rules.iterator(); it.hasNext();) { Rule r = it.next(); Rule localRule = g.getLocallyDefinedRule(r.name); // if locally defined or it's not local but synpred, don't make // a delegation method if ( localRule!=null || r.isSynPred ) { it.remove(); // kill overridden rules } } return rules; }
Get all rule definitions from all direct/indirect delegate grammars of g.
/** Get all rule definitions from all direct/indirect delegate grammars * of g. */
public Set<? extends Rule> getAllImportedRules(Grammar g) { Set<String> ruleNames = new HashSet<String>(); Set<Rule> rules = new HashSet<Rule>(); CompositeGrammarTree subtreeRoot = delegateGrammarTreeRoot.findNode(g); List<Grammar> grammars = subtreeRoot.getPreOrderedGrammarList(); // walk all grammars preorder, priority given to grammar listed first. for (int i = 0; i < grammars.size(); i++) { Grammar delegate = grammars.get(i); // for each rule in delegate, add to rules if no rule with that // name as been seen. (can't use removeAll; wrong hashcode/equals on Rule) for (Rule r : delegate.getRules()) { if ( !ruleNames.contains(r.name) ) { ruleNames.add(r.name); // track that we've seen this rules.add(r); } } } return rules; } public Grammar getRootGrammar() { if ( delegateGrammarTreeRoot==null ) { return null; } return delegateGrammarTreeRoot.grammar; } public Grammar getGrammar(String grammarName) { CompositeGrammarTree t = delegateGrammarTreeRoot.findNode(grammarName); if ( t!=null ) { return t.grammar; } return null; } // NFA spans multiple grammars, must handle here public int getNewNFAStateNumber() { return stateCounter++; } public void addState(NFAState state) { numberToStateList.setSize(state.stateNumber+1); // make sure we have room numberToStateList.set(state.stateNumber, state); } public NFAState getState(int s) { return numberToStateList.get(s); } public void assignTokenTypes() throws RecognitionException { // ASSIGN TOKEN TYPES for all delegates (same walker) //System.out.println("### assign types"); AssignTokenTypesWalker ttypesWalker = new AssignTokenTypesBehavior(); List<Grammar> grammars = delegateGrammarTreeRoot.getPostOrderedGrammarList(); for (int i = 0; grammars!=null && i < grammars.size(); i++) { Grammar g = grammars.get(i); ttypesWalker.setTreeNodeStream(new CommonTreeNodeStream(g.getGrammarTree())); try { //System.out.println(" walking "+g.name); ttypesWalker.grammar_(g); } catch (RecognitionException re) { ErrorManager.error(ErrorManager.MSG_BAD_AST_STRUCTURE, re); } } // the walker has filled literals, tokens, and alias tables. // now tell it to define them in the root grammar ttypesWalker.defineTokens(delegateGrammarTreeRoot.grammar); } public void translateLeftRecursiveRules() { List<Grammar> grammars = delegateGrammarTreeRoot.getPostOrderedGrammarList(); for (int i = 0; grammars!=null && i < grammars.size(); i++) { Grammar g = grammars.get(i); if ( !(g.type==Grammar.PARSER || g.type==Grammar.COMBINED) ) continue; for (GrammarAST r : g.grammarTree.findAllType(ANTLRParser.RULE)) { if ( !Character.isUpperCase(r.getChild(0).getText().charAt(0)) ) { if ( LeftRecursiveRuleAnalyzer.hasImmediateRecursiveRuleRefs(r, r.enclosingRuleName) ) { g.translateLeftRecursiveRule(r); } } } } } public void defineGrammarSymbols() { delegateGrammarTreeRoot.trimLexerImportsIntoCombined(); List<Grammar> grammars = delegateGrammarTreeRoot.getPostOrderedGrammarList(); for (int i = 0; grammars!=null && i < grammars.size(); i++) { Grammar g = grammars.get(i); g.defineGrammarSymbols(); } for (int i = 0; grammars!=null && i < grammars.size(); i++) { Grammar g = grammars.get(i); g.checkNameSpaceAndActions(); } minimizeRuleSet(); } public void createNFAs() { if ( ErrorManager.doNotAttemptAnalysis() ) { return; } List<Grammar> grammars = delegateGrammarTreeRoot.getPostOrderedGrammarList(); //System.out.println("### createNFAs for composite; grammars: "+names); for (int i = 0; grammars!=null && i < grammars.size(); i++) { Grammar g = grammars.get(i); g.createRuleStartAndStopNFAStates(); } for (int i = 0; grammars!=null && i < grammars.size(); i++) { Grammar g = grammars.get(i); g.buildNFA(); } } public void minimizeRuleSet() { Set<String> ruleDefs = new HashSet<String>(); _minimizeRuleSet(ruleDefs, delegateGrammarTreeRoot); } public void _minimizeRuleSet(Set<String> ruleDefs, CompositeGrammarTree p) { Set<String> localRuleDefs = new HashSet<String>(); Set<String> overrides = new HashSet<String>(); // compute set of non-overridden rules for this delegate for (Rule r : p.grammar.getRules()) { if ( !ruleDefs.contains(r.name) ) { localRuleDefs.add(r.name); } else if ( !r.name.equals(Grammar.ARTIFICIAL_TOKENS_RULENAME) ) { // record any overridden rule 'cept tokens rule overrides.add(r.name); } } //System.out.println("rule defs for "+p.grammar.name+": "+localRuleDefs); //System.out.println("overridden rule for "+p.grammar.name+": "+overrides); p.grammar.overriddenRules = overrides; // make set of all rules defined thus far walking delegation tree. // the same rule in two delegates resolves in favor of first found // in tree therefore second must not be included ruleDefs.addAll(localRuleDefs); // pass larger set of defined rules to delegates if ( p.children!=null ) { for (CompositeGrammarTree delegate : p.children) { _minimizeRuleSet(ruleDefs, delegate); } } } /* public void minimizeRuleSet() { Set<Rule> refs = _minimizeRuleSet(delegateGrammarTreeRoot); System.out.println("all rule refs: "+refs); } public Set<Rule> _minimizeRuleSet(CompositeGrammarTree p) { Set<Rule> refs = new HashSet<Rule>(); for (GrammarAST refAST : p.grammar.ruleRefs) { System.out.println("ref "+refAST.getText()+": "+refAST.NFAStartState+ " enclosing rule: "+refAST.NFAStartState.enclosingRule+ " invoking rule: "+((NFAState)refAST.NFAStartState.transition[0].target).enclosingRule); refs.add(((NFAState)refAST.NFAStartState.transition[0].target).enclosingRule); } if ( p.children!=null ) { for (CompositeGrammarTree delegate : p.children) { Set<Rule> delegateRuleRefs = _minimizeRuleSet(delegate); refs.addAll(delegateRuleRefs); } } return refs; } */ /* public void oldminimizeRuleSet() { // first walk to remove all overridden rules Set<String> ruleDefs = new HashSet<String>(); Set<String> ruleRefs = new HashSet<String>(); for (GrammarAST refAST : delegateGrammarTreeRoot.grammar.ruleRefs) { String rname = refAST.getText(); ruleRefs.add(rname); } _minimizeRuleSet(ruleDefs, ruleRefs, delegateGrammarTreeRoot); System.out.println("overall rule defs: "+ruleDefs); } public void _minimizeRuleSet(Set<String> ruleDefs, Set<String> ruleRefs, CompositeGrammarTree p) { Set<String> localRuleDefs = new HashSet<String>(); for (Rule r : p.grammar.getRules()) { if ( !ruleDefs.contains(r.name) ) { localRuleDefs.add(r.name); ruleDefs.add(r.name); } } System.out.println("rule defs for "+p.grammar.name+": "+localRuleDefs); // remove locally-defined rules not in ref set // find intersection of local rules and references from delegator // that is set of rules needed by delegator Set<String> localRuleDefsSatisfyingRefsFromBelow = new HashSet<String>(); for (String r : ruleRefs) { if ( localRuleDefs.contains(r) ) { localRuleDefsSatisfyingRefsFromBelow.add(r); } } // now get list of refs from localRuleDefsSatisfyingRefsFromBelow. // Those rules are also allowed in this delegate for (GrammarAST refAST : p.grammar.ruleRefs) { if ( localRuleDefsSatisfyingRefsFromBelow.contains(refAST.enclosingRuleName) ) { // found rule ref within needed rule } } // remove rule refs not in the new rule def set // walk all children, adding rules not already defined if ( p.children!=null ) { for (CompositeGrammarTree delegate : p.children) { _minimizeRuleSet(ruleDefs, ruleRefs, delegate); } } } */ /* public void trackNFAStatesThatHaveLabeledEdge(Label label, NFAState stateWithLabeledEdge) { Set<NFAState> states = typeToNFAStatesWithEdgeOfTypeMap.get(label); if ( states==null ) { states = new HashSet<NFAState>(); typeToNFAStatesWithEdgeOfTypeMap.put(label, states); } states.add(stateWithLabeledEdge); } public Map<Label, Set<NFAState>> getTypeToNFAStatesWithEdgeOfTypeMap() { return typeToNFAStatesWithEdgeOfTypeMap; } public Set<NFAState> getStatesWithEdge(Label label) { return typeToNFAStatesWithEdgeOfTypeMap.get(label); } */ }