/*
 * [The "BSD license"]
 *  Copyright (c) 2010 Terence Parr
 *  All rights reserved.
 *
 *  Redistribution and use in source and binary forms, with or without
 *  modification, are permitted provided that the following conditions
 *  are met:
 *  1. Redistributions of source code must retain the above copyright
 *      notice, this list of conditions and the following disclaimer.
 *  2. Redistributions in binary form must reproduce the above copyright
 *      notice, this list of conditions and the following disclaimer in the
 *      documentation and/or other materials provided with the distribution.
 *  3. The name of the author may not be used to endorse or promote products
 *      derived from this software without specific prior written permission.
 *
 *  THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
 *  IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 *  OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
 *  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
 *  INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
 *  NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 *  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 *  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 *  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
 *  THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */
package org.antlr.codegen;


import org.antlr.Tool;
import org.antlr.analysis.DFA;
import org.antlr.analysis.*;
import org.antlr.grammar.v3.ANTLRLexer;
import org.antlr.grammar.v3.ANTLRParser;
import org.antlr.grammar.v3.ActionTranslator;
import org.antlr.grammar.v3.CodeGenTreeWalker;
import org.antlr.misc.BitSet;
import org.antlr.misc.*;
import org.antlr.runtime.*;
import org.antlr.runtime.tree.CommonTreeNodeStream;
import org.antlr.tool.*;
import org.stringtemplate.v4.*;

import java.io.IOException;
import java.io.Writer;
import java.util.*;

ANTLR's code generator. Generate recognizers derived from grammars. Language independence achieved through the use of STGroup objects. All output strings are completely encapsulated in the group files such as Java.stg. Some computations are done that are unused by a particular language. This generator just computes and sets the values into the templates; the templates are free to use or not use the information. To make a new code generation target, define X.stg for language X by copying from existing Y.stg most closely releated to your language; e.g., to do CSharp.stg copy Java.stg. The template group file has a bunch of templates that are needed by the code generator. You can add a new target w/o even recompiling ANTLR itself. The language=X option in a grammar file dictates which templates get loaded/used. Some language like C need both parser files and header files. Java needs to have a separate file for the cyclic DFA as ANTLR generates bytecodes directly (which cannot be in the generated parser Java file). To facilitate this, cyclic can be in same file, but header, output must be searpate. recognizer is in outptufile.
/** ANTLR's code generator. * * Generate recognizers derived from grammars. Language independence * achieved through the use of STGroup objects. All output * strings are completely encapsulated in the group files such as Java.stg. * Some computations are done that are unused by a particular language. * This generator just computes and sets the values into the templates; * the templates are free to use or not use the information. * * To make a new code generation target, define X.stg for language X * by copying from existing Y.stg most closely releated to your language; * e.g., to do CSharp.stg copy Java.stg. The template group file has a * bunch of templates that are needed by the code generator. You can add * a new target w/o even recompiling ANTLR itself. The language=X option * in a grammar file dictates which templates get loaded/used. * * Some language like C need both parser files and header files. Java needs * to have a separate file for the cyclic DFA as ANTLR generates bytecodes * directly (which cannot be in the generated parser Java file). To facilitate * this, * * cyclic can be in same file, but header, output must be searpate. recognizer * is in outptufile. */
public class CodeGenerator {
When generating SWITCH statements, some targets might need to limit the size (based upon the number of case labels). Generally, this limit will be hit only for lexers where wildcard in a UNICODE vocabulary environment would generate a SWITCH with 65000 labels.
/** When generating SWITCH statements, some targets might need to limit * the size (based upon the number of case labels). Generally, this * limit will be hit only for lexers where wildcard in a UNICODE * vocabulary environment would generate a SWITCH with 65000 labels. */
public final static int MSCL_DEFAULT = 300; public static int MAX_SWITCH_CASE_LABELS = MSCL_DEFAULT; public final static int MSA_DEFAULT = 3; public static int MIN_SWITCH_ALTS = MSA_DEFAULT; public boolean GENERATE_SWITCHES_WHEN_POSSIBLE = true; public static boolean LAUNCH_ST_INSPECTOR = false; public final static int MADSI_DEFAULT = 60; // do lots of states inline (needed for expression rules) public static int MAX_ACYCLIC_DFA_STATES_INLINE = MADSI_DEFAULT; public static String classpathTemplateRootDirectoryName = "org/antlr/codegen/templates";
Which grammar are we generating code for? Each generator is attached to a specific grammar.
/** Which grammar are we generating code for? Each generator * is attached to a specific grammar. */
public Grammar grammar;
What language are we generating?
/** What language are we generating? */
protected String language;
The target specifies how to write out files and do other language specific actions.
/** The target specifies how to write out files and do other language * specific actions. */
public Target target = null;
Where are the templates this generator should use to generate code?
/** Where are the templates this generator should use to generate code? */
protected STGroup templates;
The basic output templates without AST or templates stuff; this will be the templates loaded for the language such as Java.stg *and* the Dbg stuff if turned on. This is used for generating syntactic predicates.
/** The basic output templates without AST or templates stuff; this will be * the templates loaded for the language such as Java.stg *and* the Dbg * stuff if turned on. This is used for generating syntactic predicates. */
protected STGroup baseTemplates; protected ST recognizerST; protected ST outputFileST; protected ST headerFileST;
Used to create unique labels
/** Used to create unique labels */
protected int uniqueLabelNumber = 1;
A reference to the ANTLR tool so we can learn about output directories and such.
/** A reference to the ANTLR tool so we can learn about output directories * and such. */
protected Tool tool;
Generate debugging event method calls
/** Generate debugging event method calls */
protected boolean debug;
Create a Tracer object and make the recognizer invoke this.
/** Create a Tracer object and make the recognizer invoke this. */
protected boolean trace;
Track runtime parsing information about decisions etc... This requires the debugging event mechanism to work.
/** Track runtime parsing information about decisions etc... * This requires the debugging event mechanism to work. */
protected boolean profile; protected int lineWidth = 72;
I have factored out the generation of acyclic DFAs to separate class
/** I have factored out the generation of acyclic DFAs to separate class */
public ACyclicDFACodeGenerator acyclicDFAGenerator = new ACyclicDFACodeGenerator(this);
I have factored out the generation of cyclic DFAs to separate class
/** I have factored out the generation of cyclic DFAs to separate class */
/* public CyclicDFACodeGenerator cyclicDFAGenerator = new CyclicDFACodeGenerator(this); */ public static final String VOCAB_FILE_EXTENSION = ".tokens"; protected final static String vocabFilePattern = "<tokens:{it|<it.name>=<it.type>\n}>" + "<literals:{it|<it.name>=<it.type>\n}>"; public CodeGenerator(Tool tool, Grammar grammar, String language) { this.tool = tool; this.grammar = grammar; this.language = language; target = loadLanguageTarget(language); } public static Target loadLanguageTarget(String language) { Target target = null; String targetName = "org.antlr.codegen."+language+"Target"; try { Class<? extends Target> c = Class.forName(targetName).asSubclass(Target.class); target = (Target)c.newInstance(); } catch (ClassNotFoundException cnfe) { target = new Target(); // use default } catch (InstantiationException ie) { ErrorManager.error(ErrorManager.MSG_CANNOT_CREATE_TARGET_GENERATOR, targetName, ie); } catch (IllegalAccessException cnfe) { ErrorManager.error(ErrorManager.MSG_CANNOT_CREATE_TARGET_GENERATOR, targetName, cnfe); } return target; }
load the main language.stg template group file
/** load the main language.stg template group file */
public void loadTemplates(String language) { String langDir = classpathTemplateRootDirectoryName+"/"+language; STGroup coreTemplates = new ToolSTGroupFile(langDir+"/"+language+".stg"); baseTemplates = coreTemplates; // dynamically add subgroups that act like filters to apply to // their supergroup. E.g., Java:Dbg:AST:ASTParser::ASTDbg. String outputOption = (String)grammar.getOption("output"); if ( outputOption!=null && outputOption.equals("AST") ) { if ( debug && grammar.type!=Grammar.LEXER ) { STGroup dbgTemplates = new ToolSTGroupFile(langDir+"/Dbg.stg"); dbgTemplates.importTemplates(coreTemplates); baseTemplates = dbgTemplates; STGroup astTemplates = new ToolSTGroupFile(langDir+"/AST.stg"); astTemplates.importTemplates(dbgTemplates); STGroup astParserTemplates; if ( grammar.type==Grammar.TREE_PARSER ) { astParserTemplates = new ToolSTGroupFile(langDir+"/ASTTreeParser.stg"); astParserTemplates.importTemplates(astTemplates); } else { astParserTemplates = new ToolSTGroupFile(langDir+"/ASTParser.stg"); astParserTemplates.importTemplates(astTemplates); } STGroup astDbgTemplates = new ToolSTGroupFile(langDir+"/ASTDbg.stg"); astDbgTemplates.importTemplates(astParserTemplates); templates = astDbgTemplates; dbgTemplates.iterateAcrossValues = true; // ST v3 compatibility with Maps astDbgTemplates.iterateAcrossValues = true; astParserTemplates.iterateAcrossValues = true; } else { STGroup astTemplates = new ToolSTGroupFile(langDir+"/AST.stg"); astTemplates.importTemplates(coreTemplates); STGroup astParserTemplates; if ( grammar.type==Grammar.TREE_PARSER ) { astParserTemplates = new ToolSTGroupFile(langDir+"/ASTTreeParser.stg"); astParserTemplates.importTemplates(astTemplates); } else { astParserTemplates = new ToolSTGroupFile(langDir+"/ASTParser.stg"); astParserTemplates.importTemplates(astTemplates); } templates = astParserTemplates; astTemplates.iterateAcrossValues = true; // ST v3 compatibility with Maps astParserTemplates.iterateAcrossValues = true; } } else if ( outputOption!=null && outputOption.equals("template") ) { if ( debug && grammar.type!=Grammar.LEXER ) { STGroup dbgTemplates = new ToolSTGroupFile(langDir+"/Dbg.stg"); dbgTemplates.importTemplates(coreTemplates); baseTemplates = dbgTemplates; STGroup stTemplates = new ToolSTGroupFile(langDir+"/ST.stg"); stTemplates.importTemplates(dbgTemplates); templates = stTemplates; dbgTemplates.iterateAcrossValues = true; } else { STGroup stTemplates = new ToolSTGroupFile(langDir+"/ST.stg"); stTemplates.importTemplates(coreTemplates); templates = stTemplates; } templates.iterateAcrossValues = true; // ST v3 compatibility with Maps } else if ( debug && grammar.type!=Grammar.LEXER ) { STGroup dbgTemplates = new ToolSTGroupFile(langDir+"/Dbg.stg"); dbgTemplates.importTemplates(coreTemplates); templates = dbgTemplates; baseTemplates = templates; baseTemplates.iterateAcrossValues = true; // ST v3 compatibility with Maps } else { templates = coreTemplates; coreTemplates.iterateAcrossValues = true; // ST v3 compatibility with Maps } }
Given the grammar to which we are attached, walk the AST associated with that grammar to create NFAs. Then create the DFAs for all decision points in the grammar by converting the NFAs to DFAs. Finally, walk the AST again to generate code. Either 1 or 2 files are written: recognizer: the main parser/lexer/treewalker item header file: language like C/C++ need extern definitions The target, such as JavaTarget, dictates which files get written.
/** Given the grammar to which we are attached, walk the AST associated * with that grammar to create NFAs. Then create the DFAs for all * decision points in the grammar by converting the NFAs to DFAs. * Finally, walk the AST again to generate code. * * Either 1 or 2 files are written: * * recognizer: the main parser/lexer/treewalker item * header file: language like C/C++ need extern definitions * * The target, such as JavaTarget, dictates which files get written. */
public ST genRecognizer() { //System.out.println("### generate "+grammar.name+" recognizer"); // LOAD OUTPUT TEMPLATES loadTemplates(language); if ( templates==null ) { return null; } // CREATE NFA FROM GRAMMAR, CREATE DFA FROM NFA if ( ErrorManager.doNotAttemptAnalysis() ) { return null; } target.performGrammarAnalysis(this, grammar); // some grammar analysis errors will not yield reliable DFA if ( ErrorManager.doNotAttemptCodeGen() ) { return null; } // OPTIMIZE DFA DFAOptimizer optimizer = new DFAOptimizer(grammar); optimizer.optimize(); // OUTPUT FILE (contains recognizerST) outputFileST = templates.getInstanceOf("outputFile"); // HEADER FILE if ( templates.isDefined("headerFile") ) { headerFileST = templates.getInstanceOf("headerFile"); } else { // create a dummy to avoid null-checks all over code generator headerFileST = new ST(templates,"xyz"); headerFileST.add("cyclicDFAs", (Object)null); // it normally sees this from outputFile //headerFileST.impl.name = "dummy-header-file"; } boolean filterMode = grammar.getOption("filter")!=null && grammar.getOption("filter").equals("true"); boolean canBacktrack = grammar.getSyntacticPredicates()!=null || grammar.composite.getRootGrammar().atLeastOneBacktrackOption || filterMode; // TODO: move this down further because generating the recognizer // alters the model with info on who uses predefined properties etc... // The actions here might refer to something. // The only two possible output files are available at this point. // Verify action scopes are ok for target and dump actions into output // Templates can say <actions.parser.header> for example. Map<String, Map<String, Object>> actions = grammar.getActions(); verifyActionScopesOkForTarget(actions); // translate $x::y references translateActionAttributeReferences(actions); ST gateST = templates.getInstanceOf("actionGate"); if ( filterMode ) { // if filtering, we need to set actions to execute at backtracking // level 1 not 0. gateST = templates.getInstanceOf("filteringActionGate"); } grammar.setSynPredGateIfNotAlready(gateST); headerFileST.add("actions", actions); outputFileST.add("actions", actions); headerFileST.add("buildTemplate", grammar.buildTemplate()); outputFileST.add("buildTemplate", grammar.buildTemplate()); headerFileST.add("buildAST", grammar.buildAST()); outputFileST.add("buildAST", grammar.buildAST()); outputFileST.add("rewriteMode", grammar.rewriteMode()); headerFileST.add("rewriteMode", grammar.rewriteMode()); outputFileST.add("backtracking", canBacktrack); headerFileST.add("backtracking", canBacktrack); // turn on memoize attribute at grammar level so we can create ruleMemo. // each rule has memoize attr that hides this one, indicating whether // it needs to save results String memoize = (String)grammar.getOption("memoize"); outputFileST.add("memoize", (grammar.atLeastOneRuleMemoizes || memoize != null && memoize.equals("true") && canBacktrack)); headerFileST.add("memoize", (grammar.atLeastOneRuleMemoizes || memoize != null && memoize.equals("true") && canBacktrack)); outputFileST.add("trace", trace); headerFileST.add("trace", trace); outputFileST.add("profile", profile); headerFileST.add("profile", profile); // RECOGNIZER if ( grammar.type==Grammar.LEXER ) { recognizerST = templates.getInstanceOf("lexer"); outputFileST.add("LEXER", true); headerFileST.add("LEXER", true); recognizerST.add("filterMode", filterMode); } else if ( grammar.type==Grammar.PARSER || grammar.type==Grammar.COMBINED ) { recognizerST = templates.getInstanceOf("parser"); outputFileST.add("PARSER", true); headerFileST.add("PARSER", true); } else { recognizerST = templates.getInstanceOf("treeParser"); outputFileST.add("TREE_PARSER", true); headerFileST.add("TREE_PARSER", true); recognizerST.add("filterMode", filterMode); } outputFileST.add("recognizer", recognizerST); headerFileST.add("recognizer", recognizerST); outputFileST.add("actionScope", grammar.getDefaultActionScope(grammar.type)); headerFileST.add("actionScope", grammar.getDefaultActionScope(grammar.type)); String targetAppropriateFileNameString = target.getTargetStringLiteralFromString(grammar.getFileName()); outputFileST.add("fileName", targetAppropriateFileNameString); headerFileST.add("fileName", targetAppropriateFileNameString); outputFileST.add("ANTLRVersion", tool.VERSION); headerFileST.add("ANTLRVersion", tool.VERSION); outputFileST.add("generatedTimestamp", Tool.getCurrentTimeStamp()); headerFileST.add("generatedTimestamp", Tool.getCurrentTimeStamp()); // GENERATE RECOGNIZER // Walk the AST holding the input grammar, this time generating code // Decisions are generated by using the precomputed DFAs // Fill in the various templates with data CodeGenTreeWalker gen = new CodeGenTreeWalker(new CommonTreeNodeStream(grammar.getGrammarTree())); try { gen.grammar_( grammar, recognizerST, outputFileST, headerFileST); } catch (RecognitionException re) { ErrorManager.error(ErrorManager.MSG_BAD_AST_STRUCTURE, re); } genTokenTypeConstants(recognizerST); genTokenTypeConstants(outputFileST); genTokenTypeConstants(headerFileST); if ( grammar.type!=Grammar.LEXER ) { genTokenTypeNames(recognizerST); genTokenTypeNames(outputFileST); genTokenTypeNames(headerFileST); } // Now that we know what synpreds are used, we can set into template Set<String> synpredNames = null; if ( grammar.synPredNamesUsedInDFA.size()>0 ) { synpredNames = grammar.synPredNamesUsedInDFA; } outputFileST.add("synpreds", synpredNames); headerFileST.add("synpreds", synpredNames); // all recognizers can see Grammar object recognizerST.add("grammar", grammar); // do not render templates to disk if errors occurred if ( ErrorManager.getErrorState().errors > 0 ) { return null; } if (LAUNCH_ST_INSPECTOR) { outputFileST.inspect(); if ( templates.isDefined("headerFile") ) headerFileST.inspect(); } // WRITE FILES try { target.genRecognizerFile(tool,this,grammar,outputFileST); if ( templates.isDefined("headerFile") ) { ST extST = templates.getInstanceOf("headerFileExtension"); target.genRecognizerHeaderFile(tool,this,grammar,headerFileST,extST.render()); } // write out the vocab interchange file; used by antlr, // does not change per target ST tokenVocabSerialization = genTokenVocabOutput(); String vocabFileName = getVocabFileName(); if ( vocabFileName!=null ) { write(tokenVocabSerialization, vocabFileName); } //System.out.println(outputFileST.getDOTForDependencyGraph(false)); } catch (IOException ioe) { ErrorManager.error(ErrorManager.MSG_CANNOT_WRITE_FILE, ioe); } /* System.out.println("num obj.prop refs: "+ ASTExpr.totalObjPropRefs); System.out.println("num reflection lookups: "+ ASTExpr.totalReflectionLookups); */ return outputFileST; }
Some targets will have some extra scopes like C++ may have '@headerfile:name {action}' or something. Make sure the target likes the scopes in action table.
/** Some targets will have some extra scopes like C++ may have * '@headerfile:name {action}' or something. Make sure the * target likes the scopes in action table. */
protected void verifyActionScopesOkForTarget(Map<String, Map<String, Object>> actions) { for (Map.Entry<String, Map<String, Object>> entry : actions.entrySet()) { String scope = entry.getKey(); if ( !target.isValidActionScope(grammar.type, scope) ) { // get any action from the scope to get error location Map<String, Object> scopeActions = entry.getValue(); GrammarAST actionAST = (GrammarAST)scopeActions.values().iterator().next(); ErrorManager.grammarError( ErrorManager.MSG_INVALID_ACTION_SCOPE,grammar, actionAST.getToken(),scope, grammar.getGrammarTypeString()); } } }
Actions may reference $x::y attributes, call translateAction on each action and replace that action in the Map.
/** Actions may reference $x::y attributes, call translateAction on * each action and replace that action in the Map. */
protected void translateActionAttributeReferences(Map<String, Map<String, Object>> actions) { for (Map.Entry<String, Map<String, Object>> entry : actions.entrySet()) { Map<String, Object> scopeActions = entry.getValue(); translateActionAttributeReferencesForSingleScope(null,scopeActions); } }
Use for translating rule @init{...} actions that have no scope
/** Use for translating rule @init{...} actions that have no scope */
public void translateActionAttributeReferencesForSingleScope( Rule r, Map<String, Object> scopeActions) { String ruleName=null; if ( r!=null ) { ruleName = r.name; } for (Map.Entry<String, Object> entry : scopeActions.entrySet()) { String name = entry.getKey(); GrammarAST actionAST = (GrammarAST)entry.getValue(); List<?> chunks = translateAction(ruleName,actionAST); scopeActions.put(name, chunks); // replace with translation } }
Error recovery in ANTLR recognizers. Based upon original ideas: Algorithms + Data Structures = Programs by Niklaus Wirth and A note on error recovery in recursive descent parsers: http://portal.acm.org/citation.cfm?id=947902.947905 Later, Josef Grosch had some good ideas: Efficient and Comfortable Error Recovery in Recursive Descent Parsers: ftp://www.cocolab.com/products/cocktail/doca4.ps/ell.ps.zip Like Grosch I implemented local FOLLOW sets that are combined at run-time upon error to avoid parsing overhead.
/** Error recovery in ANTLR recognizers. * * Based upon original ideas: * * Algorithms + Data Structures = Programs by Niklaus Wirth * * and * * A note on error recovery in recursive descent parsers: * http://portal.acm.org/citation.cfm?id=947902.947905 * * Later, Josef Grosch had some good ideas: * Efficient and Comfortable Error Recovery in Recursive Descent Parsers: * ftp://www.cocolab.com/products/cocktail/doca4.ps/ell.ps.zip * * Like Grosch I implemented local FOLLOW sets that are combined at run-time * upon error to avoid parsing overhead. */
public void generateLocalFOLLOW(GrammarAST referencedElementNode, String referencedElementName, String enclosingRuleName, int elementIndex) { /* System.out.println("compute FOLLOW "+grammar.name+"."+referencedElementNode.toString()+ " for "+referencedElementName+"#"+elementIndex +" in "+ enclosingRuleName+ " line="+referencedElementNode.getLine()); */ NFAState followingNFAState = referencedElementNode.followingNFAState; LookaheadSet follow = null; if ( followingNFAState!=null ) { // compute follow for this element and, as side-effect, track // the rule LOOK sensitivity. follow = grammar.FIRST(followingNFAState); } if ( follow==null ) { ErrorManager.internalError("no follow state or cannot compute follow"); follow = new LookaheadSet(); } if ( follow.member(Label.EOF) ) { // TODO: can we just remove? Seems needed here: // compilation_unit : global_statement* EOF // Actually i guess we resync to EOF regardless follow.remove(Label.EOF); } //System.out.println(" "+follow); List<Integer> tokenTypeList; long[] words; if ( follow.tokenTypeSet==null ) { words = new long[1]; tokenTypeList = new ArrayList<Integer>(); } else { BitSet bits = BitSet.of(follow.tokenTypeSet); words = bits.toPackedArray(); tokenTypeList = follow.tokenTypeSet.toList(); } // use the target to convert to hex strings (typically) String[] wordStrings = new String[words.length]; for (int j = 0; j < words.length; j++) { long w = words[j]; wordStrings[j] = target.getTarget64BitStringFromValue(w); } recognizerST.addAggr("bitsets.{name,inName,bits,tokenTypes,tokenIndex}", referencedElementName, enclosingRuleName, wordStrings, tokenTypeList, Utils.integer(elementIndex)); outputFileST.addAggr("bitsets.{name,inName,bits,tokenTypes,tokenIndex}", referencedElementName, enclosingRuleName, wordStrings, tokenTypeList, Utils.integer(elementIndex)); headerFileST.addAggr("bitsets.{name,inName,bits,tokenTypes,tokenIndex}", referencedElementName, enclosingRuleName, wordStrings, tokenTypeList, Utils.integer(elementIndex)); } // L O O K A H E A D D E C I S I O N G E N E R A T I O N
Generate code that computes the predicted alt given a DFA. The recognizerST can be either the main generated recognizerTemplate for storage in the main parser file or a separate file. It's up to the code that ultimately invokes the codegen.g grammar rule. Regardless, the output file and header file get a copy of the DFAs.
/** Generate code that computes the predicted alt given a DFA. The * recognizerST can be either the main generated recognizerTemplate * for storage in the main parser file or a separate file. It's up to * the code that ultimately invokes the codegen.g grammar rule. * * Regardless, the output file and header file get a copy of the DFAs. */
public ST genLookaheadDecision(ST recognizerST, DFA dfa) { ST decisionST; // If we are doing inline DFA and this one is acyclic and LL(*) // I have to check for is-non-LL(*) because if non-LL(*) the cyclic // check is not done by DFA.verify(); that is, verify() avoids // doesStateReachAcceptState() if non-LL(*) if ( dfa.canInlineDecision() ) { decisionST = acyclicDFAGenerator.genFixedLookaheadDecision(getTemplates(), dfa); } else { // generate any kind of DFA here (cyclic or acyclic) dfa.createStateTables(this); outputFileST.add("cyclicDFAs", dfa); headerFileST.add("cyclicDFAs", dfa); decisionST = templates.getInstanceOf("dfaDecision"); String description = dfa.getNFADecisionStartState().getDescription(); description = target.getTargetStringLiteralFromString(description); if ( description!=null ) { decisionST.add("description", description); } decisionST.add("decisionNumber", Utils.integer(dfa.getDecisionNumber())); } return decisionST; }
A special state is huge (too big for state tables) or has a predicated edge. Generate a simple if-then-else. Cannot be an accept state as they have no emanating edges. Don't worry about switch vs if-then-else because if you get here, the state is super complicated and needs an if-then-else. This is used by the new DFA scheme created June 2006.
/** A special state is huge (too big for state tables) or has a predicated * edge. Generate a simple if-then-else. Cannot be an accept state as * they have no emanating edges. Don't worry about switch vs if-then-else * because if you get here, the state is super complicated and needs an * if-then-else. This is used by the new DFA scheme created June 2006. */
public ST generateSpecialState(DFAState s) { ST stateST; stateST = templates.getInstanceOf("cyclicDFAState"); stateST.add("needErrorClause", true); stateST.add("semPredState", s.isResolvedWithPredicates()); stateST.add("stateNumber", s.stateNumber); stateST.add("decisionNumber", s.dfa.decisionNumber); boolean foundGatedPred = false; ST eotST = null; for (int i = 0; i < s.getNumberOfTransitions(); i++) { Transition edge = s.transition(i); ST edgeST; if ( edge.label.getAtom()==Label.EOT ) { // this is the default clause; has to held until last edgeST = templates.getInstanceOf("eotDFAEdge"); stateST.remove("needErrorClause"); eotST = edgeST; } else { edgeST = templates.getInstanceOf("cyclicDFAEdge"); ST exprST = genLabelExpr(templates,edge,1); edgeST.add("labelExpr", exprST); } edgeST.add("edgeNumber", Utils.integer(i + 1)); edgeST.add("targetStateNumber", Utils.integer(edge.target.stateNumber)); // stick in any gated predicates for any edge if not already a pred if ( !edge.label.isSemanticPredicate() ) { DFAState t = (DFAState)edge.target; SemanticContext preds = t.getGatedPredicatesInNFAConfigurations(); if ( preds!=null ) { foundGatedPred = true; ST predST = preds.genExpr(this, getTemplates(), t.dfa); edgeST.add("predicates", predST.render()); } } if ( edge.label.getAtom()!=Label.EOT ) { stateST.add("edges", edgeST); } } if ( foundGatedPred ) { // state has >= 1 edge with a gated pred (syn or sem) // must rewind input first, set flag. stateST.add("semPredState", foundGatedPred); } if ( eotST!=null ) { stateST.add("edges", eotST); } return stateST; }
Generate an expression for traversing an edge.
/** Generate an expression for traversing an edge. */
protected ST genLabelExpr(STGroup templates, Transition edge, int k) { Label label = edge.label; if ( label.isSemanticPredicate() ) { return genSemanticPredicateExpr(templates, edge); } if ( label.isSet() ) { return genSetExpr(templates, label.getSet(), k, true); } // must be simple label ST eST = templates.getInstanceOf("lookaheadTest"); eST.add("atom", getTokenTypeAsTargetLabel(label.getAtom())); eST.add("atomAsInt", Utils.integer(label.getAtom())); eST.add("k", Utils.integer(k)); return eST; } protected ST genSemanticPredicateExpr(STGroup templates, Transition edge) { DFA dfa = ((DFAState)edge.target).dfa; // which DFA are we in Label label = edge.label; SemanticContext semCtx = label.getSemanticContext(); return semCtx.genExpr(this,templates,dfa); }
For intervals such as [3..3, 30..35], generate an expression that tests the lookahead similar to LA(1)==3 || (LA(1)>=30&&LA(1)<=35)
/** For intervals such as [3..3, 30..35], generate an expression that * tests the lookahead similar to LA(1)==3 || (LA(1)&gt;=30&amp;&amp;LA(1)&lt;=35) */
public ST genSetExpr(STGroup templates, IntSet set, int k, boolean partOfDFA) { if ( !(set instanceof IntervalSet) ) { throw new IllegalArgumentException("unable to generate expressions for non IntervalSet objects"); } IntervalSet iset = (IntervalSet)set; if ( iset.getIntervals()==null || iset.getIntervals().isEmpty() ) { ST emptyST = new ST(templates, ""); emptyST.impl.name = "empty-set-expr"; return emptyST; } String testSTName = "lookaheadTest"; String testRangeSTName = "lookaheadRangeTest"; String testSetSTName = "lookaheadSetTest"; String varSTName = "lookaheadVarName"; if ( !partOfDFA ) { testSTName = "isolatedLookaheadTest"; testRangeSTName = "isolatedLookaheadRangeTest"; testSetSTName = "isolatedLookaheadSetTest"; varSTName = "isolatedLookaheadVarName"; } ST setST = templates.getInstanceOf("setTest"); // If the SetTest template exists, separate the ranges: // flatten the small ones into one list and make that a range, // and leave the others as they are. if ( templates.isDefined(testSetSTName) ) { // Flatten the IntervalSet into a list of integers. ST sST = templates.getInstanceOf(testSetSTName); Iterator<Interval> iter = iset.getIntervals().iterator(); int rangeNumber = 1; while (iter.hasNext()) { Interval I = iter.next(); int a = I.a; int b = I.b; // Not flattening the large ranges helps us avoid making a // set that contains 90% of Unicode when we could just use // a simple range like (LA(1)>=123 && LA(1)<=65535). // This flattens all ranges of length 4 or less. if (b - a < 4) { for (int i = a; i <= b; i++) { sST.add("values", getTokenTypeAsTargetLabel(i)); sST.add("valuesAsInt", Utils.integer(i)); } } else { ST eST = templates.getInstanceOf(testRangeSTName); eST.add("lower", getTokenTypeAsTargetLabel(a)); eST.add("lowerAsInt", Utils.integer(a)); eST.add("upper", getTokenTypeAsTargetLabel(b)); eST.add("upperAsInt", Utils.integer(b)); eST.add("rangeNumber", Utils.integer(rangeNumber)); eST.add("k", Utils.integer(k)); setST.add("ranges", eST); rangeNumber++; } } sST.add("k", Utils.integer(k)); setST.add("ranges", sST); return setST; } Iterator<Interval> iter = iset.getIntervals().iterator(); int rangeNumber = 1; while (iter.hasNext()) { Interval I = iter.next(); int a = I.a; int b = I.b; ST eST; if ( a==b ) { eST = templates.getInstanceOf(testSTName); eST.add("atom", getTokenTypeAsTargetLabel(a)); eST.add("atomAsInt", Utils.integer(a)); //eST.add("k",Utils.integer(k)); } else { eST = templates.getInstanceOf(testRangeSTName); eST.add("lower", getTokenTypeAsTargetLabel(a)); eST.add("lowerAsInt", Utils.integer(a)); eST.add("upper", getTokenTypeAsTargetLabel(b)); eST.add("upperAsInt", Utils.integer(b)); eST.add("rangeNumber", Utils.integer(rangeNumber)); } eST.add("k", Utils.integer(k)); setST.add("ranges", eST); rangeNumber++; } return setST; } // T O K E N D E F I N I T I O N G E N E R A T I O N
Set attributes tokens and literals attributes in the incoming code template. This is not the token vocab interchange file, but rather a list of token type ID needed by the recognizer.
/** Set attributes tokens and literals attributes in the incoming * code template. This is not the token vocab interchange file, but * rather a list of token type ID needed by the recognizer. */
protected void genTokenTypeConstants(ST code) { // make constants for the token types for (String tokenID : grammar.getTokenIDs()) { int tokenType = grammar.getTokenType(tokenID); if ( tokenType==Label.EOF || tokenType>=Label.MIN_TOKEN_TYPE ) { // don't do FAUX labels 'cept EOF code.addAggr("tokens.{name,type}", tokenID, Utils.integer(tokenType)); } } }
Generate a token names table that maps token type to a printable name: either the label like INT or the literal like "begin".
/** Generate a token names table that maps token type to a printable * name: either the label like INT or the literal like "begin". */
protected void genTokenTypeNames(ST code) { for (int t=Label.MIN_TOKEN_TYPE; t<=grammar.getMaxTokenType(); t++) { String tokenName = grammar.getTokenDisplayName(t); if ( tokenName!=null ) { tokenName=target.getTargetStringLiteralFromString(tokenName, true); code.add("tokenNames", tokenName); } } }
Get a meaningful name for a token type useful during code generation. Literals without associated names are converted to the string equivalent of their integer values. Used to generate x==ID and x==34 type comparisons etc... Essentially we are looking for the most obvious way to refer to a token type in the generated code. If in the lexer, return the char literal translated to the target language. For example, ttype=10 will yield '\n' from the getTokenDisplayName method. That must be converted to the target languages literals. For most C-derived languages no translation is needed.
/** Get a meaningful name for a token type useful during code generation. * Literals without associated names are converted to the string equivalent * of their integer values. Used to generate x==ID and x==34 type comparisons * etc... Essentially we are looking for the most obvious way to refer * to a token type in the generated code. If in the lexer, return the * char literal translated to the target language. For example, ttype=10 * will yield '\n' from the getTokenDisplayName method. That must * be converted to the target languages literals. For most C-derived * languages no translation is needed. */
public String getTokenTypeAsTargetLabel(int ttype) { if ( grammar.type==Grammar.LEXER ) { String name = grammar.getTokenDisplayName(ttype); return target.getTargetCharLiteralFromANTLRCharLiteral(this,name); } return target.getTokenTypeAsTargetLabel(this,ttype); }
Generate a token vocab file with all the token names/types. For example: ID=7 FOR=8 'for'=8 This is independent of the target language; used by antlr internally
/** Generate a token vocab file with all the token names/types. For example: * ID=7 * FOR=8 * 'for'=8 * * This is independent of the target language; used by antlr internally */
protected ST genTokenVocabOutput() { ST vocabFileST = new ST(vocabFilePattern); vocabFileST.add("literals",(Object)null); // "define" literals arg vocabFileST.add("tokens",(Object)null); vocabFileST.impl.name = "vocab-file"; // make constants for the token names for (String tokenID : grammar.getTokenIDs()) { int tokenType = grammar.getTokenType(tokenID); if ( tokenType>=Label.MIN_TOKEN_TYPE ) { vocabFileST.addAggr("tokens.{name,type}", tokenID, Utils.integer(tokenType)); } } // now dump the strings for (String literal : grammar.getStringLiterals()) { int tokenType = grammar.getTokenType(literal); if ( tokenType>=Label.MIN_TOKEN_TYPE ) { vocabFileST.addAggr("tokens.{name,type}", literal, Utils.integer(tokenType)); } } return vocabFileST; } public List<? extends Object> translateAction(String ruleName, GrammarAST actionTree) { if ( actionTree.getType()==ANTLRParser.ARG_ACTION ) { return translateArgAction(ruleName, actionTree); } ActionTranslator translator = new ActionTranslator(this,ruleName,actionTree); List<Object> chunks = translator.translateToChunks(); chunks = target.postProcessAction(chunks, actionTree.token); return chunks; }
Translate an action like [3,"foo",a[3]] and return a List of the translated actions. Because actions are themselves translated to a list of chunks, must cat together into a ST>. Don't translate to strings early as we need to eval templates in context.
/** Translate an action like [3,"foo",a[3]] and return a List of the * translated actions. Because actions are themselves translated to a list * of chunks, must cat together into a ST&gt;. Don't translate * to strings early as we need to eval templates in context. */
public List<ST> translateArgAction(String ruleName, GrammarAST actionTree) { String actionText = actionTree.token.getText(); List<String> args = getListOfArgumentsFromAction(actionText,','); List<ST> translatedArgs = new ArrayList<ST>(); for (String arg : args) { if ( arg!=null ) { Token actionToken = new CommonToken(ANTLRParser.ACTION,arg); ActionTranslator translator = new ActionTranslator(this,ruleName, actionToken, actionTree.outerAltNum); List<Object> chunks = translator.translateToChunks(); chunks = target.postProcessAction(chunks, actionToken); ST catST = new ST(templates, "<chunks>"); catST.add("chunks", chunks); translatedArgs.add(catST); } } if ( translatedArgs.isEmpty() ) { return null; } return translatedArgs; } public static List<String> getListOfArgumentsFromAction(String actionText, int separatorChar) { List<String> args = new ArrayList<String>(); getListOfArgumentsFromAction(actionText, 0, -1, separatorChar, args); return args; }
Given an arg action like [x, (*a).foo(21,33), 3.2+1, '\n', "a,oo\nick", {bl, "fdkj"eck}, ["cat\n,", x, 43]] convert to a list of arguments. Allow nested square brackets etc... Set separatorChar to ';' or ',' or whatever you want.
/** Given an arg action like * * [x, (*a).foo(21,33), 3.2+1, '\n', * "a,oo\nick", {bl, "fdkj"eck}, ["cat\n,", x, 43]] * * convert to a list of arguments. Allow nested square brackets etc... * Set separatorChar to ';' or ',' or whatever you want. */
public static int getListOfArgumentsFromAction(String actionText, int start, int targetChar, int separatorChar, List<String> args) { if ( actionText==null ) { return -1; } actionText = actionText.replaceAll("//.*\n", ""); int n = actionText.length(); //System.out.println("actionText@"+start+"->"+(char)targetChar+"="+actionText.substring(start,n)); int p = start; int last = p; while ( p<n && actionText.charAt(p)!=targetChar ) { int c = actionText.charAt(p); switch ( c ) { case '\'' : p++; while ( p<n && actionText.charAt(p)!='\'' ) { if ( actionText.charAt(p)=='\\' && (p+1)<n && actionText.charAt(p+1)=='\'' ) { p++; // skip escaped quote } p++; } p++; break; case '"' : p++; while ( p<n && actionText.charAt(p)!='\"' ) { if ( actionText.charAt(p)=='\\' && (p+1)<n && actionText.charAt(p+1)=='\"' ) { p++; // skip escaped quote } p++; } p++; break; case '(' : p = getListOfArgumentsFromAction(actionText,p+1,')',separatorChar,args); break; case '{' : p = getListOfArgumentsFromAction(actionText,p+1,'}',separatorChar,args); break; case '<' : if ( actionText.indexOf('>',p+1)>=p ) { // do we see a matching '>' ahead? if so, hope it's a generic // and not less followed by expr with greater than p = getListOfArgumentsFromAction(actionText,p+1,'>',separatorChar,args); } else { p++; // treat as normal char } break; case '[' : p = getListOfArgumentsFromAction(actionText,p+1,']',separatorChar,args); break; default : if ( c==separatorChar && targetChar==-1 ) { String arg = actionText.substring(last, p); //System.out.println("arg="+arg); args.add(arg.trim()); last = p+1; } p++; break; } } if ( targetChar==-1 && p<=n ) { String arg = actionText.substring(last, p).trim(); //System.out.println("arg="+arg); if ( arg.length()>0 ) { args.add(arg.trim()); } } p++; return p; }
Given a template constructor action like %foo(a={...}) in an action, translate it to the appropriate template constructor from the templateLib. This translates a *piece* of the action.
/** Given a template constructor action like %foo(a={...}) in * an action, translate it to the appropriate template constructor * from the templateLib. This translates a *piece* of the action. */
public ST translateTemplateConstructor(String ruleName, int outerAltNum, Token actionToken, String templateActionText) { // first, parse with antlr.g //System.out.println("translate template: "+templateActionText); ANTLRLexer lexer = new ANTLRLexer(new ANTLRStringStream(templateActionText)); lexer.setFileName(grammar.getFileName()); ANTLRParser parser = ANTLRParser.createParser(new CommonTokenStream(lexer)); parser.setFileName(grammar.getFileName()); ANTLRParser.rewrite_template_return parseResult = null; try { parseResult = parser.rewrite_template(); } catch (RecognitionException re) { ErrorManager.grammarError(ErrorManager.MSG_INVALID_TEMPLATE_ACTION, grammar, actionToken, templateActionText); } catch (Exception tse) { ErrorManager.internalError("can't parse template action",tse); } GrammarAST rewriteTree = parseResult.getTree(); // then translate via codegen.g CodeGenTreeWalker gen = new CodeGenTreeWalker(new CommonTreeNodeStream(rewriteTree)); gen.init(grammar); gen.setCurrentRuleName(ruleName); gen.setOuterAltNum(outerAltNum); ST st = null; try { st = gen.rewrite_template(); } catch (RecognitionException re) { ErrorManager.error(ErrorManager.MSG_BAD_AST_STRUCTURE, re); } return st; } public void issueInvalidScopeError(String x, String y, Rule enclosingRule, Token actionToken, int outerAltNum) { //System.out.println("error $"+x+"::"+y); Rule r = grammar.getRule(x); AttributeScope scope = grammar.getGlobalScope(x); if ( scope==null ) { if ( r!=null ) { scope = r.ruleScope; // if not global, might be rule scope } } if ( scope==null ) { ErrorManager.grammarError(ErrorManager.MSG_UNKNOWN_DYNAMIC_SCOPE, grammar, actionToken, x); } else if ( scope.getAttribute(y)==null ) { ErrorManager.grammarError(ErrorManager.MSG_UNKNOWN_DYNAMIC_SCOPE_ATTRIBUTE, grammar, actionToken, x, y); } } public void issueInvalidAttributeError(String x, String y, Rule enclosingRule, Token actionToken, int outerAltNum) { //System.out.println("error $"+x+"."+y); if ( enclosingRule==null ) { // action not in a rule ErrorManager.grammarError(ErrorManager.MSG_ATTRIBUTE_REF_NOT_IN_RULE, grammar, actionToken, x, y); return; } // action is in a rule Grammar.LabelElementPair label = enclosingRule.getRuleLabel(x); if ( label!=null || enclosingRule.getRuleRefsInAlt(x, outerAltNum)!=null ) { // $rulelabel.attr or $ruleref.attr; must be unknown attr String refdRuleName = x; if ( label!=null ) { refdRuleName = enclosingRule.getRuleLabel(x).referencedRuleName; } Rule refdRule = grammar.getRule(refdRuleName); AttributeScope scope = refdRule.getAttributeScope(y); if ( scope==null ) { ErrorManager.grammarError(ErrorManager.MSG_UNKNOWN_RULE_ATTRIBUTE, grammar, actionToken, refdRuleName, y); } else if ( scope.isParameterScope ) { ErrorManager.grammarError(ErrorManager.MSG_INVALID_RULE_PARAMETER_REF, grammar, actionToken, refdRuleName, y); } else if ( scope.isDynamicRuleScope ) { ErrorManager.grammarError(ErrorManager.MSG_INVALID_RULE_SCOPE_ATTRIBUTE_REF, grammar, actionToken, refdRuleName, y); } } } public void issueInvalidAttributeError(String x, Rule enclosingRule, Token actionToken, int outerAltNum) { //System.out.println("error $"+x); if ( enclosingRule==null ) { // action not in a rule ErrorManager.grammarError(ErrorManager.MSG_ATTRIBUTE_REF_NOT_IN_RULE, grammar, actionToken, x); return; } // action is in a rule Grammar.LabelElementPair label = enclosingRule.getRuleLabel(x); AttributeScope scope = enclosingRule.getAttributeScope(x); if ( label!=null || enclosingRule.getRuleRefsInAlt(x, outerAltNum)!=null || enclosingRule.name.equals(x) ) { ErrorManager.grammarError(ErrorManager.MSG_ISOLATED_RULE_SCOPE, grammar, actionToken, x); } else if ( scope!=null && scope.isDynamicRuleScope ) { ErrorManager.grammarError(ErrorManager.MSG_ISOLATED_RULE_ATTRIBUTE, grammar, actionToken, x); } else { ErrorManager.grammarError(ErrorManager.MSG_UNKNOWN_SIMPLE_ATTRIBUTE, grammar, actionToken, x); } } // M I S C public STGroup getTemplates() { return templates; } public STGroup getBaseTemplates() { return baseTemplates; } public void setDebug(boolean debug) { this.debug = debug; } public void setTrace(boolean trace) { this.trace = trace; } public void setProfile(boolean profile) { this.profile = profile; if ( profile ) { setDebug(true); // requires debug events } } public ST getRecognizerST() { return outputFileST; }
Generate TParser.java and TLexer.java from T.g if combined, else just use T.java as output regardless of type.
/** Generate TParser.java and TLexer.java from T.g if combined, else * just use T.java as output regardless of type. */
public String getRecognizerFileName(String name, int type) { ST extST = templates.getInstanceOf("codeFileExtension"); String recognizerName = grammar.getRecognizerName(); return recognizerName+extST.render(); /* String suffix = ""; if ( type==Grammar.COMBINED || (type==Grammar.LEXER && !grammar.implicitLexer) ) { suffix = Grammar.grammarTypeToFileNameSuffix[type]; } return name+suffix+extST.toString(); */ }
What is the name of the vocab file generated for this grammar? Returns null if no .tokens file should be generated.
/** What is the name of the vocab file generated for this grammar? * Returns null if no .tokens file should be generated. */
public String getVocabFileName() { if ( grammar.isBuiltFromString() ) { return null; } return grammar.name+VOCAB_FILE_EXTENSION; } public void write(ST code, String fileName) throws IOException { //long start = System.currentTimeMillis(); Writer w = tool.getOutputFile(grammar, fileName); // Write the output to a StringWriter STWriter wr = new AutoIndentWriter(w); wr.setLineWidth(lineWidth); code.write(wr); w.close(); //long stop = System.currentTimeMillis(); //System.out.println("render time for "+fileName+": "+(int)(stop-start)+"ms"); }
You can generate a switch rather than if-then-else for a DFA state if there are no semantic predicates and the number of edge label values is small enough; e.g., don't generate a switch for a state containing an edge label such as 20..52330 (the resulting byte codes would overflow the method 65k limit probably).
/** You can generate a switch rather than if-then-else for a DFA state * if there are no semantic predicates and the number of edge label * values is small enough; e.g., don't generate a switch for a state * containing an edge label such as 20..52330 (the resulting byte codes * would overflow the method 65k limit probably). */
protected boolean canGenerateSwitch(DFAState s) { if ( !GENERATE_SWITCHES_WHEN_POSSIBLE ) { return false; } int size = 0; for (int i = 0; i < s.getNumberOfTransitions(); i++) { Transition edge = s.transition(i); if ( edge.label.isSemanticPredicate() ) { return false; } // can't do a switch if the edges are going to require predicates if ( edge.label.getAtom()==Label.EOT ) { int EOTPredicts = ((DFAState)edge.target).getUniquelyPredictedAlt(); if ( EOTPredicts==NFA.INVALID_ALT_NUMBER ) { // EOT target has to be a predicate then; no unique alt return false; } } // if target is a state with gated preds, we need to use preds on // this edge then to reach it. if ( ((DFAState)edge.target).getGatedPredicatesInNFAConfigurations()!=null ) { return false; } size += edge.label.getSet().size(); } if ( s.getNumberOfTransitions()<MIN_SWITCH_ALTS || size>MAX_SWITCH_CASE_LABELS ) { return false; } return true; }
Create a label to track a token / rule reference's result. Technically, this is a place where I break model-view separation as I am creating a variable name that could be invalid in a target language, however, label ::= <ID><INT> is probably ok in all languages we care about.
/** Create a label to track a token / rule reference's result. * Technically, this is a place where I break model-view separation * as I am creating a variable name that could be invalid in a * target language, however, label ::= &lt;ID&gt;&lt;INT&gt; is probably ok in * all languages we care about. */
public String createUniqueLabel(String name) { return new StringBuffer() .append(name).append(uniqueLabelNumber++).toString(); } }