/*
 * [The "BSD license"]
 *  Copyright (c) 2010 Terence Parr
 *  All rights reserved.
 *
 *  Redistribution and use in source and binary forms, with or without
 *  modification, are permitted provided that the following conditions
 *  are met:
 *  1. Redistributions of source code must retain the above copyright
 *      notice, this list of conditions and the following disclaimer.
 *  2. Redistributions in binary form must reproduce the above copyright
 *      notice, this list of conditions and the following disclaimer in the
 *      documentation and/or other materials provided with the distribution.
 *  3. The name of the author may not be used to endorse or promote products
 *      derived from this software without specific prior written permission.
 *
 *  THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
 *  IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 *  OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
 *  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
 *  INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
 *  NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 *  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 *  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 *  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
 *  THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */
package org.antlr.tool;

import org.antlr.analysis.Label;
import org.antlr.grammar.v3.AssignTokenTypesWalker;
import org.antlr.misc.Utils;

import java.util.*;

Move all of the functionality from assign.types.g grammar file.
/** Move all of the functionality from assign.types.g grammar file. */
public class AssignTokenTypesBehavior extends AssignTokenTypesWalker { protected static final Integer UNASSIGNED = Utils.integer(-1); protected static final Integer UNASSIGNED_IN_PARSER_RULE = Utils.integer(-2); protected Map<String,Integer> stringLiterals = new TreeMap<String, Integer>(); protected Map<String,Integer> tokens = new TreeMap<String, Integer>(); protected Map<String,String> aliases = new TreeMap<String, String>(); protected Map<String,String> aliasesReverseIndex = new HashMap<String,String>();
Track actual lexer rule defs so we don't get repeated token defs in generated lexer.
/** Track actual lexer rule defs so we don't get repeated token defs in * generated lexer. */
protected Set<String> tokenRuleDefs = new HashSet<String>(); public AssignTokenTypesBehavior() { super(null); } @Override protected void init(Grammar g) { this.grammar = g; currentRuleName = null; if ( stringAlias==null ) { // only init once; can't statically init since we need astFactory initASTPatterns(); } }
Track string literals (could be in tokens{} section)
/** Track string literals (could be in tokens{} section) */
@Override protected void trackString(GrammarAST t) { // if lexer, don't allow aliasing in tokens section if ( currentRuleName==null && grammar.type==Grammar.LEXER ) { ErrorManager.grammarError(ErrorManager.MSG_CANNOT_ALIAS_TOKENS_IN_LEXER, grammar, t.token, t.getText()); return; } // in a plain parser grammar rule, cannot reference literals // (unless defined previously via tokenVocab option) // don't warn until we hit root grammar as may be defined there. if ( grammar.getGrammarIsRoot() && grammar.type==Grammar.PARSER && grammar.getTokenType(t.getText())== Label.INVALID ) { ErrorManager.grammarError(ErrorManager.MSG_LITERAL_NOT_ASSOCIATED_WITH_LEXER_RULE, grammar, t.token, t.getText()); } // Don't record literals for lexers, they are things to match not tokens if ( grammar.type==Grammar.LEXER ) { return; } // otherwise add literal to token types if referenced from parser rule // or in the tokens{} section if ( (currentRuleName==null || Character.isLowerCase(currentRuleName.charAt(0))) && grammar.getTokenType(t.getText())==Label.INVALID ) { stringLiterals.put(t.getText(), UNASSIGNED_IN_PARSER_RULE); } } @Override protected void trackToken(GrammarAST t) { // imported token names might exist, only add if new // Might have ';'=4 in vocab import and SEMI=';'. Avoid // setting to UNASSIGNED if we have loaded ';'/SEMI if ( grammar.getTokenType(t.getText())==Label.INVALID && tokens.get(t.getText())==null ) { tokens.put(t.getText(), UNASSIGNED); } } @Override protected void trackTokenRule(GrammarAST t, GrammarAST modifier, GrammarAST block) { // imported token names might exist, only add if new if ( grammar.type==Grammar.LEXER || grammar.type==Grammar.COMBINED ) { if ( !Character.isUpperCase(t.getText().charAt(0)) ) { return; } if ( t.getText().equals(Grammar.ARTIFICIAL_TOKENS_RULENAME) ) { // don't add Tokens rule return; } // track all lexer rules so we can look for token refs w/o // associated lexer rules. grammar.composite.lexerRules.add(t.getText()); int existing = grammar.getTokenType(t.getText()); if ( existing==Label.INVALID ) { tokens.put(t.getText(), UNASSIGNED); } // look for "<TOKEN> : <literal> ;" pattern // (can have optional action last) if ( block.hasSameTreeStructure(charAlias) || block.hasSameTreeStructure(stringAlias) || block.hasSameTreeStructure(charAlias2) || block.hasSameTreeStructure(stringAlias2) ) { tokenRuleDefs.add(t.getText()); /* Grammar parent = grammar.composite.getDelegator(grammar); boolean importedByParserOrCombined = parent!=null && (parent.type==Grammar.LEXER||parent.type==Grammar.PARSER); */ if ( grammar.type==Grammar.COMBINED || grammar.type==Grammar.LEXER ) { // only call this rule an alias if combined or lexer alias(t, (GrammarAST)block.getChild(0).getChild(0)); } } } // else error } @Override protected void alias(GrammarAST t, GrammarAST s) { String tokenID = t.getText(); String literal = s.getText(); String prevAliasLiteralID = aliasesReverseIndex.get(literal); if ( prevAliasLiteralID!=null ) { // we've seen this literal before if ( tokenID.equals(prevAliasLiteralID) ) { // duplicate but identical alias; might be tokens {A='a'} and // lexer rule A : 'a' ; Is ok, just return return; } // give error unless both are rules (ok if one is in tokens section) if ( !(tokenRuleDefs.contains(tokenID) && tokenRuleDefs.contains(prevAliasLiteralID)) ) { // don't allow alias if A='a' in tokens section and B : 'a'; is rule. // Allow if both are rules. Will get DFA nondeterminism error later. ErrorManager.grammarError(ErrorManager.MSG_TOKEN_ALIAS_CONFLICT, grammar, t.token, tokenID+"="+literal, prevAliasLiteralID); } return; // don't do the alias } int existingLiteralType = grammar.getTokenType(literal); if ( existingLiteralType !=Label.INVALID ) { // we've seen this before from a tokenVocab most likely // don't assign a new token type; use existingLiteralType. tokens.put(tokenID, existingLiteralType); } String prevAliasTokenID = aliases.get(tokenID); if ( prevAliasTokenID!=null ) { ErrorManager.grammarError(ErrorManager.MSG_TOKEN_ALIAS_REASSIGNMENT, grammar, t.token, tokenID+"="+literal, prevAliasTokenID); return; // don't do the alias } aliases.put(tokenID, literal); aliasesReverseIndex.put(literal, tokenID); } @Override public void defineTokens(Grammar root) { /* System.out.println("stringLiterals="+stringLiterals); System.out.println("tokens="+tokens); System.out.println("aliases="+aliases); System.out.println("aliasesReverseIndex="+aliasesReverseIndex); */ assignTokenIDTypes(root); aliasTokenIDsAndLiterals(root); assignStringTypes(root); /* System.out.println("stringLiterals="+stringLiterals); System.out.println("tokens="+tokens); System.out.println("aliases="+aliases); */ defineTokenNamesAndLiteralsInGrammar(root); } /* protected void defineStringLiteralsFromDelegates() { if ( grammar.getGrammarIsMaster() && grammar.type==Grammar.COMBINED ) { List<Grammar> delegates = grammar.getDelegates(); System.out.println("delegates in master combined: "+delegates); for (int i = 0; i < delegates.size(); i++) { Grammar d = (Grammar) delegates.get(i); Set<String> literals = d.getStringLiterals(); for (Iterator it = literals.iterator(); it.hasNext();) { String literal = (String) it.next(); System.out.println("literal "+literal); int ttype = grammar.getTokenType(literal); grammar.defineLexerRuleForStringLiteral(literal, ttype); } } } } */ @Override protected void assignStringTypes(Grammar root) { // walk string literals assigning types to unassigned ones for (Map.Entry<String, Integer> entry : stringLiterals.entrySet()) { String lit = entry.getKey(); Integer oldTypeI = entry.getValue(); int oldType = oldTypeI; if ( oldType<Label.MIN_TOKEN_TYPE ) { Integer typeI = Utils.integer(root.getNewTokenType()); stringLiterals.put(lit, typeI); // if string referenced in combined grammar parser rule, // automatically define in the generated lexer root.defineLexerRuleForStringLiteral(lit, typeI); } } } @Override protected void aliasTokenIDsAndLiterals(Grammar root) { if ( root.type==Grammar.LEXER ) { return; // strings/chars are never token types in LEXER } // walk aliases if any and assign types to aliased literals if literal // was referenced for (Map.Entry<String, String> entry : aliases.entrySet()) { String tokenID = entry.getKey(); String literal = entry.getValue(); if ( literal.charAt(0)=='\'' && stringLiterals.get(literal)!=null ) { stringLiterals.put(literal, tokens.get(tokenID)); // an alias still means you need a lexer rule for it Integer typeI = tokens.get(tokenID); if ( !tokenRuleDefs.contains(tokenID) ) { root.defineLexerRuleForAliasedStringLiteral(tokenID, literal, typeI); } } } } @Override protected void assignTokenIDTypes(Grammar root) { // walk token names, assigning values if unassigned for (Map.Entry<String, Integer> entry : tokens.entrySet()) { String tokenID = entry.getKey(); if ( entry.getValue()==UNASSIGNED ) { tokens.put(tokenID, Utils.integer(root.getNewTokenType())); } } } @Override protected void defineTokenNamesAndLiteralsInGrammar(Grammar root) { for (Map.Entry<String, Integer> entry : tokens.entrySet()) { int ttype = entry.getValue(); root.defineToken(entry.getKey(), ttype); } for (Map.Entry<String, Integer> entry : stringLiterals.entrySet()) { String lit = entry.getKey(); int ttype = entry.getValue(); root.defineToken(lit, ttype); } } }