/*
 * [The "BSD license"]
 *  Copyright (c) 2010 Terence Parr
 *  All rights reserved.
 *
 *  Redistribution and use in source and binary forms, with or without
 *  modification, are permitted provided that the following conditions
 *  are met:
 *  1. Redistributions of source code must retain the above copyright
 *      notice, this list of conditions and the following disclaimer.
 *  2. Redistributions in binary form must reproduce the above copyright
 *      notice, this list of conditions and the following disclaimer in the
 *      documentation and/or other materials provided with the distribution.
 *  3. The name of the author may not be used to endorse or promote products
 *      derived from this software without specific prior written permission.
 *
 *  THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
 *  IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 *  OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
 *  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
 *  INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
 *  NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 *  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 *  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 *  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
 *  THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */
package org.antlr.analysis;

import org.antlr.codegen.CodeGenerator;
import org.antlr.misc.IntSet;
import org.antlr.misc.IntervalSet;
import org.antlr.misc.Utils;
import org.antlr.runtime.IntStream;
import org.stringtemplate.v4.ST;
import org.antlr.tool.*;

import java.util.*;

A DFA (converted from a grammar's NFA). DFAs are used as prediction machine for alternative blocks in all kinds of recognizers (lexers, parsers, tree walkers).
/** A DFA (converted from a grammar's NFA). * DFAs are used as prediction machine for alternative blocks in all kinds * of recognizers (lexers, parsers, tree walkers). */
public class DFA { public static final int REACHABLE_UNKNOWN = -2; public static final int REACHABLE_BUSY = -1; // in process of computing public static final int REACHABLE_NO = 0; public static final int REACHABLE_YES = 1; public static final int CYCLIC_UNKNOWN = -2; public static final int CYCLIC_BUSY = -1; // in process of computing public static final int CYCLIC_DONE = 0; /** Prevent explosion of DFA states during conversion. The max number * of states per alt in a single decision's DFA. public static final int MAX_STATES_PER_ALT_IN_DFA = 450; */
Set to 0 to not terminate early (time in ms)
/** Set to 0 to not terminate early (time in ms) */
public static int MAX_TIME_PER_DFA_CREATION = 1*1000;
How many edges can each DFA state have before a "special" state is created that uses IF expressions instead of a table?
/** How many edges can each DFA state have before a "special" state * is created that uses IF expressions instead of a table? */
public static int MAX_STATE_TRANSITIONS_FOR_TABLE = 65534;
What's the start state for this DFA?
/** What's the start state for this DFA? */
public DFAState startState;
This DFA is being built for which decision?
/** This DFA is being built for which decision? */
public int decisionNumber = 0;
From what NFAState did we create the DFA?
/** From what NFAState did we create the DFA? */
public NFAState decisionNFAStartState;
The printable grammar fragment associated with this DFA
/** The printable grammar fragment associated with this DFA */
public String description;
A set of all uniquely-numbered DFA states. Maps hash of DFAState to the actual DFAState object. We use this to detect existing DFA states. Map<DFAState,DFAState>. Use Map so we can get old state back (Set only allows you to see if it's there). Not used during fixed k lookahead as it's a waste to fill it with a dup of states array.
/** A set of all uniquely-numbered DFA states. Maps hash of DFAState * to the actual DFAState object. We use this to detect * existing DFA states. Map&lt;DFAState,DFAState&gt;. Use Map so * we can get old state back (Set only allows you to see if it's there). * Not used during fixed k lookahead as it's a waste to fill it with * a dup of states array. */
protected Map<DFAState, DFAState> uniqueStates = new HashMap<DFAState, DFAState>();
Maps the state number to the actual DFAState. Use a Vector as it grows automatically when I set the ith element. This contains all states, but the states are not unique. s3 might be same as s1 so s3 → s1 in this table. This is how cycles occur. If fixed k, then these states will all be unique as states[i] always points at state i when no cycles exist. This is managed in parallel with uniqueStates and simply provides a way to go from state number to DFAState rather than via a hash lookup.
/** Maps the state number to the actual DFAState. Use a Vector as it * grows automatically when I set the ith element. This contains all * states, but the states are not unique. s3 might be same as s1 so * s3 &rarr; s1 in this table. This is how cycles occur. If fixed k, * then these states will all be unique as states[i] always points * at state i when no cycles exist. * * This is managed in parallel with uniqueStates and simply provides * a way to go from state number to DFAState rather than via a * hash lookup. */
protected Vector<DFAState> states = new Vector<DFAState>();
Unique state numbers per DFA
/** Unique state numbers per DFA */
protected int stateCounter = 0;
count only new states not states that were rejected as already present
/** count only new states not states that were rejected as already present */
protected int numberOfStates = 0;
User specified max fixed lookahead. If 0, nothing specified. -1 implies we have not looked at the options table yet to set k.
/** User specified max fixed lookahead. If 0, nothing specified. -1 * implies we have not looked at the options table yet to set k. */
protected int user_k = -1;
While building the DFA, track max lookahead depth if not cyclic
/** While building the DFA, track max lookahead depth if not cyclic */
protected int max_k = -1;
Is this DFA reduced? I.e., can all states lead to an accept state?
/** Is this DFA reduced? I.e., can all states lead to an accept state? */
protected boolean reduced = true;
Are there any loops in this DFA? Computed by doesStateReachAcceptState()
/** Are there any loops in this DFA? * Computed by doesStateReachAcceptState() */
protected boolean cyclic = false;
Track whether this DFA has at least one sem/syn pred encountered during a closure operation. This is useful for deciding whether to retry a non-LL(*) with k=1. If no pred, it will not work w/o a pred so don't bother. It would just give another error message.
/** Track whether this DFA has at least one sem/syn pred encountered * during a closure operation. This is useful for deciding whether * to retry a non-LL(*) with k=1. If no pred, it will not work w/o * a pred so don't bother. It would just give another error message. */
public boolean predicateVisible = false; public boolean hasPredicateBlockedByAction = false;
Each alt in an NFA derived from a grammar must have a DFA state that predicts it lest the parser not know what to do. Nondeterminisms can lead to this situation (assuming no semantic predicates can resolve the problem) and when for some reason, I cannot compute the lookahead (which might arise from an error in the algorithm or from left-recursion etc...). This list starts out with all alts contained and then in method doesStateReachAcceptState() I remove the alts I know to be uniquely predicted.
/** Each alt in an NFA derived from a grammar must have a DFA state that * predicts it lest the parser not know what to do. Nondeterminisms can * lead to this situation (assuming no semantic predicates can resolve * the problem) and when for some reason, I cannot compute the lookahead * (which might arise from an error in the algorithm or from * left-recursion etc...). This list starts out with all alts contained * and then in method doesStateReachAcceptState() I remove the alts I * know to be uniquely predicted. */
protected List<Integer> unreachableAlts; protected int nAlts = 0;
We only want one accept state per predicted alt; track here
/** We only want one accept state per predicted alt; track here */
protected DFAState[] altToAcceptState;
Track whether an alt discovers recursion for each alt during NFA to DFA conversion; >1 alt with recursion implies nonregular.
/** Track whether an alt discovers recursion for each alt during * NFA to DFA conversion; &gt;1 alt with recursion implies nonregular. */
public IntSet recursiveAltSet = new IntervalSet();
Which NFA are we converting (well, which piece of the NFA)?
/** Which NFA are we converting (well, which piece of the NFA)? */
public NFA nfa; protected NFAToDFAConverter nfaConverter;
This probe tells you a lot about a decision and is useful even when there is no error such as when a syntactic nondeterminism is solved via semantic predicates. Perhaps a GUI would want the ability to show that.
/** This probe tells you a lot about a decision and is useful even * when there is no error such as when a syntactic nondeterminism * is solved via semantic predicates. Perhaps a GUI would want * the ability to show that. */
public DecisionProbe probe = new DecisionProbe(this); /** Track absolute time of the conversion so we can have a failsafe: * if it takes too long, then terminate. Assume bugs are in the * analysis engine. */ //protected long conversionStartTime;
Map an edge transition table to a unique set number; ordered so we can push into the output template as an ordered list of sets and then ref them from within the transition[][] table. Like this for C# target: public static readonly DFA30_transition0 = new short[] { 46, 46, -1, 46, 46, -1, -1, -1, -1, -1, -1, -1,...}; public static readonly DFA30_transition1 = new short[] { 21 }; public static readonly short[][] DFA30_transition = { DFA30_transition0, DFA30_transition0, DFA30_transition1, ... };
/** Map an edge transition table to a unique set number; ordered so * we can push into the output template as an ordered list of sets * and then ref them from within the transition[][] table. Like this * for C# target: * public static readonly DFA30_transition0 = * new short[] { 46, 46, -1, 46, 46, -1, -1, -1, -1, -1, -1, -1,...}; * public static readonly DFA30_transition1 = * new short[] { 21 }; * public static readonly short[][] DFA30_transition = { * DFA30_transition0, * DFA30_transition0, * DFA30_transition1, * ... * }; */
public Map<List<Integer>, Integer> edgeTransitionClassMap = new LinkedHashMap<List<Integer>, Integer>();
The unique edge transition class number; every time we see a new set of edges emanating from a state, we number it so we can reuse if it's every seen again for another state. For Java grammar, some of the big edge transition tables are seen about 57 times.
/** The unique edge transition class number; every time we see a new * set of edges emanating from a state, we number it so we can reuse * if it's every seen again for another state. For Java grammar, * some of the big edge transition tables are seen about 57 times. */
protected int edgeTransitionClass =0; /* This DFA can be converted to a transition[state][char] table and * the following tables are filled by createStateTables upon request. * These are injected into the templates for code generation. * See March 25, 2006 entry for description: * http://www.antlr.org/blog/antlr3/codegen.tml * Often using Vector as can't set ith position in a List and have * it extend list size; bizarre. */
List of special DFAState objects
/** List of special DFAState objects */
public List<DFAState> specialStates;
List of ST for special states.
/** List of ST for special states. */
public List<ST> specialStateSTs; public Vector<Integer> accept; public Vector<Integer> eot; public Vector<Integer> eof; public Vector<Integer> min; public Vector<Integer> max; public Vector<Integer> special; public Vector<Vector<Integer>> transition;
just the Vector<Integer> indicating which unique edge table is at position i.
/** just the Vector&lt;Integer&gt; indicating which unique edge table is at * position i. */
public Vector<Integer> transitionEdgeTables; // not used by java yet protected int uniqueCompressedSpecialStateNum = 0;
Which generator to use if we're building state tables
/** Which generator to use if we're building state tables */
protected CodeGenerator generator = null; protected DFA() {} public DFA(int decisionNumber, NFAState decisionStartState) { this.decisionNumber = decisionNumber; this.decisionNFAStartState = decisionStartState; nfa = decisionStartState.nfa; nAlts = nfa.grammar.getNumberOfAltsForDecisionNFA(decisionStartState); //setOptions( nfa.grammar.getDecisionOptions(getDecisionNumber()) ); initAltRelatedInfo(); //long start = System.currentTimeMillis(); nfaConverter = new NFAToDFAConverter(this); try { nfaConverter.convert(); // figure out if there are problems with decision verify(); if ( !probe.isDeterministic() || probe.analysisOverflowed() ) { probe.issueWarnings(); } // must be after verify as it computes cyclic, needed by this routine // should be after warnings because early termination or something // will not allow the reset to operate properly in some cases. resetStateNumbersToBeContiguous(); //long stop = System.currentTimeMillis(); //System.out.println("verify cost: "+(int)(stop-start)+" ms"); } // catch (AnalysisTimeoutException at) { // probe.reportAnalysisTimeout(); // if ( !okToRetryDFAWithK1() ) { // probe.issueWarnings(); // } // } catch (NonLLStarDecisionException nonLL) { probe.reportNonLLStarDecision(this); // >1 alt recurses, k=* and no auto backtrack nor manual sem/syn if ( !okToRetryDFAWithK1() ) { probe.issueWarnings(); } } }
Walk all states and reset their numbers to be a contiguous sequence of integers starting from 0. Only cyclic DFA can have unused positions in states list. State i might be identical to a previous state j and will result in states[i] == states[j]. We don't want to waste a state number on this. Useful mostly for code generation in tables. At the start of this routine, states[i].stateNumber <= i by definition. If states[50].stateNumber is 50 then a cycle during conversion may try to add state 103, but we find that an identical DFA state, named 50, already exists, hence, states[103]==states[50] and both have stateNumber 50 as they point at same object. Afterwards, the set of state numbers from all states should represent a contiguous range from 0..n-1 where n is the number of unique states.
/** Walk all states and reset their numbers to be a contiguous sequence * of integers starting from 0. Only cyclic DFA can have unused positions * in states list. State i might be identical to a previous state j and * will result in states[i] == states[j]. We don't want to waste a state * number on this. Useful mostly for code generation in tables. * * At the start of this routine, states[i].stateNumber &lt;= i by definition. * If states[50].stateNumber is 50 then a cycle during conversion may * try to add state 103, but we find that an identical DFA state, named * 50, already exists, hence, states[103]==states[50] and both have * stateNumber 50 as they point at same object. Afterwards, the set * of state numbers from all states should represent a contiguous range * from 0..n-1 where n is the number of unique states. */
public void resetStateNumbersToBeContiguous() { if ( getUserMaxLookahead()>0 ) { // all numbers are unique already; no states are thrown out. return; } // walk list of DFAState objects by state number, // setting state numbers to 0..n-1 int snum=0; for (int i = 0; i <= getMaxStateNumber(); i++) { DFAState s = getState(i); // some states are unused after creation most commonly due to cycles // or conflict resolution. if ( s==null ) { continue; } // state i is mapped to DFAState with state number set to i originally // so if it's less than i, then we renumbered it already; that // happens when states have been merged or cycles occurred I think. // states[50] will point to DFAState with s50 in it but // states[103] might also point at this same DFAState. Since // 50 < 103 then it's already been renumbered as it points downwards. boolean alreadyRenumbered = s.stateNumber<i; if ( !alreadyRenumbered ) { // state i is a valid state, reset it's state number s.stateNumber = snum; // rewrite state numbers to be 0..n-1 snum++; } } if ( snum!=getNumberOfStates() ) { ErrorManager.internalError("DFA "+decisionNumber+": "+ decisionNFAStartState.getDescription()+" num unique states "+getNumberOfStates()+ "!= num renumbered states "+snum); } } // JAVA-SPECIFIC Accessors!!!!! It is so impossible to get arrays // or even consistently formatted strings acceptable to java that // I am forced to build the individual char elements here public List<? extends String> getJavaCompressedAccept() { return getRunLengthEncoding(accept); } public List<? extends String> getJavaCompressedEOT() { return getRunLengthEncoding(eot); } public List<? extends String> getJavaCompressedEOF() { return getRunLengthEncoding(eof); } public List<? extends String> getJavaCompressedMin() { return getRunLengthEncoding(min); } public List<? extends String> getJavaCompressedMax() { return getRunLengthEncoding(max); } public List<? extends String> getJavaCompressedSpecial() { return getRunLengthEncoding(special); } public List<List<? extends String>> getJavaCompressedTransition() { if ( transition==null || transition.isEmpty() ) { return null; } List<List<? extends String>> encoded = new ArrayList<List<? extends String>>(transition.size()); // walk Vector<Vector<FormattedInteger>> which is the transition[][] table for (int i = 0; i < transition.size(); i++) { Vector<Integer> transitionsForState = transition.elementAt(i); encoded.add(getRunLengthEncoding(transitionsForState)); } return encoded; }
Compress the incoming data list so that runs of same number are encoded as number,value pair sequences. 3 -1 -1 -1 28 is encoded as 1 3 3 -1 1 28. I am pretty sure this is the lossless compression that GIF files use. Transition tables are heavily compressed by this technique. I got the idea from JFlex http://jflex.de/ Return List<String> where each string is either \xyz for 8bit char and \uFFFF for 16bit. Hideous and specific to Java, but it is the only target bad enough to need it.
/** Compress the incoming data list so that runs of same number are * encoded as number,value pair sequences. 3 -1 -1 -1 28 is encoded * as 1 3 3 -1 1 28. I am pretty sure this is the lossless compression * that GIF files use. Transition tables are heavily compressed by * this technique. I got the idea from JFlex http://jflex.de/ * * Return List&lt;String&gt; where each string is either \xyz for 8bit char * and \uFFFF for 16bit. Hideous and specific to Java, but it is the * only target bad enough to need it. */
public List<? extends String> getRunLengthEncoding(List<Integer> data) { if ( data==null || data.isEmpty() ) { // for states with no transitions we want an empty string "" // to hold its place in the transitions array. List<String> empty = new ArrayList<String>(); empty.add(""); return empty; } int size = Math.max(2,data.size()/2); List<String> encoded = new ArrayList<String>(size); // guess at size // scan values looking for runs int i = 0; Integer emptyValue = Utils.integer(-1); while ( i < data.size() ) { Integer I = data.get(i); if ( I==null ) { I = emptyValue; } // count how many v there are? int n = 0; for (int j = i; j < data.size(); j++) { Integer v = data.get(j); if ( v==null ) { v = emptyValue; } if ( I.equals(v) ) { n++; } else { break; } } encoded.add(generator.target.encodeIntAsCharEscape((char)n)); encoded.add(generator.target.encodeIntAsCharEscape((char)I.intValue())); i+=n; } return encoded; } public void createStateTables(CodeGenerator generator) { //System.out.println("createTables:\n"+this); this.generator = generator; description = getNFADecisionStartState().getDescription(); description = generator.target.getTargetStringLiteralFromString(description); // create all the tables special = new Vector<Integer>(this.getNumberOfStates()); // Vector<short> special.setSize(this.getNumberOfStates()); specialStates = new ArrayList<DFAState>(); // List<DFAState> specialStateSTs = new ArrayList<ST>(); // List<ST> accept = new Vector<Integer>(this.getNumberOfStates()); // Vector<int> accept.setSize(this.getNumberOfStates()); eot = new Vector<Integer>(this.getNumberOfStates()); // Vector<int> eot.setSize(this.getNumberOfStates()); eof = new Vector<Integer>(this.getNumberOfStates()); // Vector<int> eof.setSize(this.getNumberOfStates()); min = new Vector<Integer>(this.getNumberOfStates()); // Vector<int> min.setSize(this.getNumberOfStates()); max = new Vector<Integer>(this.getNumberOfStates()); // Vector<int> max.setSize(this.getNumberOfStates()); transition = new Vector<Vector<Integer>>(this.getNumberOfStates()); // Vector<Vector<int>> transition.setSize(this.getNumberOfStates()); transitionEdgeTables = new Vector<Integer>(this.getNumberOfStates()); // Vector<int> transitionEdgeTables.setSize(this.getNumberOfStates()); // for each state in the DFA, fill relevant tables. Iterator<DFAState> it; if ( getUserMaxLookahead()>0 ) { it = states.iterator(); } else { it = getUniqueStates().values().iterator(); } while ( it.hasNext() ) { DFAState s = it.next(); if ( s==null ) { // ignore null states; some acylic DFA see this condition // when inlining DFA (due to lacking of exit branch pruning?) continue; } if ( s.isAcceptState() ) { // can't compute min,max,special,transition on accepts accept.set(s.stateNumber, Utils.integer(s.getUniquelyPredictedAlt())); } else { createMinMaxTables(s); createTransitionTableEntryForState(s); createSpecialTable(s); createEOTAndEOFTables(s); } } // now that we have computed list of specialStates, gen code for 'em for (int i = 0; i < specialStates.size(); i++) { DFAState ss = specialStates.get(i); ST stateST = generator.generateSpecialState(ss); specialStateSTs.add(stateST); } // check that the tables are not messed up by encode/decode /* testEncodeDecode(min); testEncodeDecode(max); testEncodeDecode(accept); testEncodeDecode(special); System.out.println("min="+min); System.out.println("max="+max); System.out.println("eot="+eot); System.out.println("eof="+eof); System.out.println("accept="+accept); System.out.println("special="+special); System.out.println("transition="+transition); */ } /* private void testEncodeDecode(List data) { System.out.println("data="+data); List encoded = getRunLengthEncoding(data); StringBuffer buf = new StringBuffer(); for (int i = 0; i < encoded.size(); i++) { String I = (String)encoded.get(i); int v = 0; if ( I.startsWith("\\u") ) { v = Integer.parseInt(I.substring(2,I.length()), 16); } else { v = Integer.parseInt(I.substring(1,I.length()), 8); } buf.append((char)v); } String encodedS = buf.toString(); short[] decoded = org.antlr.runtime.DFA.unpackEncodedString(encodedS); //System.out.println("decoded:"); for (int i = 0; i < decoded.length; i++) { short x = decoded[i]; if ( x!=((Integer)data.get(i)).intValue() ) { System.err.println("problem with encoding"); } //System.out.print(", "+x); } //System.out.println(); } */ protected void createMinMaxTables(DFAState s) { int smin = Label.MAX_CHAR_VALUE + 1; int smax = Label.MIN_ATOM_VALUE - 1; for (int j = 0; j < s.getNumberOfTransitions(); j++) { Transition edge = s.transition(j); Label label = edge.label; if ( label.isAtom() ) { if ( label.getAtom()>=Label.MIN_CHAR_VALUE ) { if ( label.getAtom()<smin ) { smin = label.getAtom(); } if ( label.getAtom()>smax ) { smax = label.getAtom(); } } } else if ( label.isSet() ) { IntervalSet labels = (IntervalSet)label.getSet(); int lmin = labels.getMinElement(); // if valid char (don't do EOF) and less than current min if ( lmin<smin && lmin>=Label.MIN_CHAR_VALUE ) { smin = labels.getMinElement(); } if ( labels.getMaxElement()>smax ) { smax = labels.getMaxElement(); } } } if ( smax<0 ) { // must be predicates or pure EOT transition; just zero out min, max smin = Label.MIN_CHAR_VALUE; smax = Label.MIN_CHAR_VALUE; } min.set(s.stateNumber, Utils.integer((char)smin)); max.set(s.stateNumber, Utils.integer((char)smax)); if ( smax<0 || smin>Label.MAX_CHAR_VALUE || smin<0 ) { ErrorManager.internalError("messed up: min="+min+", max="+max); } } protected void createTransitionTableEntryForState(DFAState s) { /* System.out.println("createTransitionTableEntryForState s"+s.stateNumber+ " dec "+s.dfa.decisionNumber+" cyclic="+s.dfa.isCyclic()); */ int smax = max.get(s.stateNumber); int smin = min.get(s.stateNumber); Vector<Integer> stateTransitions = new Vector<Integer>(smax-smin+1); stateTransitions.setSize(smax-smin+1); transition.set(s.stateNumber, stateTransitions); for (int j = 0; j < s.getNumberOfTransitions(); j++) { Transition edge = s.transition(j); Label label = edge.label; if ( label.isAtom() && label.getAtom()>=Label.MIN_CHAR_VALUE ) { int labelIndex = label.getAtom()-smin; // offset from 0 stateTransitions.set(labelIndex, Utils.integer(edge.target.stateNumber)); } else if ( label.isSet() ) { IntervalSet labels = (IntervalSet)label.getSet(); int[] atoms = labels.toArray(); for (int a = 0; a < atoms.length; a++) { // set the transition if the label is valid (don't do EOF) if ( atoms[a]>=Label.MIN_CHAR_VALUE ) { int labelIndex = atoms[a]-smin; // offset from 0 stateTransitions.set(labelIndex, Utils.integer(edge.target.stateNumber)); } } } } // track unique state transition tables so we can reuse Integer edgeClass = edgeTransitionClassMap.get(stateTransitions); if ( edgeClass!=null ) { //System.out.println("we've seen this array before; size="+stateTransitions.size()); transitionEdgeTables.set(s.stateNumber, edgeClass); } else { edgeClass = Utils.integer(edgeTransitionClass); transitionEdgeTables.set(s.stateNumber, edgeClass); edgeTransitionClassMap.put(stateTransitions, edgeClass); edgeTransitionClass++; } }
Set up the EOT and EOF tables; we cannot put -1 min/max values so we need another way to test that in the DFA transition function.
/** Set up the EOT and EOF tables; we cannot put -1 min/max values so * we need another way to test that in the DFA transition function. */
protected void createEOTAndEOFTables(DFAState s) { for (int j = 0; j < s.getNumberOfTransitions(); j++) { Transition edge = s.transition(j); Label label = edge.label; if ( label.isAtom() ) { if ( label.getAtom()==Label.EOT ) { // eot[s] points to accept state eot.set(s.stateNumber, Utils.integer(edge.target.stateNumber)); } else if ( label.getAtom()==Label.EOF ) { // eof[s] points to accept state eof.set(s.stateNumber, Utils.integer(edge.target.stateNumber)); } } else if ( label.isSet() ) { IntervalSet labels = (IntervalSet)label.getSet(); int[] atoms = labels.toArray(); for (int a = 0; a < atoms.length; a++) { if ( atoms[a]==Label.EOT ) { // eot[s] points to accept state eot.set(s.stateNumber, Utils.integer(edge.target.stateNumber)); } else if ( atoms[a]==Label.EOF ) { eof.set(s.stateNumber, Utils.integer(edge.target.stateNumber)); } } } } } protected void createSpecialTable(DFAState s) { // number all special states from 0...n-1 instead of their usual numbers boolean hasSemPred = false; // TODO this code is very similar to canGenerateSwitch. Refactor to share for (int j = 0; j < s.getNumberOfTransitions(); j++) { Transition edge = s.transition(j); Label label = edge.label; // can't do a switch if the edges have preds or are going to // require gated predicates if ( label.isSemanticPredicate() || ((DFAState)edge.target).getGatedPredicatesInNFAConfigurations()!=null) { hasSemPred = true; break; } } // if has pred or too big for table, make it special int smax = max.get(s.stateNumber); int smin = min.get(s.stateNumber); if ( hasSemPred || smax-smin>MAX_STATE_TRANSITIONS_FOR_TABLE ) { special.set(s.stateNumber, Utils.integer(uniqueCompressedSpecialStateNum)); uniqueCompressedSpecialStateNum++; specialStates.add(s); } else { special.set(s.stateNumber, Utils.integer(-1)); // not special } } public int predict(IntStream input) { Interpreter interp = new Interpreter(nfa.grammar, input); return interp.predict(this); }
Add a new DFA state to this DFA if not already present. To force an acyclic, fixed maximum depth DFA, just always return the incoming state. By not reusing old states, no cycles can be created. If we're doing fixed k lookahead don't updated uniqueStates, just return incoming state, which indicates it's a new state.
/** Add a new DFA state to this DFA if not already present. * To force an acyclic, fixed maximum depth DFA, just always * return the incoming state. By not reusing old states, * no cycles can be created. If we're doing fixed k lookahead * don't updated uniqueStates, just return incoming state, which * indicates it's a new state. */
protected DFAState addState(DFAState d) { if ( getUserMaxLookahead()>0 ) { return d; } // does a DFA state exist already with everything the same // except its state number? DFAState existing = uniqueStates.get(d); if ( existing != null ) { /* System.out.println("state "+d.stateNumber+" exists as state "+ existing.stateNumber); */ // already there...get the existing DFA state return existing; } // if not there, then add new state. uniqueStates.put(d,d); numberOfStates++; return d; } public void removeState(DFAState d) { DFAState it = uniqueStates.remove(d); if ( it!=null ) { numberOfStates--; } } public Map<DFAState, DFAState> getUniqueStates() { return uniqueStates; }
What is the max state number ever created? This may be beyond getNumberOfStates().
/** What is the max state number ever created? This may be beyond * getNumberOfStates(). */
public int getMaxStateNumber() { return states.size()-1; } public DFAState getState(int stateNumber) { return states.get(stateNumber); } public void setState(int stateNumber, DFAState d) { states.set(stateNumber, d); }
Is the DFA reduced? I.e., does every state have a path to an accept state? If not, don't delete as we need to generate an error indicating which paths are "dead ends". Also tracks list of alts with no accept state in the DFA. Must call verify() first before this makes sense.
/** Is the DFA reduced? I.e., does every state have a path to an accept * state? If not, don't delete as we need to generate an error indicating * which paths are "dead ends". Also tracks list of alts with no accept * state in the DFA. Must call verify() first before this makes sense. */
public boolean isReduced() { return reduced; }
Is this DFA cyclic? That is, are there any loops? If not, then the DFA is essentially an LL(k) predictor for some fixed, max k value. We can build a series of nested IF statements to match this. In the presence of cycles, we need to build a general DFA and interpret it to distinguish between alternatives.
/** Is this DFA cyclic? That is, are there any loops? If not, then * the DFA is essentially an LL(k) predictor for some fixed, max k value. * We can build a series of nested IF statements to match this. In the * presence of cycles, we need to build a general DFA and interpret it * to distinguish between alternatives. */
public boolean isCyclic() { return cyclic && getUserMaxLookahead()==0; } public boolean isClassicDFA() { return !isCyclic() && !nfa.grammar.decisionsWhoseDFAsUsesSemPreds.contains(this) && !nfa.grammar.decisionsWhoseDFAsUsesSynPreds.contains(this); } public boolean canInlineDecision() { return !isCyclic() && !probe.isNonLLStarDecision() && getNumberOfStates() < CodeGenerator.MAX_ACYCLIC_DFA_STATES_INLINE; }
Is this DFA derived from the NFA for the Tokens rule?
/** Is this DFA derived from the NFA for the Tokens rule? */
public boolean isTokensRuleDecision() { if ( nfa.grammar.type!=Grammar.LEXER ) { return false; } NFAState nfaStart = getNFADecisionStartState(); Rule r = nfa.grammar.getLocallyDefinedRule(Grammar.ARTIFICIAL_TOKENS_RULENAME); NFAState TokensRuleStart = r.startState; NFAState TokensDecisionStart = (NFAState)TokensRuleStart.transition[0].target; return nfaStart == TokensDecisionStart; }
The user may specify a max, acyclic lookahead for any decision. No DFA cycles are created when this value, k, is greater than 0. If this decision has no k lookahead specified, then try the grammar.
/** The user may specify a max, acyclic lookahead for any decision. No * DFA cycles are created when this value, k, is greater than 0. * If this decision has no k lookahead specified, then try the grammar. */
public int getUserMaxLookahead() { if ( user_k>=0 ) { // cache for speed return user_k; } user_k = nfa.grammar.getUserMaxLookahead(decisionNumber); return user_k; } public boolean getAutoBacktrackMode() { return nfa.grammar.getAutoBacktrackMode(decisionNumber); } public void setUserMaxLookahead(int k) { this.user_k = k; }
Return k if decision is LL(k) for some k else return max int
/** Return k if decision is LL(k) for some k else return max int */
public int getMaxLookaheadDepth() { if ( hasCycle() ) return Integer.MAX_VALUE; // compute to be sure return _getMaxLookaheadDepth(startState, 0); } int _getMaxLookaheadDepth(DFAState d, int depth) { // not cyclic; don't worry about termination // fail if pred edge. int max = depth; for (int i=0; i<d.getNumberOfTransitions(); i++) { Transition t = d.transition(i); // if ( t.isSemanticPredicate() ) return Integer.MAX_VALUE; if ( !t.isSemanticPredicate() ) { // if pure pred not gated, it must target stop state; don't count DFAState edgeTarget = (DFAState)t.target; int m = _getMaxLookaheadDepth(edgeTarget, depth+1); max = Math.max(max, m); } } return max; }
Count all disambiguating syn preds (ignore synpred tests for gated edges, which occur for nonambig input sequences). E.g., x : (X)=> (X|Y)\n" + | X\n" + ; gives .s0-X->.s1 .s0-Y&&{synpred1_t}?->:s2=>1 .s1-{synpred1_t}?->:s2=>1 .s1-{true}?->:s3=>2
/** Count all disambiguating syn preds (ignore synpred tests * for gated edges, which occur for nonambig input sequences). * E.g., * x : (X)=&gt; (X|Y)\n" + * | X\n" + * ; * * gives * * .s0-X-&gt;.s1 * .s0-Y&amp;&amp;{synpred1_t}?-&gt;:s2=&gt;1 * .s1-{synpred1_t}?-&gt;:s2=&gt;1 * .s1-{true}?-&gt;:s3=&gt;2 */
public boolean hasSynPred() { boolean has = _hasSynPred(startState, new HashSet<DFAState>()); // if ( !has ) { // System.out.println("no synpred in dec "+decisionNumber); // FASerializer serializer = new FASerializer(nfa.grammar); // String result = serializer.serialize(startState); // System.out.println(result); // } return has; } public boolean getHasSynPred() { return hasSynPred(); } // for ST boolean _hasSynPred(DFAState d, Set<DFAState> busy) { busy.add(d); for (int i=0; i<d.getNumberOfTransitions(); i++) { Transition t = d.transition(i); if ( t.isSemanticPredicate() ) { SemanticContext ctx = t.label.getSemanticContext(); // if ( ctx.toString().indexOf("synpred")>=0 ) { // System.out.println("has pred "+ctx.toString()+" "+ctx.isSyntacticPredicate()); // System.out.println(((SemanticContext.Predicate)ctx).predicateAST.token); // } if ( ctx.isSyntacticPredicate() ) return true; } DFAState edgeTarget = (DFAState)t.target; if ( !busy.contains(edgeTarget) && _hasSynPred(edgeTarget, busy) ) return true; } return false; } public boolean hasSemPred() { // has user-defined sempred boolean has = _hasSemPred(startState, new HashSet<DFAState>()); return has; } boolean _hasSemPred(DFAState d, Set<DFAState> busy) { busy.add(d); for (int i=0; i<d.getNumberOfTransitions(); i++) { Transition t = d.transition(i); if ( t.isSemanticPredicate() ) { SemanticContext ctx = t.label.getSemanticContext(); if ( ctx.hasUserSemanticPredicate() ) return true; } DFAState edgeTarget = (DFAState)t.target; if ( !busy.contains(edgeTarget) && _hasSemPred(edgeTarget, busy) ) return true; } return false; }
Compute cyclic w/o relying on state computed during analysis. just check.
/** Compute cyclic w/o relying on state computed during analysis. just check. */
public boolean hasCycle() { boolean cyclic = _hasCycle(startState, new HashMap<DFAState, Integer>()); return cyclic; } boolean _hasCycle(DFAState d, Map<DFAState, Integer> busy) { busy.put(d, CYCLIC_BUSY); for (int i=0; i<d.getNumberOfTransitions(); i++) { Transition t = d.transition(i); DFAState target = (DFAState)t.target; int cond = CYCLIC_UNKNOWN; if ( busy.get(target)!=null ) cond = busy.get(target); if ( cond==CYCLIC_BUSY ) return true; if ( cond!=CYCLIC_DONE && _hasCycle(target, busy) ) return true; } busy.put(d, CYCLIC_DONE); return false; }
Return a list of Integer alt numbers for which no lookahead could be computed or for which no single DFA accept state predicts those alts. Must call verify() first before this makes sense.
/** Return a list of Integer alt numbers for which no lookahead could * be computed or for which no single DFA accept state predicts those * alts. Must call verify() first before this makes sense. */
public List<Integer> getUnreachableAlts() { return unreachableAlts; }
Once this DFA has been built, need to verify that: 1. it's reduced 2. all alts have an accept state Elsewhere, in the NFA converter, we need to verify that: 3. alts i and j have disjoint lookahead if no sem preds 4. if sem preds, nondeterministic alts must be sufficiently covered This is avoided if analysis bails out for any reason.
/** Once this DFA has been built, need to verify that: * * 1. it's reduced * 2. all alts have an accept state * * Elsewhere, in the NFA converter, we need to verify that: * * 3. alts i and j have disjoint lookahead if no sem preds * 4. if sem preds, nondeterministic alts must be sufficiently covered * * This is avoided if analysis bails out for any reason. */
public void verify() { doesStateReachAcceptState(startState); }
figure out if this state eventually reaches an accept state and modify the instance variable 'reduced' to indicate if we find at least one state that cannot reach an accept state. This implies that the overall DFA is not reduced. This algorithm should be linear in the number of DFA states. The algorithm also tracks which alternatives have no accept state, indicating a nondeterminism. Also computes whether the DFA is cyclic. TODO: I call getUniquelyPredicatedAlt too much; cache predicted alt
/** figure out if this state eventually reaches an accept state and * modify the instance variable 'reduced' to indicate if we find * at least one state that cannot reach an accept state. This implies * that the overall DFA is not reduced. This algorithm should be * linear in the number of DFA states. * * The algorithm also tracks which alternatives have no accept state, * indicating a nondeterminism. * * Also computes whether the DFA is cyclic. * * TODO: I call getUniquelyPredicatedAlt too much; cache predicted alt */
protected boolean doesStateReachAcceptState(DFAState d) { if ( d.isAcceptState() ) { // accept states have no edges emanating from them so we can return d.setAcceptStateReachable(REACHABLE_YES); // this alt is uniquely predicted, remove from nondeterministic list int predicts = d.getUniquelyPredictedAlt(); unreachableAlts.remove(Utils.integer(predicts)); return true; } // avoid infinite loops d.setAcceptStateReachable(REACHABLE_BUSY); boolean anEdgeReachesAcceptState = false; // Visit every transition, track if at least one edge reaches stop state // Cannot terminate when we know this state reaches stop state since // all transitions must be traversed to set status of each DFA state. for (int i=0; i<d.getNumberOfTransitions(); i++) { Transition t = d.transition(i); DFAState edgeTarget = (DFAState)t.target; int targetStatus = edgeTarget.getAcceptStateReachable(); if ( targetStatus==REACHABLE_BUSY ) { // avoid cycles; they say nothing cyclic = true; continue; } if ( targetStatus==REACHABLE_YES ) { // avoid unnecessary work anEdgeReachesAcceptState = true; continue; } if ( targetStatus==REACHABLE_NO ) { // avoid unnecessary work continue; } // target must be REACHABLE_UNKNOWN (i.e., unvisited) if ( doesStateReachAcceptState(edgeTarget) ) { anEdgeReachesAcceptState = true; // have to keep looking so don't break loop // must cover all states even if we find a path for this state } } if ( anEdgeReachesAcceptState ) { d.setAcceptStateReachable(REACHABLE_YES); } else { d.setAcceptStateReachable(REACHABLE_NO); reduced = false; } return anEdgeReachesAcceptState; }
Walk all accept states and find the manually-specified synpreds. Gated preds are not always hoisted I used to do this in the code generator, but that is too late. This converter tries to avoid computing DFA for decisions in syntactic predicates that are not ever used such as those created by autobacktrack mode.
/** Walk all accept states and find the manually-specified synpreds. * Gated preds are not always hoisted * I used to do this in the code generator, but that is too late. * This converter tries to avoid computing DFA for decisions in * syntactic predicates that are not ever used such as those * created by autobacktrack mode. */
public void findAllGatedSynPredsUsedInDFAAcceptStates() { int nAlts = getNumberOfAlts(); for (int i=1; i<=nAlts; i++) { DFAState a = getAcceptState(i); //System.out.println("alt "+i+": "+a); if ( a!=null ) { Set<? extends SemanticContext> synpreds = a.getGatedSyntacticPredicatesInNFAConfigurations(); if ( synpreds!=null ) { // add all the predicates we find (should be just one, right?) for (SemanticContext semctx : synpreds) { // System.out.println("synpreds: "+semctx); nfa.grammar.synPredUsedInDFA(this, semctx); } } } } } public NFAState getNFADecisionStartState() { return decisionNFAStartState; } public DFAState getAcceptState(int alt) { return altToAcceptState[alt]; } public void setAcceptState(int alt, DFAState acceptState) { altToAcceptState[alt] = acceptState; } public String getDescription() { return description; } public int getDecisionNumber() { return decisionNFAStartState.getDecisionNumber(); }
If this DFA failed to finish during construction, we might be able to retry with k=1 but we need to know whether it will potentially succeed. Can only succeed if there is a predicate to resolve the issue. Don't try if k=1 already as it would cycle forever. Timeout can retry with k=1 even if no predicate if k!=1.
/** If this DFA failed to finish during construction, we might be * able to retry with k=1 but we need to know whether it will * potentially succeed. Can only succeed if there is a predicate * to resolve the issue. Don't try if k=1 already as it would * cycle forever. Timeout can retry with k=1 even if no predicate * if k!=1. */
public boolean okToRetryDFAWithK1() { boolean nonLLStarOrOverflowAndPredicateVisible = (probe.isNonLLStarDecision()||probe.analysisOverflowed()) && predicateVisible; // auto backtrack or manual sem/syn return getUserMaxLookahead()!=1 && nonLLStarOrOverflowAndPredicateVisible; } public String getReasonForFailure() { StringBuilder buf = new StringBuilder(); if ( probe.isNonLLStarDecision() ) { buf.append("non-LL(*)"); if ( predicateVisible ) { buf.append(" && predicate visible"); } } if ( probe.analysisOverflowed() ) { buf.append("recursion overflow"); if ( predicateVisible ) { buf.append(" && predicate visible"); } } buf.append("\n"); return buf.toString(); }
What GrammarAST node (derived from the grammar) is this DFA associated with? It will point to the start of a block or the loop back of a (...)+ block etc...
/** What GrammarAST node (derived from the grammar) is this DFA * associated with? It will point to the start of a block or * the loop back of a (...)+ block etc... */
public GrammarAST getDecisionASTNode() { return decisionNFAStartState.associatedASTNode; } public boolean isGreedy() { GrammarAST blockAST = nfa.grammar.getDecisionBlockAST(decisionNumber); Object v = nfa.grammar.getBlockOption(blockAST,"greedy"); if ( v!=null && v.equals("false") ) { return false; } return true; } public DFAState newState() { DFAState n = new DFAState(this); n.stateNumber = stateCounter; stateCounter++; states.setSize(n.stateNumber+1); states.set(n.stateNumber, n); // track state num to state return n; } public int getNumberOfStates() { if ( getUserMaxLookahead()>0 ) { // if using fixed lookahead then uniqueSets not set return states.size(); } return numberOfStates; } public int getNumberOfAlts() { return nAlts; } // public boolean analysisTimedOut() { // return probe.analysisTimedOut(); // } protected void initAltRelatedInfo() { unreachableAlts = new LinkedList<Integer>(); for (int i = 1; i <= nAlts; i++) { unreachableAlts.add(Utils.integer(i)); } altToAcceptState = new DFAState[nAlts+1]; } @Override public String toString() { FASerializer serializer = new FASerializer(nfa.grammar); if ( startState==null ) { return ""; } return serializer.serialize(startState, false); } /** EOT (end of token) is a label that indicates when the DFA conversion * algorithm would "fall off the end of a lexer rule". It normally * means the default clause. So for ('a'..'z')+ you would see a DFA * with a state that has a..z and EOT emanating from it. a..z would * jump to a state predicting alt 1 and EOT would jump to a state * predicting alt 2 (the exit loop branch). EOT implies anything other * than a..z. If for some reason, the set is "all char" such as with * the wildcard '.', then EOT cannot match anything. For example, * * BLOCK : '{' (.)* '}' * * consumes all char until EOF when greedy=true. When all edges are * combined for the DFA state after matching '}', you will find that * it is all char. The EOT transition has nothing to match and is * unreachable. The findNewDFAStatesAndAddDFATransitions() method * must know to ignore the EOT, so we simply remove it from the * reachable labels. Later analysis will find that the exit branch * is not predicted by anything. For greedy=false, we leave only * the EOT label indicating that the DFA should stop immediately * and predict the exit branch. The reachable labels are often a * set of disjoint values like: [<EOT>, 42, {0..41, 43..65534}] * due to DFA conversion so must construct a pure set to see if * it is same as Label.ALLCHAR. * * Only do this for Lexers. * * If EOT coexists with ALLCHAR: * 1. If not greedy, modify the labels parameter to be EOT * 2. If greedy, remove EOT from the labels set protected boolean reachableLabelsEOTCoexistsWithAllChar(OrderedHashSet labels) { Label eot = new Label(Label.EOT); if ( !labels.containsKey(eot) ) { return false; } System.out.println("### contains EOT"); boolean containsAllChar = false; IntervalSet completeVocab = new IntervalSet(); int n = labels.size(); for (int i=0; i<n; i++) { Label rl = (Label)labels.get(i); if ( !rl.equals(eot) ) { completeVocab.addAll(rl.getSet()); } } System.out.println("completeVocab="+completeVocab); if ( completeVocab.equals(Label.ALLCHAR) ) { System.out.println("all char"); containsAllChar = true; } return containsAllChar; } */ }