/*
 * Copyright (c) 2012-2017 The ANTLR Project. All rights reserved.
 * Use of this file is governed by the BSD 3-clause license that
 * can be found in the LICENSE.txt file in the project root.
 */

package org.antlr.v4.runtime;

import org.antlr.v4.runtime.misc.Interval;

import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Set;

This implementation of TokenStream loads tokens from a TokenSource on-demand, and places the tokens in a buffer to provide access to any previous token by index.

This token stream ignores the value of Token.getChannel. If your parser requires the token stream filter tokens to only those on a particular channel, such as Token.DEFAULT_CHANNEL or Token.HIDDEN_CHANNEL, use a filtering token stream such a CommonTokenStream.

/** * This implementation of {@link TokenStream} loads tokens from a * {@link TokenSource} on-demand, and places the tokens in a buffer to provide * access to any previous token by index. * * <p> * This token stream ignores the value of {@link Token#getChannel}. If your * parser requires the token stream filter tokens to only those on a particular * channel, such as {@link Token#DEFAULT_CHANNEL} or * {@link Token#HIDDEN_CHANNEL}, use a filtering token stream such a * {@link CommonTokenStream}.</p> */
public class BufferedTokenStream implements TokenStream {
The TokenSource from which tokens for this stream are fetched.
/** * The {@link TokenSource} from which tokens for this stream are fetched. */
protected TokenSource tokenSource;
A collection of all tokens fetched from the token source. The list is considered a complete view of the input once fetchedEOF is set to true.
/** * A collection of all tokens fetched from the token source. The list is * considered a complete view of the input once {@link #fetchedEOF} is set * to {@code true}. */
protected List<Token> tokens = new ArrayList<Token>(100);
The index into tokens of the current token (next token to consume). tokens[p] should be LT(1).

This field is set to -1 when the stream is first constructed or when setTokenSource is called, indicating that the first token has not yet been fetched from the token source. For additional information, see the documentation of IntStream for a description of Initializing Methods.

/** * The index into {@link #tokens} of the current token (next token to * {@link #consume}). {@link #tokens}{@code [}{@link #p}{@code ]} should be * {@link #LT LT(1)}. * * <p>This field is set to -1 when the stream is first constructed or when * {@link #setTokenSource} is called, indicating that the first token has * not yet been fetched from the token source. For additional information, * see the documentation of {@link IntStream} for a description of * Initializing Methods.</p> */
protected int p = -1;
Indicates whether the Token.EOF token has been fetched from tokenSource and added to tokens. This field improves performance for the following cases:
  • consume: The lookahead check in consume to prevent consuming the EOF symbol is optimized by checking the values of fetchedEOF and p instead of calling LA.
  • fetch: The check to prevent adding multiple EOF symbols into tokens is trivial with this field.
/** * Indicates whether the {@link Token#EOF} token has been fetched from * {@link #tokenSource} and added to {@link #tokens}. This field improves * performance for the following cases: * * <ul> * <li>{@link #consume}: The lookahead check in {@link #consume} to prevent * consuming the EOF symbol is optimized by checking the values of * {@link #fetchedEOF} and {@link #p} instead of calling {@link #LA}.</li> * <li>{@link #fetch}: The check to prevent adding multiple EOF symbols into * {@link #tokens} is trivial with this field.</li> * <ul> */
protected boolean fetchedEOF; public BufferedTokenStream(TokenSource tokenSource) { if (tokenSource == null) { throw new NullPointerException("tokenSource cannot be null"); } this.tokenSource = tokenSource; } @Override public TokenSource getTokenSource() { return tokenSource; } @Override public int index() { return p; } @Override public int mark() { return 0; } @Override public void release(int marker) { // no resources to release }
This method resets the token stream back to the first token in the buffer. It is equivalent to calling seek(0).
See Also:
Deprecated:Use seek(0) instead.
/** * This method resets the token stream back to the first token in the * buffer. It is equivalent to calling {@link #seek}{@code (0)}. * * @see #setTokenSource(TokenSource) * @deprecated Use {@code seek(0)} instead. */
@Deprecated public void reset() { seek(0); } @Override public void seek(int index) { lazyInit(); p = adjustSeekIndex(index); } @Override public int size() { return tokens.size(); } @Override public void consume() { boolean skipEofCheck; if (p >= 0) { if (fetchedEOF) { // the last token in tokens is EOF. skip check if p indexes any // fetched token except the last. skipEofCheck = p < tokens.size() - 1; } else { // no EOF token in tokens. skip check if p indexes a fetched token. skipEofCheck = p < tokens.size(); } } else { // not yet initialized skipEofCheck = false; } if (!skipEofCheck && LA(1) == EOF) { throw new IllegalStateException("cannot consume EOF"); } if (sync(p + 1)) { p = adjustSeekIndex(p + 1); } }
Make sure index i in tokens has a token.
See Also:
Returns:true if a token is located at index i, otherwise false.
/** Make sure index {@code i} in tokens has a token. * * @return {@code true} if a token is located at index {@code i}, otherwise * {@code false}. * @see #get(int i) */
protected boolean sync(int i) { assert i >= 0; int n = i - tokens.size() + 1; // how many more elements we need? //System.out.println("sync("+i+") needs "+n); if ( n > 0 ) { int fetched = fetch(n); return fetched >= n; } return true; }
Add n elements to buffer.
Returns:The actual number of elements added to the buffer.
/** Add {@code n} elements to buffer. * * @return The actual number of elements added to the buffer. */
protected int fetch(int n) { if (fetchedEOF) { return 0; } for (int i = 0; i < n; i++) { Token t = tokenSource.nextToken(); if ( t instanceof WritableToken ) { ((WritableToken)t).setTokenIndex(tokens.size()); } tokens.add(t); if ( t.getType()==Token.EOF ) { fetchedEOF = true; return i + 1; } } return n; } @Override public Token get(int i) { if ( i < 0 || i >= tokens.size() ) { throw new IndexOutOfBoundsException("token index "+i+" out of range 0.."+(tokens.size()-1)); } return tokens.get(i); }
Get all tokens from start..stop inclusively
/** Get all tokens from start..stop inclusively */
public List<Token> get(int start, int stop) { if ( start<0 || stop<0 ) return null; lazyInit(); List<Token> subset = new ArrayList<Token>(); if ( stop>=tokens.size() ) stop = tokens.size()-1; for (int i = start; i <= stop; i++) { Token t = tokens.get(i); if ( t.getType()==Token.EOF ) break; subset.add(t); } return subset; } @Override public int LA(int i) { return LT(i).getType(); } protected Token LB(int k) { if ( (p-k)<0 ) return null; return tokens.get(p-k); } @Override public Token LT(int k) { lazyInit(); if ( k==0 ) return null; if ( k < 0 ) return LB(-k); int i = p + k - 1; sync(i); if ( i >= tokens.size() ) { // return EOF token // EOF must be last token return tokens.get(tokens.size()-1); } // if ( i>range ) range = i; return tokens.get(i); }
Allowed derived classes to modify the behavior of operations which change the current stream position by adjusting the target token index of a seek operation. The default implementation simply returns i. If an exception is thrown in this method, the current stream index should not be changed.

For example, CommonTokenStream overrides this method to ensure that the seek target is always an on-channel token.

Params:
  • i – The target token index.
Returns:The adjusted target token index.
/** * Allowed derived classes to modify the behavior of operations which change * the current stream position by adjusting the target token index of a seek * operation. The default implementation simply returns {@code i}. If an * exception is thrown in this method, the current stream index should not be * changed. * * <p>For example, {@link CommonTokenStream} overrides this method to ensure that * the seek target is always an on-channel token.</p> * * @param i The target token index. * @return The adjusted target token index. */
protected int adjustSeekIndex(int i) { return i; } protected final void lazyInit() { if (p == -1) { setup(); } } protected void setup() { sync(0); p = adjustSeekIndex(0); }
Reset this token stream by setting its token source.
/** Reset this token stream by setting its token source. */
public void setTokenSource(TokenSource tokenSource) { this.tokenSource = tokenSource; tokens.clear(); p = -1; fetchedEOF = false; } public List<Token> getTokens() { return tokens; } public List<Token> getTokens(int start, int stop) { return getTokens(start, stop, null); }
Given a start and stop index, return a List of all tokens in the token type BitSet. Return null if no tokens were found. This method looks at both on and off channel tokens.
/** Given a start and stop index, return a List of all tokens in * the token type BitSet. Return null if no tokens were found. This * method looks at both on and off channel tokens. */
public List<Token> getTokens(int start, int stop, Set<Integer> types) { lazyInit(); if ( start<0 || stop>=tokens.size() || stop<0 || start>=tokens.size() ) { throw new IndexOutOfBoundsException("start "+start+" or stop "+stop+ " not in 0.."+(tokens.size()-1)); } if ( start>stop ) return null; // list = tokens[start:stop]:{T t, t.getType() in types} List<Token> filteredTokens = new ArrayList<Token>(); for (int i=start; i<=stop; i++) { Token t = tokens.get(i); if ( types==null || types.contains(t.getType()) ) { filteredTokens.add(t); } } if ( filteredTokens.isEmpty() ) { filteredTokens = null; } return filteredTokens; } public List<Token> getTokens(int start, int stop, int ttype) { HashSet<Integer> s = new HashSet<Integer>(ttype); s.add(ttype); return getTokens(start,stop, s); }
Given a starting index, return the index of the next token on channel. Return i if tokens[i] is on channel. Return the index of the EOF token if there are no tokens on channel between i and EOF.
/** * Given a starting index, return the index of the next token on channel. * Return {@code i} if {@code tokens[i]} is on channel. Return the index of * the EOF token if there are no tokens on channel between {@code i} and * EOF. */
protected int nextTokenOnChannel(int i, int channel) { sync(i); if (i >= size()) { return size() - 1; } Token token = tokens.get(i); while ( token.getChannel()!=channel ) { if ( token.getType()==Token.EOF ) { return i; } i++; sync(i); token = tokens.get(i); } return i; }
Given a starting index, return the index of the previous token on channel. Return i if tokens[i] is on channel. Return -1 if there are no tokens on channel between i and 0.

If i specifies an index at or after the EOF token, the EOF token index is returned. This is due to the fact that the EOF token is treated as though it were on every channel.

/** * Given a starting index, return the index of the previous token on * channel. Return {@code i} if {@code tokens[i]} is on channel. Return -1 * if there are no tokens on channel between {@code i} and 0. * * <p> * If {@code i} specifies an index at or after the EOF token, the EOF token * index is returned. This is due to the fact that the EOF token is treated * as though it were on every channel.</p> */
protected int previousTokenOnChannel(int i, int channel) { sync(i); if (i >= size()) { // the EOF token is on every channel return size() - 1; } while (i >= 0) { Token token = tokens.get(i); if (token.getType() == Token.EOF || token.getChannel() == channel) { return i; } i--; } return i; }
Collect all tokens on specified channel to the right of the current token up until we see a token on DEFAULT_TOKEN_CHANNEL or EOF. If channel is -1, find any non default channel token.
/** Collect all tokens on specified channel to the right of * the current token up until we see a token on DEFAULT_TOKEN_CHANNEL or * EOF. If channel is -1, find any non default channel token. */
public List<Token> getHiddenTokensToRight(int tokenIndex, int channel) { lazyInit(); if ( tokenIndex<0 || tokenIndex>=tokens.size() ) { throw new IndexOutOfBoundsException(tokenIndex+" not in 0.."+(tokens.size()-1)); } int nextOnChannel = nextTokenOnChannel(tokenIndex + 1, Lexer.DEFAULT_TOKEN_CHANNEL); int to; int from = tokenIndex+1; // if none onchannel to right, nextOnChannel=-1 so set to = last token if ( nextOnChannel == -1 ) to = size()-1; else to = nextOnChannel; return filterForChannel(from, to, channel); }
Collect all hidden tokens (any off-default channel) to the right of the current token up until we see a token on DEFAULT_TOKEN_CHANNEL or EOF.
/** Collect all hidden tokens (any off-default channel) to the right of * the current token up until we see a token on DEFAULT_TOKEN_CHANNEL * or EOF. */
public List<Token> getHiddenTokensToRight(int tokenIndex) { return getHiddenTokensToRight(tokenIndex, -1); }
Collect all tokens on specified channel to the left of the current token up until we see a token on DEFAULT_TOKEN_CHANNEL. If channel is -1, find any non default channel token.
/** Collect all tokens on specified channel to the left of * the current token up until we see a token on DEFAULT_TOKEN_CHANNEL. * If channel is -1, find any non default channel token. */
public List<Token> getHiddenTokensToLeft(int tokenIndex, int channel) { lazyInit(); if ( tokenIndex<0 || tokenIndex>=tokens.size() ) { throw new IndexOutOfBoundsException(tokenIndex+" not in 0.."+(tokens.size()-1)); } if (tokenIndex == 0) { // obviously no tokens can appear before the first token return null; } int prevOnChannel = previousTokenOnChannel(tokenIndex - 1, Lexer.DEFAULT_TOKEN_CHANNEL); if ( prevOnChannel == tokenIndex - 1 ) return null; // if none onchannel to left, prevOnChannel=-1 then from=0 int from = prevOnChannel+1; int to = tokenIndex-1; return filterForChannel(from, to, channel); }
Collect all hidden tokens (any off-default channel) to the left of the current token up until we see a token on DEFAULT_TOKEN_CHANNEL.
/** Collect all hidden tokens (any off-default channel) to the left of * the current token up until we see a token on DEFAULT_TOKEN_CHANNEL. */
public List<Token> getHiddenTokensToLeft(int tokenIndex) { return getHiddenTokensToLeft(tokenIndex, -1); } protected List<Token> filterForChannel(int from, int to, int channel) { List<Token> hidden = new ArrayList<Token>(); for (int i=from; i<=to; i++) { Token t = tokens.get(i); if ( channel==-1 ) { if ( t.getChannel()!= Lexer.DEFAULT_TOKEN_CHANNEL ) hidden.add(t); } else { if ( t.getChannel()==channel ) hidden.add(t); } } if ( hidden.size()==0 ) return null; return hidden; } @Override public String getSourceName() { return tokenSource.getSourceName(); }
Get the text of all tokens in this buffer.
/** Get the text of all tokens in this buffer. */
@Override public String getText() { return getText(Interval.of(0,size()-1)); } @Override public String getText(Interval interval) { int start = interval.a; int stop = interval.b; if ( start<0 || stop<0 ) return ""; fill(); if ( stop>=tokens.size() ) stop = tokens.size()-1; StringBuilder buf = new StringBuilder(); for (int i = start; i <= stop; i++) { Token t = tokens.get(i); if ( t.getType()==Token.EOF ) break; buf.append(t.getText()); } return buf.toString(); } @Override public String getText(RuleContext ctx) { return getText(ctx.getSourceInterval()); } @Override public String getText(Token start, Token stop) { if ( start!=null && stop!=null ) { return getText(Interval.of(start.getTokenIndex(), stop.getTokenIndex())); } return ""; }
Get all tokens from lexer until EOF
/** Get all tokens from lexer until EOF */
public void fill() { lazyInit(); final int blockSize = 1000; while (true) { int fetched = fetch(blockSize); if (fetched < blockSize) { return; } } } }