// // BaseRecognizer.m // ANTLR // // Created by Alan Condit on 6/16/10. // [The "BSD licence"] // Copyright (c) 2010 Alan Condit // All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions // are met: // 1. Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // 2. Redistributions in binary form must reproduce the above copyright // notice, this list of conditions and the following disclaimer in the // documentation and/or other materials provided with the distribution. // 3. The name of the author may not be used to endorse or promote products // derived from this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #import "ACNumber.h" #import "BaseRecognizer.h" #import "HashRule.h" #import "RuleMemo.h" #import "CommonToken.h" #import "Map.h" #import "NoViableAltException.h" extern NSInteger debug; @implementation BaseRecognizer static AMutableArray *_tokenNames; static NSString *_grammarFileName; static NSString *NEXT_TOKEN_RULE_NAME; @synthesize state; @synthesize grammarFileName; //@synthesize failed; @synthesize sourceName; //@synthesize numberOfSyntaxErrors; @synthesize tokenNames; + (void) initialize { NEXT_TOKEN_RULE_NAME = [NSString stringWithString:@"nextToken"]; [NEXT_TOKEN_RULE_NAME retain]; } + (BaseRecognizer *) newBaseRecognizer { return [[BaseRecognizer alloc] init]; } + (BaseRecognizer *) newBaseRecognizerWithRuleLen:(NSInteger)aLen { return [[BaseRecognizer alloc] initWithLen:aLen]; } + (BaseRecognizer *) newBaseRecognizer:(RecognizerSharedState *)aState { return [[BaseRecognizer alloc] initWithState:aState]; } + (AMutableArray *)getTokenNames { return _tokenNames; } + (void)setTokenNames:(AMutableArray *)theTokNams { if ( _tokenNames != theTokNams ) { if ( _tokenNames ) [_tokenNames release]; [theTokNams retain]; } _tokenNames = theTokNams; } + (void)setGrammarFileName:(NSString *)aFileName { if ( _grammarFileName != aFileName ) { if ( _grammarFileName ) [_grammarFileName release]; [aFileName retain]; } [_grammarFileName retain]; } - (id) init { if ((self = [super init]) != nil) { if (state == nil) { state = [[RecognizerSharedState newRecognizerSharedState] retain]; } tokenNames = _tokenNames; if ( tokenNames ) [tokenNames retain]; grammarFileName = _grammarFileName; if ( grammarFileName ) [grammarFileName retain]; state._fsp = -1; state.errorRecovery = NO; // are we recovering? state.lastErrorIndex = -1; state.failed = NO; // indicate that some match failed state.syntaxErrors = 0; state.backtracking = 0; // the level of backtracking state.tokenStartCharIndex = -1; } return self; } - (id) initWithLen:(NSInteger)aLen { if ((self = [super init]) != nil) { if (state == nil) { state = [[RecognizerSharedState newRecognizerSharedStateWithRuleLen:aLen] retain]; } tokenNames = _tokenNames; if ( tokenNames ) [tokenNames retain]; grammarFileName = _grammarFileName; if ( grammarFileName ) [grammarFileName retain]; state._fsp = -1; state.errorRecovery = NO; // are we recovering? state.lastErrorIndex = -1; state.failed = NO; // indicate that some match failed state.syntaxErrors = 0; state.backtracking = 0; // the level of backtracking state.tokenStartCharIndex = -1; } return self; } - (id) initWithState:(RecognizerSharedState *)aState { if ((self = [super init]) != nil) { state = aState; if (state == nil) { state = [RecognizerSharedState newRecognizerSharedState]; } [state retain]; tokenNames = _tokenNames; if ( tokenNames ) [tokenNames retain]; grammarFileName = _grammarFileName; if ( grammarFileName ) [grammarFileName retain]; state._fsp = -1; state.errorRecovery = NO; // are we recovering? state.lastErrorIndex = -1; state.failed = NO; // indicate that some match failed state.syntaxErrors = 0; state.backtracking = 0; // the level of backtracking state.tokenStartCharIndex = -1; } return self; } - (void)dealloc { #ifdef DEBUG_DEALLOC NSLog( @"called dealloc in BaseRecognizer" ); #endif if ( grammarFileName ) [grammarFileName release]; if ( tokenNames ) [tokenNames release]; if ( state ) [state release]; [super dealloc]; } // reset the recognizer to the initial state. does not touch the token source! // this can be extended by the grammar writer to reset custom ivars - (void) reset { if ( state == nil ) return; if ( state.following != nil ) { if ( [state.following count] ) [state.following removeAllObjects]; } state._fsp = -1; state.errorRecovery = NO; // are we recovering? state.lastErrorIndex = -1; state.failed = NO; // indicate that some match failed state.syntaxErrors = 0; state.backtracking = 0; // the level of backtracking state.tokenStartCharIndex = -1; if ( state.ruleMemo != nil ) { if ( [state.ruleMemo count] ) [state.ruleMemo removeAllObjects]; } } - (BOOL) getFailed { return [state getFailed]; } - (void) setFailed:(BOOL)flag { [state setFailed:flag]; } - (RecognizerSharedState *) getState { return state; } - (void) setState:(RecognizerSharedState *) theState { if (state != theState) { if ( state ) [state release]; state = theState; [state retain]; } } - (id)input { return nil; // Must be overriden in inheriting class } - (void)skip // override in inheriting class { return; } -(id) match:(id<IntStream>)anInput TokenType:(NSInteger)ttype Follow:(ANTLRBitSet *)follow { id matchedSymbol = [self getCurrentInputSymbol:anInput]; if ([anInput LA:1] == ttype) { [anInput consume]; state.errorRecovery = NO; state.failed = NO; return matchedSymbol; } if (state.backtracking > 0) { state.failed = YES; return matchedSymbol; } matchedSymbol = [self recoverFromMismatchedToken:anInput TokenType:ttype Follow:follow]; return matchedSymbol; } -(void) matchAny:(id<IntStream>)anInput { state.errorRecovery = NO; state.failed = NO; [anInput consume]; } -(BOOL) mismatchIsUnwantedToken:(id<IntStream>)anInput TokenType:(NSInteger)ttype { return [anInput LA:2] == ttype; } -(BOOL) mismatchIsMissingToken:(id<IntStream>)anInput Follow:(ANTLRBitSet *) follow { if ( follow == nil ) { // we have no information about the follow; we can only consume // a single token and hope for the best return NO; } // compute what can follow this grammar element reference if ( [follow member:TokenTypeEOR] ) { ANTLRBitSet *viableTokensFollowingThisRule = [self computeContextSensitiveRuleFOLLOW]; follow = [follow or:viableTokensFollowingThisRule]; if ( state._fsp >= 0 ) { // remove EOR if we're not the start symbol [follow remove:(TokenTypeEOR)]; } } // if current token is consistent with what could come after set // then we know we're missing a token; error recovery is free to // "insert" the missing token //System.out.println("viable tokens="+follow.toString(getTokenNames())); //System.out.println("LT(1)="+((TokenStream)input).LT(1)); // BitSet cannot handle negative numbers like -1 (EOF) so I leave EOR // in follow set to indicate that the fall of the start symbol is // in the set (EOF can follow). if ( [follow member:[anInput LA:1]] || [follow member:TokenTypeEOR] ) { //System.out.println("LT(1)=="+((TokenStream)input).LT(1)+" is consistent with what follows; inserting..."); return YES; } return NO; } /** Report a recognition problem. * * This method sets errorRecovery to indicate the parser is recovering * not parsing. Once in recovery mode, no errors are generated. * To get out of recovery mode, the parser must successfully match * a token (after a resync). So it will go: * * 1. error occurs * 2. enter recovery mode, report error * 3. consume until token found in resynch set * 4. try to resume parsing * 5. next match() will reset errorRecovery mode * * If you override, make sure to update syntaxErrors if you care about that. */ -(void) reportError:(RecognitionException *) e { // if we've already reported an error and have not matched a token // yet successfully, don't report any errors. if ( state.errorRecovery ) { //System.err.print("[SPURIOUS] "); return; } state.syntaxErrors++; // don't count spurious state.errorRecovery = YES; [self displayRecognitionError:[self getTokenNames] Exception:e]; } -(void) displayRecognitionError:(AMutableArray *)theTokNams Exception:(RecognitionException *)e { NSString *hdr = [self getErrorHeader:e]; NSString *msg = [self getErrorMessage:e TokenNames:theTokNams]; [self emitErrorMessage:[NSString stringWithFormat:@" %@ %@", hdr, msg]]; } /** What error message should be generated for the various * exception types? * * Not very object-oriented code, but I like having all error message * generation within one method rather than spread among all of the * exception classes. This also makes it much easier for the exception * handling because the exception classes do not have to have pointers back * to this object to access utility routines and so on. Also, changing * the message for an exception type would be difficult because you * would have to subclassing exception, but then somehow get ANTLR * to make those kinds of exception objects instead of the default. * This looks weird, but trust me--it makes the most sense in terms * of flexibility. * * For grammar debugging, you will want to override this to add * more information such as the stack frame with * getRuleInvocationStack(e, this.getClass().getName()) and, * for no viable alts, the decision description and state etc... * * Override this to change the message generated for one or more * exception types. */ - (NSString *)getErrorMessage:(RecognitionException *)e TokenNames:(AMutableArray *)theTokNams { // NSString *msg = [e getMessage]; NSString *msg; if ( [e isKindOfClass:[UnwantedTokenException class]] ) { UnwantedTokenException *ute = (UnwantedTokenException *)e; NSString *tokenName=@"<unknown>"; if ( ute.expecting == TokenTypeEOF ) { tokenName = @"EOF"; } else { tokenName = (NSString *)[theTokNams objectAtIndex:ute.expecting]; } msg = [NSString stringWithFormat:@"extraneous input %@ expecting %@", [self getTokenErrorDisplay:[ute getUnexpectedToken]], tokenName]; } else if ( [e isKindOfClass:[MissingTokenException class] ] ) { MissingTokenException *mte = (MissingTokenException *)e; NSString *tokenName=@"<unknown>"; if ( mte.expecting== TokenTypeEOF ) { tokenName = @"EOF"; } else { tokenName = [theTokNams objectAtIndex:mte.expecting]; } msg = [NSString stringWithFormat:@"missing %@ at %@", tokenName, [self getTokenErrorDisplay:(e.token)] ]; } else if ( [e isKindOfClass:[MismatchedTokenException class]] ) { MismatchedTokenException *mte = (MismatchedTokenException *)e; NSString *tokenName=@"<unknown>"; if ( mte.expecting== TokenTypeEOF ) { tokenName = @"EOF"; } else { tokenName = [theTokNams objectAtIndex:mte.expecting]; } msg = [NSString stringWithFormat:@"mismatched input %@ expecting %@",[self getTokenErrorDisplay:(e.token)], tokenName]; } else if ( [e isKindOfClass:[MismatchedTreeNodeException class]] ) { MismatchedTreeNodeException *mtne = (MismatchedTreeNodeException *)e; NSString *tokenName=@"<unknown>"; if ( mtne.expecting==TokenTypeEOF ) { tokenName = @"EOF"; } else { tokenName = [theTokNams objectAtIndex:mtne.expecting]; } msg = [NSString stringWithFormat:@"mismatched tree node: %@ expecting %@", mtne.node, tokenName]; } else if ( [e isKindOfClass:[NoViableAltException class]] ) { //NoViableAltException *nvae = (NoViableAltException *)e; // for development, can add "decision=<<"+nvae.grammarDecisionDescription+">>" // and "(decision="+nvae.decisionNumber+") and // "state "+nvae.stateNumber // msg = [NSString stringWithFormat:@"no viable alternative at input %@", [self getTokenErrorDisplay:e.token]]; msg = [NSString stringWithFormat:@"no viable alternative decision:%d state:%d at input %@", ((NoViableAltException *)e).stateNumber, ((NoViableAltException *)e).decisionNumber, [self getTokenErrorDisplay:e.token]]; } else if ( [e isKindOfClass:[EarlyExitException class]] ) { //EarlyExitException *eee = (EarlyExitException *)e; // for development, can add "(decision="+eee.decisionNumber+")" msg =[NSString stringWithFormat: @"required (...)+ loop did not match anything at input ", [self getTokenErrorDisplay:e.token]]; } else if ( [e isKindOfClass:[MismatchedSetException class]] ) { MismatchedSetException *mse = (MismatchedSetException *)e; msg = [NSString stringWithFormat:@"mismatched input %@ expecting set %@", [self getTokenErrorDisplay:(e.token)], mse.expecting]; } #pragma warning NotSet not yet implemented. else if ( [e isKindOfClass:[MismatchedNotSetException class] ] ) { MismatchedNotSetException *mse = (MismatchedNotSetException *)e; msg = [NSString stringWithFormat:@"mismatched input %@ expecting set %@", [self getTokenErrorDisplay:(e.token)], mse.expecting]; } else if ( [e isKindOfClass:[FailedPredicateException class]] ) { FailedPredicateException *fpe = (FailedPredicateException *)e; msg = [NSString stringWithFormat:@"rule %@ failed predicate: { %@ }?", fpe.ruleName, fpe.predicate]; } else { msg = [NSString stringWithFormat:@"Exception= %@\n", e.name]; } return msg; } /** Get number of recognition errors (lexer, parser, tree parser). Each * recognizer tracks its own number. So parser and lexer each have * separate count. Does not count the spurious errors found between * an error and next valid token match * * See also reportError() */ - (NSInteger) getNumberOfSyntaxErrors { return state.syntaxErrors; } /** What is the error header, normally line/character position information? */ - (NSString *)getErrorHeader:(RecognitionException *)e { return [NSString stringWithFormat:@"line %d:%d", e.line, e.charPositionInLine]; } /** How should a token be displayed in an error message? The default * is to display just the text, but during development you might * want to have a lot of information spit out. Override in that case * to use t.toString() (which, for CommonToken, dumps everything about * the token). This is better than forcing you to override a method in * your token objects because you don't have to go modify your lexer * so that it creates a new Java type. */ - (NSString *)getTokenErrorDisplay:(id<Token>)t { NSString *s = t.text; if ( s == nil ) { if ( t.type == TokenTypeEOF ) { s = @"<EOF>"; } else { s = [NSString stringWithFormat:@"<%@>", t.type]; } } s = [s stringByReplacingOccurrencesOfString:@"\n" withString:@"\\\\n"]; s = [s stringByReplacingOccurrencesOfString:@"\r" withString:@"\\\\r"]; s = [s stringByReplacingOccurrencesOfString:@"\t" withString:@"\\\\t"]; return [NSString stringWithFormat:@"\'%@\'", s]; } /** Override this method to change where error messages go */ - (void) emitErrorMessage:(NSString *) msg { // System.err.println(msg); NSLog(@"%@", msg); } /** Recover from an error found on the input stream. This is * for NoViableAlt and mismatched symbol exceptions. If you enable * single token insertion and deletion, this will usually not * handle mismatched symbol exceptions but there could be a mismatched * token that the match() routine could not recover from. */ - (void)recover:(id<IntStream>)anInput Exception:(RecognitionException *)re { if ( state.lastErrorIndex == anInput.index ) { // uh oh, another error at same token index; must be a case // where LT(1) is in the recovery token set so nothing is // consumed; consume a single token so at least to prevent // an infinite loop; this is a failsafe. [anInput consume]; } state.lastErrorIndex = anInput.index; ANTLRBitSet *followSet = [self computeErrorRecoverySet]; [self beginResync]; [self consumeUntilFollow:anInput Follow:followSet]; [self endResync]; } - (void) beginResync { } - (void) endResync { } /* Compute the error recovery set for the current rule. During * rule invocation, the parser pushes the set of tokens that can * follow that rule reference on the stack; this amounts to * computing FIRST of what follows the rule reference in the * enclosing rule. This local follow set only includes tokens * from within the rule; i.e., the FIRST computation done by * ANTLR stops at the end of a rule. * * EXAMPLE * * When you find a "no viable alt exception", the input is not * consistent with any of the alternatives for rule r. The best * thing to do is to consume tokens until you see something that * can legally follow a call to r *or* any rule that called r. * You don't want the exact set of viable next tokens because the * input might just be missing a token--you might consume the * rest of the input looking for one of the missing tokens. * * Consider grammar: * * a : '[' b ']' * | '(' b ')' * ; * b : c '^' INT ; * c : ID * | INT * ; * * At each rule invocation, the set of tokens that could follow * that rule is pushed on a stack. Here are the various "local" * follow sets: * * FOLLOW(b1_in_a) = FIRST(']') = ']' * FOLLOW(b2_in_a) = FIRST(')') = ')' * FOLLOW(c_in_b) = FIRST('^') = '^' * * Upon erroneous input "[]", the call chain is * * a -> b -> c * * and, hence, the follow context stack is: * * depth local follow set after call to rule * 0 <EOF> a (from main()) * 1 ']' b * 3 '^' c * * Notice that ')' is not included, because b would have to have * been called from a different context in rule a for ')' to be * included. * * For error recovery, we cannot consider FOLLOW(c) * (context-sensitive or otherwise). We need the combined set of * all context-sensitive FOLLOW sets--the set of all tokens that * could follow any reference in the call chain. We need to * resync to one of those tokens. Note that FOLLOW(c)='^' and if * we resync'd to that token, we'd consume until EOF. We need to * sync to context-sensitive FOLLOWs for a, b, and c: {']','^'}. * In this case, for input "[]", LA(1) is in this set so we would * not consume anything and after printing an error rule c would * return normally. It would not find the required '^' though. * At this point, it gets a mismatched token error and throws an * exception (since LA(1) is not in the viable following token * set). The rule exception handler tries to recover, but finds * the same recovery set and doesn't consume anything. Rule b * exits normally returning to rule a. Now it finds the ']' (and * with the successful match exits errorRecovery mode). * * So, you cna see that the parser walks up call chain looking * for the token that was a member of the recovery set. * * Errors are not generated in errorRecovery mode. * * ANTLR's error recovery mechanism is based upon original ideas: * * "Algorithms + Data Structures = Programs" by Niklaus Wirth * * and * * "A note on error recovery in recursive descent parsers": * http://portal.acm.org/citation.cfm?id=947902.947905 * * Later, Josef Grosch had some good ideas: * * "Efficient and Comfortable Error Recovery in Recursive Descent * Parsers": * ftp://www.cocolab.com/products/cocktail/doca4.ps/ell.ps.zip * * Like Grosch I implemented local FOLLOW sets that are combined * at run-time upon error to avoid overhead during parsing. */ - (ANTLRBitSet *) computeErrorRecoverySet { return [self combineFollows:NO]; } /** Compute the context-sensitive FOLLOW set for current rule. * This is set of token types that can follow a specific rule * reference given a specific call chain. You get the set of * viable tokens that can possibly come next (lookahead depth 1) * given the current call chain. Contrast this with the * definition of plain FOLLOW for rule r: * * FOLLOW(r)={x | S=>*alpha r beta in G and x in FIRST(beta)} * * where x in T* and alpha, beta in V*; T is set of terminals and * V is the set of terminals and nonterminals. In other words, * FOLLOW(r) is the set of all tokens that can possibly follow * references to r in *any* sentential form (context). At * runtime, however, we know precisely which context applies as * we have the call chain. We may compute the exact (rather * than covering superset) set of following tokens. * * For example, consider grammar: * * stat : ID '=' expr ';' // FOLLOW(stat)=={EOF} * | "return" expr '.' * ; * expr : atom ('+' atom)* ; // FOLLOW(expr)=={';','.',')'} * atom : INT // FOLLOW(atom)=={'+',')',';','.'} * | '(' expr ')' * ; * * The FOLLOW sets are all inclusive whereas context-sensitive * FOLLOW sets are precisely what could follow a rule reference. * For input input "i=(3);", here is the derivation: * * stat => ID '=' expr ';' * => ID '=' atom ('+' atom)* ';' * => ID '=' '(' expr ')' ('+' atom)* ';' * => ID '=' '(' atom ')' ('+' atom)* ';' * => ID '=' '(' INT ')' ('+' atom)* ';' * => ID '=' '(' INT ')' ';' * * At the "3" token, you'd have a call chain of * * stat -> expr -> atom -> expr -> atom * * What can follow that specific nested ref to atom? Exactly ')' * as you can see by looking at the derivation of this specific * input. Contrast this with the FOLLOW(atom)={'+',')',';','.'}. * * You want the exact viable token set when recovering from a * token mismatch. Upon token mismatch, if LA(1) is member of * the viable next token set, then you know there is most likely * a missing token in the input stream. "Insert" one by just not * throwing an exception. */ - (ANTLRBitSet *)computeContextSensitiveRuleFOLLOW { return [self combineFollows:YES]; } // what is exact? it seems to only add sets from above on stack // if EOR is in set i. When it sees a set w/o EOR, it stops adding. // Why would we ever want them all? Maybe no viable alt instead of // mismatched token? - (ANTLRBitSet *)combineFollows:(BOOL) exact { NSInteger top = state._fsp; ANTLRBitSet *followSet = [[ANTLRBitSet newBitSet] retain]; for (int i = top; i >= 0; i--) { ANTLRBitSet *localFollowSet = (ANTLRBitSet *)[state.following objectAtIndex:i]; /* System.out.println("local follow depth "+i+"="+ localFollowSet.toString(getTokenNames())+")"); */ [followSet orInPlace:localFollowSet]; if ( exact ) { // can we see end of rule? if ( [localFollowSet member:TokenTypeEOR] ) { // Only leave EOR in set if at top (start rule); this lets // us know if have to include follow(start rule); i.e., EOF if ( i > 0 ) { [followSet remove:TokenTypeEOR]; } } else { // can't see end of rule, quit break; } } } return followSet; } /** Attempt to recover from a single missing or extra token. * * EXTRA TOKEN * * LA(1) is not what we are looking for. If LA(2) has the right token, * however, then assume LA(1) is some extra spurious token. Delete it * and LA(2) as if we were doing a normal match(), which advances the * input. * * MISSING TOKEN * * If current token is consistent with what could come after * ttype then it is ok to "insert" the missing token, else throw * exception For example, Input "i=(3;" is clearly missing the * ')'. When the parser returns from the nested call to expr, it * will have call chain: * * stat -> expr -> atom * * and it will be trying to match the ')' at this point in the * derivation: * * => ID '=' '(' INT ')' ('+' atom)* ';' * ^ * match() will see that ';' doesn't match ')' and report a * mismatched token error. To recover, it sees that LA(1)==';' * is in the set of tokens that can follow the ')' token * reference in rule atom. It can assume that you forgot the ')'. */ - (id<Token>)recoverFromMismatchedToken:(id<IntStream>)anInput TokenType:(NSInteger)ttype Follow:(ANTLRBitSet *)follow { RecognitionException *e = nil; // if next token is what we are looking for then "delete" this token if ( [self mismatchIsUnwantedToken:anInput TokenType:ttype] ) { e = [UnwantedTokenException newException:ttype Stream:anInput]; /* System.err.println("recoverFromMismatchedToken deleting "+ ((TokenStream)input).LT(1)+ " since "+((TokenStream)input).LT(2)+" is what we want"); */ [self beginResync]; [anInput consume]; // simply delete extra token [self endResync]; [self reportError:e]; // report after consuming so AW sees the token in the exception // we want to return the token we're actually matching id matchedSymbol = [self getCurrentInputSymbol:anInput]; [anInput consume]; // move past ttype token as if all were ok return matchedSymbol; } // can't recover with single token deletion, try insertion if ( [self mismatchIsMissingToken:anInput Follow:follow] ) { id<Token> inserted = [self getMissingSymbol:anInput Exception:e TokenType:ttype Follow:follow]; e = [MissingTokenException newException:ttype Stream:anInput With:inserted]; [self reportError:e]; // report after inserting so AW sees the token in the exception return inserted; } // even that didn't work; must throw the exception e = [MismatchedTokenException newException:ttype Stream:anInput]; @throw e; } /** Not currently used */ -(id) recoverFromMismatchedSet:(id<IntStream>)anInput Exception:(RecognitionException *)e Follow:(ANTLRBitSet *) follow { if ( [self mismatchIsMissingToken:anInput Follow:follow] ) { // System.out.println("missing token"); [self reportError:e]; // we don't know how to conjure up a token for sets yet return [self getMissingSymbol:anInput Exception:e TokenType:TokenTypeInvalid Follow:follow]; } // TODO do single token deletion like above for Token mismatch @throw e; } /** Match needs to return the current input symbol, which gets put * into the label for the associated token ref; e.g., x=ID. Token * and tree parsers need to return different objects. Rather than test * for input stream type or change the IntStream interface, I use * a simple method to ask the recognizer to tell me what the current * input symbol is. * * This is ignored for lexers. */ - (id) getCurrentInputSymbol:(id<IntStream>)anInput { return nil; } /** Conjure up a missing token during error recovery. * * The recognizer attempts to recover from single missing * symbols. But, actions might refer to that missing symbol. * For example, x=ID {f($x);}. The action clearly assumes * that there has been an identifier matched previously and that * $x points at that token. If that token is missing, but * the next token in the stream is what we want we assume that * this token is missing and we keep going. Because we * have to return some token to replace the missing token, * we have to conjure one up. This method gives the user control * over the tokens returned for missing tokens. Mostly, * you will want to create something special for identifier * tokens. For literals such as '{' and ',', the default * action in the parser or tree parser works. It simply creates * a CommonToken of the appropriate type. The text will be the token. * If you change what tokens must be created by the lexer, * override this method to create the appropriate tokens. */ - (id)getMissingSymbol:(id<IntStream>)anInput Exception:(RecognitionException *)e TokenType:(NSInteger)expectedTokenType Follow:(ANTLRBitSet *)follow { return nil; } -(void) consumeUntilTType:(id<IntStream>)anInput TokenType:(NSInteger)tokenType { //System.out.println("consumeUntil "+tokenType); int ttype = [anInput LA:1]; while (ttype != TokenTypeEOF && ttype != tokenType) { [anInput consume]; ttype = [anInput LA:1]; } } /** Consume tokens until one matches the given token set */ -(void) consumeUntilFollow:(id<IntStream>)anInput Follow:(ANTLRBitSet *)set { //System.out.println("consumeUntil("+set.toString(getTokenNames())+")"); int ttype = [anInput LA:1]; while (ttype != TokenTypeEOF && ![set member:ttype] ) { //System.out.println("consume during recover LA(1)="+getTokenNames()[input.LA(1)]); [anInput consume]; ttype = [anInput LA:1]; } } /** Push a rule's follow set using our own hardcoded stack */ - (void)pushFollow:(ANTLRBitSet *)fset { if ( (state._fsp +1) >= [state.following count] ) { // AMutableArray *f = [AMutableArray arrayWithCapacity:[[state.following] count]*2]; // System.arraycopy(state.following, 0, f, 0, state.following.length); // state.following = f; [state.following addObject:fset]; [fset retain]; state._fsp++; } else { [state.following replaceObjectAtIndex:++state._fsp withObject:fset]; } } - (ANTLRBitSet *)popFollow { ANTLRBitSet *fset; if ( state._fsp >= 0 && [state.following count] > 0 ) { fset = [state.following objectAtIndex:state._fsp--]; [state.following removeLastObject]; return fset; } else { NSLog( @"Attempted to pop a follow when none exists on the stack\n" ); } return nil; } /** Return List<String> of the rules in your parser instance * leading up to a call to this method. You could override if * you want more details such as the file/line info of where * in the parser java code a rule is invoked. * * This is very useful for error messages and for context-sensitive * error recovery. */ - (AMutableArray *)getRuleInvocationStack { NSString *parserClassName = [[self className] retain]; return [self getRuleInvocationStack:[RecognitionException newException] Recognizer:parserClassName]; } /** A more general version of getRuleInvocationStack where you can * pass in, for example, a RecognitionException to get it's rule * stack trace. This routine is shared with all recognizers, hence, * static. * * TODO: move to a utility class or something; weird having lexer call this */ - (AMutableArray *)getRuleInvocationStack:(RecognitionException *)e Recognizer:(NSString *)recognizerClassName { // char *name; AMutableArray *rules = [[AMutableArray arrayWithCapacity:20] retain]; NSArray *stack = [e callStackSymbols]; int i = 0; for (i = [stack count]-1; i >= 0; i--) { NSString *t = [stack objectAtIndex:i]; // NSLog(@"stack %d = %@\n", i, t); if ( [t commonPrefixWithString:@"org.antlr.runtime." options:NSLiteralSearch] ) { // id aClass = objc_getClass( [t UTF8String] ); continue; // skip support code such as this method } if ( [t isEqualTo:NEXT_TOKEN_RULE_NAME] ) { // name = sel_getName(method_getName(method)); // NSString *aMethod = [NSString stringWithFormat:@"%s", name]; continue; } if ( ![t isEqualTo:recognizerClassName] ) { // name = class_getName( [t UTF8String] ); continue; // must not be part of this parser } [rules addObject:t]; } #ifdef DONTUSEYET StackTraceElement[] stack = e.getStackTrace(); int i = 0; for (i=stack.length-1; i>=0; i--) { StackTraceElement t = stack[i]; if ( [t getClassName().startsWith("org.antlr.runtime.") ) { continue; // skip support code such as this method } if ( [[t getMethodName] equals:NEXT_TOKEN_RULE_NAME] ) { continue; } if ( ![[t getClassName] equals:recognizerClassName] ) { continue; // must not be part of this parser } [rules addObject:[t getMethodName]]; } #endif [stack release]; return rules; } - (NSInteger) getBacktrackingLevel { return [state getBacktracking]; } - (void) setBacktrackingLevel:(NSInteger)level { [state setBacktracking:level]; } /** Used to print out token names like ID during debugging and * error reporting. The generated parsers implement a method * that overrides this to point to their String[] tokenNames. */ - (NSArray *)getTokenNames { return tokenNames; } /** For debugging and other purposes, might want the grammar name. * Have ANTLR generate an implementation for this method. */ - (NSString *)getGrammarFileName { return grammarFileName; } - (NSString *)getSourceName { return nil; } /** A convenience method for use most often with template rewrites. * Convert a List<Token> to List<String> */ - (AMutableArray *)toStrings:(AMutableArray *)tokens { if ( tokens == nil ) return nil; AMutableArray *strings = [AMutableArray arrayWithCapacity:[tokens count]]; id object; NSInteger i = 0; for (object in tokens) { [strings addObject:[object text]]; i++; } return strings; } /** Given a rule number and a start token index number, return * ANTLR_MEMO_RULE_UNKNOWN if the rule has not parsed input starting from * start index. If this rule has parsed input starting from the * start index before, then return where the rule stopped parsing. * It returns the index of the last token matched by the rule. * * For now we use a hashtable and just the slow Object-based one. * Later, we can make a special one for ints and also one that * tosses out data after we commit past input position i. */ - (NSInteger)getRuleMemoization:(NSInteger)ruleIndex StartIndex:(NSInteger)ruleStartIndex { ACNumber *stopIndexI; HashRule *aHashRule; if ( (aHashRule = [state.ruleMemo objectAtIndex:ruleIndex]) == nil ) { aHashRule = [HashRule newHashRuleWithLen:17]; [state.ruleMemo insertObject:aHashRule atIndex:ruleIndex]; } stopIndexI = [aHashRule getRuleMemoStopIndex:ruleStartIndex]; if ( stopIndexI == nil ) { return ANTLR_MEMO_RULE_UNKNOWN; } return [stopIndexI integerValue]; } /** Has this rule already parsed input at the current index in the * input stream? Return the stop token index or MEMO_RULE_UNKNOWN. * If we attempted but failed to parse properly before, return * MEMO_RULE_FAILED. * * This method has a side-effect: if we have seen this input for * this rule and successfully parsed before, then seek ahead to * 1 past the stop token matched for this rule last time. */ - (BOOL)alreadyParsedRule:(id<IntStream>)anInput RuleIndex:(NSInteger)ruleIndex { NSInteger aStopIndex = [self getRuleMemoization:ruleIndex StartIndex:anInput.index]; if ( aStopIndex == ANTLR_MEMO_RULE_UNKNOWN ) { // NSLog(@"rule %d not yet encountered\n", ruleIndex); return NO; } if ( aStopIndex == ANTLR_MEMO_RULE_FAILED ) { if (debug) NSLog(@"rule %d will never succeed\n", ruleIndex); state.failed = YES; } else { if (debug) NSLog(@"seen rule %d before; skipping ahead to %d failed = %@\n", ruleIndex, aStopIndex+1, state.failed?@"YES":@"NO"); [anInput seek:(aStopIndex+1)]; // jump to one past stop token } return YES; } /** Record whether or not this rule parsed the input at this position * successfully. Use a standard java hashtable for now. */ - (void)memoize:(id<IntStream>)anInput RuleIndex:(NSInteger)ruleIndex StartIndex:(NSInteger)ruleStartIndex { RuleStack *aRuleStack; NSInteger stopTokenIndex; aRuleStack = state.ruleMemo; stopTokenIndex = (state.failed ? ANTLR_MEMO_RULE_FAILED : (anInput.index-1)); if ( aRuleStack == nil ) { if (debug) NSLog(@"!!!!!!!!! memo array is nil for %@", [self getGrammarFileName]); return; } if ( ruleIndex >= [aRuleStack length] ) { if (debug) NSLog(@"!!!!!!!!! memo size is %d, but rule index is %d", [state.ruleMemo length], ruleIndex); return; } if ( [aRuleStack objectAtIndex:ruleIndex] != nil ) { [aRuleStack putHashRuleAtRuleIndex:ruleIndex StartIndex:ruleStartIndex StopIndex:stopTokenIndex]; } return; } /** return how many rule/input-index pairs there are in total. * TODO: this includes synpreds. :( */ - (NSInteger)getRuleMemoizationCacheSize { RuleStack *aRuleStack; HashRule *aHashRule; int aCnt = 0; aRuleStack = state.ruleMemo; for (NSUInteger i = 0; aRuleStack != nil && i < [aRuleStack length]; i++) { aHashRule = [aRuleStack objectAtIndex:i]; if ( aHashRule != nil ) { aCnt += [aHashRule count]; // how many input indexes are recorded? } } return aCnt; } #pragma warning Have to fix traceIn and traceOut. - (void)traceIn:(NSString *)ruleName Index:(NSInteger)ruleIndex Object:(id)inputSymbol { NSLog(@"enter %@ %@", ruleName, inputSymbol); if ( state.backtracking > 0 ) { NSLog(@" backtracking=%s", ((state.backtracking==YES)?"YES":"NO")); } NSLog(@"\n"); } - (void)traceOut:(NSString *)ruleName Index:(NSInteger)ruleIndex Object:(id)inputSymbol { NSLog(@"exit %@ -- %@", ruleName, inputSymbol); if ( state.backtracking > 0 ) { NSLog(@" backtracking=%s %s", state.backtracking?"YES":"NO", state.failed ? "failed":"succeeded"); } NSLog(@"\n"); } // call a syntactic predicate methods using its selector. this way we can support arbitrary synpreds. - (BOOL) evaluateSyntacticPredicate:(SEL)synpredFragment // stream:(id<IntStream>)input { id<IntStream> input; state.backtracking++; // input = state.token.input; input = self.input; int start = [input mark]; @try { [self performSelector:synpredFragment]; } @catch (RecognitionException *re) { NSLog(@"impossible synpred: %@", re.name); } BOOL success = (state.failed == NO); [input rewind:start]; state.backtracking--; state.failed = NO; return success; } @end