package java_cup; import java.util.Enumeration; import java.util.Hashtable; import java.util.Stack; /** This class represents a state in the LALR viable prefix recognition machine. * A state consists of an LALR item set and a set of transitions to other * states under terminal and non-terminal symbols. Each state represents * a potential configuration of the parser. If the item set of a state * includes an item such as: <pre> * [A ::= B * C d E , {a,b,c}] * </pre> * this indicates that when the parser is in this state it is currently * looking for an A of the given form, has already seen the B, and would * expect to see an a, b, or c after this sequence is complete. Note that * the parser is normally looking for several things at once (represented * by several items). In our example above, the state would also include * items such as: <pre> * [C ::= * X e Z, {d}] * [X ::= * f, {e}] * </pre> * to indicate that it was currently looking for a C followed by a d (which * would be reduced into a C, matching the first symbol in our production * above), and the terminal f followed by e.<p> * * At runtime, the parser uses a viable prefix recognition machine made up * of these states to parse. The parser has two operations, shift and reduce. * In a shift, it consumes one token and makes a transition to a new state. * This corresponds to "moving the dot past" a terminal in one or more items * in the state (these new shifted items will then be found in the state at * the end of the transition). For a reduce operation, the parser is * signifying that it is recognizing the RHS of some production. To do this * it first "backs up" by popping a stack of previously saved states. It * pops off the same number of states as are found in the RHS of the * production. This leaves the machine in the same state is was in when the * parser first attempted to find the RHS. From this state it makes a * transition based on the non-terminal on the LHS of the production. This * corresponds to placing the parse in a configuration equivalent to having * replaced all the symbols from the the input corresponding to the RHS with * the symbol on the LHS. * * @see java_cup.lalr_item * @see java_cup.lalr_item_set * @see java_cup.lalr_transition * @version last updated: 11/25/95 * @author Scott Hudson * */ public class lalr_state { /*-----------------------------------------------------------*/ /*--- Constructor(s) ----------------------------------------*/ /*-----------------------------------------------------------*/ /** Constructor for building a state from a set of items. * @param itms the set of items that makes up this state. */ public lalr_state(lalr_item_set itms) throws internal_error { /* don't allow null or duplicate item sets */ if (itms == null) throw new internal_error( "Attempt to construct an LALR state from a null item set"); if (find_state(itms) != null) throw new internal_error( "Attempt to construct a duplicate LALR state"); /* assign a unique index */ _index = next_index++; /* store the items */ _items = itms; /* add to the global collection, keyed with its item set */ _all.put(_items,this); } /*-----------------------------------------------------------*/ /*--- (Access to) Static (Class) Variables ------------------*/ /*-----------------------------------------------------------*/ /** Collection of all states. */ protected static Hashtable _all = new Hashtable(); /** Collection of all states. */ public static Enumeration all() {return _all.elements();} /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/ /** Indicate total number of states there are. */ public static int number() {return _all.size();} /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/ /** Hash table to find states by their kernels (i.e, the original, * unclosed, set of items -- which uniquely define the state). This table * stores state objects using (a copy of) their kernel item sets as keys. */ protected static Hashtable _all_kernels = new Hashtable(); /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/ /** Find and return state with a given a kernel item set (or null if not * found). The kernel item set is the subset of items that were used to * originally create the state. These items are formed by "shifting the * dot" within items of other states that have a transition to this one. * The remaining elements of this state's item set are added during closure. * @param itms the kernel set of the state we are looking for. */ public static lalr_state find_state(lalr_item_set itms) { if (itms == null) return null; else return (lalr_state)_all.get(itms); } /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/ /** Static counter for assigning unique state indexes. */ protected static int next_index = 0; /*-----------------------------------------------------------*/ /*--- (Access to) Instance Variables ------------------------*/ /*-----------------------------------------------------------*/ /** The item set for this state. */ protected lalr_item_set _items; /** The item set for this state. */ public lalr_item_set items() {return _items;} /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/ /** List of transitions out of this state. */ protected lalr_transition _transitions = null; /** List of transitions out of this state. */ public lalr_transition transitions() {return _transitions;} /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/ /** Index of this state in the parse tables */ protected int _index; /** Index of this state in the parse tables */ public int index() {return _index;} /*-----------------------------------------------------------*/ /*--- Static Methods ----------------------------------------*/ /*-----------------------------------------------------------*/ /** Helper routine for debugging -- produces a dump of the given state * onto System.out. */ protected static void dump_state(lalr_state st) throws internal_error { lalr_item_set itms; lalr_item itm; production_part part; if (st == null) { System.out.println("NULL lalr_state"); return; } System.out.println("lalr_state [" + st.index() + "] {"); itms = st.items(); for (Enumeration e = itms.all(); e.hasMoreElements(); ) { itm = (lalr_item)e.nextElement(); System.out.print(" ["); System.out.print(itm.the_production().lhs().the_symbol().name()); System.out.print(" ::= "); for (int i = 0; i<itm.the_production().rhs_length(); i++) { if (i == itm.dot_pos()) System.out.print("(*) "); part = itm.the_production().rhs(i); if (part.is_action()) System.out.print("{action} "); else System.out.print(((symbol_part)part).the_symbol().name() + " "); } if (itm.dot_at_end()) System.out.print("(*) "); System.out.println("]"); } System.out.println("}"); } /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/ /** Propagate lookahead sets through the constructed viable prefix * recognizer. When the machine is constructed, each item that results in the creation of another such that its lookahead is included in the other's will have a propagate link set up for it. This allows additions to the lookahead of one item to be included in other items that it was used to directly or indirectly create. */ protected static void propagate_all_lookaheads() throws internal_error { /* iterate across all states */ for (Enumeration st = all(); st.hasMoreElements(); ) { /* propagate lookaheads out of that state */ ((lalr_state)st.nextElement()).propagate_lookaheads(); } } /*-----------------------------------------------------------*/ /*--- General Methods ---------------------------------------*/ /*-----------------------------------------------------------*/ /** Add a transition out of this state to another. * @param on_sym the symbol the transition is under. * @param to_st the state the transition goes to. */ public void add_transition(symbol on_sym, lalr_state to_st) throws internal_error { lalr_transition trans; /* create a new transition object and put it in our list */ trans = new lalr_transition(on_sym, to_st, _transitions); _transitions = trans; } /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/ /** Build an LALR viable prefix recognition machine given a start * production. This method operates by first building a start state * from the start production (based on a single item with the dot at * the beginning and EOF as expected lookahead). Then for each state * it attempts to extend the machine by creating transitions out of * the state to new or existing states. When considering extension * from a state we make a transition on each symbol that appears before * the dot in some item. For example, if we have the items: <pre> * [A ::= a b * X c, {d,e}] * [B ::= a b * X d, {a,b}] * </pre> * in some state, then we would be making a transition under X to a new * state. This new state would be formed by a "kernel" of items * corresponding to moving the dot past the X. In this case: <pre> * [A ::= a b X * c, {d,e}] * [B ::= a b X * Y, {a,b}] * </pre> * The full state would then be formed by "closing" this kernel set of * items so that it included items that represented productions of things * the parser was now looking for. In this case we would items * corresponding to productions of Y, since various forms of Y are expected * next when in this state (see lalr_item_set.compute_closure() for details * on closure). <p> * * The process of building the viable prefix recognizer terminates when no * new states can be added. However, in order to build a smaller number of * states (i.e., corresponding to LALR rather than canonical LR) the state * building process does not maintain full loookaheads in all items. * Consequently, after the machine is built, we go back and propagate * lookaheads through the constructed machine using a call to * propagate_all_lookaheads(). This makes use of propagation links * constructed during the closure and transition process. * * @param start_prod the start production of the grammar * @see java_cup.lalr_item_set#compute_closure * @see java_cup.lalr_state#propagate_all_lookaheads */ public static lalr_state build_machine(production start_prod) throws internal_error { lalr_state start_state; lalr_item_set start_items; lalr_item_set new_items; lalr_item_set linked_items; lalr_item_set kernel; Stack work_stack = new Stack(); lalr_state st, new_st; symbol_set outgoing; lalr_item itm, new_itm, existing, fix_itm; symbol sym, sym2; Enumeration i, s, fix; /* sanity check */ if (start_prod == null) throw new internal_error( "Attempt to build viable prefix recognizer using a null production"); /* build item with dot at front of start production and EOF lookahead */ start_items = new lalr_item_set(); itm = new lalr_item(start_prod); itm.lookahead().add(terminal.EOF); start_items.add(itm); /* create copy the item set to form the kernel */ kernel = new lalr_item_set(start_items); /* create the closure from that item set */ start_items.compute_closure(); /* build a state out of that item set and put it in our work set */ start_state = new lalr_state(start_items); work_stack.push(start_state); /* enter the state using the kernel as the key */ _all_kernels.put(kernel, start_state); /* continue looking at new states until we have no more work to do */ while (!work_stack.empty()) { /* remove a state from the work set */ st = (lalr_state)work_stack.pop(); /* gather up all the symbols that appear before dots */ outgoing = new symbol_set(); for (i = st.items().all(); i.hasMoreElements(); ) { itm = (lalr_item)i.nextElement(); /* add the symbol before the dot (if any) to our collection */ sym = itm.symbol_after_dot(); if (sym != null) outgoing.add(sym); } /* now create a transition out for each individual symbol */ for (s = outgoing.all(); s.hasMoreElements(); ) { sym = (symbol)s.nextElement(); /* will be keeping the set of items with propagate links */ linked_items = new lalr_item_set(); /* gather up shifted versions of all the items that have this symbol before the dot */ new_items = new lalr_item_set(); for (i = st.items().all(); i.hasMoreElements();) { itm = (lalr_item)i.nextElement(); /* if this is the symbol we are working on now, add to set */ sym2 = itm.symbol_after_dot(); if (sym.equals(sym2)) { /* add to the kernel of the new state */ new_items.add(itm.shift()); /* remember that itm has propagate link to it */ linked_items.add(itm); } } /* use new items as state kernel */ kernel = new lalr_item_set(new_items); /* have we seen this one already? */ new_st = (lalr_state)_all_kernels.get(kernel); /* if we haven't, build a new state out of the item set */ if (new_st == null) { /* compute closure of the kernel for the full item set */ new_items.compute_closure(); /* build the new state */ new_st = new lalr_state(new_items); /* add the new state to our work set */ work_stack.push(new_st); /* put it in our kernel table */ _all_kernels.put(kernel, new_st); } /* otherwise relink propagation to items in existing state */ else { /* walk through the items that have links to the new state */ for (fix = linked_items.all(); fix.hasMoreElements(); ) { fix_itm = (lalr_item)fix.nextElement(); /* look at each propagate link out of that item */ for (int l =0; l < fix_itm.propagate_items().size(); l++) { /* pull out item linked to in the new state */ new_itm = (lalr_item)fix_itm.propagate_items().elementAt(l); /* find corresponding item in the existing state */ existing = new_st.items().find(new_itm); /* fix up the item so it points to the existing set */ if (existing != null) fix_itm.propagate_items().setElementAt(existing ,l); } } } /* add a transition from current state to that state */ st.add_transition(sym, new_st); } } /* all done building states */ /* propagate complete lookahead sets throughout the states */ propagate_all_lookaheads(); return start_state; } /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/ /** Propagate lookahead sets out of this state. This recursively * propagates to all items that have propagation links from some item * in this state. */ protected void propagate_lookaheads() throws internal_error { /* recursively propagate out from each item in the state */ for (Enumeration itm = items().all(); itm.hasMoreElements(); ) ((lalr_item)itm.nextElement()).propagate_lookaheads(null); } /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/ /** Fill in the parse table entries for this state. There are two * parse tables that encode the viable prefix recognition machine, an * action table and a reduce-goto table. The rows in each table * correspond to states of the machine. The columns of the action table * are indexed by terminal symbols and correspond to either transitions * out of the state (shift entries) or reductions from the state to some * previous state saved on the stack (reduce entries). All entries in the * action table that are not shifts or reduces, represent errors. The * reduce-goto table is indexed by non terminals and represents transitions * out of a state on that non-terminal.<p> * Conflicts occur if more than one action needs to go in one entry of the * action table (this cannot happen with the reduce-goto table). Conflicts * are resolved by always shifting for shift/reduce conflicts and choosing * the lowest numbered production (hence the one that appeared first in * the specification) in reduce/reduce conflicts. All conflicts are * reported and if more conflicts are detected than were declared by the * user, code generation is aborted. * * @param act_table the action table to put entries in. * @param reduce_table the reduce-goto table to put entries in. */ public void build_table_entries( parse_action_table act_table, parse_reduce_table reduce_table) throws internal_error { parse_action_row our_act_row; parse_reduce_row our_red_row; lalr_item itm; parse_action act, other_act; symbol sym; boolean conflicted = false; /* pull out our rows from the tables */ our_act_row = act_table.under_state[index()]; our_red_row = reduce_table.under_state[index()]; /* consider each item in our state */ for (Enumeration i = items().all(); i.hasMoreElements(); ) { itm = (lalr_item)i.nextElement(); /* if its completed (dot at end) then reduce under the lookahead */ if (itm.dot_at_end()) { act = new reduce_action(itm.the_production()); /* consider each lookahead symbol */ for (int t = 0; t < terminal.number(); t++) { /* skip over the ones not in the lookahead */ if (!itm.lookahead().contains(t)) continue; /* if we don't already have an action put this one in */ if (our_act_row.under_term[t].kind() == parse_action.ERROR) { our_act_row.under_term[t] = act; } else { /* we now have at least one conflict */ conflicted = true; other_act = our_act_row.under_term[t]; /* if the other act was not a shift */ if (other_act.kind() != parse_action.SHIFT) { /* if we have lower index hence priority, replace it*/ if (itm.the_production().index() < ((reduce_action)other_act).reduce_with().index()) { /* replace the action */ our_act_row.under_term[t] = act; } } } } } } /* consider each outgoing transition */ for (lalr_transition trans=transitions(); trans!=null; trans=trans.next()) { /* if its on an terminal add a shift entry */ sym = trans.on_symbol(); if (!sym.is_non_term()) { act = new shift_action(trans.to_state()); /* if we don't already have an action put this one in */ if ( our_act_row.under_term[sym.index()].kind() == parse_action.ERROR) { our_act_row.under_term[sym.index()] = act; } else { /* we now have at least one conflict */ conflicted = true; /* shift always wins */ our_act_row.under_term[sym.index()] = act; } } else { /* for non terminals add an entry to the reduce-goto table */ our_red_row.under_non_term[sym.index()] = trans.to_state(); } } /* if we end up with conflict(s), report them */ if (conflicted) report_conflicts(); } /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/ /** Produce warning messages for all conflicts found in this state. */ protected void report_conflicts() throws internal_error { lalr_item itm, compare; symbol shift_sym; terminal_set conflict_set; boolean after_itm; /* consider each element */ for (Enumeration itms = items().all(); itms.hasMoreElements(); ) { itm = (lalr_item)itms.nextElement(); /* clear the S/R conflict set for this item */ conflict_set = new terminal_set(); /* if it results in a reduce, it could be a conflict */ if (itm.dot_at_end()) { /* not yet after itm */ after_itm = false; /* compare this item against all others looking for conflicts */ for (Enumeration comps = items().all(); comps.hasMoreElements(); ) { compare = (lalr_item)comps.nextElement(); /* if this is the item, next one is after it */ if (itm == compare) after_itm = true; /* only look at it if its not the same item */ if (itm != compare) { /* is it a reduce */ if (compare.dot_at_end()) { /* only look at reduces after itm */ if (after_itm) /* does the comparison item conflict? */ if (compare.lookahead().intersects(itm.lookahead())) /* report a reduce/reduce conflict */ report_reduce_reduce(itm, compare); } /* must be a shift on a terminal or non-terminal */ else { /* is it a shift on a terminal */ shift_sym = compare.symbol_after_dot(); if (!shift_sym.is_non_term()) { /* does the terminal conflict with our item */ if (itm.lookahead().contains((terminal)shift_sym)) /* remember the terminal symbol in conflict */ conflict_set.add((terminal)shift_sym); } } } } /* report S/R conflicts under all the symbols we conflict under */ for (int t = 0; t < terminal.number(); t++) if (conflict_set.contains(t)) report_shift_reduce(itm,t); } } } /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/ /** Produce a warning message for one reduce/reduce conflict. * * @param itm1 first item in conflict. * @param itm2 second item in conflict. */ protected void report_reduce_reduce(lalr_item itm1, lalr_item itm2) throws internal_error { boolean comma_flag = false; System.err.println("*** Reduce/Reduce conflict found in state #"+index()); System.err.print (" between "); System.err.println(itm1.to_simple_string()); System.err.print (" and "); System.err.println(itm2.to_simple_string()); System.err.print(" under symbols: {" ); for (int t = 0; t < terminal.number(); t++) { if (itm1.lookahead().contains(t) && itm2.lookahead().contains(t)) { if (comma_flag) System.err.print(", "); else comma_flag = true; System.err.print(terminal.find(t).name()); } } System.err.println("}"); System.err.print(" Resolved in favor of "); if (itm1.the_production().index() < itm2.the_production().index()) System.err.println("the first production.\n"); else System.err.println("the second production.\n"); /* count the conflict */ emit.num_conflicts++; lexer.warning_count++; } /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/ /** Produce a warning message for one shift/reduce conflict. * * @param red_itm the item with the reduce. * @param conflict_sym the index of the symbol conflict occurs under. */ protected void report_shift_reduce( lalr_item red_itm, int conflict_sym) throws internal_error { lalr_item itm; symbol shift_sym; /* emit top part of message including the reduce item */ System.err.println("*** Shift/Reduce conflict found in state #"+index()); System.err.print (" between "); System.err.println(red_itm.to_simple_string()); /* find and report on all items that shift under our conflict symbol */ for (Enumeration itms = items().all(); itms.hasMoreElements(); ) { itm = (lalr_item)itms.nextElement(); /* only look if its not the same item and not a reduce */ if (itm != red_itm && !itm.dot_at_end()) { /* is it a shift on our conflicting terminal */ shift_sym = itm.symbol_after_dot(); if (!shift_sym.is_non_term() && shift_sym.index() == conflict_sym) { /* yes, report on it */ System.err.println(" and " + itm.to_simple_string()); } } } System.err.println(" under symbol "+ terminal.find(conflict_sym).name()); System.err.println(" Resolved in favor of shifting.\n"); /* count the conflict */ emit.num_conflicts++; lexer.warning_count++; } /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/ /** Equality comparison. */ public boolean equals(lalr_state other) { /* we are equal if our item sets are equal */ return other != null && items().equals(other.items()); } /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/ /** Generic equality comparison. */ public boolean equals(Object other) { if (!(other instanceof lalr_state)) return false; else return equals((lalr_state)other); } /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/ /** Produce a hash code. */ public int hashCode() { /* just use the item set hash code */ return items().hashCode(); } /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/ /** Convert to a string. */ public String toString() { String result; lalr_transition tr; /* dump the item set */ result = "lalr_state [" + index() + "]: " + _items + "\n"; /* do the transitions */ for (tr = transitions(); tr != null; tr = tr.next()) { result += tr; result += "\n"; } return result; } /*-----------------------------------------------------------*/ };