package java_cup;
import java.util.Enumeration;
import java.util.Hashtable;
import java.util.Stack;
/** This class represents a state in the LALR viable prefix recognition machine.
* A state consists of an LALR item set and a set of transitions to other
* states under terminal and non-terminal symbols. Each state represents
* a potential configuration of the parser. If the item set of a state
* includes an item such as: <pre>
* [A ::= B * C d E , {a,b,c}]
* </pre>
* this indicates that when the parser is in this state it is currently
* looking for an A of the given form, has already seen the B, and would
* expect to see an a, b, or c after this sequence is complete. Note that
* the parser is normally looking for several things at once (represented
* by several items). In our example above, the state would also include
* items such as: <pre>
* [C ::= * X e Z, {d}]
* [X ::= * f, {e}]
* </pre>
* to indicate that it was currently looking for a C followed by a d (which
* would be reduced into a C, matching the first symbol in our production
* above), and the terminal f followed by e.<p>
*
* At runtime, the parser uses a viable prefix recognition machine made up
* of these states to parse. The parser has two operations, shift and reduce.
* In a shift, it consumes one token and makes a transition to a new state.
* This corresponds to "moving the dot past" a terminal in one or more items
* in the state (these new shifted items will then be found in the state at
* the end of the transition). For a reduce operation, the parser is
* signifying that it is recognizing the RHS of some production. To do this
* it first "backs up" by popping a stack of previously saved states. It
* pops off the same number of states as are found in the RHS of the
* production. This leaves the machine in the same state is was in when the
* parser first attempted to find the RHS. From this state it makes a
* transition based on the non-terminal on the LHS of the production. This
* corresponds to placing the parse in a configuration equivalent to having
* replaced all the symbols from the the input corresponding to the RHS with
* the symbol on the LHS.
*
* @see java_cup.lalr_item
* @see java_cup.lalr_item_set
* @see java_cup.lalr_transition
* @version last updated: 11/25/95
* @author Scott Hudson
*
*/
public class lalr_state {
/*-----------------------------------------------------------*/
/*--- Constructor(s) ----------------------------------------*/
/*-----------------------------------------------------------*/
/** Constructor for building a state from a set of items.
* @param itms the set of items that makes up this state.
*/
public lalr_state(lalr_item_set itms) throws internal_error
{
/* don't allow null or duplicate item sets */
if (itms == null)
throw new internal_error(
"Attempt to construct an LALR state from a null item set");
if (find_state(itms) != null)
throw new internal_error(
"Attempt to construct a duplicate LALR state");
/* assign a unique index */
_index = next_index++;
/* store the items */
_items = itms;
/* add to the global collection, keyed with its item set */
_all.put(_items,this);
}
/*-----------------------------------------------------------*/
/*--- (Access to) Static (Class) Variables ------------------*/
/*-----------------------------------------------------------*/
/** Collection of all states. */
protected static Hashtable _all = new Hashtable();
/** Collection of all states. */
public static Enumeration all() {return _all.elements();}
/*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
/** Indicate total number of states there are. */
public static int number() {return _all.size();}
/*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
/** Hash table to find states by their kernels (i.e, the original,
* unclosed, set of items -- which uniquely define the state). This table
* stores state objects using (a copy of) their kernel item sets as keys.
*/
protected static Hashtable _all_kernels = new Hashtable();
/*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
/** Find and return state with a given a kernel item set (or null if not
* found). The kernel item set is the subset of items that were used to
* originally create the state. These items are formed by "shifting the
* dot" within items of other states that have a transition to this one.
* The remaining elements of this state's item set are added during closure.
* @param itms the kernel set of the state we are looking for.
*/
public static lalr_state find_state(lalr_item_set itms)
{
if (itms == null)
return null;
else
return (lalr_state)_all.get(itms);
}
/*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
/** Static counter for assigning unique state indexes. */
protected static int next_index = 0;
/*-----------------------------------------------------------*/
/*--- (Access to) Instance Variables ------------------------*/
/*-----------------------------------------------------------*/
/** The item set for this state. */
protected lalr_item_set _items;
/** The item set for this state. */
public lalr_item_set items() {return _items;}
/*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
/** List of transitions out of this state. */
protected lalr_transition _transitions = null;
/** List of transitions out of this state. */
public lalr_transition transitions() {return _transitions;}
/*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
/** Index of this state in the parse tables */
protected int _index;
/** Index of this state in the parse tables */
public int index() {return _index;}
/*-----------------------------------------------------------*/
/*--- Static Methods ----------------------------------------*/
/*-----------------------------------------------------------*/
/** Helper routine for debugging -- produces a dump of the given state
* onto System.out.
*/
protected static void dump_state(lalr_state st) throws internal_error
{
lalr_item_set itms;
lalr_item itm;
production_part part;
if (st == null)
{
System.out.println("NULL lalr_state");
return;
}
System.out.println("lalr_state [" + st.index() + "] {");
itms = st.items();
for (Enumeration e = itms.all(); e.hasMoreElements(); )
{
itm = (lalr_item)e.nextElement();
System.out.print(" [");
System.out.print(itm.the_production().lhs().the_symbol().name());
System.out.print(" ::= ");
for (int i = 0; i<itm.the_production().rhs_length(); i++)
{
if (i == itm.dot_pos()) System.out.print("(*) ");
part = itm.the_production().rhs(i);
if (part.is_action())
System.out.print("{action} ");
else
System.out.print(((symbol_part)part).the_symbol().name() + " ");
}
if (itm.dot_at_end()) System.out.print("(*) ");
System.out.println("]");
}
System.out.println("}");
}
/*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
/** Propagate lookahead sets through the constructed viable prefix
* recognizer. When the machine is constructed, each item that results
in the creation of another such that its lookahead is included in the
other's will have a propagate link set up for it. This allows additions
to the lookahead of one item to be included in other items that it
was used to directly or indirectly create.
*/
protected static void propagate_all_lookaheads() throws internal_error
{
/* iterate across all states */
for (Enumeration st = all(); st.hasMoreElements(); )
{
/* propagate lookaheads out of that state */
((lalr_state)st.nextElement()).propagate_lookaheads();
}
}
/*-----------------------------------------------------------*/
/*--- General Methods ---------------------------------------*/
/*-----------------------------------------------------------*/
/** Add a transition out of this state to another.
* @param on_sym the symbol the transition is under.
* @param to_st the state the transition goes to.
*/
public void add_transition(symbol on_sym, lalr_state to_st)
throws internal_error
{
lalr_transition trans;
/* create a new transition object and put it in our list */
trans = new lalr_transition(on_sym, to_st, _transitions);
_transitions = trans;
}
/*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
/** Build an LALR viable prefix recognition machine given a start
* production. This method operates by first building a start state
* from the start production (based on a single item with the dot at
* the beginning and EOF as expected lookahead). Then for each state
* it attempts to extend the machine by creating transitions out of
* the state to new or existing states. When considering extension
* from a state we make a transition on each symbol that appears before
* the dot in some item. For example, if we have the items: <pre>
* [A ::= a b * X c, {d,e}]
* [B ::= a b * X d, {a,b}]
* </pre>
* in some state, then we would be making a transition under X to a new
* state. This new state would be formed by a "kernel" of items
* corresponding to moving the dot past the X. In this case: <pre>
* [A ::= a b X * c, {d,e}]
* [B ::= a b X * Y, {a,b}]
* </pre>
* The full state would then be formed by "closing" this kernel set of
* items so that it included items that represented productions of things
* the parser was now looking for. In this case we would items
* corresponding to productions of Y, since various forms of Y are expected
* next when in this state (see lalr_item_set.compute_closure() for details
* on closure). <p>
*
* The process of building the viable prefix recognizer terminates when no
* new states can be added. However, in order to build a smaller number of
* states (i.e., corresponding to LALR rather than canonical LR) the state
* building process does not maintain full loookaheads in all items.
* Consequently, after the machine is built, we go back and propagate
* lookaheads through the constructed machine using a call to
* propagate_all_lookaheads(). This makes use of propagation links
* constructed during the closure and transition process.
*
* @param start_prod the start production of the grammar
* @see java_cup.lalr_item_set#compute_closure
* @see java_cup.lalr_state#propagate_all_lookaheads
*/
public static lalr_state build_machine(production start_prod)
throws internal_error
{
lalr_state start_state;
lalr_item_set start_items;
lalr_item_set new_items;
lalr_item_set linked_items;
lalr_item_set kernel;
Stack work_stack = new Stack();
lalr_state st, new_st;
symbol_set outgoing;
lalr_item itm, new_itm, existing, fix_itm;
symbol sym, sym2;
Enumeration i, s, fix;
/* sanity check */
if (start_prod == null)
throw new internal_error(
"Attempt to build viable prefix recognizer using a null production");
/* build item with dot at front of start production and EOF lookahead */
start_items = new lalr_item_set();
itm = new lalr_item(start_prod);
itm.lookahead().add(terminal.EOF);
start_items.add(itm);
/* create copy the item set to form the kernel */
kernel = new lalr_item_set(start_items);
/* create the closure from that item set */
start_items.compute_closure();
/* build a state out of that item set and put it in our work set */
start_state = new lalr_state(start_items);
work_stack.push(start_state);
/* enter the state using the kernel as the key */
_all_kernels.put(kernel, start_state);
/* continue looking at new states until we have no more work to do */
while (!work_stack.empty())
{
/* remove a state from the work set */
st = (lalr_state)work_stack.pop();
/* gather up all the symbols that appear before dots */
outgoing = new symbol_set();
for (i = st.items().all(); i.hasMoreElements(); )
{
itm = (lalr_item)i.nextElement();
/* add the symbol before the dot (if any) to our collection */
sym = itm.symbol_after_dot();
if (sym != null) outgoing.add(sym);
}
/* now create a transition out for each individual symbol */
for (s = outgoing.all(); s.hasMoreElements(); )
{
sym = (symbol)s.nextElement();
/* will be keeping the set of items with propagate links */
linked_items = new lalr_item_set();
/* gather up shifted versions of all the items that have this
symbol before the dot */
new_items = new lalr_item_set();
for (i = st.items().all(); i.hasMoreElements();)
{
itm = (lalr_item)i.nextElement();
/* if this is the symbol we are working on now, add to set */
sym2 = itm.symbol_after_dot();
if (sym.equals(sym2))
{
/* add to the kernel of the new state */
new_items.add(itm.shift());
/* remember that itm has propagate link to it */
linked_items.add(itm);
}
}
/* use new items as state kernel */
kernel = new lalr_item_set(new_items);
/* have we seen this one already? */
new_st = (lalr_state)_all_kernels.get(kernel);
/* if we haven't, build a new state out of the item set */
if (new_st == null)
{
/* compute closure of the kernel for the full item set */
new_items.compute_closure();
/* build the new state */
new_st = new lalr_state(new_items);
/* add the new state to our work set */
work_stack.push(new_st);
/* put it in our kernel table */
_all_kernels.put(kernel, new_st);
}
/* otherwise relink propagation to items in existing state */
else
{
/* walk through the items that have links to the new state */
for (fix = linked_items.all(); fix.hasMoreElements(); )
{
fix_itm = (lalr_item)fix.nextElement();
/* look at each propagate link out of that item */
for (int l =0; l < fix_itm.propagate_items().size(); l++)
{
/* pull out item linked to in the new state */
new_itm =
(lalr_item)fix_itm.propagate_items().elementAt(l);
/* find corresponding item in the existing state */
existing = new_st.items().find(new_itm);
/* fix up the item so it points to the existing set */
if (existing != null)
fix_itm.propagate_items().setElementAt(existing ,l);
}
}
}
/* add a transition from current state to that state */
st.add_transition(sym, new_st);
}
}
/* all done building states */
/* propagate complete lookahead sets throughout the states */
propagate_all_lookaheads();
return start_state;
}
/*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
/** Propagate lookahead sets out of this state. This recursively
* propagates to all items that have propagation links from some item
* in this state.
*/
protected void propagate_lookaheads() throws internal_error
{
/* recursively propagate out from each item in the state */
for (Enumeration itm = items().all(); itm.hasMoreElements(); )
((lalr_item)itm.nextElement()).propagate_lookaheads(null);
}
/*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
/** Fill in the parse table entries for this state. There are two
* parse tables that encode the viable prefix recognition machine, an
* action table and a reduce-goto table. The rows in each table
* correspond to states of the machine. The columns of the action table
* are indexed by terminal symbols and correspond to either transitions
* out of the state (shift entries) or reductions from the state to some
* previous state saved on the stack (reduce entries). All entries in the
* action table that are not shifts or reduces, represent errors. The
* reduce-goto table is indexed by non terminals and represents transitions
* out of a state on that non-terminal.<p>
* Conflicts occur if more than one action needs to go in one entry of the
* action table (this cannot happen with the reduce-goto table). Conflicts
* are resolved by always shifting for shift/reduce conflicts and choosing
* the lowest numbered production (hence the one that appeared first in
* the specification) in reduce/reduce conflicts. All conflicts are
* reported and if more conflicts are detected than were declared by the
* user, code generation is aborted.
*
* @param act_table the action table to put entries in.
* @param reduce_table the reduce-goto table to put entries in.
*/
public void build_table_entries(
parse_action_table act_table,
parse_reduce_table reduce_table)
throws internal_error
{
parse_action_row our_act_row;
parse_reduce_row our_red_row;
lalr_item itm;
parse_action act, other_act;
symbol sym;
boolean conflicted = false;
/* pull out our rows from the tables */
our_act_row = act_table.under_state[index()];
our_red_row = reduce_table.under_state[index()];
/* consider each item in our state */
for (Enumeration i = items().all(); i.hasMoreElements(); )
{
itm = (lalr_item)i.nextElement();
/* if its completed (dot at end) then reduce under the lookahead */
if (itm.dot_at_end())
{
act = new reduce_action(itm.the_production());
/* consider each lookahead symbol */
for (int t = 0; t < terminal.number(); t++)
{
/* skip over the ones not in the lookahead */
if (!itm.lookahead().contains(t)) continue;
/* if we don't already have an action put this one in */
if (our_act_row.under_term[t].kind() ==
parse_action.ERROR)
{
our_act_row.under_term[t] = act;
}
else
{
/* we now have at least one conflict */
conflicted = true;
other_act = our_act_row.under_term[t];
/* if the other act was not a shift */
if (other_act.kind() != parse_action.SHIFT)
{
/* if we have lower index hence priority, replace it*/
if (itm.the_production().index() <
((reduce_action)other_act).reduce_with().index())
{
/* replace the action */
our_act_row.under_term[t] = act;
}
}
}
}
}
}
/* consider each outgoing transition */
for (lalr_transition trans=transitions(); trans!=null; trans=trans.next())
{
/* if its on an terminal add a shift entry */
sym = trans.on_symbol();
if (!sym.is_non_term())
{
act = new shift_action(trans.to_state());
/* if we don't already have an action put this one in */
if ( our_act_row.under_term[sym.index()].kind() ==
parse_action.ERROR)
{
our_act_row.under_term[sym.index()] = act;
}
else
{
/* we now have at least one conflict */
conflicted = true;
/* shift always wins */
our_act_row.under_term[sym.index()] = act;
}
}
else
{
/* for non terminals add an entry to the reduce-goto table */
our_red_row.under_non_term[sym.index()] = trans.to_state();
}
}
/* if we end up with conflict(s), report them */
if (conflicted)
report_conflicts();
}
/*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
/** Produce warning messages for all conflicts found in this state. */
protected void report_conflicts()
throws internal_error
{
lalr_item itm, compare;
symbol shift_sym;
terminal_set conflict_set;
boolean after_itm;
/* consider each element */
for (Enumeration itms = items().all(); itms.hasMoreElements(); )
{
itm = (lalr_item)itms.nextElement();
/* clear the S/R conflict set for this item */
conflict_set = new terminal_set();
/* if it results in a reduce, it could be a conflict */
if (itm.dot_at_end())
{
/* not yet after itm */
after_itm = false;
/* compare this item against all others looking for conflicts */
for (Enumeration comps = items().all(); comps.hasMoreElements(); )
{
compare = (lalr_item)comps.nextElement();
/* if this is the item, next one is after it */
if (itm == compare) after_itm = true;
/* only look at it if its not the same item */
if (itm != compare)
{
/* is it a reduce */
if (compare.dot_at_end())
{
/* only look at reduces after itm */
if (after_itm)
/* does the comparison item conflict? */
if (compare.lookahead().intersects(itm.lookahead()))
/* report a reduce/reduce conflict */
report_reduce_reduce(itm, compare);
}
/* must be a shift on a terminal or non-terminal */
else
{
/* is it a shift on a terminal */
shift_sym = compare.symbol_after_dot();
if (!shift_sym.is_non_term())
{
/* does the terminal conflict with our item */
if (itm.lookahead().contains((terminal)shift_sym))
/* remember the terminal symbol in conflict */
conflict_set.add((terminal)shift_sym);
}
}
}
}
/* report S/R conflicts under all the symbols we conflict under */
for (int t = 0; t < terminal.number(); t++)
if (conflict_set.contains(t))
report_shift_reduce(itm,t);
}
}
}
/*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
/** Produce a warning message for one reduce/reduce conflict.
*
* @param itm1 first item in conflict.
* @param itm2 second item in conflict.
*/
protected void report_reduce_reduce(lalr_item itm1, lalr_item itm2)
throws internal_error
{
boolean comma_flag = false;
System.err.println("*** Reduce/Reduce conflict found in state #"+index());
System.err.print (" between ");
System.err.println(itm1.to_simple_string());
System.err.print (" and ");
System.err.println(itm2.to_simple_string());
System.err.print(" under symbols: {" );
for (int t = 0; t < terminal.number(); t++)
{
if (itm1.lookahead().contains(t) && itm2.lookahead().contains(t))
{
if (comma_flag) System.err.print(", "); else comma_flag = true;
System.err.print(terminal.find(t).name());
}
}
System.err.println("}");
System.err.print(" Resolved in favor of ");
if (itm1.the_production().index() < itm2.the_production().index())
System.err.println("the first production.\n");
else
System.err.println("the second production.\n");
/* count the conflict */
emit.num_conflicts++;
lexer.warning_count++;
}
/*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
/** Produce a warning message for one shift/reduce conflict.
*
* @param red_itm the item with the reduce.
* @param conflict_sym the index of the symbol conflict occurs under.
*/
protected void report_shift_reduce(
lalr_item red_itm,
int conflict_sym)
throws internal_error
{
lalr_item itm;
symbol shift_sym;
/* emit top part of message including the reduce item */
System.err.println("*** Shift/Reduce conflict found in state #"+index());
System.err.print (" between ");
System.err.println(red_itm.to_simple_string());
/* find and report on all items that shift under our conflict symbol */
for (Enumeration itms = items().all(); itms.hasMoreElements(); )
{
itm = (lalr_item)itms.nextElement();
/* only look if its not the same item and not a reduce */
if (itm != red_itm && !itm.dot_at_end())
{
/* is it a shift on our conflicting terminal */
shift_sym = itm.symbol_after_dot();
if (!shift_sym.is_non_term() && shift_sym.index() == conflict_sym)
{
/* yes, report on it */
System.err.println(" and " + itm.to_simple_string());
}
}
}
System.err.println(" under symbol "+ terminal.find(conflict_sym).name());
System.err.println(" Resolved in favor of shifting.\n");
/* count the conflict */
emit.num_conflicts++;
lexer.warning_count++;
}
/*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
/** Equality comparison. */
public boolean equals(lalr_state other)
{
/* we are equal if our item sets are equal */
return other != null && items().equals(other.items());
}
/*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
/** Generic equality comparison. */
public boolean equals(Object other)
{
if (!(other instanceof lalr_state))
return false;
else
return equals((lalr_state)other);
}
/*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
/** Produce a hash code. */
public int hashCode()
{
/* just use the item set hash code */
return items().hashCode();
}
/*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
/** Convert to a string. */
public String toString()
{
String result;
lalr_transition tr;
/* dump the item set */
result = "lalr_state [" + index() + "]: " + _items + "\n";
/* do the transitions */
for (tr = transitions(); tr != null; tr = tr.next())
{
result += tr;
result += "\n";
}
return result;
}
/*-----------------------------------------------------------*/
};