package java_cup; import java.util.Enumeration; import java.util.Hashtable; /** This class represents a production in the grammar. It contains * a LHS non terminal, and an array of RHS symbols. As various * transformations are done on the RHS of the production, it may shrink. * As a result a separate length is always maintained to indicate how much * of the RHS array is still valid.<p> * * I addition to construction and manipulation operations, productions provide * methods for factoring out actions (see remove_embedded_actions()), for * computing the nullability of the production (i.e., can it derive the empty * string, see check_nullable()), and operations for computing its first * set (i.e., the set of terminals that could appear at the beginning of some * string derived from the production, see check_first_set()). * * @see java_cup.production_part * @see java_cup.symbol_part * @see java_cup.action_part * @version last updated: 11/25/95 * @author Scott Hudson */ public class production { /*-----------------------------------------------------------*/ /*--- Constructor(s) ----------------------------------------*/ /*-----------------------------------------------------------*/ /** Full constructor. This constructor accepts a LHS non terminal, * an array of RHS parts (including terminals, non terminals, and * actions), and a string for a final reduce action. It does several * manipulations in the process of creating a production object. * After some validity checking it translates labels that appear in * actions into code for accessing objects on the runtime parse stack. * It them merges adjacent actions if they appear and moves any trailing * action into the final reduce actions string. Next it removes any * embedded actions by factoring them out with new action productions. * Finally it assigns a unique index to the production.<p> * * Factoring out of actions is accomplished by creating new "hidden" * non terminals. For example if the production was originally: <pre> * A ::= B {action} C D * </pre> * then it is factored into two productions:<pre> * A ::= B X C D * X ::= {action} * </pre> * (where X is a unique new non terminal). This has the effect of placing * all actions at the end where they can be handled as part of a reduce by * the parser. */ public production( non_terminal lhs_sym, production_part rhs_parts[], int rhs_l, String action_str) throws internal_error { int i; action_part tail_action; /* remember the length */ if (rhs_l >= 0) _rhs_length = rhs_l; else if (rhs_parts != null) _rhs_length = rhs_parts.length; else _rhs_length = 0; /* make sure we have a valid left-hand-side */ if (lhs_sym == null) throw new internal_error( "Attempt to construct a production with a null LHS"); /* translate labels appearing in action strings */ action_str = translate_labels( rhs_parts, rhs_l, action_str, lhs_sym.stack_type()); /* count use of lhs */ lhs_sym.note_use(); /* create the part for left-hand-side */ _lhs = new symbol_part(lhs_sym); /* merge adjacent actions (if any) */ _rhs_length = merge_adjacent_actions(rhs_parts, _rhs_length); /* strip off any trailing action */ tail_action = strip_trailing_action(rhs_parts, _rhs_length); if (tail_action != null) _rhs_length--; /* allocate and copy over the right-hand-side */ _rhs = new production_part[_rhs_length]; for (i=0; i<_rhs_length; i++) _rhs[i] = rhs_parts[i]; /* count use of each rhs symbol */ for (i=0; i<_rhs_length; i++) if (!_rhs[i].is_action()) ((symbol_part)_rhs[i]).the_symbol().note_use(); /* merge any trailing action with action string parameter */ if (action_str == null) action_str = ""; if (tail_action != null && tail_action.code_string() != null) action_str = tail_action.code_string() + action_str; /* stash the action */ _action = new action_part(action_str); /* rewrite production to remove any embedded actions */ remove_embedded_actions(); /* assign an index */ _index = next_index++; /* put us in the global collection of productions */ _all.put(new Integer(_index),this); /* put us in the production list of the lhs non terminal */ lhs_sym.add_production(this); } /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/ /** Constructor with no action string. */ public production( non_terminal lhs_sym, production_part rhs_parts[], int rhs_l) throws internal_error { this(lhs_sym,rhs_parts,rhs_l,null); } /*-----------------------------------------------------------*/ /*--- (Access to) Static (Class) Variables ------------------*/ /*-----------------------------------------------------------*/ /** Table of all productions. Elements are stored using their index as * the key. */ protected static Hashtable _all = new Hashtable(); /** Access to all productions. */ public static Enumeration all() {return _all.elements();}; /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/ /** Total number of productions. */ public static int number() {return _all.size();}; /** Static counter for assigning unique index numbers. */ protected static int next_index; /*-----------------------------------------------------------*/ /*--- (Access to) Instance Variables ------------------------*/ /*-----------------------------------------------------------*/ /** The left hand side non-terminal. */ protected symbol_part _lhs; /** The left hand side non-terminal. */ public symbol_part lhs() {return _lhs;} /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/ /** A collection of parts for the right hand side. */ protected production_part _rhs[]; /** Access to the collection of parts for the right hand side. */ public production_part rhs(int indx) throws internal_error { if (indx >= 0 && indx < _rhs_length) return _rhs[indx]; else throw new internal_error( "Index out of range for right hand side of production"); } /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/ /** How much of the right hand side array we are presently using. */ protected int _rhs_length; /** How much of the right hand side array we are presently using. */ public int rhs_length() {return _rhs_length;} /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/ /** An action_part containing code for the action to be performed when we * reduce with this production. */ protected action_part _action; /** An action_part containing code for the action to be performed when we * reduce with this production. */ public action_part action() {return _action;} /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/ /** Index number of the production. */ protected int _index; /** Index number of the production. */ public int index() {return _index;} /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/ /** Count of number of reductions using this production. */ protected int _num_reductions = 0; /** Count of number of reductions using this production. */ public int num_reductions() {return _num_reductions;} /** Increment the count of reductions with this non-terminal */ public void note_reduction_use() {_num_reductions++;} /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/ /** Is the nullability of the production known or unknown? */ protected boolean _nullable_known = false; /** Is the nullability of the production known or unknown? */ public boolean nullable_known() {return _nullable_known;} /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/ /** Nullability of the production (can it derive the empty string). */ protected boolean _nullable = false; /** Nullability of the production (can it derive the empty string). */ public boolean nullable() {return _nullable;} /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/ /** First set of the production. This is the set of terminals that * could appear at the front of some string derived from this production. */ protected terminal_set _first_set = new terminal_set(); /** First set of the production. This is the set of terminals that * could appear at the front of some string derived from this production. */ public terminal_set first_set() {return _first_set;} /*-----------------------------------------------------------*/ /*--- Static Methods ----------------------------------------*/ /*-----------------------------------------------------------*/ /** Determine if a given character can be a label id starter. * @param c the character in question. */ protected static boolean is_id_start(char c) { return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c == '_'); //later need to handle non-8-bit chars here } /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/ /** Determine if a character can be in a label id. * @param c the character in question. */ protected static boolean is_id_char(char c) { return is_id_start(c) || (c >= '0' && c <= '9'); } /*-----------------------------------------------------------*/ /*--- General Methods ---------------------------------------*/ /*-----------------------------------------------------------*/ /** Determine the translation for one label id found within a code_string. * Symbols appearing in the RHS correspond to objects found on the parse * stack at runtime. The code to access them, becomes code to access an * object at the appropriate offset from the top of the stack, and then * cast that to the proper type. * * @param id_str the name of the id to be translated. * @param act_pos the original position of the action it appears in. * @param label_map a hash table mapping labels to positions in the RHS. * @param type_map a hash table mapping labels to declared symbol types. */ protected String label_translate( String id_str, /* the id string we are (possibly) translating */ int act_pos, /* position of the action */ Hashtable label_map, /* map from labels to positions in the RHS */ Hashtable label_types)/* map from labels to stack types */ { Integer label_pos; String label_type; int offset; /* look up the id */ label_pos = (Integer)label_map.get(id_str); /* if we don't find it, just return the id */ if (label_pos == null) return id_str; /* extract the type of the labeled symbol */ label_type = (String)label_types.get(id_str); /* is this for the LHS? */ if (label_pos.intValue() == -1) { /* return the result object cast properly */ return "((" + label_type + ")" + emit.pre("result") + ")"; } /* its a RHS label */ /* if the label appears after the action, we have an error */ if (label_pos.intValue() > act_pos) { /* emit an error message */ System.err.println("*** Label \"" + id_str + "\" appears in action before it appears in production"); lexer.error_count++; // later need to print the production this is in /* just return the id unchanged */ return id_str; } /* calculate the stack offset as the difference in position from label to action minus one */ offset = (act_pos - label_pos.intValue())-1; /* translation is properly cast element at that offset from TOS */ return "(/*"+id_str+"*/("+label_type+")" + emit.pre("stack") + ".elementAt(" + emit.pre("top") +"-"+ offset + "))"; } /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/ /** Translate all the label names within an action string to appropriate code. * @param act_string the string to be translated * @param act_pos the position that the action originally held in the * production. * @param label_map a hash table mapping labels to positions in the RHS. * @param type_map a hash table mapping labels to declared symbol types. */ protected String action_translate( String act_string, /* the action string */ int act_pos, /* the position of the action on the RHS */ Hashtable label_map, /* map from labels to RHS positions */ Hashtable label_types) /* map from labels to symbol stack types */ { int id_start; int pos; int len; String id_str; boolean in_id; StringBuffer result; char buffer[]; /* if we have no string we are done */ if (act_string == null || act_string.length()== 0) return act_string; len = act_string.length(); /* set up a place to put the result */ result = new StringBuffer(len + 50); /* extract string into array */ buffer = new char[len + 1]; act_string.getChars(0, len, buffer, 0); /* put terminator in buffer so we can look one past the end */ buffer[len] = '\0'; /* walk down the input buffer looking for identifiers */ in_id = false; for (pos = id_start = 0; pos <= len; pos++) { /* are we currently working on an id? */ if (in_id) { /* does this end the id? */ if (!is_id_char(buffer[pos])) { /* extract the id string and translate it */ id_str = new String(buffer, id_start, pos - id_start); result.append( label_translate(id_str, act_pos, label_map,label_types)); /* copy over the ending character */ if (buffer[pos] != '\0') result.append(buffer, pos, 1); /* and we are done with this id */ in_id = false; } else { /* we are still in the id, so just keep going */ } } else /* we are not inside an id */ { /* start a new id? */ if (is_id_start(buffer[pos])) { /* start keeping these chars as an id */ in_id = true; id_start = pos; } else { /* just copy over the char */ if (buffer[pos] != '\0') result.append(buffer, pos, 1); } } } /* return the accumulated result */ return result.toString(); } /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/ /** Translate label names to appropriate code within all action strings. * @param rhs array of RHS parts. * @param rhs_len how much of rhs to consider valid. * @param final_action the final action string of the production. * @param lhs_type the object type associated with the LHS symbol. */ protected String translate_labels( production_part rhs[], int rhs_len, String final_action, String lhs_type) { Hashtable label_map = new Hashtable(11); Hashtable label_types = new Hashtable(11); symbol_part part; action_part act_part; int pos; /* walk down the parts and extract the labels */ for (pos = 0; pos < rhs_len; pos++) { if (!rhs[pos].is_action()) { part = (symbol_part)rhs[pos]; /* if it has a label enter it in the tables */ if (part.label() != null) { label_map.put(part.label(), new Integer(pos)); label_types.put(part.label(), part.the_symbol().stack_type()); } } } /* add a label for the LHS */ label_map.put("RESULT", new Integer(-1)); label_types.put("RESULT", lhs_type); /* now walk across and do each action string */ for (pos = 0; pos < rhs_len; pos++) { if (rhs[pos].is_action()) { act_part = (action_part)rhs[pos]; act_part.set_code_string( action_translate( act_part.code_string(), pos, label_map, label_types)); } } /* now do the final action string at the position after the last */ return action_translate(final_action, rhs_len, label_map, label_types); } /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/ /** Helper routine to merge adjacent actions in a set of RHS parts * @param rhs_parts array of RHS parts. * @param len amount of that array that is valid. * @return remaining valid length. */ protected int merge_adjacent_actions(production_part rhs_parts[], int len) { int from_loc, to_loc, merge_cnt; /* bail out early if we have no work to do */ if (rhs_parts == null || len == 0) return 0; merge_cnt = 0; to_loc = -1; for (from_loc=0; from_loc<len; from_loc++) { /* do we go in the current position or one further */ if (to_loc < 0 || !rhs_parts[to_loc].is_action() || !rhs_parts[from_loc].is_action()) { /* next one */ to_loc++; /* clear the way for it */ if (to_loc != from_loc) rhs_parts[to_loc] = null; } /* if this is not trivial? */ if (to_loc != from_loc) { /* do we merge or copy? */ if (rhs_parts[to_loc] != null && rhs_parts[to_loc].is_action() && rhs_parts[from_loc].is_action()) { /* merge */ rhs_parts[to_loc] = new action_part( ((action_part)rhs_parts[to_loc]).code_string() + ((action_part)rhs_parts[from_loc]).code_string()); merge_cnt++; } else { /* copy */ rhs_parts[to_loc] = rhs_parts[from_loc]; } } } /* return the used length */ return len - merge_cnt; } /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/ /** Helper routine to strip a trailing action off rhs and return it * @param rhs_parts array of RHS parts. * @param len how many of those are valid. * @return the removed action part. */ protected action_part strip_trailing_action( production_part rhs_parts[], int len) { action_part result; /* bail out early if we have nothing to do */ if (rhs_parts == null || len == 0) return null; /* see if we have a trailing action */ if (rhs_parts[len-1].is_action()) { /* snip it out and return it */ result = (action_part)rhs_parts[len-1]; rhs_parts[len-1] = null; return result; } else return null; } /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/ /** Remove all embedded actions from a production by factoring them * out into individual action production using new non terminals. * if the original production was: <pre> * A ::= B {action1} C {action2} D * </pre> * then it will be factored into: <pre> * A ::= B NT$1 C NT$2 D * NT$1 ::= {action1} * NT$2 ::= {action2} * </pre> * where NT$1 and NT$2 are new system created non terminals. */ protected void remove_embedded_actions() throws internal_error { non_terminal new_nt; production new_prod; /* walk over the production and process each action */ for (int act_loc = 0; act_loc < rhs_length(); act_loc++) if (rhs(act_loc).is_action()) { /* create a new non terminal for the action production */ new_nt = non_terminal.create_new(); /* create a new production with just the action */ new_prod = new action_production(this, new_nt, null, 0, ((action_part)rhs(act_loc)).code_string()); /* replace the action with the generated non terminal */ _rhs[act_loc] = new symbol_part(new_nt); } } /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/ /** Check to see if the production (now) appears to be nullable. * A production is nullable if its RHS could derive the empty string. * This results when the RHS is empty or contains only non terminals * which themselves are nullable. */ public boolean check_nullable() throws internal_error { production_part part; symbol sym; int pos; /* if we already know bail out early */ if (nullable_known()) return nullable(); /* if we have a zero size RHS we are directly nullable */ if (rhs_length() == 0) { /* stash and return the result */ return set_nullable(true); } /* otherwise we need to test all of our parts */ for (pos=0; pos<rhs_length(); pos++) { part = rhs(pos); /* only look at non-actions */ if (!part.is_action()) { sym = ((symbol_part)part).the_symbol(); /* if its a terminal we are definitely not nullable */ if (!sym.is_non_term()) return set_nullable(false); /* its a non-term, is it marked nullable */ else if (!((non_terminal)sym).nullable()) /* this one not (yet) nullable, so we aren't */ return false; } } /* if we make it here all parts are nullable */ return set_nullable(true); } /** set (and return) nullability */ boolean set_nullable(boolean v) { _nullable_known = true; _nullable = v; return v; } /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/ /** Update (and return) the first set based on current NT firsts. * This assumes that nullability has already been computed for all non * terminals and productions. */ public terminal_set check_first_set() throws internal_error { int part; symbol sym; /* walk down the right hand side till we get past all nullables */ for (part=0; part<rhs_length(); part++) { /* only look at non-actions */ if (!rhs(part).is_action()) { sym = ((symbol_part)rhs(part)).the_symbol(); /* is it a non-terminal?*/ if (sym.is_non_term()) { /* add in current firsts from that NT */ _first_set.add(((non_terminal)sym).first_set()); /* if its not nullable, we are done */ if (!((non_terminal)sym).nullable()) break; } else { /* its a terminal -- add that to the set */ _first_set.add((terminal)sym); /* we are done */ break; } } } /* return our updated first set */ return first_set(); } /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/ /** Equality comparison. */ public boolean equals(production other) { if (other == null) return false; return other._index == _index; } /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/ /** Generic equality comparison. */ public boolean equals(Object other) { if (!(other instanceof production)) return false; else return equals((production)other); } /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/ /** Produce a hash code. */ public int hashCode() { /* just use a simple function of the index */ return _index*13; } /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/ /** Convert to a string. */ public String toString() { String result; /* catch any internal errors */ try { result = "production [" + index() + "]: "; result += ((lhs() != null) ? lhs().toString() : "$$NULL-LHS$$"); result += " :: = "; for (int i = 0; i<rhs_length(); i++) result += rhs(i) + " "; result += ";"; if (action() != null && action().code_string() != null) result += " {" + action().code_string() + "}"; if (nullable_known()) if (nullable()) result += "[NULLABLE]"; else result += "[NOT NULLABLE]"; } catch (internal_error e) { /* crash on internal error since we can't throw it from here (because superclass does not throw anything. */ e.crash(); result = null; } return result; } /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/ /** Convert to a simpler string. */ public String to_simple_string() throws internal_error { String result; result = ((lhs() != null) ? lhs().the_symbol().name() : "NULL_LHS"); result += " ::= "; for (int i = 0; i < rhs_length(); i++) if (!rhs(i).is_action()) result += ((symbol_part)rhs(i)).the_symbol().name() + " "; return result; } /*-----------------------------------------------------------*/ };