Java程序  |  783行  |  24.85 KB


package java_cup;
 
import java.util.Enumeration;
import java.util.Hashtable;

/** This class represents a production in the grammar.  It contains
 *  a LHS non terminal, and an array of RHS symbols.  As various 
 *  transformations are done on the RHS of the production, it may shrink.
 *  As a result a separate length is always maintained to indicate how much
 *  of the RHS array is still valid.<p>
 * 
 *  I addition to construction and manipulation operations, productions provide
 *  methods for factoring out actions (see  remove_embedded_actions()), for
 *  computing the nullability of the production (i.e., can it derive the empty
 *  string, see check_nullable()), and operations for computing its first
 *  set (i.e., the set of terminals that could appear at the beginning of some
 *  string derived from the production, see check_first_set()).
 * 
 * @see     java_cup.production_part
 * @see     java_cup.symbol_part
 * @see     java_cup.action_part
 * @version last updated: 11/25/95
 * @author  Scott Hudson
 */

public class production {

  /*-----------------------------------------------------------*/
  /*--- Constructor(s) ----------------------------------------*/
  /*-----------------------------------------------------------*/

  /** Full constructor.  This constructor accepts a LHS non terminal,
   *  an array of RHS parts (including terminals, non terminals, and 
   *  actions), and a string for a final reduce action.   It does several
   *  manipulations in the process of  creating a production object.
   *  After some validity checking it translates labels that appear in
   *  actions into code for accessing objects on the runtime parse stack.
   *  It them merges adjacent actions if they appear and moves any trailing
   *  action into the final reduce actions string.  Next it removes any
   *  embedded actions by factoring them out with new action productions.  
   *  Finally it assigns a unique index to the production.<p>
   *
   *  Factoring out of actions is accomplished by creating new "hidden"
   *  non terminals.  For example if the production was originally: <pre>
   *    A ::= B {action} C D
   *  </pre>
   *  then it is factored into two productions:<pre>
   *    A ::= B X C D
   *    X ::= {action}
   *  </pre> 
   *  (where X is a unique new non terminal).  This has the effect of placing
   *  all actions at the end where they can be handled as part of a reduce by
   *  the parser.
   */
  public production(
    non_terminal    lhs_sym, 
    production_part rhs_parts[], 
    int             rhs_l,
    String          action_str)
    throws internal_error
    {
      int         i;
      action_part tail_action;

      /* remember the length */
      if (rhs_l >= 0)
    _rhs_length = rhs_l;
      else if (rhs_parts != null)
    _rhs_length = rhs_parts.length;
      else
    _rhs_length = 0;
    
      /* make sure we have a valid left-hand-side */
      if (lhs_sym == null) 
    throw new internal_error(
      "Attempt to construct a production with a null LHS");

      /* translate labels appearing in action strings */
      action_str = translate_labels(
             rhs_parts, rhs_l, action_str, lhs_sym.stack_type());

      /* count use of lhs */
      lhs_sym.note_use();

      /* create the part for left-hand-side */
      _lhs = new symbol_part(lhs_sym);

      /* merge adjacent actions (if any) */
      _rhs_length = merge_adjacent_actions(rhs_parts, _rhs_length);

      /* strip off any trailing action */
      tail_action = strip_trailing_action(rhs_parts, _rhs_length);
      if (tail_action != null) _rhs_length--;

      /* allocate and copy over the right-hand-side */
      _rhs = new production_part[_rhs_length];
      for (i=0; i<_rhs_length; i++)
    _rhs[i] = rhs_parts[i];

      /* count use of each rhs symbol */
      for (i=0; i<_rhs_length; i++)
    if (!_rhs[i].is_action())
      ((symbol_part)_rhs[i]).the_symbol().note_use();

      /* merge any trailing action with action string parameter */
      if (action_str == null) action_str = "";
      if (tail_action != null && tail_action.code_string() != null)
    action_str = tail_action.code_string() + action_str;

      /* stash the action */
      _action = new action_part(action_str);

      /* rewrite production to remove any embedded actions */
      remove_embedded_actions();

      /* assign an index */
      _index = next_index++;

      /* put us in the global collection of productions */
      _all.put(new Integer(_index),this);

      /* put us in the production list of the lhs non terminal */
      lhs_sym.add_production(this);
    }

  /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/

  /** Constructor with no action string. */
  public production(
    non_terminal    lhs_sym, 
    production_part rhs_parts[], 
    int             rhs_l)
    throws internal_error
    {
      this(lhs_sym,rhs_parts,rhs_l,null);
    }
 
  /*-----------------------------------------------------------*/
  /*--- (Access to) Static (Class) Variables ------------------*/
  /*-----------------------------------------------------------*/
 
  /** Table of all productions.  Elements are stored using their index as 
   *  the key.
   */
  protected static Hashtable _all = new Hashtable();
 
  /** Access to all productions. */
  public static Enumeration all() {return _all.elements();};

  /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
 
  /** Total number of productions. */
  public static int number() {return _all.size();};

  /** Static counter for assigning unique index numbers. */
  protected static int next_index;

  /*-----------------------------------------------------------*/
  /*--- (Access to) Instance Variables ------------------------*/
  /*-----------------------------------------------------------*/

  /** The left hand side non-terminal. */
  protected symbol_part _lhs;

  /** The left hand side non-terminal. */
  public symbol_part lhs() {return _lhs;}

  /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/

  /** A collection of parts for the right hand side. */
  protected production_part _rhs[];

  /** Access to the collection of parts for the right hand side. */
  public production_part rhs(int indx) throws internal_error
    {
      if (indx >= 0 && indx < _rhs_length)
    return _rhs[indx];
      else
    throw new internal_error(
      "Index out of range for right hand side of production");
    }

  /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/

  /** How much of the right hand side array we are presently using. */
  protected int _rhs_length;

  /** How much of the right hand side array we are presently using. */
  public int rhs_length() {return _rhs_length;}

  /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/

  /** An action_part containing code for the action to be performed when we 
   *  reduce with this production. 
   */
  protected action_part _action;

  /** An action_part containing code for the action to be performed when we 
   *  reduce with this production. 
   */
  public action_part action() {return _action;}

  /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/

  /** Index number of the production. */
  protected int _index;

  /** Index number of the production. */
  public int index() {return _index;}

  /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/

  /** Count of number of reductions using this production. */
  protected int _num_reductions = 0;

  /** Count of number of reductions using this production. */
  public int num_reductions() {return _num_reductions;}

  /** Increment the count of reductions with this non-terminal */
  public void note_reduction_use() {_num_reductions++;}

  /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/

  /** Is the nullability of the production known or unknown? */
  protected boolean _nullable_known = false;

  /** Is the nullability of the production known or unknown? */
  public boolean nullable_known() {return _nullable_known;}

  /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/

  /** Nullability of the production (can it derive the empty string). */
  protected boolean _nullable = false;

  /** Nullability of the production (can it derive the empty string). */
  public boolean nullable() {return _nullable;}

  /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/

  /** First set of the production.  This is the set of terminals that 
   *  could appear at the front of some string derived from this production.
   */
  protected terminal_set _first_set = new terminal_set();

  /** First set of the production.  This is the set of terminals that 
   *  could appear at the front of some string derived from this production.
   */
  public terminal_set first_set() {return _first_set;}

  /*-----------------------------------------------------------*/
  /*--- Static Methods ----------------------------------------*/
  /*-----------------------------------------------------------*/

  /** Determine if a given character can be a label id starter. 
   * @param c the character in question. 
   */
  protected static boolean is_id_start(char c)
    {
      return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c == '_');

      //later need to handle non-8-bit chars here
    }

  /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/

  /** Determine if a character can be in a label id. 
   * @param c the character in question.
   */
  protected static boolean is_id_char(char c)
    {
      return is_id_start(c) || (c >= '0' && c <= '9');
    }

  /*-----------------------------------------------------------*/
  /*--- General Methods ---------------------------------------*/
  /*-----------------------------------------------------------*/

  /** Determine the translation for one label id found within a code_string. 
   *  Symbols appearing in the RHS correspond to objects found on the parse
   *  stack at runtime.  The code to access them, becomes code to access an
   *  object at the appropriate offset from the top of the stack, and then
   *  cast that to the proper type.
   *
   * @param id_str    the name of the id to be translated.
   * @param act_pos   the original position of the action it appears in.
   * @param label_map a hash table mapping labels to positions in the RHS.
   * @param type_map  a hash table mapping labels to declared symbol types.
   */
  protected String label_translate(
    String    id_str,     /* the id string we are (possibly) translating */
    int       act_pos,    /* position of the action                      */
    Hashtable label_map,  /* map from labels to positions in the RHS     */
    Hashtable label_types)/* map from labels to stack types              */
    {
      Integer label_pos;
      String  label_type;
      int     offset;

      /* look up the id */
      label_pos  = (Integer)label_map.get(id_str);

      /* if we don't find it, just return the id */
      if (label_pos == null) return id_str;

      /* extract the type of the labeled symbol */
      label_type = (String)label_types.get(id_str);

      /* is this for the LHS? */
      if (label_pos.intValue() == -1)
        {
      /* return the result object cast properly */
      return "((" + label_type + ")" + emit.pre("result") + ")";
         }

       /* its a RHS label */

       /* if the label appears after the action, we have an error */
       if (label_pos.intValue() > act_pos)
         {
       /* emit an error message */
       System.err.println("*** Label \"" + id_str + 
         "\" appears in action before it appears in production");
        lexer.error_count++;

        // later need to print the production this is in
    
        /* just return the id unchanged */
          return id_str;
      }

      /* calculate the stack offset as the difference in position from 
     label to action minus one */
      offset = (act_pos - label_pos.intValue())-1;

      /* translation is properly cast element at that offset from TOS */
      return "(/*"+id_str+"*/("+label_type+")" + 
       emit.pre("stack") + ".elementAt(" + emit.pre("top") +"-"+ offset + "))";
   
    }

  /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/

  /** Translate all the label names within an action string to appropriate code.
   * @param act_string  the string to be translated
   * @param act_pos     the position that the action originally held in the 
   *                    production.
   * @param label_map   a hash table mapping labels to positions in the RHS.
   * @param type_map    a hash table mapping labels to declared symbol types.
   */
  protected String action_translate(
    String    act_string,  /* the action string                     */
    int       act_pos,     /* the position of the action on the RHS */
    Hashtable label_map,   /* map from labels to RHS positions      */
    Hashtable label_types) /* map from labels to symbol stack types */
    {
      int          id_start;
      int          pos;
      int          len;
      String       id_str;
      boolean      in_id;
      StringBuffer result;
      char         buffer[];

      /* if we have no string we are done */
      if (act_string == null || act_string.length()== 0) return act_string;

      len = act_string.length();

      /* set up a place to put the result */
      result = new StringBuffer(len + 50);

      /* extract string into array */
      buffer = new char[len + 1];
      act_string.getChars(0, len, buffer, 0);

      /* put terminator in buffer so we can look one past the end */
      buffer[len] = '\0';

      /* walk down the input buffer looking for identifiers */
      in_id = false;
      for (pos = id_start = 0; pos <= len; pos++)
    {
      /* are we currently working on an id? */
      if (in_id)
        {
          /* does this end the id? */
          if (!is_id_char(buffer[pos]))
        {
          /* extract the id string and translate it */
          id_str = new String(buffer, id_start, pos - id_start);
          result.append(
              label_translate(id_str, act_pos, label_map,label_types));

          /* copy over the ending character */
          if (buffer[pos] != '\0')
                result.append(buffer, pos, 1);

          /* and we are done with this id */
          in_id = false;
        }
          else
        {
          /* we are still in the id, so just keep going */
        }
        }
      else /* we are not inside an id */
        {
          /* start a new id? */
          if (is_id_start(buffer[pos]))
            {
          /* start keeping these chars as an id */
              in_id = true;
              id_start = pos;
            }
         else
           {
             /* just copy over the char */
         if (buffer[pos] != '\0')
               result.append(buffer, pos, 1);
           }
        }
    }

      /* return the accumulated result */
      return result.toString();
    }

  /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/

  /** Translate label names to appropriate code within all action strings. 
   * @param rhs          array of RHS parts.
   * @param rhs_len      how much of rhs to consider valid.
   * @param final_action the final action string of the production. 
   * @param lhs_type     the object type associated with the LHS symbol.
   */ 
  protected String translate_labels(
    production_part  rhs[], 
    int              rhs_len, 
    String           final_action,
    String           lhs_type)
    {
      Hashtable   label_map   = new Hashtable(11);
      Hashtable   label_types = new Hashtable(11);
      symbol_part part;
      action_part act_part;
      int         pos;

      /* walk down the parts and extract the labels */
      for (pos = 0; pos < rhs_len; pos++)
    {
      if (!rhs[pos].is_action())
        {
          part = (symbol_part)rhs[pos];

          /* if it has a label enter it in the tables */
          if (part.label() != null)
        {
          label_map.put(part.label(), new Integer(pos));
          label_types.put(part.label(), part.the_symbol().stack_type());
        }
        }
    }

      /* add a label for the LHS */
      label_map.put("RESULT", new Integer(-1));
      label_types.put("RESULT", lhs_type);

      /* now walk across and do each action string */
      for (pos = 0; pos < rhs_len; pos++)
    {
      if (rhs[pos].is_action())
        {
           act_part = (action_part)rhs[pos];
           act_part.set_code_string(
         action_translate(
           act_part.code_string(), pos, label_map, label_types));
        }
    }

      /* now do the final action string at the position after the last */
      return action_translate(final_action, rhs_len, label_map, label_types);

    }

  /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/

  /** Helper routine to merge adjacent actions in a set of RHS parts 
   * @param rhs_parts array of RHS parts.
   * @param len       amount of that array that is valid.
   * @return          remaining valid length.
   */
  protected int merge_adjacent_actions(production_part rhs_parts[], int len)
    {
      int from_loc, to_loc, merge_cnt;

      /* bail out early if we have no work to do */
      if (rhs_parts == null || len == 0) return 0;

      merge_cnt = 0;
      to_loc = -1;
      for (from_loc=0; from_loc<len; from_loc++)
    {
      /* do we go in the current position or one further */
      if (to_loc < 0 || !rhs_parts[to_loc].is_action() 
             || !rhs_parts[from_loc].is_action())
        {
          /* next one */
          to_loc++;

          /* clear the way for it */
          if (to_loc != from_loc) rhs_parts[to_loc] = null;
        }

      /* if this is not trivial? */
      if (to_loc != from_loc)
        {
          /* do we merge or copy? */
          if (rhs_parts[to_loc] != null && rhs_parts[to_loc].is_action() && 
          rhs_parts[from_loc].is_action())
          {
            /* merge */
            rhs_parts[to_loc] = new action_part(
          ((action_part)rhs_parts[to_loc]).code_string() +
          ((action_part)rhs_parts[from_loc]).code_string());
            merge_cnt++;
          }
        else
          {
            /* copy */
            rhs_parts[to_loc] = rhs_parts[from_loc];
          }
        }
    }

      /* return the used length */
      return len - merge_cnt;
    }

  /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/

  /** Helper routine to strip a trailing action off rhs and return it
   * @param rhs_parts array of RHS parts.
   * @param len       how many of those are valid.
   * @return          the removed action part.
   */
  protected action_part strip_trailing_action(
    production_part rhs_parts[],
    int             len)
    {
      action_part result;

      /* bail out early if we have nothing to do */
      if (rhs_parts == null || len == 0) return null;

      /* see if we have a trailing action */
      if (rhs_parts[len-1].is_action())
    {
      /* snip it out and return it */
      result = (action_part)rhs_parts[len-1];
      rhs_parts[len-1] = null;
      return result;
    }
      else
    return null;
    }

  /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/

  /** Remove all embedded actions from a production by factoring them 
   *  out into individual action production using new non terminals.
   *  if the original production was:  <pre>
   *    A ::= B {action1} C {action2} D 
   *  </pre>
   *  then it will be factored into: <pre>
   *    A ::= B NT$1 C NT$2 D
   *    NT$1 ::= {action1}
   *    NT$2 ::= {action2}
   *  </pre>
   *  where NT$1 and NT$2 are new system created non terminals.
   */
  protected void remove_embedded_actions() throws internal_error
    {
      non_terminal new_nt;
      production   new_prod;

      /* walk over the production and process each action */
      for (int act_loc = 0; act_loc < rhs_length(); act_loc++)
    if (rhs(act_loc).is_action())
      {
        /* create a new non terminal for the action production */
        new_nt = non_terminal.create_new();

        /* create a new production with just the action */
        new_prod = new action_production(this, new_nt, null, 0, 
            ((action_part)rhs(act_loc)).code_string());

        /* replace the action with the generated non terminal */
        _rhs[act_loc] = new symbol_part(new_nt);
      }
    }

  /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/

  /** Check to see if the production (now) appears to be nullable.
   *  A production is nullable if its RHS could derive the empty string.
   *  This results when the RHS is empty or contains only non terminals
   *  which themselves are nullable.
   */
  public boolean check_nullable() throws internal_error
    {
      production_part part;
      symbol          sym;
      int             pos;

      /* if we already know bail out early */
      if (nullable_known()) return nullable();

      /* if we have a zero size RHS we are directly nullable */
      if (rhs_length() == 0)
    {
      /* stash and return the result */
      return set_nullable(true);
    }

      /* otherwise we need to test all of our parts */
      for (pos=0; pos<rhs_length(); pos++)
    {
      part = rhs(pos);

      /* only look at non-actions */
      if (!part.is_action())
        {
          sym = ((symbol_part)part).the_symbol();

          /* if its a terminal we are definitely not nullable */
          if (!sym.is_non_term()) 
        return set_nullable(false);
          /* its a non-term, is it marked nullable */
          else if (!((non_terminal)sym).nullable())
        /* this one not (yet) nullable, so we aren't */
            return false;
        }
    }

      /* if we make it here all parts are nullable */
      return set_nullable(true);
    }

  /** set (and return) nullability */
  boolean set_nullable(boolean v)
    {
      _nullable_known = true;
      _nullable = v;
      return v;
    }

  /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/

  /** Update (and return) the first set based on current NT firsts. 
   *  This assumes that nullability has already been computed for all non 
   *  terminals and productions. 
   */
  public terminal_set check_first_set() throws internal_error
    {
      int    part;
      symbol sym;

      /* walk down the right hand side till we get past all nullables */
      for (part=0; part<rhs_length(); part++)
    {
      /* only look at non-actions */
      if (!rhs(part).is_action())
        {
          sym = ((symbol_part)rhs(part)).the_symbol();

          /* is it a non-terminal?*/
          if (sym.is_non_term())
        {
          /* add in current firsts from that NT */
          _first_set.add(((non_terminal)sym).first_set());

          /* if its not nullable, we are done */
          if (!((non_terminal)sym).nullable())
            break;
        }
          else
        {
              /* its a terminal -- add that to the set */
          _first_set.add((terminal)sym);

          /* we are done */
          break;
        }
        }
    }

      /* return our updated first set */
      return first_set();
    }

  /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/

  /** Equality comparison. */
  public boolean equals(production other)
    {
      if (other == null) return false;
      return other._index == _index;
    }

  /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/

  /** Generic equality comparison. */
  public boolean equals(Object other)
    {
      if (!(other instanceof production))
    return false;
      else
    return equals((production)other);
    }

  /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/

  /** Produce a hash code. */
  public int hashCode()
    {
      /* just use a simple function of the index */
      return _index*13;
    }

  /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/

  /** Convert to a string. */
  public String toString() 
    {
      String result;
      
      /* catch any internal errors */
      try {
        result = "production [" + index() + "]: "; 
        result += ((lhs() != null) ? lhs().toString() : "$$NULL-LHS$$");
        result += " :: = ";
        for (int i = 0; i<rhs_length(); i++)
      result += rhs(i) + " ";
        result += ";";
        if (action()  != null && action().code_string() != null) 
      result += " {" + action().code_string() + "}";

        if (nullable_known())
      if (nullable())
        result += "[NULLABLE]";
      else
        result += "[NOT NULLABLE]";
      } catch (internal_error e) {
    /* crash on internal error since we can't throw it from here (because
       superclass does not throw anything. */
    e.crash();
    result = null;
      }

      return result;
    }

  /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/

  /** Convert to a simpler string. */
  public String to_simple_string() throws internal_error
    {
      String result;

      result = ((lhs() != null) ? lhs().the_symbol().name() : "NULL_LHS");
      result += " ::= ";
      for (int i = 0; i < rhs_length(); i++)
    if (!rhs(i).is_action())
      result += ((symbol_part)rhs(i)).the_symbol().name() + " ";

      return result;
    }

  /*-----------------------------------------------------------*/

};