4 import java.util.Hashtable;
5 import java.util.Enumeration;
7 /** This class represents a production in the grammar. It contains
8 * a LHS non terminal, and an array of RHS symbols. As various
9 * transformations are done on the RHS of the production, it may shrink.
10 * As a result a separate length is always maintained to indicate how much
11 * of the RHS array is still valid.<p>
13 * I addition to construction and manipulation operations, productions provide
14 * methods for factoring out actions (see remove_embedded_actions()), for
15 * computing the nullability of the production (i.e., can it derive the empty
16 * string, see check_nullable()), and operations for computing its first
17 * set (i.e., the set of terminals that could appear at the beginning of some
18 * string derived from the production, see check_first_set()).
20 * @see java_cup.production_part
21 * @see java_cup.symbol_part
22 * @see java_cup.action_part
23 * @version last updated: 7/3/96
24 * @author Frank Flannery
27 public class production {
29 /*-----------------------------------------------------------*/
30 /*--- Constructor(s) ----------------------------------------*/
31 /*-----------------------------------------------------------*/
33 /** Full constructor. This constructor accepts a LHS non terminal,
34 * an array of RHS parts (including terminals, non terminals, and
35 * actions), and a string for a final reduce action. It does several
36 * manipulations in the process of creating a production object.
37 * After some validity checking it translates labels that appear in
38 * actions into code for accessing objects on the runtime parse stack.
39 * It them merges adjacent actions if they appear and moves any trailing
40 * action into the final reduce actions string. Next it removes any
41 * embedded actions by factoring them out with new action productions.
42 * Finally it assigns a unique index to the production.<p>
44 * Factoring out of actions is accomplished by creating new "hidden"
45 * non terminals. For example if the production was originally: <pre>
46 * A ::= B {action} C D
48 * then it is factored into two productions:<pre>
52 * (where X is a unique new non terminal). This has the effect of placing
53 * all actions at the end where they can be handled as part of a reduce by
58 production_part rhs_parts[],
64 action_part tail_action;
68 /* remember the length */
71 else if (rhs_parts != null)
72 _rhs_length = rhs_parts.length;
76 /* make sure we have a valid left-hand-side */
78 throw new internal_error(
79 "Attempt to construct a production with a null LHS");
81 /* I'm not translating labels anymore, I'm adding code to declare
82 labels as valid variables. This way, the users code string is
86 /* check if the last part of the right hand side is an action. If
87 it is, it won't be on the stack, so we don't want to count it
88 in the rightlen. Then when we search down the stack for a
89 Symbol, we don't try to search past action */
92 if (rhs_parts[rhs_l - 1].is_action()) {
99 /* get the generated declaration code for the necessary labels. */
100 declare_str = declare_labels(
101 rhs_parts, rightlen, action_str);
103 if (action_str == null)
104 action_str = declare_str;
106 action_str = declare_str + action_str;
108 /* count use of lhs */
111 /* create the part for left-hand-side */
112 _lhs = new symbol_part(lhs_sym);
114 /* merge adjacent actions (if any) */
115 _rhs_length = merge_adjacent_actions(rhs_parts, _rhs_length);
117 /* strip off any trailing action */
118 tail_action = strip_trailing_action(rhs_parts, _rhs_length);
119 if (tail_action != null) _rhs_length--;
121 /* Why does this run through the right hand side happen
122 over and over? here a quick combination of two
123 prior runs plus one I wanted of my own
125 /* allocate and copy over the right-hand-side */
126 /* count use of each rhs symbol */
127 _rhs = new production_part[_rhs_length];
128 for (i=0; i<_rhs_length; i++) {
129 _rhs[i] = rhs_parts[i];
130 if (!_rhs[i].is_action()) {
131 ((symbol_part)_rhs[i]).the_symbol().note_use();
132 if (((symbol_part)_rhs[i]).the_symbol() instanceof terminal) {
134 ((terminal)((symbol_part)_rhs[i]).the_symbol()).precedence_num();
136 ((terminal)((symbol_part)_rhs[i]).the_symbol()).precedence_side();
141 /*now action string is really declaration string, so put it in front!
143 if (action_str == null) action_str = "";
144 if (tail_action != null && tail_action.code_string() != null)
145 action_str = action_str + "\t\t" + tail_action.code_string();
147 /* stash the action */
148 _action = new action_part(action_str);
150 /* rewrite production to remove any embedded actions */
151 remove_embedded_actions();
153 /* assign an index */
154 _index = next_index++;
156 /* put us in the global collection of productions */
157 _all.put(new Integer(_index),this);
159 /* put us in the production list of the lhs non terminal */
160 lhs_sym.add_production(this);
163 /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
165 /** Constructor with no action string. */
167 non_terminal lhs_sym,
168 production_part rhs_parts[],
170 throws internal_error
172 this(lhs_sym,rhs_parts,rhs_l,null);
175 /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
177 /* Constructor with precedence and associativity of production
178 contextually define */
180 non_terminal lhs_sym,
181 production_part rhs_parts[],
186 throws internal_error
188 this(lhs_sym,rhs_parts,rhs_l,action_str);
190 /* set the precedence */
191 set_precedence_num(prec_num);
192 set_precedence_side(prec_side);
195 /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
197 /* Constructor w/ no action string and contextual precedence
200 non_terminal lhs_sym,
201 production_part rhs_parts[],
205 throws internal_error
207 this(lhs_sym,rhs_parts,rhs_l,null);
208 /* set the precedence */
209 set_precedence_num(prec_num);
210 set_precedence_side(prec_side);
213 /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
215 /*-----------------------------------------------------------*/
216 /*--- (Access to) Static (Class) Variables ------------------*/
217 /*-----------------------------------------------------------*/
220 /** Table of all productions. Elements are stored using their index as
223 protected static Hashtable _all = new Hashtable();
225 /** Access to all productions. */
226 public static Enumeration all() {return _all.elements();}
228 /** Lookup a production by index. */
229 public static production find(int indx) {
230 return (production) _all.get(new Integer(indx));
233 /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
235 /** Total number of productions. */
236 public static int number() {return _all.size();}
238 /** Static counter for assigning unique index numbers. */
239 protected static int next_index;
241 /*-----------------------------------------------------------*/
242 /*--- (Access to) Instance Variables ------------------------*/
243 /*-----------------------------------------------------------*/
245 /** The left hand side non-terminal. */
246 protected symbol_part _lhs;
248 /** The left hand side non-terminal. */
249 public symbol_part lhs() {return _lhs;}
252 /** The precedence of the rule */
253 protected int _rhs_prec = -1;
254 protected int _rhs_assoc = -1;
256 /** Access to the precedence of the rule */
257 public int precedence_num() { return _rhs_prec; }
258 public int precedence_side() { return _rhs_assoc; }
260 /** Setting the precedence of a rule */
261 public void set_precedence_num(int prec_num) {
262 _rhs_prec = prec_num;
264 public void set_precedence_side(int prec_side) {
265 _rhs_assoc = prec_side;
268 /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
270 /** A collection of parts for the right hand side. */
271 protected production_part _rhs[];
273 /** Access to the collection of parts for the right hand side. */
274 public production_part rhs(int indx) throws internal_error
276 if (indx >= 0 && indx < _rhs_length)
279 throw new internal_error(
280 "Index out of range for right hand side of production");
283 /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
285 /** How much of the right hand side array we are presently using. */
286 protected int _rhs_length;
288 /** How much of the right hand side array we are presently using. */
289 public int rhs_length() {return _rhs_length;}
291 /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
293 /** An action_part containing code for the action to be performed when we
294 * reduce with this production.
296 protected action_part _action;
298 /** An action_part containing code for the action to be performed when we
299 * reduce with this production.
301 public action_part action() {return _action;}
303 /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
305 /** Index number of the production. */
306 protected int _index;
308 /** Index number of the production. */
309 public int index() {return _index;}
311 /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
313 /** Count of number of reductions using this production. */
314 protected int _num_reductions = 0;
316 /** Count of number of reductions using this production. */
317 public int num_reductions() {return _num_reductions;}
319 /** Increment the count of reductions with this non-terminal */
320 public void note_reduction_use() {_num_reductions++;}
322 /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
324 /** Is the nullability of the production known or unknown? */
325 protected boolean _nullable_known = false;
327 /** Is the nullability of the production known or unknown? */
328 public boolean nullable_known() {return _nullable_known;}
330 /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
332 /** Nullability of the production (can it derive the empty string). */
333 protected boolean _nullable = false;
335 /** Nullability of the production (can it derive the empty string). */
336 public boolean nullable() {return _nullable;}
338 /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
340 /** First set of the production. This is the set of terminals that
341 * could appear at the front of some string derived from this production.
343 protected terminal_set _first_set = new terminal_set();
345 /** First set of the production. This is the set of terminals that
346 * could appear at the front of some string derived from this production.
348 public terminal_set first_set() {return _first_set;}
350 /*-----------------------------------------------------------*/
351 /*--- Static Methods ----------------------------------------*/
352 /*-----------------------------------------------------------*/
354 /** Determine if a given character can be a label id starter.
355 * @param c the character in question.
357 protected static boolean is_id_start(char c)
359 return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c == '_');
361 //later need to handle non-8-bit chars here
364 /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
366 /** Determine if a character can be in a label id.
367 * @param c the character in question.
369 protected static boolean is_id_char(char c)
371 return is_id_start(c) || (c >= '0' && c <= '9');
374 /*-----------------------------------------------------------*/
375 /*--- General Methods ---------------------------------------*/
376 /*-----------------------------------------------------------*/
379 /** Return label declaration code
380 * @param labelname the label name
381 * @param stack_type the stack type of label?
384 protected String make_declaration(
391 /* Put in the left/right value labels */
392 if (emit.lr_values())
393 ret = "\t\tint " + labelname + "left = ((java_cup.runtime.Symbol)" +
394 emit.pre("stack") + ".elementAt(" + emit.pre("top") +
395 "-" + offset + ")).left;\n" +
396 "\t\tint " + labelname + "right = ((java_cup.runtime.Symbol)" +
397 emit.pre("stack") + ".elementAt(" + emit.pre("top") +
398 "-" + offset + ")).right;\n";
401 /* otherwise, just declare label. */
402 return ret + "\t\t" + stack_type + " " + labelname + " = (" + stack_type +
403 ")((" + "java_cup.runtime.Symbol) " + emit.pre("stack") + ".elementAt(" + emit.pre("top")
404 + "-" + offset + ")).value;\n";
407 /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
409 /** Declare label names as valid variables within the action string
410 * @param rhs array of RHS parts.
411 * @param rhs_len how much of rhs to consider valid.
412 * @param final_action the final action string of the production.
413 * @param lhs_type the object type associated with the LHS symbol.
415 protected String declare_labels(
416 production_part rhs[],
420 String declaration = "";
423 action_part act_part;
426 /* walk down the parts and extract the labels */
427 for (pos = 0; pos < rhs_len; pos++)
429 if (!rhs[pos].is_action())
431 part = (symbol_part)rhs[pos];
433 /* if it has a label, make declaration! */
434 if (part.label() != null)
436 declaration = declaration +
437 make_declaration(part.label(), part.the_symbol().stack_type(),
447 /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
449 /** Helper routine to merge adjacent actions in a set of RHS parts
450 * @param rhs_parts array of RHS parts.
451 * @param len amount of that array that is valid.
452 * @return remaining valid length.
454 protected int merge_adjacent_actions(production_part rhs_parts[], int len)
456 int from_loc, to_loc, merge_cnt;
458 /* bail out early if we have no work to do */
459 if (rhs_parts == null || len == 0) return 0;
463 for (from_loc=0; from_loc<len; from_loc++)
465 /* do we go in the current position or one further */
466 if (to_loc < 0 || !rhs_parts[to_loc].is_action()
467 || !rhs_parts[from_loc].is_action())
472 /* clear the way for it */
473 if (to_loc != from_loc) rhs_parts[to_loc] = null;
476 /* if this is not trivial? */
477 if (to_loc != from_loc)
479 /* do we merge or copy? */
480 if (rhs_parts[to_loc] != null && rhs_parts[to_loc].is_action() &&
481 rhs_parts[from_loc].is_action())
484 rhs_parts[to_loc] = new action_part(
485 ((action_part)rhs_parts[to_loc]).code_string() +
486 ((action_part)rhs_parts[from_loc]).code_string());
492 rhs_parts[to_loc] = rhs_parts[from_loc];
497 /* return the used length */
498 return len - merge_cnt;
501 /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
503 /** Helper routine to strip a trailing action off rhs and return it
504 * @param rhs_parts array of RHS parts.
505 * @param len how many of those are valid.
506 * @return the removed action part.
508 protected action_part strip_trailing_action(
509 production_part rhs_parts[],
514 /* bail out early if we have nothing to do */
515 if (rhs_parts == null || len == 0) return null;
517 /* see if we have a trailing action */
518 if (rhs_parts[len-1].is_action())
520 /* snip it out and return it */
521 result = (action_part)rhs_parts[len-1];
522 rhs_parts[len-1] = null;
529 /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
531 /** Remove all embedded actions from a production by factoring them
532 * out into individual action production using new non terminals.
533 * if the original production was: <pre>
534 * A ::= B {action1} C {action2} D
536 * then it will be factored into: <pre>
537 * A ::= B NT$1 C NT$2 D
541 * where NT$1 and NT$2 are new system created non terminals.
544 /* the declarations added to the parent production are also passed along,
545 as they should be perfectly valid in this code string, since it
546 was originally a code string in the parent, not on its own.
548 protected void remove_embedded_actions(
550 ) throws internal_error
556 /* walk over the production and process each action */
557 for (int act_loc = 0; act_loc < rhs_length(); act_loc++)
558 if (rhs(act_loc).is_action())
562 declare_str = declare_labels(
564 /* create a new non terminal for the action production */
565 new_nt = non_terminal.create_new();
566 new_nt.is_embedded_action = true; /* 24-Mar-1998, CSA */
568 /* create a new production with just the action */
569 new_prod = new action_production(this, new_nt, null, 0,
570 declare_str + ((action_part)rhs(act_loc)).code_string());
572 /* replace the action with the generated non terminal */
573 _rhs[act_loc] = new symbol_part(new_nt);
577 /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
579 /** Check to see if the production (now) appears to be nullable.
580 * A production is nullable if its RHS could derive the empty string.
581 * This results when the RHS is empty or contains only non terminals
582 * which themselves are nullable.
584 public boolean check_nullable() throws internal_error
586 production_part part;
590 /* if we already know bail out early */
591 if (nullable_known()) return nullable();
593 /* if we have a zero size RHS we are directly nullable */
594 if (rhs_length() == 0)
596 /* stash and return the result */
597 return set_nullable(true);
600 /* otherwise we need to test all of our parts */
601 for (pos=0; pos<rhs_length(); pos++)
605 /* only look at non-actions */
606 if (!part.is_action())
608 sym = ((symbol_part)part).the_symbol();
610 /* if its a terminal we are definitely not nullable */
611 if (!sym.is_non_term())
612 return set_nullable(false);
613 /* its a non-term, is it marked nullable */
614 else if (!((non_terminal)sym).nullable())
615 /* this one not (yet) nullable, so we aren't */
620 /* if we make it here all parts are nullable */
621 return set_nullable(true);
624 /** set (and return) nullability */
625 boolean set_nullable(boolean v)
627 _nullable_known = true;
632 /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
634 /** Update (and return) the first set based on current NT firsts.
635 * This assumes that nullability has already been computed for all non
636 * terminals and productions.
638 public terminal_set check_first_set() throws internal_error
643 /* walk down the right hand side till we get past all nullables */
644 for (part=0; part<rhs_length(); part++)
646 /* only look at non-actions */
647 if (!rhs(part).is_action())
649 sym = ((symbol_part)rhs(part)).the_symbol();
651 /* is it a non-terminal?*/
652 if (sym.is_non_term())
654 /* add in current firsts from that NT */
655 _first_set.add(((non_terminal)sym).first_set());
657 /* if its not nullable, we are done */
658 if (!((non_terminal)sym).nullable())
663 /* its a terminal -- add that to the set */
664 _first_set.add((terminal)sym);
672 /* return our updated first set */
676 /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
678 /** Equality comparison. */
679 public boolean equals(production other)
681 if (other == null) return false;
682 return other._index == _index;
685 /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
687 /** Generic equality comparison. */
688 public boolean equals(Object other)
690 if (!(other instanceof production))
693 return equals((production)other);
696 /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
698 /** Produce a hash code. */
699 public int hashCode()
701 /* just use a simple function of the index */
705 /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
707 /** Convert to a string. */
708 public String toString()
712 /* catch any internal errors */
714 result = "production [" + index() + "]: ";
715 result += ((lhs() != null) ? lhs().toString() : "$$NULL-LHS$$");
717 for (int i = 0; i<rhs_length(); i++)
718 result += rhs(i) + " ";
720 if (action() != null && action().code_string() != null)
721 result += " {" + action().code_string() + "}";
723 if (nullable_known())
725 result += "[NULLABLE]";
727 result += "[NOT NULLABLE]";
728 } catch (internal_error e) {
729 /* crash on internal error since we can't throw it from here (because
730 superclass does not throw anything. */
738 /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
740 /** Convert to a simpler string. */
741 public String to_simple_string() throws internal_error
745 result = ((lhs() != null) ? lhs().the_symbol().name() : "NULL_LHS");
747 for (int i = 0; i < rhs_length(); i++)
748 if (!rhs(i).is_action())
749 result += ((symbol_part)rhs(i)).the_symbol().name() + " ";
754 /*-----------------------------------------------------------*/