2 /*================================================================*/
4 JavaCup Specification for the JavaCup Specification Language
5 by Scott Hudson, GVU Center, Georgia Tech, August 1995
6 and Frank Flannery, Department of Computer Science, Princeton Univ,
8 Bug Fixes: C. Scott Ananian, Dept of Electrical Engineering, Princeton
9 University, October 1996. [later Massachusetts Institute of Technology]
12 This JavaCup specification is used to implement JavaCup itself.
13 It specifies the parser for the JavaCup specification language.
14 (It also serves as a reasonable example of what a typical JavaCup
17 The specification has the following parts:
18 Package and import declarations
19 These serve the same purpose as in a normal Java source file
20 (and will appear in the generated code for the parser). In this
21 case we are part of the java_cup package and we import both the
22 java_cup runtime system and Hashtable from the standard Java
26 This section provides code that is included with the class encapsulating
27 the various pieces of user code embedded in the grammar (i.e., the
28 semantic actions). This provides a series of helper routines and
29 data structures that the semantic actions use.
32 This section provides code included in the parser class itself. In
33 this case we override the default error reporting routines.
35 Init with and scan with
36 These sections provide small bits of code that initialize, then
37 indicate how to invoke the scanner.
40 These sections declare all the terminal and non terminal symbols
41 and the types of objects that they will be represented by at runtime,
42 then indicate the start symbol of the grammar (), and finally provide
43 the grammar itself (with embedded actions).
45 Operation of the parser
46 The parser acts primarily by accumulating data structures representing
47 various parts of the specification. Various small parts (e.g., single
48 code strings) are stored as static variables of the emit class and
49 in a few cases as variables declared in the action code section.
50 Terminals, non terminals, and productions, are maintained as collection
51 accessible via static methods of those classes. In addition, two
52 symbol tables are kept:
53 symbols maintains the name to object mapping for all symbols
54 non_terms maintains a separate mapping containing only the non terms
56 Several intermediate working structures are also declared in the action
57 code section. These include: rhs_parts, rhs_pos, and lhs_nt which
58 build up parts of the current production while it is being parsed.
61 Scott Hudson, GVU Center, Georgia Tech.
62 Frank Flannery, Department of Computer Science, Princeton Univ.
63 C. Scott Ananian, Department of Electrical Engineering, Princeton Univ.
66 v0.9a First released version [SEH] 8/29/95
67 v0.9b Updated for beta language (throws clauses) [SEH] 11/25/95
68 v0.10a Made many improvements/changes. now offers:
70 left/right positions and propagations
71 cleaner label references
72 precedence and associativity for terminals
73 contextual precedence for productions
75 v0.10b Fixed %prec directive so it works like it's supposed to.
77 v0.10g Added support for array types on symbols.
79 v0.10i Broaden set of IDs allowed in multipart_id and label_id so
80 that only java reserved words (and not CUP reserved words like
81 'parser' and 'start') are prohibited. Allow reordering of
82 action code, parser code, init code, and scan with sections,
83 and made closing semicolon optional for these sections.
84 Added 'nonterminal' as a terminal symbol, finally fixing a
85 spelling mistake that's been around since the beginning.
86 For backwards compatibility, you can still misspell the
89 /*================================================================*/
92 import java_cup.runtime.*;
93 import java.util.Hashtable;
95 /*----------------------------------------------------------------*/
98 /** helper routine to clone a new production part adding a given label */
99 protected production_part add_lab(production_part part, String lab)
100 throws internal_error
102 /* if there is no label, or this is an action, just return the original */
103 if (lab == null || part.is_action()) return part;
105 /* otherwise build a new one with the given label attached */
106 return new symbol_part(((symbol_part)part).the_symbol(),lab);
109 /** max size of right hand side we will support */
110 protected final int MAX_RHS = 200;
112 /** array for accumulating right hand side parts */
113 protected production_part[] rhs_parts = new production_part[MAX_RHS];
115 /** where we are currently in building a right hand side */
116 protected int rhs_pos = 0;
118 /** start a new right hand side */
119 protected void new_rhs() {rhs_pos = 0; }
121 /** add a new right hand side part */
122 protected void add_rhs_part(production_part part) throws java.lang.Exception
124 if (rhs_pos >= MAX_RHS)
125 throw new Exception("Internal Error: Productions limited to " +
126 MAX_RHS + " symbols and actions");
128 rhs_parts[rhs_pos] = part;
132 /** string to build up multiple part names */
133 protected String multipart_name = new String();
135 /** append a new name segment to the accumulated multipart name */
136 protected void append_multipart(String name)
140 /* if we aren't just starting out, put on a dot */
141 if (multipart_name.length() != 0) dot = ".";
143 multipart_name = multipart_name.concat(dot + name);
146 /** table of declared symbols -- contains production parts indexed by name */
147 protected Hashtable symbols = new Hashtable();
149 /** table of just non terminals -- contains non_terminals indexed by name */
150 protected Hashtable non_terms = new Hashtable();
152 /** declared start non_terminal */
153 protected non_terminal start_nt = null;
155 /** left hand side non terminal of the current production */
156 protected non_terminal lhs_nt;
158 /** Current precedence number */
161 /** Current precedence side */
162 int _cur_side = assoc.no_prec;
164 /** update the precedences we are declaring */
165 protected void update_precedence(int p) {
169 /** add relevant data to terminals */
170 protected void add_precedence(String term) {
172 System.err.println("Unable to add precedence to nonexistent terminal");
174 symbol_part sp = (symbol_part)symbols.get(term);
176 System.err.println("Could find terminal " + term + " while declaring precedence");
178 java_cup.symbol sym = sp.the_symbol();
179 if (sym instanceof terminal)
180 ((terminal)sym).set_precedence(_cur_side, _cur_prec);
181 else System.err.println("Precedence declaration: Can't find terminal " + term);
187 /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . */
191 /* override error routines */
193 public void report_fatal_error(
198 lexer.emit_error(message);
199 System.err.println("Can't recover from previous error(s), giving up.");
203 public void report_error(String message, Object info)
205 lexer.emit_error(message);
209 /*----------------------------------------------------------------*/
211 init with {: lexer.init(); :};
212 scan with {: return lexer.next_token(); :};
214 /*----------------------------------------------------------------*/
217 PACKAGE, IMPORT, CODE, ACTION, PARSER, TERMINAL, NON, INIT, SCAN, WITH,
218 START, SEMI, COMMA, STAR, DOT, COLON, COLON_COLON_EQUALS, BAR, PRECEDENCE,
219 LEFT, RIGHT, NONASSOC, PERCENT_PREC, LBRACK, RBRACK, NONTERMINAL;
221 terminal String ID, CODE_STRING;
224 spec, package_spec, import_list, action_code_part,
225 code_parts, code_part, opt_semi, non_terminal,
226 parser_code_part, symbol_list, start_spec, production_list,
227 multipart_id, import_spec, import_id, init_code, scan_code, symbol,
228 type_id, term_name_list, non_term_name_list, production, prod_part_list,
229 prod_part, new_term_id, new_non_term_id, rhs_list, rhs, empty,
230 precedence_list, preced, terminal_list, precedence_l, declares_term,
233 non terminal String nt_id, symbol_id, label_id, opt_label, terminal_id,
236 /*----------------------------------------------------------------*/
240 /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . */
244 /* declare "error" as a terminal */
245 symbols.put("error", new symbol_part(terminal.error));
247 /* declare start non terminal */
248 non_terms.put("$START", non_terminal.START_nt);
258 /* error recovery assuming something went wrong before symbols
259 and we have TERMINAL or NON TERMINAL to sync on. if we get
260 an error after that, we recover inside symbol_list or
270 /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . */
276 /* save the package name */
277 emit.package_name = multipart_name;
279 /* reset the accumulated multipart name */
280 multipart_name = new String();
287 /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . */
296 /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . */
302 /* save this import on the imports list */
303 emit.import_list.push(multipart_name);
305 /* reset the accumulated multipart name */
306 multipart_name = new String();
311 /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . */
313 // allow any order; all parts are optional. [CSA, 23-Jul-1999]
314 // (we check in the part action to make sure we don't have 2 of any part)
316 action_code_part | parser_code_part | init_code | scan_code ;
318 | code_parts code_part;
320 /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . */
323 ACTION CODE CODE_STRING:user_code opt_semi
325 if (emit.action_code!=null)
326 lexer.emit_error("Redundant action code (skipping)");
327 else /* save the user included code string */
328 emit.action_code = user_code;
332 /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . */
335 PARSER CODE CODE_STRING:user_code opt_semi
337 if (emit.parser_code!=null)
338 lexer.emit_error("Redundant parser code (skipping)");
339 else /* save the user included code string */
340 emit.parser_code = user_code;
344 /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . */
347 INIT WITH CODE_STRING:user_code opt_semi
349 if (emit.init_code!=null)
350 lexer.emit_error("Redundant init code (skipping)");
351 else /* save the user code */
352 emit.init_code = user_code;
356 /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . */
359 SCAN WITH CODE_STRING:user_code opt_semi
361 if (emit.scan_code!=null)
362 lexer.emit_error("Redundant scan code (skipping)");
363 else /* save the user code */
364 emit.scan_code = user_code;
368 /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . */
370 symbol_list ::= symbol_list symbol | symbol;
372 /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . */
389 /* error recovery productions -- sync on semicolon */
394 /* reset the accumulated multipart name */
395 multipart_name = new String();
402 /* reset the accumulated multipart name */
403 multipart_name = new String();
408 /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . */
413 /* reset the accumulated multipart name */
414 multipart_name = new String();
418 /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . */
420 declares_non_term ::=
423 /* reset the accumulated multipart name */
424 multipart_name = new String();
428 /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . */
430 term_name_list ::= term_name_list COMMA new_term_id | new_term_id;
432 /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . */
434 non_term_name_list ::=
442 /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . */
445 precedence_list ::= precedence_l | empty;
447 /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . */
449 precedence_l ::= precedence_l preced | preced;
451 /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . */
456 update_precedence(assoc.left);
462 update_precedence(assoc.right);
468 update_precedence(assoc.nonassoc);
473 /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . */
475 terminal_list ::= terminal_list COMMA terminal_id
480 /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . */
482 terminal_id ::= term_id:sym
488 /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . */
490 term_id ::= symbol_id:sym
492 /* check that the symbol_id is a terminal */
493 if (symbols.get(sym) == null)
495 /* issue a message */
496 lexer.emit_error("Terminal \"" + sym +
497 "\" has not been declared");
502 /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . */
505 START WITH nt_id:start_name
507 /* verify that the name has been declared as a non terminal */
508 non_terminal nt = (non_terminal)non_terms.get(start_name);
511 lexer.emit_error( "Start non terminal \"" + start_name +
512 "\" has not been declared");
516 /* remember the non-terminal for later */
519 /* build a special start production */
521 add_rhs_part(add_lab(new symbol_part(start_nt), "start_val"));
522 add_rhs_part(new symbol_part(terminal.EOF));
523 add_rhs_part(new action_part("RESULT = start_val;"));
524 emit.start_production =
525 new production(non_terminal.START_nt, rhs_parts, rhs_pos);
534 /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . */
536 production_list ::= production_list production | production;
538 /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . */
543 /* lookup the lhs nt */
544 lhs_nt = (non_terminal)non_terms.get(lhs_id);
546 /* if it wasn't declared, emit a message */
549 if (lexer.error_count == 0)
550 lexer.emit_error("LHS non terminal \"" + lhs_id +
551 "\" has not been declared");
554 /* reset the rhs accumulation */
563 {: lexer.emit_error("Syntax Error"); :}
567 /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . */
569 rhs_list ::= rhs_list BAR rhs | rhs;
571 /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . */
574 prod_part_list PERCENT_PREC term_id:term_name
576 java_cup.symbol sym = null;
579 /* Find the precedence symbol */
580 if (term_name == null) {
581 System.err.println("No terminal for contextual precedence");
584 sym = ((symbol_part)symbols.get(term_name)).the_symbol();
586 /* build the production */
588 if ((sym!=null) && (sym instanceof terminal)) {
589 p = new production(lhs_nt, rhs_parts, rhs_pos,
590 ((terminal)sym).precedence_num(),
591 ((terminal)sym).precedence_side());
592 ((symbol_part)symbols.get(term_name)).the_symbol().note_use();
594 System.err.println("Invalid terminal " + term_name +
595 " for contextual precedence assignment");
596 p = new production(lhs_nt, rhs_parts, rhs_pos);
599 /* if we have no start non-terminal declared and this is
600 the first production, make its lhs nt the start_nt
601 and build a special start production for it. */
602 if (start_nt == null)
606 /* build a special start production */
608 add_rhs_part(add_lab(new symbol_part(start_nt),"start_val"));
609 add_rhs_part(new symbol_part(terminal.EOF));
610 add_rhs_part(new action_part("RESULT = start_val;"));
611 if ((sym!=null) && (sym instanceof terminal)) {
612 emit.start_production =
613 new production(non_terminal.START_nt, rhs_parts,
614 rhs_pos, ((terminal)sym).precedence_num(),
615 ((terminal)sym).precedence_side());
617 emit.start_production =
618 new production(non_terminal.START_nt, rhs_parts, rhs_pos);
624 /* reset the rhs accumulation in any case */
632 /* build the production */
633 production p = new production(lhs_nt, rhs_parts, rhs_pos);
635 /* if we have no start non-terminal declared and this is
636 the first production, make its lhs nt the start_nt
637 and build a special start production for it. */
638 if (start_nt == null)
642 /* build a special start production */
644 add_rhs_part(add_lab(new symbol_part(start_nt),"start_val"));
645 add_rhs_part(new symbol_part(terminal.EOF));
646 add_rhs_part(new action_part("RESULT = start_val;"));
647 emit.start_production =
648 new production(non_terminal.START_nt, rhs_parts, rhs_pos);
654 /* reset the rhs accumulation in any case */
659 /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . */
661 prod_part_list ::= prod_part_list prod_part | empty;
663 /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . */
666 symbol_id:symid opt_label:labid
668 /* try to look up the id */
669 production_part symb = (production_part)symbols.get(symid);
671 /* if that fails, symbol is undeclared */
674 if (lexer.error_count == 0)
675 lexer.emit_error("java_cup.runtime.Symbol \"" + symid +
676 "\" has not been declared");
680 /* add a labeled production part */
681 add_rhs_part(add_lab(symb, labid));
687 /* add a new production part */
688 add_rhs_part(new action_part(code_str));
692 /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . */
696 {: RESULT = labid; :}
702 /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . */
705 multipart_id DOT robust_id:another_id
706 {: append_multipart(another_id); :}
709 {: append_multipart(an_id); :}
712 /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . */
715 multipart_id DOT STAR
716 {: append_multipart("*"); :}
721 /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . */
723 type_id ::= multipart_id
724 | type_id LBRACK RBRACK
725 {: multipart_name = multipart_name.concat("[]"); :}
728 /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . */
733 /* see if this terminal has been declared before */
734 if (symbols.get(term_id) != null)
736 /* issue a message */
737 lexer.emit_error("java_cup.runtime.Symbol \"" + term_id +
738 "\" has already been declared");
742 /* if no type declared, declare one */
743 if (multipart_name.equals("")) {
744 append_multipart("Object");
746 /* build a production_part and put it in the table */
748 new symbol_part(new terminal(term_id, multipart_name)));
753 /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . */
758 /* see if this non terminal has been declared before */
759 if (symbols.get(non_term_id) != null)
761 /* issue a message */
762 lexer.emit_error( "java_cup.runtime.Symbol \"" + non_term_id +
763 "\" has already been declared");
767 if (multipart_name.equals("")) {
768 append_multipart("Object");
770 /* build the non terminal object */
771 non_terminal this_nt =
772 new non_terminal(non_term_id, multipart_name);
774 /* put it in the non_terms table */
775 non_terms.put(non_term_id, this_nt);
777 /* build a production_part and put it in the symbols table */
778 symbols.put(non_term_id, new symbol_part(this_nt));
783 /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . */
787 {: RESULT = the_id; :}
790 lexer.emit_error("Illegal use of reserved word");
795 /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . */
799 {: RESULT = the_id; :}
802 lexer.emit_error("Illegal use of reserved word");
807 /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . */
811 {: RESULT = the_id; :}
814 /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . */
816 robust_id ::= /* all ids that aren't reserved words in Java */
817 ID:the_id {: RESULT = the_id; :}
818 /* package is reserved. */
819 /* import is reserved. */
820 | CODE {: RESULT = "code"; :}
821 | ACTION {: RESULT = "action"; :}
822 | PARSER {: RESULT = "parser"; :}
823 | TERMINAL {: RESULT = "terminal"; :}
824 | NON {: RESULT = "non"; :}
825 | NONTERMINAL {: RESULT = "nonterminal"; :}
826 | INIT {: RESULT = "init"; :}
827 | SCAN {: RESULT = "scan"; :}
828 | WITH {: RESULT = "with"; :}
829 | START {: RESULT = "start"; :}
830 | PRECEDENCE {: RESULT = "precedence"; :}
831 | LEFT {: RESULT = "left"; :}
832 | RIGHT {: RESULT = "right"; :}
833 | NONASSOC {: RESULT = "nonassoc"; :}
836 lexer.emit_error("Illegal use of reserved word");
841 /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . */
843 non_terminal ::= NON TERMINAL | NONTERMINAL;
845 /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . */
847 opt_semi ::= /* nothing */
850 /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . */
852 empty ::= /* nothing */;
854 /*----------------------------------------------------------------*/