3 import java.io.IOException;
5 import java.io.LineNumberReader;
9 * Copyright (C) 2002 C. Scott Ananian <cananian@alumni.princeton.edu>
10 * This program is released under the terms of the GPL; see the file
11 * COPYING for more details. There is NO WARRANTY on this code.
15 LineNumberReader reader;
23 public int line_num = 0;
24 LineList lineL = new LineList(-line_pos, null); // sentinel for line #0
26 public Lexer(Reader reader) {
27 this(reader, true, true);
30 public Lexer(Reader reader, boolean task, boolean dsm) {
31 this.reader = new LineNumberReader(new EscapedUnicodeReader(reader));
38 public java_cup.runtime.Symbol nextToken() throws java.io.IOException {
39 java_cup.runtime.Symbol sym =
40 lookahead==null?_nextToken():lookahead.get();
44 private boolean shouldBePLT() throws java.io.IOException {
45 // look ahead to see if this LT should be changed to a PLT
46 if (last==null || last.sym!=Sym.IDENTIFIER)
48 if (lookahead==null) lookahead = new FIFO(new FIFO.Getter() {
49 java_cup.runtime.Symbol next() throws java.io.IOException
50 { return _nextToken(); }
53 // skip past IDENTIFIER (DOT IDENTIFIER)*
54 if (lookahead.peek(i++).sym != Sym.IDENTIFIER)
56 while (lookahead.peek(i).sym == Sym.DOT) {
58 if (lookahead.peek(i++).sym != Sym.IDENTIFIER)
61 // skip past (LBRACK RBRACK)*
62 while (lookahead.peek(i).sym == Sym.LBRACK) {
64 if (lookahead.peek(i++).sym != Sym.RBRACK)
67 // now the next sym has to be one of LT GT COMMA EXTENDS IMPLEMENTS
68 switch(lookahead.peek(i).sym) {
79 private java_cup.runtime.Symbol last = null;
80 private FIFO lookahead = null;
81 public java_cup.runtime.Symbol _nextToken() throws java.io.IOException {
83 * Identifiers/Keywords/true/false/null (start with java letter)
84 * numeric literal (start with number)
85 * character literal (start with single quote)
86 * string (start with double quote)
87 * separator (parens, braces, brackets, semicolon, comma, period)
88 * operator (equals, plus, minus, etc)
90 * comment (start with slash)
95 startpos = lineL.head + line_pos;
96 ie = getInputElement();
97 if (ie instanceof DocumentationComment)
98 comment = ((Comment)ie).getComment();
99 } while (!(ie instanceof Token));
100 endpos = lineL.head + line_pos - 1;
102 // System.out.println(ie.toString()); // uncomment to debug lexer.
103 java_cup.runtime.Symbol sym = ((Token)ie).token();
104 // fix up left/right positions.
105 sym.left = startpos; sym.right = endpos;
109 public boolean debug_lex() throws java.io.IOException {
110 InputElement ie = getInputElement();
111 System.out.println(ie);
112 return !(ie instanceof EOF);
116 public String lastComment() {
119 public void clearComment() {
123 InputElement getInputElement() throws java.io.IOException {
128 if (line.length()<=line_pos) { // end of line.
134 switch (line.charAt(line_pos)) {
137 case ' ': // ASCII SP
138 case '\t': // ASCII HT
139 case '\f': // ASCII FF
140 case '\n': // LineTerminator
141 return new WhiteSpace(consume());
144 case '\020': // ASCII SUB
157 // May get Token instead of Comment.
158 InputElement getComment() throws java.io.IOException {
160 // line.charAt(line_pos+0) is '/'
161 switch (line.charAt(line_pos+1)) {
162 case '/': // EndOfLineComment
163 comment = line.substring(line_pos+2);
164 line_pos = line.length();
165 return new EndOfLineComment(comment);
167 case '*': // TraditionalComment or DocumentationComment
169 if (line.charAt(line_pos)=='*') { // DocumentationComment
170 return snarfComment(new DocumentationComment());
171 } else { // TraditionalComment
172 return snarfComment(new TraditionalComment());
175 default: // it's a token, not a comment.
180 Comment snarfComment(Comment c) throws java.io.IOException {
181 StringBuffer text=new StringBuffer();
182 while(true) { // Grab CommentTail
183 while (line.charAt(line_pos)!='*') { // Add NotStar to comment.
184 int star_pos = line.indexOf('*', line_pos);
186 text.append(line.substring(line_pos));
187 c.appendLine(text.toString()); text.setLength(0);
188 line_pos = line.length();
191 throw new IOException("Unterminated comment at end of file.");
193 text.append(line.substring(line_pos, star_pos));
197 // At this point, line.charAt(line_pos)=='*'
198 // Grab CommentTailStar starting at line_pos+1.
199 if (line.charAt(line_pos+1)=='/') { // safe because line ends with '\n'
200 c.appendLine(text.toString()); line_pos+=2; return c;
202 text.append(line.charAt(line_pos++)); // add the '*'
206 Token getToken() throws java.io.IOException {
207 // Tokens are: Identifiers, Keywords, Literals, Separators, Operators.
208 switch (line.charAt(line_pos)) {
209 // Separators: (period is a special case)
219 return new Separator(consume());
237 return getOperator();
240 return getCharLiteral();
243 return getStringLiteral();
245 // a period is a special case:
247 if (Character.digit(line.charAt(line_pos+1),10)!=-1)
248 return getNumericLiteral();
250 line.charAt(line_pos+1)=='.' &&
251 line.charAt(line_pos+2)=='.') {
252 consume(); consume(); consume();
253 return new Separator('\u2026'); // unicode ellipsis character.
254 } else return new Separator(consume());
259 if (Character.isJavaIdentifierStart(line.charAt(line_pos)))
260 return getIdentifier();
261 if (Character.isDigit(line.charAt(line_pos)))
262 return getNumericLiteral();
263 throw new IOException("Illegal character on line "+line_num);
266 static final String[] keywords = new String[] {
267 "abstract", "assert", "atomic", "boolean", "break", "byte", "case", "catch", "char",
268 "class", "const", "continue",
269 "default", "disjoint", "do", "double",
271 "extends", "external", "final", "finally",
272 "flag", //keyword for failure aware computation
273 "float", "for", "genreach", "getoffset", "global", "goto", "if",
275 "import", "instanceof", "int",
279 "native", "new", "newflag", "optional", "package", "private", "protected", "public",
281 "scratch", "sese", "short", "static", "strictfp", "super", "switch", "synchronized",
282 "tag", "task", "taskexit", //keywords for failure aware computation
283 "this", "throw", "throws", "transient", "try", "void",
286 Token getIdentifier() throws java.io.IOException {
288 StringBuffer sb = new StringBuffer().append(consume());
290 if (!Character.isJavaIdentifierStart(sb.charAt(0)))
291 throw new IOException("Invalid Java Identifier on line "+line_num);
292 while (Character.isJavaIdentifierPart(line.charAt(line_pos)))
293 sb.append(consume());
294 String s = sb.toString();
295 // Now check against boolean literals and null literal.
296 if (s.equals("null")) return new NullLiteral();
297 if (s.equals("true")) return new BooleanLiteral(true);
298 if (s.equals("false")) return new BooleanLiteral(false);
299 // Check against keywords.
300 // pre-java 1.5 compatibility:
301 //if (!isJava15 && s.equals("enum")) return new Identifier(s);
302 // pre-java 1.4 compatibility:
303 if (!dsmExt && s.equals("global")) return new Identifier(s);
305 if (!taskExt && s.equals("taskexit")) return new Identifier(s);
306 if (!taskExt && s.equals("tag")) return new Identifier(s);
307 if (!taskExt && s.equals("flag")) return new Identifier(s);
308 if (!taskExt && s.equals("newflag")) return new Identifier(s);
310 if (!isJava14 && s.equals("assert")) return new Identifier(s);
311 // pre-java 1.2 compatibility:
312 if (!isJava12 && s.equals("strictfp")) return new Identifier(s);
313 // use binary search.
314 for (int l=0, r=keywords.length; r > l; ) {
315 int x = (l+r)/2, cmp = s.compareTo(keywords[x]);
316 if (cmp < 0) r=x; else l=x+1;
317 if (cmp== 0) return new Keyword(s);
320 return new Identifier(s);
322 NumericLiteral getNumericLiteral() throws java.io.IOException {
324 // leading decimal indicates float.
325 if (line.charAt(line_pos)=='.')
326 return getFloatingPointLiteral();
328 if (line.charAt(line_pos)=='0' &&
329 (line.charAt(line_pos+1)=='x' ||
330 line.charAt(line_pos+1)=='X')) {
331 line_pos+=2; return getIntegerLiteral(/*base*/ 16);
333 // otherwise scan to first non-numeric
334 for (i=line_pos; Character.digit(line.charAt(i),10)!=-1; )
336 switch(line.charAt(i)) { // discriminate based on first non-numeric
344 return getFloatingPointLiteral();
349 if (line.charAt(line_pos)=='0')
350 return getIntegerLiteral(/*base*/ 8);
351 return getIntegerLiteral(/*base*/ 10);
354 NumericLiteral getIntegerLiteral(int radix) throws java.io.IOException {
356 while (Character.digit(line.charAt(line_pos),radix)!=-1)
357 val = (val*radix) + Character.digit(consume(),radix);
358 if (line.charAt(line_pos) == 'l' ||
359 line.charAt(line_pos) == 'L') {
361 return new LongLiteral(val);
363 // we compare MAX_VALUE against val/2 to allow constants like
364 // 0xFFFF0000 to get past the test. (unsigned long->signed int)
365 if ((val/2) > Integer.MAX_VALUE ||
366 val < Integer.MIN_VALUE)
367 throw new IOException("Constant does not fit in integer on line "+line_num);
368 return new IntegerLiteral((int)val);
370 NumericLiteral getFloatingPointLiteral() throws java.io.IOException {
371 String rep = getDigits();
372 if (line.charAt(line_pos)=='.')
373 rep+=consume() + getDigits();
374 if (line.charAt(line_pos)=='e' ||
375 line.charAt(line_pos)=='E') {
377 if (line.charAt(line_pos)=='+' ||
378 line.charAt(line_pos)=='-')
383 switch (line.charAt(line_pos)) {
387 return new FloatLiteral(Float.valueOf(rep).floatValue());
395 return new DoubleLiteral(Double.valueOf(rep).doubleValue());
397 } catch (NumberFormatException e) {
398 throw new IOException("Illegal floating-point on line "+line_num+": "+e);
402 StringBuffer sb = new StringBuffer();
403 while (Character.digit(line.charAt(line_pos),10)!=-1)
404 sb.append(consume());
405 return sb.toString();
408 Operator getOperator() {
409 char first = consume();
410 char second= line.charAt(line_pos);
413 // single-character operators.
417 return new Operator(new String(new char[] {first}));
425 return new Operator(new String(new char[] {first, consume()}));
430 // Check for trailing '='
432 return new Operator(new String(new char[] {first, consume()}));
434 // Special-case '<<', '>>' and '>>>'
435 if ((first=='<' && second=='<') || // <<
436 (first=='>' && second=='>')) { // >>
437 String op = new String(new char[] {first, consume()});
438 if (first=='>' && line.charAt(line_pos)=='>') // >>>
440 if (line.charAt(line_pos)=='=') // <<=, >>=, >>>=
442 return new Operator(op);
445 // Otherwise return single operator.
446 return new Operator(new String(new char[] {first}));
449 CharacterLiteral getCharLiteral() throws java.io.IOException {
450 char firstquote = consume();
452 switch (line.charAt(line_pos)) {
454 val = getEscapeSequence();
458 throw new IOException("Invalid character literal on line "+line_num);
461 throw new IOException("Invalid character literal on line "+line_num);
467 char secondquote = consume();
468 if (firstquote != '\'' || secondquote != '\'')
469 throw new IOException("Invalid character literal on line "+line_num);
470 return new CharacterLiteral(val);
472 StringLiteral getStringLiteral() throws java.io.IOException {
473 char openquote = consume();
474 StringBuffer val = new StringBuffer();
475 while (line.charAt(line_pos)!='\"') {
476 switch(line.charAt(line_pos)) {
478 val.append(getEscapeSequence());
482 throw new IOException("Invalid string literal on line " + line_num);
485 val.append(consume());
489 char closequote = consume();
490 if (openquote != '\"' || closequote != '\"')
491 throw new IOException("Invalid string literal on line " + line_num);
493 return new StringLiteral(val.toString().intern());
496 char getEscapeSequence() throws java.io.IOException {
497 if (consume() != '\\')
498 throw new IOException("Invalid escape sequence on line " + line_num);
499 switch(line.charAt(line_pos)) {
501 consume(); return '\b';
504 consume(); return '\t';
507 consume(); return '\n';
510 consume(); return '\f';
513 consume(); return '\r';
516 consume(); return '\"';
519 consume(); return '\'';
522 consume(); return '\\';
528 return (char) getOctal(3);
534 return (char) getOctal(2);
537 throw new IOException("Invalid escape sequence on line " + line_num);
540 int getOctal(int maxlength) throws java.io.IOException {
542 for (i=0; i<maxlength; i++)
543 if (Character.digit(line.charAt(line_pos), 8)!=-1) {
544 val = (8*val) + Character.digit(consume(), 8);
546 if ((i==0) || (val>0xFF)) // impossible.
547 throw new IOException("Invalid octal escape sequence in line " + line_num);
552 return line.charAt(line_pos++);
554 void nextLine() throws java.io.IOException {
555 line=reader.readLine();
556 if (line!=null) line=line+'\n';
557 lineL = new LineList(lineL.head+line_pos, lineL); // for error reporting
562 // Deal with error messages.
563 public void errorMsg(String msg, java_cup.runtime.Symbol info) {
564 int n=line_num, c=info.left-lineL.head;
565 for (LineList p = lineL; p!=null; p=p.tail, n--)
566 if (p.head<=info.left) {
567 c=info.left-p.head; break;
569 System.err.println(msg+" at line "+n);
572 private int num_errors = 0;
573 public int numErrors() {
580 LineList(int head, LineList tail) {
581 this.head = head; this.tail = tail;