3 import java.io.IOException;
5 import java.io.LineNumberReader;
9 * Copyright (C) 2002 C. Scott Ananian <cananian@alumni.princeton.edu>
10 * This program is released under the terms of the GPL; see the file
11 * COPYING for more details. There is NO WARRANTY on this code.
15 LineNumberReader reader;
22 public int line_num = 0;
23 LineList lineL = new LineList(-line_pos, null); // sentinel for line #0
25 public Lexer(Reader reader) {
29 public Lexer(Reader reader, boolean task) {
30 this.reader = new LineNumberReader(new EscapedUnicodeReader(reader));
36 public java_cup.runtime.Symbol nextToken() throws java.io.IOException {
37 java_cup.runtime.Symbol sym =
38 lookahead==null ? _nextToken() : lookahead.get();
42 private boolean shouldBePLT() throws java.io.IOException {
43 // look ahead to see if this LT should be changed to a PLT
44 if (last==null || last.sym!=Sym.IDENTIFIER)
46 if (lookahead==null) lookahead = new FIFO(new FIFO.Getter() {
47 java_cup.runtime.Symbol next() throws java.io.IOException
48 { return _nextToken(); }
51 // skip past IDENTIFIER (DOT IDENTIFIER)*
52 if (lookahead.peek(i++).sym != Sym.IDENTIFIER)
54 while (lookahead.peek(i).sym == Sym.DOT) {
56 if (lookahead.peek(i++).sym != Sym.IDENTIFIER)
59 // skip past (LBRACK RBRACK)*
60 while (lookahead.peek(i).sym == Sym.LBRACK) {
62 if (lookahead.peek(i++).sym != Sym.RBRACK)
65 // now the next sym has to be one of LT GT COMMA EXTENDS IMPLEMENTS
66 switch(lookahead.peek(i).sym) {
77 private java_cup.runtime.Symbol last = null;
78 private FIFO lookahead = null;
79 public java_cup.runtime.Symbol _nextToken() throws java.io.IOException {
81 * Identifiers/Keywords/true/false/null (start with java letter)
82 * numeric literal (start with number)
83 * character literal (start with single quote)
84 * string (start with double quote)
85 * separator (parens, braces, brackets, semicolon, comma, period)
86 * operator (equals, plus, minus, etc)
88 * comment (start with slash)
93 startpos = lineL.head + line_pos;
94 ie = getInputElement();
95 if (ie instanceof DocumentationComment)
96 comment = ((Comment)ie).getComment();
97 } while (!(ie instanceof Token));
98 endpos = lineL.head + line_pos - 1;
100 // System.out.println(ie.toString()); // uncomment to debug lexer.
101 java_cup.runtime.Symbol sym = ((Token)ie).token();
102 // fix up left/right positions.
103 sym.left = startpos; sym.right = endpos;
107 public boolean debug_lex() throws java.io.IOException {
108 InputElement ie = getInputElement();
109 System.out.println(ie);
110 return !(ie instanceof EOF);
114 public String lastComment() {
117 public void clearComment() {
121 InputElement getInputElement() throws java.io.IOException {
126 if (line.length()<=line_pos) { // end of line.
132 switch (line.charAt(line_pos)) {
135 case ' ': // ASCII SP
136 case '\t': // ASCII HT
137 case '\f': // ASCII FF
138 case '\n': // LineTerminator
139 return new WhiteSpace(consume());
142 case '\020': // ASCII SUB
155 // May get Token instead of Comment.
156 InputElement getComment() throws java.io.IOException {
158 // line.charAt(line_pos+0) is '/'
159 switch (line.charAt(line_pos+1)) {
160 case '/': // EndOfLineComment
161 comment = line.substring(line_pos+2);
162 line_pos = line.length();
163 return new EndOfLineComment(comment);
165 case '*': // TraditionalComment or DocumentationComment
167 if (line.charAt(line_pos)=='*') { // DocumentationComment
168 return snarfComment(new DocumentationComment());
169 } else { // TraditionalComment
170 return snarfComment(new TraditionalComment());
173 default: // it's a token, not a comment.
178 Comment snarfComment(Comment c) throws java.io.IOException {
179 StringBuffer text=new StringBuffer();
180 while(true) { // Grab CommentTail
181 while (line.charAt(line_pos)!='*') { // Add NotStar to comment.
182 int star_pos = line.indexOf('*', line_pos);
184 text.append(line.substring(line_pos));
185 c.appendLine(text.toString()); text.setLength(0);
186 line_pos = line.length();
189 throw new IOException("Unterminated comment at end of file.");
191 text.append(line.substring(line_pos, star_pos));
195 // At this point, line.charAt(line_pos)=='*'
196 // Grab CommentTailStar starting at line_pos+1.
197 if (line.charAt(line_pos+1)=='/') { // safe because line ends with '\n'
198 c.appendLine(text.toString()); line_pos+=2; return c;
200 text.append(line.charAt(line_pos++)); // add the '*'
204 Token getToken() throws java.io.IOException {
205 // Tokens are: Identifiers, Keywords, Literals, Separators, Operators.
206 switch (line.charAt(line_pos)) {
207 // Separators: (period is a special case)
217 return new Separator(consume());
235 return getOperator();
238 return getCharLiteral();
241 return getStringLiteral();
243 // a period is a special case:
245 if (Character.digit(line.charAt(line_pos+1),10)!=-1)
246 return getNumericLiteral();
248 line.charAt(line_pos+1)=='.' &&
249 line.charAt(line_pos+2)=='.') {
250 consume(); consume(); consume();
251 return new Separator('\u2026'); // unicode ellipsis character.
252 } else return new Separator(consume());
257 if (Character.isJavaIdentifierStart(line.charAt(line_pos)))
258 return getIdentifier();
259 if (Character.isDigit(line.charAt(line_pos)))
260 return getNumericLiteral();
261 throw new IOException("Illegal character on line "+line_num);
264 static final String[] keywords = new String[] {
265 "abstract", "assert", "atomic", "boolean", "break", "byte", "case", "catch", "char",
266 "class", "const", "continue",
267 "default", "disjoint", "do", "double",
269 "extends", "external", "final", "finally",
270 "flag", //keyword for failure aware computation
271 "float", "for", "genreach", "getoffset", "global", "goto", "if",
273 "import", "instanceof", "int",
277 "native", "new", "newflag", "optional", "package", "private", "protected", "public",
279 "scratch", "sese", "short", "static", "strictfp", "super", "switch", "synchronized",
280 "tag", "task", "taskexit", //keywords for failure aware computation
281 "this", "throw", "throws", "transient", "try", "void",
284 Token getIdentifier() throws java.io.IOException {
286 StringBuffer sb = new StringBuffer().append(consume());
288 if (!Character.isJavaIdentifierStart(sb.charAt(0)))
289 throw new IOException("Invalid Java Identifier on line "+line_num);
290 while (Character.isJavaIdentifierPart(line.charAt(line_pos)))
291 sb.append(consume());
292 String s = sb.toString();
293 // Now check against boolean literals and null literal.
294 if (s.equals("null")) return new NullLiteral();
295 if (s.equals("true")) return new BooleanLiteral(true);
296 if (s.equals("false")) return new BooleanLiteral(false);
297 // Check against keywords.
298 // pre-java 1.5 compatibility:
299 //if (!isJava15 && s.equals("enum")) return new Identifier(s);
300 // pre-java 1.4 compatibility:
302 if (!taskExt && s.equals("taskexit")) return new Identifier(s);
303 if (!taskExt && s.equals("tag")) return new Identifier(s);
304 if (!taskExt && s.equals("flag")) return new Identifier(s);
305 if (!taskExt && s.equals("newflag")) return new Identifier(s);
307 if (!isJava14 && s.equals("assert")) return new Identifier(s);
308 // pre-java 1.2 compatibility:
309 if (!isJava12 && s.equals("strictfp")) return new Identifier(s);
310 // use binary search.
311 for (int l=0, r=keywords.length; r > l; ) {
312 int x = (l+r)/2, cmp = s.compareTo(keywords[x]);
313 if (cmp < 0) r=x;else l=x+1;
314 if (cmp== 0) return new Keyword(s);
317 return new Identifier(s);
319 NumericLiteral getNumericLiteral() throws java.io.IOException {
321 // leading decimal indicates float.
322 if (line.charAt(line_pos)=='.')
323 return getFloatingPointLiteral();
325 if (line.charAt(line_pos)=='0' &&
326 (line.charAt(line_pos+1)=='x' ||
327 line.charAt(line_pos+1)=='X')) {
328 line_pos+=2; return getIntegerLiteral(/*base*/ 16);
330 // otherwise scan to first non-numeric
331 for (i=line_pos; Character.digit(line.charAt(i),10)!=-1; )
333 switch(line.charAt(i)) { // discriminate based on first non-numeric
341 return getFloatingPointLiteral();
346 if (line.charAt(line_pos)=='0')
347 return getIntegerLiteral(/*base*/ 8);
348 return getIntegerLiteral(/*base*/ 10);
351 NumericLiteral getIntegerLiteral(int radix) throws java.io.IOException {
353 while (Character.digit(line.charAt(line_pos),radix)!=-1)
354 val = (val*radix) + Character.digit(consume(),radix);
355 if (line.charAt(line_pos) == 'l' ||
356 line.charAt(line_pos) == 'L') {
358 return new LongLiteral(val);
360 // we compare MAX_VALUE against val/2 to allow constants like
361 // 0xFFFF0000 to get past the test. (unsigned long->signed int)
362 if ((val/2) > Integer.MAX_VALUE ||
363 val < Integer.MIN_VALUE)
364 throw new IOException("Constant does not fit in integer on line "+line_num);
365 return new IntegerLiteral((int)val);
367 NumericLiteral getFloatingPointLiteral() throws java.io.IOException {
368 String rep = getDigits();
369 if (line.charAt(line_pos)=='.')
370 rep+=consume() + getDigits();
371 if (line.charAt(line_pos)=='e' ||
372 line.charAt(line_pos)=='E') {
374 if (line.charAt(line_pos)=='+' ||
375 line.charAt(line_pos)=='-')
380 switch (line.charAt(line_pos)) {
384 return new FloatLiteral(Float.valueOf(rep).floatValue());
392 return new DoubleLiteral(Double.valueOf(rep).doubleValue());
394 } catch (NumberFormatException e) {
395 throw new IOException("Illegal floating-point on line "+line_num+": "+e);
399 StringBuffer sb = new StringBuffer();
400 while (Character.digit(line.charAt(line_pos),10)!=-1)
401 sb.append(consume());
402 return sb.toString();
405 Operator getOperator() {
406 char first = consume();
407 char second= line.charAt(line_pos);
410 // single-character operators.
414 return new Operator(new String(new char[] {first}));
422 return new Operator(new String(new char[] {first, consume()}));
427 // Check for trailing '='
429 return new Operator(new String(new char[] {first, consume()}));
431 // Special-case '<<', '>>' and '>>>'
432 if ((first=='<' && second=='<') || // <<
433 (first=='>' && second=='>')) { // >>
434 String op = new String(new char[] {first, consume()});
435 if (first=='>' && line.charAt(line_pos)=='>') // >>>
437 if (line.charAt(line_pos)=='=') // <<=, >>=, >>>=
439 return new Operator(op);
442 // Otherwise return single operator.
443 return new Operator(new String(new char[] {first}));
446 CharacterLiteral getCharLiteral() throws java.io.IOException {
447 char firstquote = consume();
449 switch (line.charAt(line_pos)) {
451 val = getEscapeSequence();
455 throw new IOException("Invalid character literal on line "+line_num);
458 throw new IOException("Invalid character literal on line "+line_num);
464 char secondquote = consume();
465 if (firstquote != '\'' || secondquote != '\'')
466 throw new IOException("Invalid character literal on line "+line_num);
467 return new CharacterLiteral(val);
469 StringLiteral getStringLiteral() throws java.io.IOException {
470 char openquote = consume();
471 StringBuffer val = new StringBuffer();
472 while (line.charAt(line_pos)!='\"') {
473 switch(line.charAt(line_pos)) {
475 val.append(getEscapeSequence());
479 throw new IOException("Invalid string literal on line " + line_num);
482 val.append(consume());
486 char closequote = consume();
487 if (openquote != '\"' || closequote != '\"')
488 throw new IOException("Invalid string literal on line " + line_num);
490 return new StringLiteral(val.toString().intern());
493 char getEscapeSequence() throws java.io.IOException {
494 if (consume() != '\\')
495 throw new IOException("Invalid escape sequence on line " + line_num);
496 switch(line.charAt(line_pos)) {
498 consume(); return '\b';
501 consume(); return '\t';
504 consume(); return '\n';
507 consume(); return '\f';
510 consume(); return '\r';
513 consume(); return '\"';
516 consume(); return '\'';
519 consume(); return '\\';
525 return (char) getOctal(3);
531 return (char) getOctal(2);
534 throw new IOException("Invalid escape sequence on line " + line_num);
537 int getOctal(int maxlength) throws java.io.IOException {
539 for (i=0; i<maxlength; i++)
540 if (Character.digit(line.charAt(line_pos), 8)!=-1) {
541 val = (8*val) + Character.digit(consume(), 8);
543 if ((i==0) || (val>0xFF)) // impossible.
544 throw new IOException("Invalid octal escape sequence in line " + line_num);
549 return line.charAt(line_pos++);
551 void nextLine() throws java.io.IOException {
552 line=reader.readLine();
553 if (line!=null) line=line+'\n';
554 lineL = new LineList(lineL.head+line_pos, lineL); // for error reporting
559 // Deal with error messages.
560 public void errorMsg(String msg, java_cup.runtime.Symbol info) {
561 int n=line_num, c=info.left-lineL.head;
562 for (LineList p = lineL; p!=null; p=p.tail, n--)
563 if (p.head<=info.left) {
564 c=info.left-p.head; break;
566 System.err.println(msg+" at line "+n);
569 private int num_errors = 0;
570 public int numErrors() {
577 LineList(int head, LineList tail) {
578 this.head = head; this.tail = tail;