3 import java.io.IOException;
5 import java.io.LineNumberReader;
9 * Copyright (C) 2002 C. Scott Ananian <cananian@alumni.princeton.edu>
10 * This program is released under the terms of the GPL; see the file
11 * COPYING for more details. There is NO WARRANTY on this code.
15 LineNumberReader reader;
21 public int line_num = 0;
22 LineList lineL = new LineList(-line_pos, null); // sentinel for line #0
24 public Lexer(Reader reader) {
25 this.reader = new LineNumberReader(new EscapedUnicodeReader(reader));
30 public java_cup.runtime.Symbol nextToken() throws java.io.IOException {
31 java_cup.runtime.Symbol sym =
32 lookahead==null ? _nextToken() : lookahead.get();
36 private boolean shouldBePLT() throws java.io.IOException {
37 // look ahead to see if this LT should be changed to a PLT
38 if (last==null || last.sym!=Sym.IDENTIFIER)
40 if (lookahead==null) lookahead = new FIFO(new FIFO.Getter() {
41 java_cup.runtime.Symbol next() throws java.io.IOException
42 { return _nextToken(); }
45 // skip past IDENTIFIER (DOT IDENTIFIER)*
46 if (lookahead.peek(i++).sym != Sym.IDENTIFIER)
48 while (lookahead.peek(i).sym == Sym.DOT) {
50 if (lookahead.peek(i++).sym != Sym.IDENTIFIER)
53 // skip past (LBRACK RBRACK)*
54 while (lookahead.peek(i).sym == Sym.LBRACK) {
56 if (lookahead.peek(i++).sym != Sym.RBRACK)
59 // now the next sym has to be one of LT GT COMMA EXTENDS IMPLEMENTS
60 switch(lookahead.peek(i).sym) {
71 private java_cup.runtime.Symbol last = null;
72 private FIFO lookahead = null;
73 public java_cup.runtime.Symbol _nextToken() throws java.io.IOException {
75 * Identifiers/Keywords/true/false/null (start with java letter)
76 * numeric literal (start with number)
77 * character literal (start with single quote)
78 * string (start with double quote)
79 * separator (parens, braces, brackets, semicolon, comma, period)
80 * operator (equals, plus, minus, etc)
82 * comment (start with slash)
87 startpos = lineL.head + line_pos;
88 ie = getInputElement();
89 if (ie instanceof DocumentationComment)
90 comment = ((Comment)ie).getComment();
91 } while (!(ie instanceof Token));
92 endpos = lineL.head + line_pos - 1;
94 // System.out.println(ie.toString()); // uncomment to debug lexer.
95 java_cup.runtime.Symbol sym = ((Token)ie).token();
96 // fix up left/right positions.
97 sym.left = startpos; sym.right = endpos;
101 public boolean debug_lex() throws java.io.IOException {
102 InputElement ie = getInputElement();
103 System.out.println(ie);
104 return !(ie instanceof EOF);
108 public String lastComment() {
111 public void clearComment() {
115 InputElement getInputElement() throws java.io.IOException {
120 if (line.length()<=line_pos) { // end of line.
126 switch (line.charAt(line_pos)) {
129 case ' ': // ASCII SP
130 case '\t': // ASCII HT
131 case '\f': // ASCII FF
132 case '\n': // LineTerminator
133 return new WhiteSpace(consume());
136 case '\020': // ASCII SUB
149 // May get Token instead of Comment.
150 InputElement getComment() throws java.io.IOException {
152 // line.charAt(line_pos+0) is '/'
153 switch (line.charAt(line_pos+1)) {
154 case '/': // EndOfLineComment
155 comment = line.substring(line_pos+2);
156 line_pos = line.length();
157 return new EndOfLineComment(comment);
159 case '*': // TraditionalComment or DocumentationComment
161 if (line.charAt(line_pos)=='*') { // DocumentationComment
162 return snarfComment(new DocumentationComment());
163 } else { // TraditionalComment
164 return snarfComment(new TraditionalComment());
167 default: // it's a token, not a comment.
172 Comment snarfComment(Comment c) throws java.io.IOException {
173 StringBuffer text=new StringBuffer();
174 while(true) { // Grab CommentTail
175 while (line.charAt(line_pos)!='*') { // Add NotStar to comment.
176 int star_pos = line.indexOf('*', line_pos);
178 text.append(line.substring(line_pos));
179 c.appendLine(text.toString()); text.setLength(0);
180 line_pos = line.length();
183 throw new IOException("Unterminated comment at end of file.");
185 text.append(line.substring(line_pos, star_pos));
189 // At this point, line.charAt(line_pos)=='*'
190 // Grab CommentTailStar starting at line_pos+1.
191 if (line.charAt(line_pos+1)=='/') { // safe because line ends with '\n'
192 c.appendLine(text.toString()); line_pos+=2; return c;
194 text.append(line.charAt(line_pos++)); // add the '*'
198 Token getToken() throws java.io.IOException {
199 // Tokens are: Identifiers, Keywords, Literals, Separators, Operators.
200 switch (line.charAt(line_pos)) {
201 // Separators: (period is a special case)
211 return new Separator(consume());
229 return getOperator();
232 return getCharLiteral();
235 return getStringLiteral();
237 // a period is a special case:
239 if (Character.digit(line.charAt(line_pos+1),10)!=-1)
240 return getNumericLiteral();
242 line.charAt(line_pos+1)=='.' &&
243 line.charAt(line_pos+2)=='.') {
244 consume(); consume(); consume();
245 return new Separator('\u2026'); // unicode ellipsis character.
246 } else return new Separator(consume());
251 if (Character.isJavaIdentifierStart(line.charAt(line_pos)))
252 return getIdentifier();
253 if (Character.isDigit(line.charAt(line_pos)))
254 return getNumericLiteral();
255 throw new IOException("Illegal character on line "+line_num);
258 static final String[] keywords = new String[] {
259 "abstract", "assert", "atomic", "boolean", "break", "byte", "case", "catch", "char",
260 "class", "const", "continue",
261 "default", "disjoint", "do", "double",
263 "extends", "external", "final", "finally",
264 "flag", //keyword for failure aware computation
265 "float", "for", "genreach", "getoffset", "global", "goto", "if",
267 "import", "instanceof", "int",
271 "native", "new", "optional", "package", "private", "protected", "public",
273 "scratch", "sese", "short", "static", "strictfp", "super", "switch", "synchronized",
274 "tag", "task", "taskexit", //keywords for failure aware computation
275 "this", "throw", "throws", "transient", "try", "void",
278 Token getIdentifier() throws java.io.IOException {
280 StringBuffer sb = new StringBuffer().append(consume());
282 if (!Character.isJavaIdentifierStart(sb.charAt(0)))
283 throw new IOException("Invalid Java Identifier on line "+line_num);
284 while (Character.isJavaIdentifierPart(line.charAt(line_pos)))
285 sb.append(consume());
286 String s = sb.toString();
287 // Now check against boolean literals and null literal.
288 if (s.equals("null")) return new NullLiteral();
289 if (s.equals("true")) return new BooleanLiteral(true);
290 if (s.equals("false")) return new BooleanLiteral(false);
291 // Check against keywords.
292 // pre-java 1.5 compatibility:
293 //if (!isJava15 && s.equals("enum")) return new Identifier(s);
294 // pre-java 1.4 compatibility:
295 if (!isJava14 && s.equals("assert")) return new Identifier(s);
296 // pre-java 1.2 compatibility:
297 if (!isJava12 && s.equals("strictfp")) return new Identifier(s);
298 // use binary search.
299 for (int l=0, r=keywords.length; r > l; ) {
300 int x = (l+r)/2, cmp = s.compareTo(keywords[x]);
301 if (cmp < 0) r=x;else l=x+1;
302 if (cmp== 0) return new Keyword(s);
305 return new Identifier(s);
307 NumericLiteral getNumericLiteral() throws java.io.IOException {
309 // leading decimal indicates float.
310 if (line.charAt(line_pos)=='.')
311 return getFloatingPointLiteral();
313 if (line.charAt(line_pos)=='0' &&
314 (line.charAt(line_pos+1)=='x' ||
315 line.charAt(line_pos+1)=='X')) {
316 line_pos+=2; return getIntegerLiteral(/*base*/ 16);
318 // otherwise scan to first non-numeric
319 for (i=line_pos; Character.digit(line.charAt(i),10)!=-1; )
321 switch(line.charAt(i)) { // discriminate based on first non-numeric
329 return getFloatingPointLiteral();
334 if (line.charAt(line_pos)=='0')
335 return getIntegerLiteral(/*base*/ 8);
336 return getIntegerLiteral(/*base*/ 10);
339 NumericLiteral getIntegerLiteral(int radix) throws java.io.IOException {
341 while (Character.digit(line.charAt(line_pos),radix)!=-1)
342 val = (val*radix) + Character.digit(consume(),radix);
343 if (line.charAt(line_pos) == 'l' ||
344 line.charAt(line_pos) == 'L') {
346 return new LongLiteral(val);
348 // we compare MAX_VALUE against val/2 to allow constants like
349 // 0xFFFF0000 to get past the test. (unsigned long->signed int)
350 if ((val/2) > Integer.MAX_VALUE ||
351 val < Integer.MIN_VALUE)
352 throw new IOException("Constant does not fit in integer on line "+line_num);
353 return new IntegerLiteral((int)val);
355 NumericLiteral getFloatingPointLiteral() throws java.io.IOException {
356 String rep = getDigits();
357 if (line.charAt(line_pos)=='.')
358 rep+=consume() + getDigits();
359 if (line.charAt(line_pos)=='e' ||
360 line.charAt(line_pos)=='E') {
362 if (line.charAt(line_pos)=='+' ||
363 line.charAt(line_pos)=='-')
368 switch (line.charAt(line_pos)) {
372 return new FloatLiteral(Float.valueOf(rep).floatValue());
380 return new DoubleLiteral(Double.valueOf(rep).doubleValue());
382 } catch (NumberFormatException e) {
383 throw new IOException("Illegal floating-point on line "+line_num+": "+e);
387 StringBuffer sb = new StringBuffer();
388 while (Character.digit(line.charAt(line_pos),10)!=-1)
389 sb.append(consume());
390 return sb.toString();
393 Operator getOperator() {
394 char first = consume();
395 char second= line.charAt(line_pos);
398 // single-character operators.
402 return new Operator(new String(new char[] {first}));
410 return new Operator(new String(new char[] {first, consume()}));
415 // Check for trailing '='
417 return new Operator(new String(new char[] {first, consume()}));
419 // Special-case '<<', '>>' and '>>>'
420 if ((first=='<' && second=='<') || // <<
421 (first=='>' && second=='>')) { // >>
422 String op = new String(new char[] {first, consume()});
423 if (first=='>' && line.charAt(line_pos)=='>') // >>>
425 if (line.charAt(line_pos)=='=') // <<=, >>=, >>>=
427 return new Operator(op);
430 // Otherwise return single operator.
431 return new Operator(new String(new char[] {first}));
434 CharacterLiteral getCharLiteral() throws java.io.IOException {
435 char firstquote = consume();
437 switch (line.charAt(line_pos)) {
439 val = getEscapeSequence();
443 throw new IOException("Invalid character literal on line "+line_num);
446 throw new IOException("Invalid character literal on line "+line_num);
452 char secondquote = consume();
453 if (firstquote != '\'' || secondquote != '\'')
454 throw new IOException("Invalid character literal on line "+line_num);
455 return new CharacterLiteral(val);
457 StringLiteral getStringLiteral() throws java.io.IOException {
458 char openquote = consume();
459 StringBuffer val = new StringBuffer();
460 while (line.charAt(line_pos)!='\"') {
461 switch(line.charAt(line_pos)) {
463 val.append(getEscapeSequence());
467 throw new IOException("Invalid string literal on line " + line_num);
470 val.append(consume());
474 char closequote = consume();
475 if (openquote != '\"' || closequote != '\"')
476 throw new IOException("Invalid string literal on line " + line_num);
478 return new StringLiteral(val.toString().intern());
481 char getEscapeSequence() throws java.io.IOException {
482 if (consume() != '\\')
483 throw new IOException("Invalid escape sequence on line " + line_num);
484 switch(line.charAt(line_pos)) {
486 consume(); return '\b';
489 consume(); return '\t';
492 consume(); return '\n';
495 consume(); return '\f';
498 consume(); return '\r';
501 consume(); return '\"';
504 consume(); return '\'';
507 consume(); return '\\';
513 return (char) getOctal(3);
519 return (char) getOctal(2);
522 throw new IOException("Invalid escape sequence on line " + line_num);
525 int getOctal(int maxlength) throws java.io.IOException {
527 for (i=0; i<maxlength; i++)
528 if (Character.digit(line.charAt(line_pos), 8)!=-1) {
529 val = (8*val) + Character.digit(consume(), 8);
531 if ((i==0) || (val>0xFF)) // impossible.
532 throw new IOException("Invalid octal escape sequence in line " + line_num);
537 return line.charAt(line_pos++);
539 void nextLine() throws java.io.IOException {
540 line=reader.readLine();
541 if (line!=null) line=line+'\n';
542 lineL = new LineList(lineL.head+line_pos, lineL); // for error reporting
547 // Deal with error messages.
548 public void errorMsg(String msg, java_cup.runtime.Symbol info) {
549 int n=line_num, c=info.left-lineL.head;
550 for (LineList p = lineL; p!=null; p=p.tail, n--)
551 if (p.head<=info.left) {
552 c=info.left-p.head; break;
554 System.err.println(msg+" at line "+n);
557 private int num_errors = 0;
558 public int numErrors() {
565 LineList(int head, LineList tail) {
566 this.head = head; this.tail = tail;