3 import java.io.IOException;
5 import java.io.LineNumberReader;
9 * Copyright (C) 2002 C. Scott Ananian <cananian@alumni.princeton.edu>
10 * This program is released under the terms of the GPL; see the file
11 * COPYING for more details. There is NO WARRANTY on this code.
15 LineNumberReader reader;
21 public int line_num = 0;
22 LineList lineL = new LineList(-line_pos, null); // sentinel for line #0
24 public Lexer(Reader reader) {
25 this.reader = new LineNumberReader(new EscapedUnicodeReader(reader));
30 public java_cup.runtime.Symbol nextToken() throws java.io.IOException {
31 java_cup.runtime.Symbol sym =
32 lookahead==null ? _nextToken() : lookahead.get();
36 private boolean shouldBePLT() throws java.io.IOException {
37 // look ahead to see if this LT should be changed to a PLT
38 if (last==null || last.sym!=Sym.IDENTIFIER)
40 if (lookahead==null) lookahead = new FIFO(new FIFO.Getter() {
41 java_cup.runtime.Symbol next() throws java.io.IOException
42 { return _nextToken(); }
45 // skip past IDENTIFIER (DOT IDENTIFIER)*
46 if (lookahead.peek(i++).sym != Sym.IDENTIFIER)
48 while (lookahead.peek(i).sym == Sym.DOT) {
50 if (lookahead.peek(i++).sym != Sym.IDENTIFIER)
53 // skip past (LBRACK RBRACK)*
54 while (lookahead.peek(i).sym == Sym.LBRACK) {
56 if (lookahead.peek(i++).sym != Sym.RBRACK)
59 // now the next sym has to be one of LT GT COMMA EXTENDS IMPLEMENTS
60 switch(lookahead.peek(i).sym) {
71 private java_cup.runtime.Symbol last = null;
72 private FIFO lookahead = null;
73 public java_cup.runtime.Symbol _nextToken() throws java.io.IOException {
75 * Identifiers/Keywords/true/false/null (start with java letter)
76 * numeric literal (start with number)
77 * character literal (start with single quote)
78 * string (start with double quote)
79 * separator (parens, braces, brackets, semicolon, comma, period)
80 * operator (equals, plus, minus, etc)
82 * comment (start with slash)
87 startpos = lineL.head + line_pos;
88 ie = getInputElement();
89 if (ie instanceof DocumentationComment)
90 comment = ((Comment)ie).getComment();
91 } while (!(ie instanceof Token));
92 endpos = lineL.head + line_pos - 1;
94 // System.out.println(ie.toString()); // uncomment to debug lexer.
95 java_cup.runtime.Symbol sym = ((Token)ie).token();
96 // fix up left/right positions.
97 sym.left = startpos; sym.right = endpos;
101 public boolean debug_lex() throws java.io.IOException {
102 InputElement ie = getInputElement();
103 System.out.println(ie);
104 return !(ie instanceof EOF);
108 public String lastComment() {
111 public void clearComment() {
115 InputElement getInputElement() throws java.io.IOException {
120 if (line.length()<=line_pos) { // end of line.
126 switch (line.charAt(line_pos)) {
129 case ' ': // ASCII SP
130 case '\t': // ASCII HT
131 case '\f': // ASCII FF
132 case '\n': // LineTerminator
133 return new WhiteSpace(consume());
136 case '\020': // ASCII SUB
149 // May get Token instead of Comment.
150 InputElement getComment() throws java.io.IOException {
152 // line.charAt(line_pos+0) is '/'
153 switch (line.charAt(line_pos+1)) {
154 case '/': // EndOfLineComment
155 comment = line.substring(line_pos+2);
156 line_pos = line.length();
157 return new EndOfLineComment(comment);
159 case '*': // TraditionalComment or DocumentationComment
161 if (line.charAt(line_pos)=='*') { // DocumentationComment
162 return snarfComment(new DocumentationComment());
163 } else { // TraditionalComment
164 return snarfComment(new TraditionalComment());
167 default: // it's a token, not a comment.
172 Comment snarfComment(Comment c) throws java.io.IOException {
173 StringBuffer text=new StringBuffer();
174 while(true) { // Grab CommentTail
175 while (line.charAt(line_pos)!='*') { // Add NotStar to comment.
176 int star_pos = line.indexOf('*', line_pos);
178 text.append(line.substring(line_pos));
179 c.appendLine(text.toString()); text.setLength(0);
180 line_pos = line.length();
183 throw new IOException("Unterminated comment at end of file.");
185 text.append(line.substring(line_pos, star_pos));
189 // At this point, line.charAt(line_pos)=='*'
190 // Grab CommentTailStar starting at line_pos+1.
191 if (line.charAt(line_pos+1)=='/') { // safe because line ends with '\n'
192 c.appendLine(text.toString()); line_pos+=2; return c;
194 text.append(line.charAt(line_pos++)); // add the '*'
198 Token getToken() throws java.io.IOException {
199 // Tokens are: Identifiers, Keywords, Literals, Separators, Operators.
200 switch (line.charAt(line_pos)) {
201 // Separators: (period is a special case)
210 return new Separator(consume());
228 return getOperator();
231 return getCharLiteral();
234 return getStringLiteral();
236 // a period is a special case:
238 if (Character.digit(line.charAt(line_pos+1),10)!=-1)
239 return getNumericLiteral();
241 line.charAt(line_pos+1)=='.' &&
242 line.charAt(line_pos+2)=='.') {
243 consume(); consume(); consume();
244 return new Separator('\u2026'); // unicode ellipsis character.
245 } else return new Separator(consume());
250 if (Character.isJavaIdentifierStart(line.charAt(line_pos)))
251 return getIdentifier();
252 if (Character.isDigit(line.charAt(line_pos)))
253 return getNumericLiteral();
254 throw new IOException("Illegal character on line "+line_num);
257 static final String[] keywords = new String[] {
258 "abstract", "assert", "atomic", "boolean", "break", "byte", "case", "catch", "char",
259 "class", "const", "continue",
260 "default", "disjoint", "do", "double",
262 "extends", "external", "final", "finally",
263 "flag", //keyword for failure aware computation
264 "float", "for","getoffset", "global", "goto", "if",
266 "import", "instanceof", "int",
270 "native", "new", "optional", "package", "private", "protected", "public",
271 "return", "scratch", "sese", "short", "static", "strictfp", "super", "switch", "synchronized",
272 "tag", "task", "taskexit", //keywords for failure aware computation
273 "this", "throw", "throws", "transient", "try", "void",
276 Token getIdentifier() throws java.io.IOException {
278 StringBuffer sb = new StringBuffer().append(consume());
280 if (!Character.isJavaIdentifierStart(sb.charAt(0)))
281 throw new IOException("Invalid Java Identifier on line "+line_num);
282 while (Character.isJavaIdentifierPart(line.charAt(line_pos)))
283 sb.append(consume());
284 String s = sb.toString();
285 // Now check against boolean literals and null literal.
286 if (s.equals("null")) return new NullLiteral();
287 if (s.equals("true")) return new BooleanLiteral(true);
288 if (s.equals("false")) return new BooleanLiteral(false);
289 // Check against keywords.
290 // pre-java 1.5 compatibility:
291 if (!isJava15 && s.equals("enum")) return new Identifier(s);
292 // pre-java 1.4 compatibility:
293 if (!isJava14 && s.equals("assert")) return new Identifier(s);
294 // pre-java 1.2 compatibility:
295 if (!isJava12 && s.equals("strictfp")) return new Identifier(s);
296 // use binary search.
297 for (int l=0, r=keywords.length; r > l; ) {
298 int x = (l+r)/2, cmp = s.compareTo(keywords[x]);
299 if (cmp < 0) r=x;else l=x+1;
300 if (cmp== 0) return new Keyword(s);
303 return new Identifier(s);
305 NumericLiteral getNumericLiteral() throws java.io.IOException {
307 // leading decimal indicates float.
308 if (line.charAt(line_pos)=='.')
309 return getFloatingPointLiteral();
311 if (line.charAt(line_pos)=='0' &&
312 (line.charAt(line_pos+1)=='x' ||
313 line.charAt(line_pos+1)=='X')) {
314 line_pos+=2; return getIntegerLiteral(/*base*/ 16);
316 // otherwise scan to first non-numeric
317 for (i=line_pos; Character.digit(line.charAt(i),10)!=-1; )
319 switch(line.charAt(i)) { // discriminate based on first non-numeric
327 return getFloatingPointLiteral();
332 if (line.charAt(line_pos)=='0')
333 return getIntegerLiteral(/*base*/ 8);
334 return getIntegerLiteral(/*base*/ 10);
337 NumericLiteral getIntegerLiteral(int radix) throws java.io.IOException {
339 while (Character.digit(line.charAt(line_pos),radix)!=-1)
340 val = (val*radix) + Character.digit(consume(),radix);
341 if (line.charAt(line_pos) == 'l' ||
342 line.charAt(line_pos) == 'L') {
344 return new LongLiteral(val);
346 // we compare MAX_VALUE against val/2 to allow constants like
347 // 0xFFFF0000 to get past the test. (unsigned long->signed int)
348 if ((val/2) > Integer.MAX_VALUE ||
349 val < Integer.MIN_VALUE)
350 throw new IOException("Constant does not fit in integer on line "+line_num);
351 return new IntegerLiteral((int)val);
353 NumericLiteral getFloatingPointLiteral() throws java.io.IOException {
354 String rep = getDigits();
355 if (line.charAt(line_pos)=='.')
356 rep+=consume() + getDigits();
357 if (line.charAt(line_pos)=='e' ||
358 line.charAt(line_pos)=='E') {
360 if (line.charAt(line_pos)=='+' ||
361 line.charAt(line_pos)=='-')
366 switch (line.charAt(line_pos)) {
370 return new FloatLiteral(Float.valueOf(rep).floatValue());
378 return new DoubleLiteral(Double.valueOf(rep).doubleValue());
380 } catch (NumberFormatException e) {
381 throw new IOException("Illegal floating-point on line "+line_num+": "+e);
385 StringBuffer sb = new StringBuffer();
386 while (Character.digit(line.charAt(line_pos),10)!=-1)
387 sb.append(consume());
388 return sb.toString();
391 Operator getOperator() {
392 char first = consume();
393 char second= line.charAt(line_pos);
396 // single-character operators.
400 return new Operator(new String(new char[] {first}));
408 return new Operator(new String(new char[] {first, consume()}));
413 // Check for trailing '='
415 return new Operator(new String(new char[] {first, consume()}));
417 // Special-case '<<', '>>' and '>>>'
418 if ((first=='<' && second=='<') || // <<
419 (first=='>' && second=='>')) { // >>
420 String op = new String(new char[] {first, consume()});
421 if (first=='>' && line.charAt(line_pos)=='>') // >>>
423 if (line.charAt(line_pos)=='=') // <<=, >>=, >>>=
425 return new Operator(op);
428 // Otherwise return single operator.
429 return new Operator(new String(new char[] {first}));
432 CharacterLiteral getCharLiteral() throws java.io.IOException {
433 char firstquote = consume();
435 switch (line.charAt(line_pos)) {
437 val = getEscapeSequence();
441 throw new IOException("Invalid character literal on line "+line_num);
444 throw new IOException("Invalid character literal on line "+line_num);
450 char secondquote = consume();
451 if (firstquote != '\'' || secondquote != '\'')
452 throw new IOException("Invalid character literal on line "+line_num);
453 return new CharacterLiteral(val);
455 StringLiteral getStringLiteral() throws java.io.IOException {
456 char openquote = consume();
457 StringBuffer val = new StringBuffer();
458 while (line.charAt(line_pos)!='\"') {
459 switch(line.charAt(line_pos)) {
461 val.append(getEscapeSequence());
465 throw new IOException("Invalid string literal on line " + line_num);
468 val.append(consume());
472 char closequote = consume();
473 if (openquote != '\"' || closequote != '\"')
474 throw new IOException("Invalid string literal on line " + line_num);
476 return new StringLiteral(val.toString().intern());
479 char getEscapeSequence() throws java.io.IOException {
480 if (consume() != '\\')
481 throw new IOException("Invalid escape sequence on line " + line_num);
482 switch(line.charAt(line_pos)) {
484 consume(); return '\b';
487 consume(); return '\t';
490 consume(); return '\n';
493 consume(); return '\f';
496 consume(); return '\r';
499 consume(); return '\"';
502 consume(); return '\'';
505 consume(); return '\\';
511 return (char) getOctal(3);
517 return (char) getOctal(2);
520 throw new IOException("Invalid escape sequence on line " + line_num);
523 int getOctal(int maxlength) throws java.io.IOException {
525 for (i=0; i<maxlength; i++)
526 if (Character.digit(line.charAt(line_pos), 8)!=-1) {
527 val = (8*val) + Character.digit(consume(), 8);
529 if ((i==0) || (val>0xFF)) // impossible.
530 throw new IOException("Invalid octal escape sequence in line " + line_num);
535 return line.charAt(line_pos++);
537 void nextLine() throws java.io.IOException {
538 line=reader.readLine();
539 if (line!=null) line=line+'\n';
540 lineL = new LineList(lineL.head+line_pos, lineL); // for error reporting
545 // Deal with error messages.
546 public void errorMsg(String msg, java_cup.runtime.Symbol info) {
547 int n=line_num, c=info.left-lineL.head;
548 for (LineList p = lineL; p!=null; p=p.tail, n--)
549 if (p.head<=info.left) {
550 c=info.left-p.head; break;
552 System.err.println(msg+" at line "+n);
555 private int num_errors = 0;
556 public int numErrors() {
563 LineList(int head, LineList tail) {
564 this.head = head; this.tail = tail;