3 import java.io.IOException;
5 import java.io.LineNumberReader;
9 * Copyright (C) 2002 C. Scott Ananian <cananian@alumni.princeton.edu>
10 * This program is released under the terms of the GPL; see the file
11 * COPYING for more details. There is NO WARRANTY on this code.
15 LineNumberReader reader;
21 public int line_num = 0;
22 LineList lineL = new LineList(-line_pos, null); // sentinel for line #0
24 public Lexer(Reader reader) {
25 this.reader = new LineNumberReader(new EscapedUnicodeReader(reader));
30 public java_cup.runtime.Symbol nextToken() throws java.io.IOException {
31 java_cup.runtime.Symbol sym =
32 lookahead==null ? _nextToken() : lookahead.get();
36 private boolean shouldBePLT() throws java.io.IOException {
37 // look ahead to see if this LT should be changed to a PLT
38 if (last==null || last.sym!=Sym.IDENTIFIER)
40 if (lookahead==null) lookahead = new FIFO(new FIFO.Getter() {
41 java_cup.runtime.Symbol next() throws java.io.IOException
42 { return _nextToken(); }
45 // skip past IDENTIFIER (DOT IDENTIFIER)*
46 if (lookahead.peek(i++).sym != Sym.IDENTIFIER)
48 while (lookahead.peek(i).sym == Sym.DOT) {
50 if (lookahead.peek(i++).sym != Sym.IDENTIFIER)
53 // skip past (LBRACK RBRACK)*
54 while (lookahead.peek(i).sym == Sym.LBRACK) {
56 if (lookahead.peek(i++).sym != Sym.RBRACK)
59 // now the next sym has to be one of LT GT COMMA EXTENDS IMPLEMENTS
60 switch(lookahead.peek(i).sym) {
71 private java_cup.runtime.Symbol last = null;
72 private FIFO lookahead = null;
73 public java_cup.runtime.Symbol _nextToken() throws java.io.IOException {
75 * Identifiers/Keywords/true/false/null (start with java letter)
76 * numeric literal (start with number)
77 * character literal (start with single quote)
78 * string (start with double quote)
79 * separator (parens, braces, brackets, semicolon, comma, period)
80 * operator (equals, plus, minus, etc)
82 * comment (start with slash)
87 startpos = lineL.head + line_pos;
88 ie = getInputElement();
89 if (ie instanceof DocumentationComment)
90 comment = ((Comment)ie).getComment();
91 } while (!(ie instanceof Token));
92 endpos = lineL.head + line_pos - 1;
94 // System.out.println(ie.toString()); // uncomment to debug lexer.
95 java_cup.runtime.Symbol sym = ((Token)ie).token();
96 // fix up left/right positions.
97 sym.left = startpos; sym.right = endpos;
101 public boolean debug_lex() throws java.io.IOException {
102 InputElement ie = getInputElement();
103 System.out.println(ie);
104 return !(ie instanceof EOF);
108 public String lastComment() {
111 public void clearComment() {
115 InputElement getInputElement() throws java.io.IOException {
120 if (line.length()<=line_pos) { // end of line.
126 switch (line.charAt(line_pos)) {
129 case ' ': // ASCII SP
130 case '\t': // ASCII HT
131 case '\f': // ASCII FF
132 case '\n': // LineTerminator
133 return new WhiteSpace(consume());
136 case '\020': // ASCII SUB
149 // May get Token instead of Comment.
150 InputElement getComment() throws java.io.IOException {
152 // line.charAt(line_pos+0) is '/'
153 switch (line.charAt(line_pos+1)) {
154 case '/': // EndOfLineComment
155 comment = line.substring(line_pos+2);
156 line_pos = line.length();
157 return new EndOfLineComment(comment);
159 case '*': // TraditionalComment or DocumentationComment
161 if (line.charAt(line_pos)=='*') { // DocumentationComment
162 return snarfComment(new DocumentationComment());
163 } else { // TraditionalComment
164 return snarfComment(new TraditionalComment());
167 default: // it's a token, not a comment.
172 Comment snarfComment(Comment c) throws java.io.IOException {
173 StringBuffer text=new StringBuffer();
174 while(true) { // Grab CommentTail
175 while (line.charAt(line_pos)!='*') { // Add NotStar to comment.
176 int star_pos = line.indexOf('*', line_pos);
178 text.append(line.substring(line_pos));
179 c.appendLine(text.toString()); text.setLength(0);
180 line_pos = line.length();
183 throw new IOException("Unterminated comment at end of file.");
185 text.append(line.substring(line_pos, star_pos));
189 // At this point, line.charAt(line_pos)=='*'
190 // Grab CommentTailStar starting at line_pos+1.
191 if (line.charAt(line_pos+1)=='/') { // safe because line ends with '\n'
192 c.appendLine(text.toString()); line_pos+=2; return c;
194 text.append(line.charAt(line_pos++)); // add the '*'
198 Token getToken() throws java.io.IOException {
199 // Tokens are: Identifiers, Keywords, Literals, Separators, Operators.
200 switch (line.charAt(line_pos)) {
201 // Separators: (period is a special case)
210 return new Separator(consume());
228 return getOperator();
231 return getCharLiteral();
234 return getStringLiteral();
236 // a period is a special case:
238 if (Character.digit(line.charAt(line_pos+1),10)!=-1)
239 return getNumericLiteral();
241 line.charAt(line_pos+1)=='.' &&
242 line.charAt(line_pos+2)=='.') {
243 consume(); consume(); consume();
244 return new Separator('\u2026'); // unicode ellipsis character.
245 } else return new Separator(consume());
250 if (Character.isJavaIdentifierStart(line.charAt(line_pos)))
251 return getIdentifier();
252 if (Character.isDigit(line.charAt(line_pos)))
253 return getNumericLiteral();
254 throw new IOException("Illegal character on line "+line_num);
257 static final String[] keywords = new String[] {
258 "abstract", "assert", "atomic", "boolean", "break", "byte", "case", "catch", "char",
259 "class", "const", "continue",
260 "default", "disjoint", "do", "double",
262 "extends", "external", "final", "finally",
263 "flag", //keyword for failure aware computation
264 "float", "for", "genreach", "getoffset", "global", "goto", "if",
266 "import", "instanceof", "int",
270 "native", "new", "optional", "package", "private", "protected", "public",
272 "scratch", "sese", "short", "static", "strictfp", "super", "switch", "synchronized",
273 "tag", "task", "taskexit", //keywords for failure aware computation
274 "this", "throw", "throws", "transient", "try", "void",
277 Token getIdentifier() throws java.io.IOException {
279 StringBuffer sb = new StringBuffer().append(consume());
281 if (!Character.isJavaIdentifierStart(sb.charAt(0)))
282 throw new IOException("Invalid Java Identifier on line "+line_num);
283 while (Character.isJavaIdentifierPart(line.charAt(line_pos)))
284 sb.append(consume());
285 String s = sb.toString();
286 // Now check against boolean literals and null literal.
287 if (s.equals("null")) return new NullLiteral();
288 if (s.equals("true")) return new BooleanLiteral(true);
289 if (s.equals("false")) return new BooleanLiteral(false);
290 // Check against keywords.
291 // pre-java 1.5 compatibility:
292 if (!isJava15 && s.equals("enum")) return new Identifier(s);
293 // pre-java 1.4 compatibility:
294 if (!isJava14 && s.equals("assert")) return new Identifier(s);
295 // pre-java 1.2 compatibility:
296 if (!isJava12 && s.equals("strictfp")) return new Identifier(s);
297 // use binary search.
298 for (int l=0, r=keywords.length; r > l; ) {
299 int x = (l+r)/2, cmp = s.compareTo(keywords[x]);
300 if (cmp < 0) r=x;else l=x+1;
301 if (cmp== 0) return new Keyword(s);
304 return new Identifier(s);
306 NumericLiteral getNumericLiteral() throws java.io.IOException {
308 // leading decimal indicates float.
309 if (line.charAt(line_pos)=='.')
310 return getFloatingPointLiteral();
312 if (line.charAt(line_pos)=='0' &&
313 (line.charAt(line_pos+1)=='x' ||
314 line.charAt(line_pos+1)=='X')) {
315 line_pos+=2; return getIntegerLiteral(/*base*/ 16);
317 // otherwise scan to first non-numeric
318 for (i=line_pos; Character.digit(line.charAt(i),10)!=-1; )
320 switch(line.charAt(i)) { // discriminate based on first non-numeric
328 return getFloatingPointLiteral();
333 if (line.charAt(line_pos)=='0')
334 return getIntegerLiteral(/*base*/ 8);
335 return getIntegerLiteral(/*base*/ 10);
338 NumericLiteral getIntegerLiteral(int radix) throws java.io.IOException {
340 while (Character.digit(line.charAt(line_pos),radix)!=-1)
341 val = (val*radix) + Character.digit(consume(),radix);
342 if (line.charAt(line_pos) == 'l' ||
343 line.charAt(line_pos) == 'L') {
345 return new LongLiteral(val);
347 // we compare MAX_VALUE against val/2 to allow constants like
348 // 0xFFFF0000 to get past the test. (unsigned long->signed int)
349 if ((val/2) > Integer.MAX_VALUE ||
350 val < Integer.MIN_VALUE)
351 throw new IOException("Constant does not fit in integer on line "+line_num);
352 return new IntegerLiteral((int)val);
354 NumericLiteral getFloatingPointLiteral() throws java.io.IOException {
355 String rep = getDigits();
356 if (line.charAt(line_pos)=='.')
357 rep+=consume() + getDigits();
358 if (line.charAt(line_pos)=='e' ||
359 line.charAt(line_pos)=='E') {
361 if (line.charAt(line_pos)=='+' ||
362 line.charAt(line_pos)=='-')
367 switch (line.charAt(line_pos)) {
371 return new FloatLiteral(Float.valueOf(rep).floatValue());
379 return new DoubleLiteral(Double.valueOf(rep).doubleValue());
381 } catch (NumberFormatException e) {
382 throw new IOException("Illegal floating-point on line "+line_num+": "+e);
386 StringBuffer sb = new StringBuffer();
387 while (Character.digit(line.charAt(line_pos),10)!=-1)
388 sb.append(consume());
389 return sb.toString();
392 Operator getOperator() {
393 char first = consume();
394 char second= line.charAt(line_pos);
397 // single-character operators.
401 return new Operator(new String(new char[] {first}));
409 return new Operator(new String(new char[] {first, consume()}));
414 // Check for trailing '='
416 return new Operator(new String(new char[] {first, consume()}));
418 // Special-case '<<', '>>' and '>>>'
419 if ((first=='<' && second=='<') || // <<
420 (first=='>' && second=='>')) { // >>
421 String op = new String(new char[] {first, consume()});
422 if (first=='>' && line.charAt(line_pos)=='>') // >>>
424 if (line.charAt(line_pos)=='=') // <<=, >>=, >>>=
426 return new Operator(op);
429 // Otherwise return single operator.
430 return new Operator(new String(new char[] {first}));
433 CharacterLiteral getCharLiteral() throws java.io.IOException {
434 char firstquote = consume();
436 switch (line.charAt(line_pos)) {
438 val = getEscapeSequence();
442 throw new IOException("Invalid character literal on line "+line_num);
445 throw new IOException("Invalid character literal on line "+line_num);
451 char secondquote = consume();
452 if (firstquote != '\'' || secondquote != '\'')
453 throw new IOException("Invalid character literal on line "+line_num);
454 return new CharacterLiteral(val);
456 StringLiteral getStringLiteral() throws java.io.IOException {
457 char openquote = consume();
458 StringBuffer val = new StringBuffer();
459 while (line.charAt(line_pos)!='\"') {
460 switch(line.charAt(line_pos)) {
462 val.append(getEscapeSequence());
466 throw new IOException("Invalid string literal on line " + line_num);
469 val.append(consume());
473 char closequote = consume();
474 if (openquote != '\"' || closequote != '\"')
475 throw new IOException("Invalid string literal on line " + line_num);
477 return new StringLiteral(val.toString().intern());
480 char getEscapeSequence() throws java.io.IOException {
481 if (consume() != '\\')
482 throw new IOException("Invalid escape sequence on line " + line_num);
483 switch(line.charAt(line_pos)) {
485 consume(); return '\b';
488 consume(); return '\t';
491 consume(); return '\n';
494 consume(); return '\f';
497 consume(); return '\r';
500 consume(); return '\"';
503 consume(); return '\'';
506 consume(); return '\\';
512 return (char) getOctal(3);
518 return (char) getOctal(2);
521 throw new IOException("Invalid escape sequence on line " + line_num);
524 int getOctal(int maxlength) throws java.io.IOException {
526 for (i=0; i<maxlength; i++)
527 if (Character.digit(line.charAt(line_pos), 8)!=-1) {
528 val = (8*val) + Character.digit(consume(), 8);
530 if ((i==0) || (val>0xFF)) // impossible.
531 throw new IOException("Invalid octal escape sequence in line " + line_num);
536 return line.charAt(line_pos++);
538 void nextLine() throws java.io.IOException {
539 line=reader.readLine();
540 if (line!=null) line=line+'\n';
541 lineL = new LineList(lineL.head+line_pos, lineL); // for error reporting
546 // Deal with error messages.
547 public void errorMsg(String msg, java_cup.runtime.Symbol info) {
548 int n=line_num, c=info.left-lineL.head;
549 for (LineList p = lineL; p!=null; p=p.tail, n--)
550 if (p.head<=info.left) {
551 c=info.left-p.head; break;
553 System.err.println(msg+" at line "+n);
556 private int num_errors = 0;
557 public int numErrors() {
564 LineList(int head, LineList tail) {
565 this.head = head; this.tail = tail;