4 import java.io.LineNumberReader;
8 * Copyright (C) 2002 C. Scott Ananian <cananian@alumni.princeton.edu>
9 * This program is released under the terms of the GPL; see the file
10 * COPYING for more details. There is NO WARRANTY on this code.
14 LineNumberReader reader;
21 LineList lineL = new LineList(-line_pos, null); // sentinel for line #0
23 public Lexer(Reader reader) {
24 this.reader = new LineNumberReader(new EscapedUnicodeReader(reader));
29 public java_cup.runtime.Symbol nextToken() throws java.io.IOException {
30 java_cup.runtime.Symbol sym =
31 lookahead==null ? _nextToken() : lookahead.get();
35 private boolean shouldBePLT() throws java.io.IOException {
36 // look ahead to see if this LT should be changed to a PLT
37 if (last==null || last.sym!=Sym.IDENTIFIER)
39 if (lookahead==null) lookahead = new FIFO(new FIFO.Getter() {
40 java_cup.runtime.Symbol next() throws java.io.IOException
41 { return _nextToken(); }
44 // skip past IDENTIFIER (DOT IDENTIFIER)*
45 if (lookahead.peek(i++).sym != Sym.IDENTIFIER)
47 while (lookahead.peek(i).sym == Sym.DOT) {
49 if (lookahead.peek(i++).sym != Sym.IDENTIFIER)
52 // skip past (LBRACK RBRACK)*
53 while (lookahead.peek(i).sym == Sym.LBRACK) {
55 if (lookahead.peek(i++).sym != Sym.RBRACK)
58 // now the next sym has to be one of LT GT COMMA EXTENDS IMPLEMENTS
59 switch(lookahead.peek(i).sym) {
69 private java_cup.runtime.Symbol last = null;
70 private FIFO lookahead = null;
71 public java_cup.runtime.Symbol _nextToken() throws java.io.IOException {
73 * Identifiers/Keywords/true/false/null (start with java letter)
74 * numeric literal (start with number)
75 * character literal (start with single quote)
76 * string (start with double quote)
77 * separator (parens, braces, brackets, semicolon, comma, period)
78 * operator (equals, plus, minus, etc)
80 * comment (start with slash)
85 startpos = lineL.head + line_pos;
86 ie = getInputElement();
87 if (ie instanceof DocumentationComment)
88 comment = ((Comment)ie).getComment();
89 } while (!(ie instanceof Token));
90 endpos = lineL.head + line_pos - 1;
92 // System.out.println(ie.toString()); // uncomment to debug lexer.
93 java_cup.runtime.Symbol sym = ((Token)ie).token();
94 // fix up left/right positions.
95 sym.left = startpos; sym.right = endpos;
99 public boolean debug_lex() throws java.io.IOException {
100 InputElement ie = getInputElement();
101 System.out.println(ie);
102 return !(ie instanceof EOF);
106 public String lastComment() { return comment; }
107 public void clearComment() { comment=""; }
109 InputElement getInputElement() throws java.io.IOException {
114 if (line.length()<=line_pos) { // end of line.
120 switch (line.charAt(line_pos)) {
123 case ' ': // ASCII SP
124 case '\t': // ASCII HT
125 case '\f': // ASCII FF
126 case '\n': // LineTerminator
127 return new WhiteSpace(consume());
130 case '\020': // ASCII SUB
143 // May get Token instead of Comment.
144 InputElement getComment() throws java.io.IOException {
146 // line.charAt(line_pos+0) is '/'
147 switch (line.charAt(line_pos+1)) {
148 case '/': // EndOfLineComment
149 comment = line.substring(line_pos+2);
150 line_pos = line.length();
151 return new EndOfLineComment(comment);
152 case '*': // TraditionalComment or DocumentationComment
154 if (line.charAt(line_pos)=='*') { // DocumentationComment
155 return snarfComment(new DocumentationComment());
156 } else { // TraditionalComment
157 return snarfComment(new TraditionalComment());
159 default: // it's a token, not a comment.
164 Comment snarfComment(Comment c) throws java.io.IOException {
165 StringBuffer text=new StringBuffer();
166 while(true) { // Grab CommentTail
167 while (line.charAt(line_pos)!='*') { // Add NotStar to comment.
168 int star_pos = line.indexOf('*', line_pos);
170 text.append(line.substring(line_pos));
171 c.appendLine(text.toString()); text.setLength(0);
172 line_pos = line.length();
175 throw new Error("Unterminated comment at end of file.");
177 text.append(line.substring(line_pos, star_pos));
181 // At this point, line.charAt(line_pos)=='*'
182 // Grab CommentTailStar starting at line_pos+1.
183 if (line.charAt(line_pos+1)=='/') { // safe because line ends with '\n'
184 c.appendLine(text.toString()); line_pos+=2; return c;
186 text.append(line.charAt(line_pos++)); // add the '*'
191 // Tokens are: Identifiers, Keywords, Literals, Separators, Operators.
192 switch (line.charAt(line_pos)) {
193 // Separators: (period is a special case)
202 return new Separator(consume());
220 return getOperator();
222 return getCharLiteral();
224 return getStringLiteral();
226 // a period is a special case:
228 if (Character.digit(line.charAt(line_pos+1),10)!=-1)
229 return getNumericLiteral();
231 line.charAt(line_pos+1)=='.' &&
232 line.charAt(line_pos+2)=='.') {
233 consume(); consume(); consume();
234 return new Separator('\u2026'); // unicode ellipsis character.
235 } else return new Separator(consume());
239 if (Character.isJavaIdentifierStart(line.charAt(line_pos)))
240 return getIdentifier();
241 if (Character.isDigit(line.charAt(line_pos)))
242 return getNumericLiteral();
243 throw new Error("Illegal character on line "+line_num);
246 static final String[] keywords = new String[] {
247 "abstract", "assert", "atomic", "boolean", "break", "byte", "case", "catch", "char",
248 "class", "const", "continue", "default", "do", "double", "else", "enum",
249 "extends", "external", "final", "finally",
250 "flag", //keyword for failure aware computation
251 "float", "for", "global", "goto", "if",
252 "implements", "import", "instanceof", "int", "interface", "isavailable",
254 "native", "new", "optional", "package", "private", "protected", "public",
255 "return", "short", "static", "strictfp", "super", "switch", "synchronized",
256 "tag", "task", "taskexit", //keywords for failure aware computation
257 "this", "throw", "throws", "transient", "try", "void",
258 "volatile", "while"};
259 Token getIdentifier() {
261 StringBuffer sb = new StringBuffer().append(consume());
263 if (!Character.isJavaIdentifierStart(sb.charAt(0)))
264 throw new Error("Invalid Java Identifier on line "+line_num);
265 while (Character.isJavaIdentifierPart(line.charAt(line_pos)))
266 sb.append(consume());
267 String s = sb.toString();
268 // Now check against boolean literals and null literal.
269 if (s.equals("null")) return new NullLiteral();
270 if (s.equals("true")) return new BooleanLiteral(true);
271 if (s.equals("false")) return new BooleanLiteral(false);
272 // Check against keywords.
273 // pre-java 1.5 compatibility:
274 if (!isJava15 && s.equals("enum")) return new Identifier(s);
275 // pre-java 1.4 compatibility:
276 if (!isJava14 && s.equals("assert")) return new Identifier(s);
277 // pre-java 1.2 compatibility:
278 if (!isJava12 && s.equals("strictfp")) return new Identifier(s);
279 // use binary search.
280 for (int l=0, r=keywords.length; r > l; ) {
281 int x = (l+r)/2, cmp = s.compareTo(keywords[x]);
282 if (cmp < 0) r=x; else l=x+1;
283 if (cmp== 0) return new Keyword(s);
286 return new Identifier(s);
288 NumericLiteral getNumericLiteral() {
290 // leading decimal indicates float.
291 if (line.charAt(line_pos)=='.')
292 return getFloatingPointLiteral();
294 if (line.charAt(line_pos)=='0' &&
295 (line.charAt(line_pos+1)=='x' ||
296 line.charAt(line_pos+1)=='X')) {
297 line_pos+=2; return getIntegerLiteral(/*base*/16);
299 // otherwise scan to first non-numeric
300 for (i=line_pos; Character.digit(line.charAt(i),10)!=-1; )
302 switch(line.charAt(i)) { // discriminate based on first non-numeric
310 return getFloatingPointLiteral();
314 if (line.charAt(line_pos)=='0')
315 return getIntegerLiteral(/*base*/8);
316 return getIntegerLiteral(/*base*/10);
319 NumericLiteral getIntegerLiteral(int radix) {
321 while (Character.digit(line.charAt(line_pos),radix)!=-1)
322 val = (val*radix) + Character.digit(consume(),radix);
323 if (line.charAt(line_pos) == 'l' ||
324 line.charAt(line_pos) == 'L') {
326 return new LongLiteral(val);
328 // we compare MAX_VALUE against val/2 to allow constants like
329 // 0xFFFF0000 to get past the test. (unsigned long->signed int)
330 if ((val/2) > Integer.MAX_VALUE ||
331 val < Integer.MIN_VALUE)
332 throw new Error("Constant does not fit in integer on line "+line_num);
333 return new IntegerLiteral((int)val);
335 NumericLiteral getFloatingPointLiteral() {
336 String rep = getDigits();
337 if (line.charAt(line_pos)=='.')
338 rep+=consume() + getDigits();
339 if (line.charAt(line_pos)=='e' ||
340 line.charAt(line_pos)=='E') {
342 if (line.charAt(line_pos)=='+' ||
343 line.charAt(line_pos)=='-')
348 switch (line.charAt(line_pos)) {
352 return new FloatLiteral(Float.valueOf(rep).floatValue());
358 return new DoubleLiteral(Double.valueOf(rep).doubleValue());
360 } catch (NumberFormatException e) {
361 throw new Error("Illegal floating-point on line "+line_num+": "+e);
365 StringBuffer sb = new StringBuffer();
366 while (Character.digit(line.charAt(line_pos),10)!=-1)
367 sb.append(consume());
368 return sb.toString();
371 Operator getOperator() {
372 char first = consume();
373 char second= line.charAt(line_pos);
376 // single-character operators.
380 return new Operator(new String(new char[] {first}));
387 return new Operator(new String(new char[] {first, consume()}));
391 // Check for trailing '='
393 return new Operator(new String(new char[] {first, consume()}));
395 // Special-case '<<', '>>' and '>>>'
396 if ((first=='<' && second=='<') || // <<
397 (first=='>' && second=='>')) { // >>
398 String op = new String(new char[] {first, consume()});
399 if (first=='>' && line.charAt(line_pos)=='>') // >>>
401 if (line.charAt(line_pos)=='=') // <<=, >>=, >>>=
403 return new Operator(op);
406 // Otherwise return single operator.
407 return new Operator(new String(new char[] {first}));
410 CharacterLiteral getCharLiteral() {
411 char firstquote = consume();
413 switch (line.charAt(line_pos)) {
415 val = getEscapeSequence();
418 throw new Error("Invalid character literal on line "+line_num);
420 throw new Error("Invalid character literal on line "+line_num);
425 char secondquote = consume();
426 if (firstquote != '\'' || secondquote != '\'')
427 throw new Error("Invalid character literal on line "+line_num);
428 return new CharacterLiteral(val);
430 StringLiteral getStringLiteral() {
431 char openquote = consume();
432 StringBuffer val = new StringBuffer();
433 while (line.charAt(line_pos)!='\"') {
434 switch(line.charAt(line_pos)) {
436 val.append(getEscapeSequence());
439 throw new Error("Invalid string literal on line " + line_num);
441 val.append(consume());
445 char closequote = consume();
446 if (openquote != '\"' || closequote != '\"')
447 throw new Error("Invalid string literal on line " + line_num);
449 return new StringLiteral(val.toString().intern());
452 char getEscapeSequence() {
453 if (consume() != '\\')
454 throw new Error("Invalid escape sequence on line " + line_num);
455 switch(line.charAt(line_pos)) {
457 consume(); return '\b';
459 consume(); return '\t';
461 consume(); return '\n';
463 consume(); return '\f';
465 consume(); return '\r';
467 consume(); return '\"';
469 consume(); return '\'';
471 consume(); return '\\';
476 return (char) getOctal(3);
481 return (char) getOctal(2);
483 throw new Error("Invalid escape sequence on line " + line_num);
486 int getOctal(int maxlength) {
488 for (i=0; i<maxlength; i++)
489 if (Character.digit(line.charAt(line_pos), 8)!=-1) {
490 val = (8*val) + Character.digit(consume(), 8);
492 if ((i==0) || (val>0xFF)) // impossible.
493 throw new Error("Invalid octal escape sequence in line " + line_num);
497 char consume() { return line.charAt(line_pos++); }
498 void nextLine() throws java.io.IOException {
499 line=reader.readLine();
500 if (line!=null) line=line+'\n';
501 lineL = new LineList(lineL.head+line_pos, lineL); // for error reporting
506 // Deal with error messages.
507 public void errorMsg(String msg, java_cup.runtime.Symbol info) {
508 int n=line_num, c=info.left-lineL.head;
509 for (LineList p = lineL; p!=null; p=p.tail, n--)
510 if (p.head<=info.left) { c=info.left-p.head; break; }
511 System.err.println(msg+" at line "+n);
514 private int num_errors = 0;
515 public int numErrors() { return num_errors; }
520 LineList(int head, LineList tail) { this.head = head; this.tail = tail; }