4 import java.io.LineNumberReader;
8 * Copyright (C) 2002 C. Scott Ananian <cananian@alumni.princeton.edu>
9 * This program is released under the terms of the GPL; see the file
10 * COPYING for more details. There is NO WARRANTY on this code.
14 LineNumberReader reader;
21 LineList lineL = new LineList(-line_pos, null); // sentinel for line #0
23 public Lexer(Reader reader) {
24 this.reader = new LineNumberReader(new EscapedUnicodeReader(reader));
29 public java_cup.runtime.Symbol nextToken() throws java.io.IOException {
30 java_cup.runtime.Symbol sym =
31 lookahead==null ? _nextToken() : lookahead.get();
35 private boolean shouldBePLT() throws java.io.IOException {
36 // look ahead to see if this LT should be changed to a PLT
37 if (last==null || last.sym!=Sym.IDENTIFIER)
39 if (lookahead==null) lookahead = new FIFO(new FIFO.Getter() {
40 java_cup.runtime.Symbol next() throws java.io.IOException
41 { return _nextToken(); }
44 // skip past IDENTIFIER (DOT IDENTIFIER)*
45 if (lookahead.peek(i++).sym != Sym.IDENTIFIER)
47 while (lookahead.peek(i).sym == Sym.DOT) {
49 if (lookahead.peek(i++).sym != Sym.IDENTIFIER)
52 // skip past (LBRACK RBRACK)*
53 while (lookahead.peek(i).sym == Sym.LBRACK) {
55 if (lookahead.peek(i++).sym != Sym.RBRACK)
58 // now the next sym has to be one of LT GT COMMA EXTENDS IMPLEMENTS
59 switch(lookahead.peek(i).sym) {
70 private java_cup.runtime.Symbol last = null;
71 private FIFO lookahead = null;
72 public java_cup.runtime.Symbol _nextToken() throws java.io.IOException {
74 * Identifiers/Keywords/true/false/null (start with java letter)
75 * numeric literal (start with number)
76 * character literal (start with single quote)
77 * string (start with double quote)
78 * separator (parens, braces, brackets, semicolon, comma, period)
79 * operator (equals, plus, minus, etc)
81 * comment (start with slash)
86 startpos = lineL.head + line_pos;
87 ie = getInputElement();
88 if (ie instanceof DocumentationComment)
89 comment = ((Comment)ie).getComment();
90 } while (!(ie instanceof Token));
91 endpos = lineL.head + line_pos - 1;
93 // System.out.println(ie.toString()); // uncomment to debug lexer.
94 java_cup.runtime.Symbol sym = ((Token)ie).token();
95 // fix up left/right positions.
96 sym.left = startpos; sym.right = endpos;
100 public boolean debug_lex() throws java.io.IOException {
101 InputElement ie = getInputElement();
102 System.out.println(ie);
103 return !(ie instanceof EOF);
107 public String lastComment() {
110 public void clearComment() {
114 InputElement getInputElement() throws java.io.IOException {
119 if (line.length()<=line_pos) { // end of line.
125 switch (line.charAt(line_pos)) {
128 case ' ': // ASCII SP
129 case '\t': // ASCII HT
130 case '\f': // ASCII FF
131 case '\n': // LineTerminator
132 return new WhiteSpace(consume());
135 case '\020': // ASCII SUB
148 // May get Token instead of Comment.
149 InputElement getComment() throws java.io.IOException {
151 // line.charAt(line_pos+0) is '/'
152 switch (line.charAt(line_pos+1)) {
153 case '/': // EndOfLineComment
154 comment = line.substring(line_pos+2);
155 line_pos = line.length();
156 return new EndOfLineComment(comment);
158 case '*': // TraditionalComment or DocumentationComment
160 if (line.charAt(line_pos)=='*') { // DocumentationComment
161 return snarfComment(new DocumentationComment());
162 } else { // TraditionalComment
163 return snarfComment(new TraditionalComment());
166 default: // it's a token, not a comment.
171 Comment snarfComment(Comment c) throws java.io.IOException {
172 StringBuffer text=new StringBuffer();
173 while(true) { // Grab CommentTail
174 while (line.charAt(line_pos)!='*') { // Add NotStar to comment.
175 int star_pos = line.indexOf('*', line_pos);
177 text.append(line.substring(line_pos));
178 c.appendLine(text.toString()); text.setLength(0);
179 line_pos = line.length();
182 throw new Error("Unterminated comment at end of file.");
184 text.append(line.substring(line_pos, star_pos));
188 // At this point, line.charAt(line_pos)=='*'
189 // Grab CommentTailStar starting at line_pos+1.
190 if (line.charAt(line_pos+1)=='/') { // safe because line ends with '\n'
191 c.appendLine(text.toString()); line_pos+=2; return c;
193 text.append(line.charAt(line_pos++)); // add the '*'
198 // Tokens are: Identifiers, Keywords, Literals, Separators, Operators.
199 switch (line.charAt(line_pos)) {
200 // Separators: (period is a special case)
209 return new Separator(consume());
227 return getOperator();
230 return getCharLiteral();
233 return getStringLiteral();
235 // a period is a special case:
237 if (Character.digit(line.charAt(line_pos+1),10)!=-1)
238 return getNumericLiteral();
240 line.charAt(line_pos+1)=='.' &&
241 line.charAt(line_pos+2)=='.') {
242 consume(); consume(); consume();
243 return new Separator('\u2026'); // unicode ellipsis character.
244 } else return new Separator(consume());
249 if (Character.isJavaIdentifierStart(line.charAt(line_pos)))
250 return getIdentifier();
251 if (Character.isDigit(line.charAt(line_pos)))
252 return getNumericLiteral();
253 throw new Error("Illegal character on line "+line_num);
256 static final String[] keywords = new String[] {
257 "abstract", "assert", "atomic", "boolean", "break", "byte", "case", "catch", "char",
258 "class", "const", "continue", "default", "do", "double", "else", "enum",
259 "extends", "external", "final", "finally",
260 "flag", //keyword for failure aware computation
261 "float", "for","getoffset", "global", "goto", "if",
262 "implements", "import", "instanceof", "int", "interface", "isavailable",
264 "native", "new", "optional", "package", "private", "protected", "public",
265 "return", "sese", "short", "static", "strictfp", "super", "switch", "synchronized",
266 "tag", "task", "taskexit", //keywords for failure aware computation
267 "this", "throw", "throws", "transient", "try", "void",
270 Token getIdentifier() {
272 StringBuffer sb = new StringBuffer().append(consume());
274 if (!Character.isJavaIdentifierStart(sb.charAt(0)))
275 throw new Error("Invalid Java Identifier on line "+line_num);
276 while (Character.isJavaIdentifierPart(line.charAt(line_pos)))
277 sb.append(consume());
278 String s = sb.toString();
279 // Now check against boolean literals and null literal.
280 if (s.equals("null")) return new NullLiteral();
281 if (s.equals("true")) return new BooleanLiteral(true);
282 if (s.equals("false")) return new BooleanLiteral(false);
283 // Check against keywords.
284 // pre-java 1.5 compatibility:
285 if (!isJava15 && s.equals("enum")) return new Identifier(s);
286 // pre-java 1.4 compatibility:
287 if (!isJava14 && s.equals("assert")) return new Identifier(s);
288 // pre-java 1.2 compatibility:
289 if (!isJava12 && s.equals("strictfp")) return new Identifier(s);
290 // use binary search.
291 for (int l=0, r=keywords.length; r > l; ) {
292 int x = (l+r)/2, cmp = s.compareTo(keywords[x]);
293 if (cmp < 0) r=x;else l=x+1;
294 if (cmp== 0) return new Keyword(s);
297 return new Identifier(s);
299 NumericLiteral getNumericLiteral() {
301 // leading decimal indicates float.
302 if (line.charAt(line_pos)=='.')
303 return getFloatingPointLiteral();
305 if (line.charAt(line_pos)=='0' &&
306 (line.charAt(line_pos+1)=='x' ||
307 line.charAt(line_pos+1)=='X')) {
308 line_pos+=2; return getIntegerLiteral(/*base*/ 16);
310 // otherwise scan to first non-numeric
311 for (i=line_pos; Character.digit(line.charAt(i),10)!=-1; )
313 switch(line.charAt(i)) { // discriminate based on first non-numeric
321 return getFloatingPointLiteral();
326 if (line.charAt(line_pos)=='0')
327 return getIntegerLiteral(/*base*/ 8);
328 return getIntegerLiteral(/*base*/ 10);
331 NumericLiteral getIntegerLiteral(int radix) {
333 while (Character.digit(line.charAt(line_pos),radix)!=-1)
334 val = (val*radix) + Character.digit(consume(),radix);
335 if (line.charAt(line_pos) == 'l' ||
336 line.charAt(line_pos) == 'L') {
338 return new LongLiteral(val);
340 // we compare MAX_VALUE against val/2 to allow constants like
341 // 0xFFFF0000 to get past the test. (unsigned long->signed int)
342 if ((val/2) > Integer.MAX_VALUE ||
343 val < Integer.MIN_VALUE)
344 throw new Error("Constant does not fit in integer on line "+line_num);
345 return new IntegerLiteral((int)val);
347 NumericLiteral getFloatingPointLiteral() {
348 String rep = getDigits();
349 if (line.charAt(line_pos)=='.')
350 rep+=consume() + getDigits();
351 if (line.charAt(line_pos)=='e' ||
352 line.charAt(line_pos)=='E') {
354 if (line.charAt(line_pos)=='+' ||
355 line.charAt(line_pos)=='-')
360 switch (line.charAt(line_pos)) {
364 return new FloatLiteral(Float.valueOf(rep).floatValue());
372 return new DoubleLiteral(Double.valueOf(rep).doubleValue());
374 } catch (NumberFormatException e) {
375 throw new Error("Illegal floating-point on line "+line_num+": "+e);
379 StringBuffer sb = new StringBuffer();
380 while (Character.digit(line.charAt(line_pos),10)!=-1)
381 sb.append(consume());
382 return sb.toString();
385 Operator getOperator() {
386 char first = consume();
387 char second= line.charAt(line_pos);
390 // single-character operators.
394 return new Operator(new String(new char[] {first}));
402 return new Operator(new String(new char[] {first, consume()}));
407 // Check for trailing '='
409 return new Operator(new String(new char[] {first, consume()}));
411 // Special-case '<<', '>>' and '>>>'
412 if ((first=='<' && second=='<') || // <<
413 (first=='>' && second=='>')) { // >>
414 String op = new String(new char[] {first, consume()});
415 if (first=='>' && line.charAt(line_pos)=='>') // >>>
417 if (line.charAt(line_pos)=='=') // <<=, >>=, >>>=
419 return new Operator(op);
422 // Otherwise return single operator.
423 return new Operator(new String(new char[] {first}));
426 CharacterLiteral getCharLiteral() {
427 char firstquote = consume();
429 switch (line.charAt(line_pos)) {
431 val = getEscapeSequence();
435 throw new Error("Invalid character literal on line "+line_num);
438 throw new Error("Invalid character literal on line "+line_num);
444 char secondquote = consume();
445 if (firstquote != '\'' || secondquote != '\'')
446 throw new Error("Invalid character literal on line "+line_num);
447 return new CharacterLiteral(val);
449 StringLiteral getStringLiteral() {
450 char openquote = consume();
451 StringBuffer val = new StringBuffer();
452 while (line.charAt(line_pos)!='\"') {
453 switch(line.charAt(line_pos)) {
455 val.append(getEscapeSequence());
459 throw new Error("Invalid string literal on line " + line_num);
462 val.append(consume());
466 char closequote = consume();
467 if (openquote != '\"' || closequote != '\"')
468 throw new Error("Invalid string literal on line " + line_num);
470 return new StringLiteral(val.toString().intern());
473 char getEscapeSequence() {
474 if (consume() != '\\')
475 throw new Error("Invalid escape sequence on line " + line_num);
476 switch(line.charAt(line_pos)) {
478 consume(); return '\b';
481 consume(); return '\t';
484 consume(); return '\n';
487 consume(); return '\f';
490 consume(); return '\r';
493 consume(); return '\"';
496 consume(); return '\'';
499 consume(); return '\\';
505 return (char) getOctal(3);
511 return (char) getOctal(2);
514 throw new Error("Invalid escape sequence on line " + line_num);
517 int getOctal(int maxlength) {
519 for (i=0; i<maxlength; i++)
520 if (Character.digit(line.charAt(line_pos), 8)!=-1) {
521 val = (8*val) + Character.digit(consume(), 8);
523 if ((i==0) || (val>0xFF)) // impossible.
524 throw new Error("Invalid octal escape sequence in line " + line_num);
529 return line.charAt(line_pos++);
531 void nextLine() throws java.io.IOException {
532 line=reader.readLine();
533 if (line!=null) line=line+'\n';
534 lineL = new LineList(lineL.head+line_pos, lineL); // for error reporting
539 // Deal with error messages.
540 public void errorMsg(String msg, java_cup.runtime.Symbol info) {
541 int n=line_num, c=info.left-lineL.head;
542 for (LineList p = lineL; p!=null; p=p.tail, n--)
543 if (p.head<=info.left) {
544 c=info.left-p.head; break;
546 System.err.println(msg+" at line "+n);
549 private int num_errors = 0;
550 public int numErrors() {
557 LineList(int head, LineList tail) {
558 this.head = head; this.tail = tail;