4 import java.io.LineNumberReader;
8 * Copyright (C) 2002 C. Scott Ananian <cananian@alumni.princeton.edu>
9 * This program is released under the terms of the GPL; see the file
10 * COPYING for more details. There is NO WARRANTY on this code.
14 LineNumberReader reader;
21 LineList lineL = new LineList(-line_pos, null); // sentinel for line #0
23 public Lexer(Reader reader) {
24 this.reader = new LineNumberReader(new EscapedUnicodeReader(reader));
29 public java_cup.runtime.Symbol nextToken() throws java.io.IOException {
30 java_cup.runtime.Symbol sym =
31 lookahead==null ? _nextToken() : lookahead.get();
35 private boolean shouldBePLT() throws java.io.IOException {
36 // look ahead to see if this LT should be changed to a PLT
37 if (last==null || last.sym!=Sym.IDENTIFIER)
39 if (lookahead==null) lookahead = new FIFO(new FIFO.Getter() {
40 java_cup.runtime.Symbol next() throws java.io.IOException
41 { return _nextToken(); }
44 // skip past IDENTIFIER (DOT IDENTIFIER)*
45 if (lookahead.peek(i++).sym != Sym.IDENTIFIER)
47 while (lookahead.peek(i).sym == Sym.DOT) {
49 if (lookahead.peek(i++).sym != Sym.IDENTIFIER)
52 // skip past (LBRACK RBRACK)*
53 while (lookahead.peek(i).sym == Sym.LBRACK) {
55 if (lookahead.peek(i++).sym != Sym.RBRACK)
58 // now the next sym has to be one of LT GT COMMA EXTENDS IMPLEMENTS
59 switch(lookahead.peek(i).sym) {
70 private java_cup.runtime.Symbol last = null;
71 private FIFO lookahead = null;
72 public java_cup.runtime.Symbol _nextToken() throws java.io.IOException {
74 * Identifiers/Keywords/true/false/null (start with java letter)
75 * numeric literal (start with number)
76 * character literal (start with single quote)
77 * string (start with double quote)
78 * separator (parens, braces, brackets, semicolon, comma, period)
79 * operator (equals, plus, minus, etc)
81 * comment (start with slash)
86 startpos = lineL.head + line_pos;
87 ie = getInputElement();
88 if (ie instanceof DocumentationComment)
89 comment = ((Comment)ie).getComment();
90 } while (!(ie instanceof Token));
91 endpos = lineL.head + line_pos - 1;
93 //System.out.println(ie.toString()); // uncomment to debug lexer.
94 java_cup.runtime.Symbol sym = ((Token)ie).token();
95 // fix up left/right positions.
96 sym.left = startpos; sym.right = endpos;
100 public boolean debug_lex() throws java.io.IOException {
101 InputElement ie = getInputElement();
102 System.out.println(ie);
103 return !(ie instanceof EOF);
107 public String lastComment() { return comment; }
108 public void clearComment() { comment=""; }
110 InputElement getInputElement() throws java.io.IOException {
115 if (line.length()<=line_pos) { // end of line.
121 switch (line.charAt(line_pos)) {
124 case ' ': // ASCII SP
125 case '\t': // ASCII HT
126 case '\f': // ASCII FF
127 case '\n': // LineTerminator
128 return new WhiteSpace(consume());
131 case '\020': // ASCII SUB
144 // May get Token instead of Comment.
145 InputElement getComment() throws java.io.IOException {
147 // line.charAt(line_pos+0) is '/'
148 switch (line.charAt(line_pos+1)) {
149 case '/': // EndOfLineComment
150 comment = line.substring(line_pos+2);
151 line_pos = line.length();
152 return new EndOfLineComment(comment);
153 case '*': // TraditionalComment or DocumentationComment
155 if (line.charAt(line_pos)=='*') { // DocumentationComment
156 return snarfComment(new DocumentationComment());
157 } else { // TraditionalComment
158 return snarfComment(new TraditionalComment());
160 default: // it's a token, not a comment.
165 Comment snarfComment(Comment c) throws java.io.IOException {
166 StringBuffer text=new StringBuffer();
167 while(true) { // Grab CommentTail
168 while (line.charAt(line_pos)!='*') { // Add NotStar to comment.
169 int star_pos = line.indexOf('*', line_pos);
171 text.append(line.substring(line_pos));
172 c.appendLine(text.toString()); text.setLength(0);
173 line_pos = line.length();
176 throw new Error("Unterminated comment at end of file.");
178 text.append(line.substring(line_pos, star_pos));
182 // At this point, line.charAt(line_pos)=='*'
183 // Grab CommentTailStar starting at line_pos+1.
184 if (line.charAt(line_pos+1)=='/') { // safe because line ends with '\n'
185 c.appendLine(text.toString()); line_pos+=2; return c;
187 text.append(line.charAt(line_pos++)); // add the '*'
192 // Tokens are: Identifiers, Keywords, Literals, Separators, Operators.
193 switch (line.charAt(line_pos)) {
194 // Separators: (period is a special case)
203 return new Separator(consume());
221 return getOperator();
223 return getCharLiteral();
225 return getStringLiteral();
227 // a period is a special case:
229 if (Character.digit(line.charAt(line_pos+1),10)!=-1)
230 return getNumericLiteral();
232 line.charAt(line_pos+1)=='.' &&
233 line.charAt(line_pos+2)=='.') {
234 consume(); consume(); consume();
235 return new Separator('\u2026'); // unicode ellipsis character.
236 } else return new Separator(consume());
240 if (Character.isJavaIdentifierStart(line.charAt(line_pos)))
241 return getIdentifier();
242 if (Character.isDigit(line.charAt(line_pos)))
243 return getNumericLiteral();
244 throw new Error("Illegal character on line "+line_num);
247 static final String[] keywords = new String[] {
248 "abstract", "assert", "boolean", "break", "byte", "case", "catch", "char",
249 "class", "const", "continue", "default", "do", "double", "else", "enum",
250 "extends", "final", "finally", "float", "for", "goto", "if",
251 "implements", "import", "instanceof", "int", "interface", "long",
252 "native", "new", "package", "private", "protected", "public",
253 "return", "short", "static", "strictfp", "super", "switch",
254 "synchronized", "this", "throw", "throws", "transient", "try", "void",
255 "volatile", "while" };
256 Token getIdentifier() {
258 StringBuffer sb = new StringBuffer().append(consume());
260 if (!Character.isJavaIdentifierStart(sb.charAt(0)))
261 throw new Error("Invalid Java Identifier on line "+line_num);
262 while (Character.isJavaIdentifierPart(line.charAt(line_pos)))
263 sb.append(consume());
264 String s = sb.toString();
265 // Now check against boolean literals and null literal.
266 if (s.equals("null")) return new NullLiteral();
267 if (s.equals("true")) return new BooleanLiteral(true);
268 if (s.equals("false")) return new BooleanLiteral(false);
269 // Check against keywords.
270 // pre-java 1.5 compatibility:
271 if (!isJava15 && s.equals("enum")) return new Identifier(s);
272 // pre-java 1.4 compatibility:
273 if (!isJava14 && s.equals("assert")) return new Identifier(s);
274 // pre-java 1.2 compatibility:
275 if (!isJava12 && s.equals("strictfp")) return new Identifier(s);
276 // use binary search.
277 for (int l=0, r=keywords.length; r > l; ) {
278 int x = (l+r)/2, cmp = s.compareTo(keywords[x]);
279 if (cmp < 0) r=x; else l=x+1;
280 if (cmp== 0) return new Keyword(s);
283 return new Identifier(s);
285 NumericLiteral getNumericLiteral() {
287 // leading decimal indicates float.
288 if (line.charAt(line_pos)=='.')
289 return getFloatingPointLiteral();
291 if (line.charAt(line_pos)=='0' &&
292 (line.charAt(line_pos+1)=='x' ||
293 line.charAt(line_pos+1)=='X')) {
294 line_pos+=2; return getIntegerLiteral(/*base*/16);
296 // otherwise scan to first non-numeric
297 for (i=line_pos; Character.digit(line.charAt(i),10)!=-1; )
299 switch(line.charAt(i)) { // discriminate based on first non-numeric
307 return getFloatingPointLiteral();
311 if (line.charAt(line_pos)=='0')
312 return getIntegerLiteral(/*base*/8);
313 return getIntegerLiteral(/*base*/10);
316 NumericLiteral getIntegerLiteral(int radix) {
318 while (Character.digit(line.charAt(line_pos),radix)!=-1)
319 val = (val*radix) + Character.digit(consume(),radix);
320 if (line.charAt(line_pos) == 'l' ||
321 line.charAt(line_pos) == 'L') {
323 return new LongLiteral(val);
325 // we compare MAX_VALUE against val/2 to allow constants like
326 // 0xFFFF0000 to get past the test. (unsigned long->signed int)
327 if ((val/2) > Integer.MAX_VALUE ||
328 val < Integer.MIN_VALUE)
329 throw new Error("Constant does not fit in integer on line "+line_num);
330 return new IntegerLiteral((int)val);
332 NumericLiteral getFloatingPointLiteral() {
333 String rep = getDigits();
334 if (line.charAt(line_pos)=='.')
335 rep+=consume() + getDigits();
336 if (line.charAt(line_pos)=='e' ||
337 line.charAt(line_pos)=='E') {
339 if (line.charAt(line_pos)=='+' ||
340 line.charAt(line_pos)=='-')
345 switch (line.charAt(line_pos)) {
349 return new FloatLiteral(Float.valueOf(rep).floatValue());
355 return new DoubleLiteral(Double.valueOf(rep).doubleValue());
357 } catch (NumberFormatException e) {
358 throw new Error("Illegal floating-point on line "+line_num+": "+e);
362 StringBuffer sb = new StringBuffer();
363 while (Character.digit(line.charAt(line_pos),10)!=-1)
364 sb.append(consume());
365 return sb.toString();
368 Operator getOperator() {
369 char first = consume();
370 char second= line.charAt(line_pos);
373 // single-character operators.
377 return new Operator(new String(new char[] {first}));
384 return new Operator(new String(new char[] {first, consume()}));
388 // Check for trailing '='
390 return new Operator(new String(new char[] {first, consume()}));
392 // Special-case '<<', '>>' and '>>>'
393 if ((first=='<' && second=='<') || // <<
394 (first=='>' && second=='>')) { // >>
395 String op = new String(new char[] {first, consume()});
396 if (first=='>' && line.charAt(line_pos)=='>') // >>>
398 if (line.charAt(line_pos)=='=') // <<=, >>=, >>>=
400 return new Operator(op);
403 // Otherwise return single operator.
404 return new Operator(new String(new char[] {first}));
407 CharacterLiteral getCharLiteral() {
408 char firstquote = consume();
410 switch (line.charAt(line_pos)) {
412 val = getEscapeSequence();
415 throw new Error("Invalid character literal on line "+line_num);
417 throw new Error("Invalid character literal on line "+line_num);
422 char secondquote = consume();
423 if (firstquote != '\'' || secondquote != '\'')
424 throw new Error("Invalid character literal on line "+line_num);
425 return new CharacterLiteral(val);
427 StringLiteral getStringLiteral() {
428 char openquote = consume();
429 StringBuffer val = new StringBuffer();
430 while (line.charAt(line_pos)!='\"') {
431 switch(line.charAt(line_pos)) {
433 val.append(getEscapeSequence());
436 throw new Error("Invalid string literal on line " + line_num);
438 val.append(consume());
442 char closequote = consume();
443 if (openquote != '\"' || closequote != '\"')
444 throw new Error("Invalid string literal on line " + line_num);
446 return new StringLiteral(val.toString().intern());
449 char getEscapeSequence() {
450 if (consume() != '\\')
451 throw new Error("Invalid escape sequence on line " + line_num);
452 switch(line.charAt(line_pos)) {
454 consume(); return '\b';
456 consume(); return '\t';
458 consume(); return '\n';
460 consume(); return '\f';
462 consume(); return '\r';
464 consume(); return '\"';
466 consume(); return '\'';
468 consume(); return '\\';
473 return (char) getOctal(3);
478 return (char) getOctal(2);
480 throw new Error("Invalid escape sequence on line " + line_num);
483 int getOctal(int maxlength) {
485 for (i=0; i<maxlength; i++)
486 if (Character.digit(line.charAt(line_pos), 8)!=-1) {
487 val = (8*val) + Character.digit(consume(), 8);
489 if ((i==0) || (val>0xFF)) // impossible.
490 throw new Error("Invalid octal escape sequence in line " + line_num);
494 char consume() { return line.charAt(line_pos++); }
495 void nextLine() throws java.io.IOException {
496 line=reader.readLine();
497 if (line!=null) line=line+'\n';
498 lineL = new LineList(lineL.head+line_pos, lineL); // for error reporting
503 // Deal with error messages.
504 public void errorMsg(String msg, java_cup.runtime.Symbol info) {
505 int n=line_num, c=info.left-lineL.head;
506 for (LineList p = lineL; p!=null; p=p.tail, n--)
507 if (p.head<=info.left) { c=info.left-p.head; break; }
508 System.err.println(msg+" at line "+n);
511 private int num_errors = 0;
512 public int numErrors() { return num_errors; }
517 LineList(int head, LineList tail) { this.head = head; this.tail = tail; }