4 import java.io.LineNumberReader;
7 * Copyright (C) 2002 C. Scott Ananian <cananian@alumni.princeton.edu>
8 * This program is released under the terms of the GPL; see the file
9 * COPYING for more details. There is NO WARRANTY on this code.
12 public class Lexer implements Parse.Lexer {
13 LineNumberReader reader;
20 LineList lineL = new LineList(-line_pos, null); // sentinel for line #0
22 public Lexer(Reader reader) {
23 this(reader, 2); // by default, use a Java 1.2-compatible lexer.
25 public Lexer(Reader reader, int java_minor_version) {
26 this.reader = new LineNumberReader(new EscapedUnicodeReader(reader));
27 this.isJava12 = java_minor_version >= 2;
28 this.isJava14 = java_minor_version >= 4;
29 this.isJava15 = java_minor_version >= 5;
32 public java_cup.runtime.Symbol nextToken() throws java.io.IOException {
33 java_cup.runtime.Symbol sym =
34 lookahead==null ? _nextToken() : lookahead.get();
35 /* Old "smart lexer" hack to parse JSR-14 syntax. New, better, grammar
36 * makes this unnecessary. (Credit to Eric Blake for its discovery.)
38 if (isJava15 && sym.sym==Sym.LT && shouldBePLT())
44 private boolean shouldBePLT() throws java.io.IOException {
45 // look ahead to see if this LT should be changed to a PLT
46 if (last==null || last.sym!=Sym.IDENTIFIER)
48 if (lookahead==null) lookahead = new FIFO(new FIFO.Getter() {
49 java_cup.runtime.Symbol next() throws java.io.IOException
50 { return _nextToken(); }
53 // skip past IDENTIFIER (DOT IDENTIFIER)*
54 if (lookahead.peek(i++).sym != Sym.IDENTIFIER)
56 while (lookahead.peek(i).sym == Sym.DOT) {
58 if (lookahead.peek(i++).sym != Sym.IDENTIFIER)
61 // skip past (LBRACK RBRACK)*
62 while (lookahead.peek(i).sym == Sym.LBRACK) {
64 if (lookahead.peek(i++).sym != Sym.RBRACK)
67 // now the next sym has to be one of LT GT COMMA EXTENDS IMPLEMENTS
68 switch(lookahead.peek(i).sym) {
79 private java_cup.runtime.Symbol last = null;
80 private FIFO lookahead = null;
81 public java_cup.runtime.Symbol _nextToken() throws java.io.IOException {
83 * Identifiers/Keywords/true/false/null (start with java letter)
84 * numeric literal (start with number)
85 * character literal (start with single quote)
86 * string (start with double quote)
87 * separator (parens, braces, brackets, semicolon, comma, period)
88 * operator (equals, plus, minus, etc)
90 * comment (start with slash)
95 startpos = lineL.head + line_pos;
96 ie = getInputElement();
97 if (ie instanceof DocumentationComment)
98 comment = ((Comment)ie).getComment();
99 } while (!(ie instanceof Token));
100 endpos = lineL.head + line_pos - 1;
102 //System.out.println(ie.toString()); // uncomment to debug lexer.
103 java_cup.runtime.Symbol sym = ((Token)ie).token();
104 // fix up left/right positions.
105 sym.left = startpos; sym.right = endpos;
109 public boolean debug_lex() throws java.io.IOException {
110 InputElement ie = getInputElement();
111 System.out.println(ie);
112 return !(ie instanceof EOF);
116 public String lastComment() { return comment; }
117 public void clearComment() { comment=""; }
119 InputElement getInputElement() throws java.io.IOException {
124 if (line.length()<=line_pos) { // end of line.
130 switch (line.charAt(line_pos)) {
133 case ' ': // ASCII SP
134 case '\t': // ASCII HT
135 case '\f': // ASCII FF
136 case '\n': // LineTerminator
137 return new WhiteSpace(consume());
140 case '\020': // ASCII SUB
153 // May get Token instead of Comment.
154 InputElement getComment() throws java.io.IOException {
156 // line.charAt(line_pos+0) is '/'
157 switch (line.charAt(line_pos+1)) {
158 case '/': // EndOfLineComment
159 comment = line.substring(line_pos+2);
160 line_pos = line.length();
161 return new EndOfLineComment(comment);
162 case '*': // TraditionalComment or DocumentationComment
164 if (line.charAt(line_pos)=='*') { // DocumentationComment
165 return snarfComment(new DocumentationComment());
166 } else { // TraditionalComment
167 return snarfComment(new TraditionalComment());
169 default: // it's a token, not a comment.
174 Comment snarfComment(Comment c) throws java.io.IOException {
175 StringBuffer text=new StringBuffer();
176 while(true) { // Grab CommentTail
177 while (line.charAt(line_pos)!='*') { // Add NotStar to comment.
178 int star_pos = line.indexOf('*', line_pos);
180 text.append(line.substring(line_pos));
181 c.appendLine(text.toString()); text.setLength(0);
182 line_pos = line.length();
185 throw new Error("Unterminated comment at end of file.");
187 text.append(line.substring(line_pos, star_pos));
191 // At this point, line.charAt(line_pos)=='*'
192 // Grab CommentTailStar starting at line_pos+1.
193 if (line.charAt(line_pos+1)=='/') { // safe because line ends with '\n'
194 c.appendLine(text.toString()); line_pos+=2; return c;
196 text.append(line.charAt(line_pos++)); // add the '*'
201 // Tokens are: Identifiers, Keywords, Literals, Separators, Operators.
202 switch (line.charAt(line_pos)) {
203 // Separators: (period is a special case)
212 return new Separator(consume());
230 return getOperator();
232 return getCharLiteral();
234 return getStringLiteral();
236 // a period is a special case:
238 if (Character.digit(line.charAt(line_pos+1),10)!=-1)
239 return getNumericLiteral();
241 line.charAt(line_pos+1)=='.' &&
242 line.charAt(line_pos+2)=='.') {
243 consume(); consume(); consume();
244 return new Separator('\u2026'); // unicode ellipsis character.
245 } else return new Separator(consume());
249 if (Character.isJavaIdentifierStart(line.charAt(line_pos)))
250 return getIdentifier();
251 if (Character.isDigit(line.charAt(line_pos)))
252 return getNumericLiteral();
253 throw new Error("Illegal character on line "+line_num);
256 static final String[] keywords = new String[] {
257 "abstract", "assert", "boolean", "break", "byte", "case", "catch", "char",
258 "class", "const", "continue", "default", "do", "double", "else", "enum",
259 "extends", "final", "finally", "float", "for", "goto", "if",
260 "implements", "import", "instanceof", "int", "interface", "long",
261 "native", "new", "package", "private", "protected", "public",
262 "return", "short", "static", "strictfp", "super", "switch",
263 "synchronized", "this", "throw", "throws", "transient", "try", "void",
264 "volatile", "while" };
265 Token getIdentifier() {
267 StringBuffer sb = new StringBuffer().append(consume());
269 if (!Character.isJavaIdentifierStart(sb.charAt(0)))
270 throw new Error("Invalid Java Identifier on line "+line_num);
271 while (Character.isJavaIdentifierPart(line.charAt(line_pos)))
272 sb.append(consume());
273 String s = sb.toString();
274 // Now check against boolean literals and null literal.
275 if (s.equals("null")) return new NullLiteral();
276 if (s.equals("true")) return new BooleanLiteral(true);
277 if (s.equals("false")) return new BooleanLiteral(false);
278 // Check against keywords.
279 // pre-java 1.5 compatibility:
280 if (!isJava15 && s.equals("enum")) return new Identifier(s);
281 // pre-java 1.4 compatibility:
282 if (!isJava14 && s.equals("assert")) return new Identifier(s);
283 // pre-java 1.2 compatibility:
284 if (!isJava12 && s.equals("strictfp")) return new Identifier(s);
285 // use binary search.
286 for (int l=0, r=keywords.length; r > l; ) {
287 int x = (l+r)/2, cmp = s.compareTo(keywords[x]);
288 if (cmp < 0) r=x; else l=x+1;
289 if (cmp== 0) return new Keyword(s);
292 return new Identifier(s);
294 NumericLiteral getNumericLiteral() {
296 // leading decimal indicates float.
297 if (line.charAt(line_pos)=='.')
298 return getFloatingPointLiteral();
300 if (line.charAt(line_pos)=='0' &&
301 (line.charAt(line_pos+1)=='x' ||
302 line.charAt(line_pos+1)=='X')) {
303 line_pos+=2; return getIntegerLiteral(/*base*/16);
305 // otherwise scan to first non-numeric
306 for (i=line_pos; Character.digit(line.charAt(i),10)!=-1; )
308 switch(line.charAt(i)) { // discriminate based on first non-numeric
316 return getFloatingPointLiteral();
320 if (line.charAt(line_pos)=='0')
321 return getIntegerLiteral(/*base*/8);
322 return getIntegerLiteral(/*base*/10);
325 NumericLiteral getIntegerLiteral(int radix) {
327 while (Character.digit(line.charAt(line_pos),radix)!=-1)
328 val = (val*radix) + Character.digit(consume(),radix);
329 if (line.charAt(line_pos) == 'l' ||
330 line.charAt(line_pos) == 'L') {
332 return new LongLiteral(val);
334 // we compare MAX_VALUE against val/2 to allow constants like
335 // 0xFFFF0000 to get past the test. (unsigned long->signed int)
336 if ((val/2) > Integer.MAX_VALUE ||
337 val < Integer.MIN_VALUE)
338 throw new Error("Constant does not fit in integer on line "+line_num);
339 return new IntegerLiteral((int)val);
341 NumericLiteral getFloatingPointLiteral() {
342 String rep = getDigits();
343 if (line.charAt(line_pos)=='.')
344 rep+=consume() + getDigits();
345 if (line.charAt(line_pos)=='e' ||
346 line.charAt(line_pos)=='E') {
348 if (line.charAt(line_pos)=='+' ||
349 line.charAt(line_pos)=='-')
354 switch (line.charAt(line_pos)) {
358 return new FloatLiteral(Float.valueOf(rep).floatValue());
364 return new DoubleLiteral(Double.valueOf(rep).doubleValue());
366 } catch (NumberFormatException e) {
367 throw new Error("Illegal floating-point on line "+line_num+": "+e);
371 StringBuffer sb = new StringBuffer();
372 while (Character.digit(line.charAt(line_pos),10)!=-1)
373 sb.append(consume());
374 return sb.toString();
377 Operator getOperator() {
378 char first = consume();
379 char second= line.charAt(line_pos);
382 // single-character operators.
386 return new Operator(new String(new char[] {first}));
393 return new Operator(new String(new char[] {first, consume()}));
397 // Check for trailing '='
399 return new Operator(new String(new char[] {first, consume()}));
401 // Special-case '<<', '>>' and '>>>'
402 if ((first=='<' && second=='<') || // <<
403 (first=='>' && second=='>')) { // >>
404 String op = new String(new char[] {first, consume()});
405 if (first=='>' && line.charAt(line_pos)=='>') // >>>
407 if (line.charAt(line_pos)=='=') // <<=, >>=, >>>=
409 return new Operator(op);
412 // Otherwise return single operator.
413 return new Operator(new String(new char[] {first}));
416 CharacterLiteral getCharLiteral() {
417 char firstquote = consume();
419 switch (line.charAt(line_pos)) {
421 val = getEscapeSequence();
424 throw new Error("Invalid character literal on line "+line_num);
426 throw new Error("Invalid character literal on line "+line_num);
431 char secondquote = consume();
432 if (firstquote != '\'' || secondquote != '\'')
433 throw new Error("Invalid character literal on line "+line_num);
434 return new CharacterLiteral(val);
436 StringLiteral getStringLiteral() {
437 char openquote = consume();
438 StringBuffer val = new StringBuffer();
439 while (line.charAt(line_pos)!='\"') {
440 switch(line.charAt(line_pos)) {
442 val.append(getEscapeSequence());
445 throw new Error("Invalid string literal on line " + line_num);
447 val.append(consume());
451 char closequote = consume();
452 if (openquote != '\"' || closequote != '\"')
453 throw new Error("Invalid string literal on line " + line_num);
455 return new StringLiteral(val.toString().intern());
458 char getEscapeSequence() {
459 if (consume() != '\\')
460 throw new Error("Invalid escape sequence on line " + line_num);
461 switch(line.charAt(line_pos)) {
463 consume(); return '\b';
465 consume(); return '\t';
467 consume(); return '\n';
469 consume(); return '\f';
471 consume(); return '\r';
473 consume(); return '\"';
475 consume(); return '\'';
477 consume(); return '\\';
482 return (char) getOctal(3);
487 return (char) getOctal(2);
489 throw new Error("Invalid escape sequence on line " + line_num);
492 int getOctal(int maxlength) {
494 for (i=0; i<maxlength; i++)
495 if (Character.digit(line.charAt(line_pos), 8)!=-1) {
496 val = (8*val) + Character.digit(consume(), 8);
498 if ((i==0) || (val>0xFF)) // impossible.
499 throw new Error("Invalid octal escape sequence in line " + line_num);
503 char consume() { return line.charAt(line_pos++); }
504 void nextLine() throws java.io.IOException {
505 line=reader.readLine();
506 if (line!=null) line=line+'\n';
507 lineL = new LineList(lineL.head+line_pos, lineL); // for error reporting
512 // Deal with error messages.
513 public void errorMsg(String msg, java_cup.runtime.Symbol info) {
514 int n=line_num, c=info.left-lineL.head;
515 for (LineList p = lineL; p!=null; p=p.tail, n--)
516 if (p.head<=info.left) { c=info.left-p.head; break; }
517 System.err.println(msg+" at line "+n);
520 private int num_errors = 0;
521 public int numErrors() { return num_errors; }
526 LineList(int head, LineList tail) { this.head = head; this.tail = tail; }