4 import java.io.LineNumberReader;
8 * Copyright (C) 2002 C. Scott Ananian <cananian@alumni.princeton.edu>
9 * This program is released under the terms of the GPL; see the file
10 * COPYING for more details. There is NO WARRANTY on this code.
14 LineNumberReader reader;
21 LineList lineL = new LineList(-line_pos, null); // sentinel for line #0
23 public Lexer(Reader reader) {
24 this.reader = new LineNumberReader(new EscapedUnicodeReader(reader));
29 public java_cup.runtime.Symbol nextToken() throws java.io.IOException {
30 java_cup.runtime.Symbol sym =
31 lookahead==null ? _nextToken() : lookahead.get();
35 private boolean shouldBePLT() throws java.io.IOException {
36 // look ahead to see if this LT should be changed to a PLT
37 if (last==null || last.sym!=Sym.IDENTIFIER)
39 if (lookahead==null) lookahead = new FIFO(new FIFO.Getter() {
40 java_cup.runtime.Symbol next() throws java.io.IOException
41 { return _nextToken(); }
44 // skip past IDENTIFIER (DOT IDENTIFIER)*
45 if (lookahead.peek(i++).sym != Sym.IDENTIFIER)
47 while (lookahead.peek(i).sym == Sym.DOT) {
49 if (lookahead.peek(i++).sym != Sym.IDENTIFIER)
52 // skip past (LBRACK RBRACK)*
53 while (lookahead.peek(i).sym == Sym.LBRACK) {
55 if (lookahead.peek(i++).sym != Sym.RBRACK)
58 // now the next sym has to be one of LT GT COMMA EXTENDS IMPLEMENTS
59 switch(lookahead.peek(i).sym) {
69 private java_cup.runtime.Symbol last = null;
70 private FIFO lookahead = null;
71 public java_cup.runtime.Symbol _nextToken() throws java.io.IOException {
73 * Identifiers/Keywords/true/false/null (start with java letter)
74 * numeric literal (start with number)
75 * character literal (start with single quote)
76 * string (start with double quote)
77 * separator (parens, braces, brackets, semicolon, comma, period)
78 * operator (equals, plus, minus, etc)
80 * comment (start with slash)
85 startpos = lineL.head + line_pos;
86 ie = getInputElement();
87 if (ie instanceof DocumentationComment)
88 comment = ((Comment)ie).getComment();
89 } while (!(ie instanceof Token));
90 endpos = lineL.head + line_pos - 1;
92 //System.out.println(ie.toString()); // uncomment to debug lexer.
93 java_cup.runtime.Symbol sym = ((Token)ie).token();
94 // fix up left/right positions.
95 sym.left = startpos; sym.right = endpos;
99 public boolean debug_lex() throws java.io.IOException {
100 InputElement ie = getInputElement();
101 System.out.println(ie);
102 return !(ie instanceof EOF);
106 public String lastComment() { return comment; }
107 public void clearComment() { comment=""; }
109 InputElement getInputElement() throws java.io.IOException {
114 if (line.length()<=line_pos) { // end of line.
120 switch (line.charAt(line_pos)) {
123 case ' ': // ASCII SP
124 case '\t': // ASCII HT
125 case '\f': // ASCII FF
126 case '\n': // LineTerminator
127 return new WhiteSpace(consume());
130 case '\020': // ASCII SUB
143 // May get Token instead of Comment.
144 InputElement getComment() throws java.io.IOException {
146 // line.charAt(line_pos+0) is '/'
147 switch (line.charAt(line_pos+1)) {
148 case '/': // EndOfLineComment
149 comment = line.substring(line_pos+2);
150 line_pos = line.length();
151 return new EndOfLineComment(comment);
152 case '*': // TraditionalComment or DocumentationComment
154 if (line.charAt(line_pos)=='*') { // DocumentationComment
155 return snarfComment(new DocumentationComment());
156 } else { // TraditionalComment
157 return snarfComment(new TraditionalComment());
159 default: // it's a token, not a comment.
164 Comment snarfComment(Comment c) throws java.io.IOException {
165 StringBuffer text=new StringBuffer();
166 while(true) { // Grab CommentTail
167 while (line.charAt(line_pos)!='*') { // Add NotStar to comment.
168 int star_pos = line.indexOf('*', line_pos);
170 text.append(line.substring(line_pos));
171 c.appendLine(text.toString()); text.setLength(0);
172 line_pos = line.length();
175 throw new Error("Unterminated comment at end of file.");
177 text.append(line.substring(line_pos, star_pos));
181 // At this point, line.charAt(line_pos)=='*'
182 // Grab CommentTailStar starting at line_pos+1.
183 if (line.charAt(line_pos+1)=='/') { // safe because line ends with '\n'
184 c.appendLine(text.toString()); line_pos+=2; return c;
186 text.append(line.charAt(line_pos++)); // add the '*'
191 // Tokens are: Identifiers, Keywords, Literals, Separators, Operators.
192 switch (line.charAt(line_pos)) {
193 // Separators: (period is a special case)
202 return new Separator(consume());
220 return getOperator();
222 return getCharLiteral();
224 return getStringLiteral();
226 // a period is a special case:
228 if (Character.digit(line.charAt(line_pos+1),10)!=-1)
229 return getNumericLiteral();
231 line.charAt(line_pos+1)=='.' &&
232 line.charAt(line_pos+2)=='.') {
233 consume(); consume(); consume();
234 return new Separator('\u2026'); // unicode ellipsis character.
235 } else return new Separator(consume());
239 if (Character.isJavaIdentifierStart(line.charAt(line_pos)))
240 return getIdentifier();
241 if (Character.isDigit(line.charAt(line_pos)))
242 return getNumericLiteral();
243 throw new Error("Illegal character on line "+line_num);
246 static final String[] keywords = new String[] {
247 "abstract", "assert", "boolean", "break", "byte", "case", "catch", "char",
248 "class", "const", "continue", "default", "do", "double", "else", "enum",
249 "extends", "final", "finally", "float", "for", "goto", "if",
250 "implements", "import", "instanceof", "int", "interface", "long",
251 "native", "new", "package", "private", "protected", "public",
252 "return", "short", "static", "strictfp", "super", "switch",
253 "synchronized", "this", "throw", "throws", "transient", "try", "void",
254 "volatile", "while" };
255 Token getIdentifier() {
257 StringBuffer sb = new StringBuffer().append(consume());
259 if (!Character.isJavaIdentifierStart(sb.charAt(0)))
260 throw new Error("Invalid Java Identifier on line "+line_num);
261 while (Character.isJavaIdentifierPart(line.charAt(line_pos)))
262 sb.append(consume());
263 String s = sb.toString();
264 // Now check against boolean literals and null literal.
265 if (s.equals("null")) return new NullLiteral();
266 if (s.equals("true")) return new BooleanLiteral(true);
267 if (s.equals("false")) return new BooleanLiteral(false);
268 // Check against keywords.
269 // pre-java 1.5 compatibility:
270 if (!isJava15 && s.equals("enum")) return new Identifier(s);
271 // pre-java 1.4 compatibility:
272 if (!isJava14 && s.equals("assert")) return new Identifier(s);
273 // pre-java 1.2 compatibility:
274 if (!isJava12 && s.equals("strictfp")) return new Identifier(s);
275 // use binary search.
276 for (int l=0, r=keywords.length; r > l; ) {
277 int x = (l+r)/2, cmp = s.compareTo(keywords[x]);
278 if (cmp < 0) r=x; else l=x+1;
279 if (cmp== 0) return new Keyword(s);
282 return new Identifier(s);
284 NumericLiteral getNumericLiteral() {
286 // leading decimal indicates float.
287 if (line.charAt(line_pos)=='.')
288 return getFloatingPointLiteral();
290 if (line.charAt(line_pos)=='0' &&
291 (line.charAt(line_pos+1)=='x' ||
292 line.charAt(line_pos+1)=='X')) {
293 line_pos+=2; return getIntegerLiteral(/*base*/16);
295 // otherwise scan to first non-numeric
296 for (i=line_pos; Character.digit(line.charAt(i),10)!=-1; )
298 switch(line.charAt(i)) { // discriminate based on first non-numeric
306 return getFloatingPointLiteral();
310 if (line.charAt(line_pos)=='0')
311 return getIntegerLiteral(/*base*/8);
312 return getIntegerLiteral(/*base*/10);
315 NumericLiteral getIntegerLiteral(int radix) {
317 while (Character.digit(line.charAt(line_pos),radix)!=-1)
318 val = (val*radix) + Character.digit(consume(),radix);
319 if (line.charAt(line_pos) == 'l' ||
320 line.charAt(line_pos) == 'L') {
322 return new LongLiteral(val);
324 // we compare MAX_VALUE against val/2 to allow constants like
325 // 0xFFFF0000 to get past the test. (unsigned long->signed int)
326 if ((val/2) > Integer.MAX_VALUE ||
327 val < Integer.MIN_VALUE)
328 throw new Error("Constant does not fit in integer on line "+line_num);
329 return new IntegerLiteral((int)val);
331 NumericLiteral getFloatingPointLiteral() {
332 String rep = getDigits();
333 if (line.charAt(line_pos)=='.')
334 rep+=consume() + getDigits();
335 if (line.charAt(line_pos)=='e' ||
336 line.charAt(line_pos)=='E') {
338 if (line.charAt(line_pos)=='+' ||
339 line.charAt(line_pos)=='-')
344 switch (line.charAt(line_pos)) {
348 return new FloatLiteral(Float.valueOf(rep).floatValue());
354 return new DoubleLiteral(Double.valueOf(rep).doubleValue());
356 } catch (NumberFormatException e) {
357 throw new Error("Illegal floating-point on line "+line_num+": "+e);
361 StringBuffer sb = new StringBuffer();
362 while (Character.digit(line.charAt(line_pos),10)!=-1)
363 sb.append(consume());
364 return sb.toString();
367 Operator getOperator() {
368 char first = consume();
369 char second= line.charAt(line_pos);
372 // single-character operators.
376 return new Operator(new String(new char[] {first}));
383 return new Operator(new String(new char[] {first, consume()}));
387 // Check for trailing '='
389 return new Operator(new String(new char[] {first, consume()}));
391 // Special-case '<<', '>>' and '>>>'
392 if ((first=='<' && second=='<') || // <<
393 (first=='>' && second=='>')) { // >>
394 String op = new String(new char[] {first, consume()});
395 if (first=='>' && line.charAt(line_pos)=='>') // >>>
397 if (line.charAt(line_pos)=='=') // <<=, >>=, >>>=
399 return new Operator(op);
402 // Otherwise return single operator.
403 return new Operator(new String(new char[] {first}));
406 CharacterLiteral getCharLiteral() {
407 char firstquote = consume();
409 switch (line.charAt(line_pos)) {
411 val = getEscapeSequence();
414 throw new Error("Invalid character literal on line "+line_num);
416 throw new Error("Invalid character literal on line "+line_num);
421 char secondquote = consume();
422 if (firstquote != '\'' || secondquote != '\'')
423 throw new Error("Invalid character literal on line "+line_num);
424 return new CharacterLiteral(val);
426 StringLiteral getStringLiteral() {
427 char openquote = consume();
428 StringBuffer val = new StringBuffer();
429 while (line.charAt(line_pos)!='\"') {
430 switch(line.charAt(line_pos)) {
432 val.append(getEscapeSequence());
435 throw new Error("Invalid string literal on line " + line_num);
437 val.append(consume());
441 char closequote = consume();
442 if (openquote != '\"' || closequote != '\"')
443 throw new Error("Invalid string literal on line " + line_num);
445 return new StringLiteral(val.toString().intern());
448 char getEscapeSequence() {
449 if (consume() != '\\')
450 throw new Error("Invalid escape sequence on line " + line_num);
451 switch(line.charAt(line_pos)) {
453 consume(); return '\b';
455 consume(); return '\t';
457 consume(); return '\n';
459 consume(); return '\f';
461 consume(); return '\r';
463 consume(); return '\"';
465 consume(); return '\'';
467 consume(); return '\\';
472 return (char) getOctal(3);
477 return (char) getOctal(2);
479 throw new Error("Invalid escape sequence on line " + line_num);
482 int getOctal(int maxlength) {
484 for (i=0; i<maxlength; i++)
485 if (Character.digit(line.charAt(line_pos), 8)!=-1) {
486 val = (8*val) + Character.digit(consume(), 8);
488 if ((i==0) || (val>0xFF)) // impossible.
489 throw new Error("Invalid octal escape sequence in line " + line_num);
493 char consume() { return line.charAt(line_pos++); }
494 void nextLine() throws java.io.IOException {
495 line=reader.readLine();
496 if (line!=null) line=line+'\n';
497 lineL = new LineList(lineL.head+line_pos, lineL); // for error reporting
502 // Deal with error messages.
503 public void errorMsg(String msg, java_cup.runtime.Symbol info) {
504 int n=line_num, c=info.left-lineL.head;
505 for (LineList p = lineL; p!=null; p=p.tail, n--)
506 if (p.head<=info.left) { c=info.left-p.head; break; }
507 System.err.println(msg+" at line "+n);
510 private int num_errors = 0;
511 public int numErrors() { return num_errors; }
516 LineList(int head, LineList tail) { this.head = head; this.tail = tail; }