--- /dev/null
+/*\r
+\r
+ Derby - Class org.apache.derby.impl.tools.ij.StatementFinder\r
+\r
+ Licensed to the Apache Software Foundation (ASF) under one or more\r
+ contributor license agreements. See the NOTICE file distributed with\r
+ this work for additional information regarding copyright ownership.\r
+ The ASF licenses this file to You under the Apache License, Version 2.0\r
+ (the "License"); you may not use this file except in compliance with\r
+ the License. You may obtain a copy of the License at\r
+\r
+ http://www.apache.org/licenses/LICENSE-2.0\r
+\r
+ Unless required by applicable law or agreed to in writing, software\r
+ distributed under the License is distributed on an "AS IS" BASIS,\r
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\r
+ See the License for the specific language governing permissions and\r
+ limitations under the License.\r
+\r
+ */\r
+\r
+package org.apache.derby.impl.tools.ij;\r
+\r
+import java.io.IOException;\r
+import java.io.Reader;\r
+\r
+/**\r
+ StatementGrabber looks through an input stream for\r
+ the next JSQL statement. A statement is considered to\r
+ be any tokens up to the next semicolon or EOF.\r
+ <p>\r
+ Semicolons inside comments, strings, and delimited identifiers\r
+ are not considered to be statement terminators but to be\r
+ part of those tokens.\r
+ <p>\r
+ The only comment form currently recognized is the SQL comment,\r
+ which begins with "--" and ends at the next EOL.\r
+ <p>\r
+ Strings and delimited identifiers are permitted to contain\r
+ newlines; the actual IJ or JSQL parsers will report errors when\r
+ those cases occur.\r
+ <p>\r
+ There are no escaped characters, i.e. "\n" is considered to\r
+ be two characters, '\' and 'n'.\r
+\r
+ */\r
+\r
+public class StatementFinder {\r
+\r
+ private Reader source; \r
+ private StringBuffer statement = new StringBuffer();\r
+ private int state;\r
+ private boolean atEOF = false;\r
+ private boolean peekEOF = false;\r
+ private char peekChar;\r
+ private boolean peeked = false;\r
+\r
+ // state variables\r
+ private static final int IN_STATEMENT = 0;\r
+ private static final int IN_STRING = 1;\r
+ private static final int IN_SQLCOMMENT = 2;\r
+ private static final int END_OF_STATEMENT = 3;\r
+ private static final int END_OF_INPUT = 4;\r
+\r
+ // special state-changing characters\r
+ private static final char MINUS = '-';\r
+ private static final char SINGLEQUOTE = '\'';\r
+ private static final char DOUBLEQUOTE = '\"';\r
+ private static final char SEMICOLON = ';';\r
+ private static final char NEWLINE = '\n';\r
+ private static final char RETURN = '\r';\r
+ private static final char SPACE = ' ';\r
+ private static final char TAB = '\t';\r
+ private static final char FORMFEED = '\f';\r
+\r
+ /**\r
+ The constructor does not assume the stream is data input\r
+ or buffered, so it will wrap it appropriately.\r
+\r
+ @param s the input stream for reading statements from.\r
+ */\r
+ public StatementFinder(Reader s) { \r
+ source = s;\r
+ }\r
+\r
+ /**\r
+ Reinit is used to redirect the finder to another stream.\r
+ The previous stream should not have been in a PEEK state.\r
+\r
+ @param s the input stream for reading statements from.\r
+ */\r
+ public void ReInit(Reader s) { \r
+ try {\r
+ source.close();\r
+ } catch (IOException ioe) {\r
+ // just be quiet if it is already gone\r
+ }\r
+ source = s;\r
+ state = IN_STATEMENT;\r
+ atEOF = false;\r
+ peekEOF = false;\r
+ peeked = false;\r
+ }\r
+\r
+ public void close() throws IOException {\r
+ source.close();\r
+ }\r
+\r
+ /**\r
+ get the next statement in the input stream. Returns it,\r
+ dropping its closing semicolon if it has one. If there is\r
+ no next statement, return a null.\r
+\r
+ @return the next statement in the input stream.\r
+ */\r
+ public String nextStatement() {\r
+ boolean haveSemi = false;\r
+ char nextChar;\r
+\r
+ // initialize fields for getting the next statement\r
+ statement.setLength(0);\r
+ if (state == END_OF_INPUT) return null;\r
+\r
+ state = IN_STATEMENT;\r
+\r
+ // skip leading whitespace\r
+ nextChar = peekChar();\r
+ if (peekEOF()) {\r
+ state = END_OF_INPUT;\r
+ return null;\r
+ }\r
+ if (whiteSpace(nextChar)) {\r
+ while (whiteSpace(peekChar()) && ! peekEOF());\r
+ if (peekEOF()) {\r
+ state = END_OF_INPUT;\r
+ return null;\r
+ }\r
+ }\r
+\r
+ while (state != END_OF_STATEMENT && state != END_OF_INPUT) {\r
+\r
+ // get the next character from the input\r
+ nextChar = readChar();\r
+ if (atEOF()) {\r
+ state = END_OF_INPUT;\r
+ break;\r
+ }\r
+\r
+ switch(nextChar) {\r
+ case MINUS:\r
+ readSingleLineComment(nextChar);\r
+ break;\r
+ case SINGLEQUOTE:\r
+ case DOUBLEQUOTE:\r
+ readString(nextChar);\r
+ break;\r
+ case SEMICOLON:\r
+ haveSemi = true;\r
+ state = END_OF_STATEMENT;\r
+ break;\r
+ default:\r
+ // keep going, just a normal character\r
+ break;\r
+ }\r
+ }\r
+\r
+ if (haveSemi)\r
+ statement.setLength(statement.length()-1);\r
+ return statement.toString();\r
+ }\r
+\r
+ /**\r
+ Determine if the given character is considered whitespace\r
+\r
+ @param c the character to consider\r
+ @return true if the character is whitespace\r
+ */\r
+ private boolean whiteSpace(char c) {\r
+ return (c == SPACE ||\r
+ c == TAB ||\r
+ c == RETURN ||\r
+ c == NEWLINE ||\r
+ c == FORMFEED);\r
+ }\r
+\r
+ /**\r
+ Advance the source stream to the end of a comment if it\r
+ is on one, assuming the first character of\r
+ a potential single line comment has been found.\r
+ If it is not a comment, do not advance the stream.\r
+ <p>\r
+ The form of a single line comment is, in regexp, XX.*$,\r
+ where XX is two instances of commentChar.\r
+\r
+ @param commentChar the character whose duplication signifies\r
+ the start of the comment.\r
+ */\r
+ private void readSingleLineComment(char commentChar) {\r
+ char nextChar;\r
+\r
+ nextChar = peekChar();\r
+ // if next char is EOF, we are done.\r
+ if (peekEOF()) return;\r
+\r
+ // if nextChar is not a minus, it was just a normal minus,\r
+ // nothing special to do\r
+ if (nextChar != commentChar) return;\r
+\r
+ // we are really in a comment\r
+ readChar(); // grab the minus for real.\r
+\r
+ state = IN_SQLCOMMENT;\r
+ do {\r
+ nextChar = peekChar();\r
+ if (peekEOF()) {\r
+ // let the caller process the EOF, don't read it\r
+ state = IN_STATEMENT;\r
+ return;\r
+ }\r
+ switch (nextChar) {\r
+ case NEWLINE:\r
+ case RETURN:\r
+ readChar(); // okay to process the character\r
+ state = IN_STATEMENT;\r
+ return;\r
+ default:\r
+ readChar(); // process the character, still in comment\r
+ break;\r
+ }\r
+ } while (state == IN_SQLCOMMENT); // could be while true...\r
+ }\r
+\r
+ /**\r
+ Advance the stream to the end of the string.\r
+ Assumes the opening delimiter of the string has been read.\r
+ This handles the SQL ability to put the delimiter within\r
+ the string by doubling it, by reading those as two strings\r
+ sitting next to one another. I.e, 'Mary''s lamb' is read\r
+ by this class as two strings, 'Mary' and 's lamb'.\r
+ <p>\r
+ The delimiter of the string is expected to be repeated at\r
+ its other end. If the other flavor of delimiter occurs within\r
+ the string, it is just a normal character within it.\r
+ <p>\r
+ All characters except the delimiter are permitted within the\r
+ string. If EOF is hit before the closing delimiter is found,\r
+ the end of the string is assumed. Parsers using this parser\r
+ will detect the error in that case and return appropriate messages.\r
+\r
+ @param stringDelimiter the starting and ending character\r
+ for the string being read.\r
+ */\r
+ private void readString(char stringDelimiter) {\r
+ state = IN_STRING;\r
+ do {\r
+ char nextChar = readChar();\r
+\r
+ if (atEOF()) {\r
+ state = END_OF_INPUT;\r
+ return;\r
+ }\r
+\r
+ if (nextChar == stringDelimiter) {\r
+ // we've reached the end of the string\r
+ state = IN_STATEMENT;\r
+ return;\r
+ }\r
+\r
+ // still in string\r
+ } while (state == IN_STRING); // could be while true...\r
+ }\r
+\r
+ private boolean atEOF() {\r
+ return atEOF;\r
+ }\r
+\r
+ private boolean peekEOF() {\r
+ return peekEOF;\r
+ }\r
+\r
+ /**\r
+ return the next character in the source stream and\r
+ append it to the statement buffer.\r
+\r
+ @return the next character in the source stream.\r
+ */\r
+ private char readChar() {\r
+ if (!peeked) peekChar();\r
+\r
+ peeked = false;\r
+ atEOF = peekEOF;\r
+\r
+ if (!atEOF) statement.append(peekChar);\r
+\r
+ return peekChar;\r
+ }\r
+\r
+ /**\r
+ return the next character in the source stream, without\r
+ advancing.\r
+\r
+ @return the next character in the source stream.\r
+ */\r
+ private char peekChar() {\r
+ peeked = true;\r
+ char c = '\00';\r
+\r
+ try {\r
+ int cInt;\r
+\r
+ // REMIND: this is assuming a flat ascii source file.\r
+ // will need to beef it up at some future point to\r
+ // understand whether the stream is ascii or something else.\r
+ cInt = source.read();\r
+ peekEOF = (cInt == -1);\r
+ if (!peekEOF) c = (char)cInt;\r
+ } catch (IOException ie) {\r
+ throw ijException.iOException(ie);\r
+ }\r
+\r
+ peekChar = c;\r
+ return c;\r
+ }\r
+}\r