Adding JMCR-Stable version
[Benchmarks_CSolver.git] / JMCR-Stable / real-world application / MyDerby-10.3 / java / engine / org / apache / derby / iapi / util / UTF8Util.java
diff --git a/JMCR-Stable/real-world application/MyDerby-10.3/java/engine/org/apache/derby/iapi/util/UTF8Util.java b/JMCR-Stable/real-world application/MyDerby-10.3/java/engine/org/apache/derby/iapi/util/UTF8Util.java
new file mode 100644 (file)
index 0000000..79d37d6
--- /dev/null
@@ -0,0 +1,202 @@
+/*\r
+\r
+   Derby - Class org.apache.derby.iapi.util.UTF8Util\r
+\r
+   Licensed to the Apache Software Foundation (ASF) under one\r
+   or more contributor license agreements.  See the NOTICE file\r
+   distributed with this work for additional information\r
+   regarding copyright ownership.  The ASF licenses this file\r
+   to you under the Apache License, Version 2.0 (the\r
+   "License"); you may not use this file except in compliance\r
+   with the License.  You may obtain a copy of the License at\r
+\r
+     http://www.apache.org/licenses/LICENSE-2.0\r
+\r
+   Unless required by applicable law or agreed to in writing,\r
+   software distributed under the License is distributed on an\r
+   "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\r
+   KIND, either express or implied.  See the License for the\r
+   specific language governing permissions and limitations\r
+   under the License.\r
+\r
+ */\r
+package org.apache.derby.iapi.util;\r
+\r
+import java.io.EOFException;\r
+import java.io.IOException;\r
+import java.io.InputStream;\r
+import java.io.UTFDataFormatException;\r
+\r
+/**\r
+ * Utility methods for handling UTF-8 encoded byte streams.\r
+ * <p>\r
+ * Note that when the <code>skip<code> methods mention detection of invalid\r
+ * UTF-8 encodings, it only checks the first byte of a character. For multibyte\r
+ * encodings, the second and third byte are not checked for correctness, just\r
+ * skipped and ignored.\r
+ *\r
+ * @see java.io.DataInput\r
+ */\r
+//@ThreadSafe\r
+public final class UTF8Util {\r
+\r
+    /** Constant used to look up character count in an array. */\r
+    private static final int CHAR_COUNT = 0;\r
+    /** Constant used to look up byte count in an array. */\r
+    private static final int BYTE_COUNT = 1;\r
+\r
+    /** This class cannot be instantiated. */\r
+    private UTF8Util() {}\r
+\r
+    /**\r
+     * Skip until the end-of-stream is reached.\r
+     *\r
+     * @param in byte stream with UTF-8 encoded characters\r
+     * @return The number of characters skipped.\r
+     * @throws IOException if reading from the stream fails\r
+     * @throws UTFDataFormatException if an invalid UTF-8 encoding is detected\r
+     */\r
+    public static final long skipUntilEOF(InputStream in)\r
+            throws IOException {\r
+        // No need to do the skip in a loop, as Reader.read() returning -1\r
+        // means EOF has been reached.\r
+        // Note that a loop should be used if skip is used instead of read.\r
+        return internalSkip(in, Long.MAX_VALUE)[CHAR_COUNT];\r
+    }\r
+\r
+    /**\r
+     * Skip the requested number of characters from the stream.\r
+     * <p>\r
+     * @param in byte stream with UTF-8 encoded characters\r
+     * @param charsToSkip number of characters to skip\r
+     * @return The number of bytes skipped.\r
+     * @throws EOFException if end-of-stream is reached before the requested\r
+     *      number of characters are skipped\r
+     * @throws IOException if reading from the stream fails\r
+     * @throws UTFDataFormatException if an invalid UTF-8 encoding is detected\r
+     */\r
+    public static final long skipFully(InputStream in, long charsToSkip)\r
+            throws EOFException, IOException {\r
+        long[] counts = internalSkip(in, charsToSkip);\r
+        if (counts[CHAR_COUNT] != charsToSkip) {\r
+            throw new EOFException("Reached end-of-stream prematurely at " +\r
+                "character/byte position " + counts[CHAR_COUNT] + "/" +\r
+                counts[BYTE_COUNT] + ", trying to skip " + charsToSkip);\r
+        }\r
+        return counts[BYTE_COUNT];\r
+    }\r
+\r
+    /**\r
+     * Skip characters in the stream.\r
+     * <p>\r
+     * Note that a smaller number than requested might be skipped if the\r
+     * end-of-stream is reached before the specified number of characters has\r
+     * been decoded. It is up to the caller to decide if this is an error\r
+     * or not. For instance, when determining the character length of a stream,\r
+     * <code>Long.MAX_VALUE</code> could be passed as the requested number of\r
+     * characters to skip.\r
+     *\r
+     * @param in byte stream with UTF-8 encoded characters\r
+     * @param charsToSkip the number of characters to skip\r
+     * @return A long array with counts; the characters skipped at position\r
+     *      <code>CHAR_COUNT</code>, the bytes skipped at position\r
+     *      <code>BYTE_COUNT</code>. Note that the number of characters skipped\r
+     *      may be smaller than the requested number.\r
+     * @throws IOException if reading from the stream fails\r
+     * @throws UTFDataFormatException if an invalid UTF-8 encoding is detected\r
+     */\r
+    private static final long[] internalSkip(final InputStream in,\r
+                                             final long charsToSkip)\r
+            throws IOException {\r
+        long charsSkipped = 0;\r
+        long bytesSkipped = 0;\r
+        // Decoding routine for modified UTF-8.\r
+        // See java.io.DataInput\r
+        while (charsSkipped < charsToSkip) {\r
+            int c = in.read();\r
+            if (c == -1) {\r
+                break;\r
+            }\r
+            charsSkipped++;\r
+            if ((c & 0x80) == 0x00) { // 8th bit set (top bit)\r
+                // Found char of one byte width.\r
+                bytesSkipped++;\r
+            } else if ((c & 0x60) == 0x40) { // 7th bit set, 6th bit unset\r
+                // Found char of two byte width.\r
+                if (skipPersistent(in, 1L) != 1L) {\r
+                    // No second byte present.\r
+                    throw new UTFDataFormatException(\r
+                        "Second byte in two byte character missing; byte pos " +\r
+                        bytesSkipped + " ; char pos " + charsSkipped);\r
+                }\r
+                bytesSkipped += 2;\r
+            } else if ((c & 0x70) == 0x60) { // 7th and 6th bit set, 5th unset\r
+                // Found char of three byte width.\r
+                int skipped = 0;\r
+                if (c == 0xe0) {\r
+                    // Check for Derby EOF marker.\r
+                    int c1 = in.read();\r
+                    int c2 = in.read();\r
+                    if (c1 == 0x00 && c2 == 0x00) {\r
+                        // Found Derby EOF marker, exit loop.\r
+                        charsSkipped--; // Compensate by subtracting one.\r
+                        break;\r
+                    }\r
+                    // Do some rudimentary error checking.\r
+                    // Allow everything except EOF, which is the same as done in\r
+                    // normal processing (skipPersistent below).\r
+                    if (c1 != -1 && c2 != -1) {\r
+                        skipped = 2;\r
+                    }\r
+                } else {\r
+                    skipped = (int)skipPersistent(in, 2L);\r
+                }\r
+                if (skipped != 2) {\r
+                    // No second or third byte present\r
+                    throw new UTFDataFormatException(\r
+                        "Second or third byte in three byte character " +\r
+                        "missing; byte pos " + bytesSkipped + " ; char pos " +\r
+                        charsSkipped);\r
+                }\r
+                bytesSkipped += 3;\r
+            } else {\r
+                throw new UTFDataFormatException(\r
+                    "Invalid UTF-8 encoding encountered: (decimal) " + c);\r
+            }\r
+        }\r
+        // We don't close the stream, since it might be reused. One example of\r
+        // this is use of Resetable streams.\r
+        return new long[] {charsSkipped, bytesSkipped};\r
+    }\r
+\r
+    /**\r
+     * Tries harder to skip the requested number of bytes.\r
+     * <p>\r
+     * Note that even if the method fails to skip the requested number of bytes,\r
+     * it will not throw an exception. If this happens, the caller can be sure\r
+     * that end-of-stream has been reached.\r
+     *\r
+     * @param in byte stream\r
+     * @param bytesToSkip the number of bytes to skip\r
+     * @return The number of bytes skipped.\r
+     * @throws IOException if reading from the stream fails\r
+     */\r
+    private static final long skipPersistent(InputStream in, long bytesToSkip)\r
+            throws IOException {\r
+        long skipped = 0;\r
+        while (skipped < bytesToSkip) {\r
+            long skippedNow = in.skip(bytesToSkip - skipped);\r
+            if (skippedNow <= 0) {\r
+                if (in.read() == -1) {\r
+                    // EOF, return what we have and leave it up to caller to\r
+                    // decide what to do about it.\r
+                    break;\r
+                } else {\r
+                    skippedNow = 1; // Added to count below.\r
+                }\r
+            }\r
+            skipped += skippedNow;\r
+        }\r
+        return skipped;\r
+    }\r
+} // End class UTF8Util\r