1 /* InputStreamReader.java -- Reader than transforms bytes to chars
2 Copyright (C) 1998, 1999, 2001, 2003, 2004, 2005, 2006
3 Free Software Foundation, Inc.
5 This file is part of GNU Classpath.
7 GNU Classpath is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
12 GNU Classpath is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GNU Classpath; see the file COPYING. If not, write to the
19 Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22 Linking this library statically or dynamically with other modules is
23 making a combined work based on this library. Thus, the terms and
24 conditions of the GNU General Public License cover the whole
27 As a special exception, the copyright holders of this library give you
28 permission to link this library with independent modules to produce an
29 executable, regardless of the license terms of these independent
30 modules, and to copy and distribute the resulting executable under
31 terms of your choice, provided that you also meet, for each linked
32 independent module, the terms and conditions of the license of that
33 module. An independent module is a module which is not derived from
34 or based on this library. If you modify this library, you may extend
35 this exception to your version of the library, but you are not
36 obligated to do so. If you do not wish to do so, delete this
37 exception statement from your version. */
43 * This class reads characters from a byte input stream. The characters
44 * read are converted from bytes in the underlying stream by a
45 * decoding layer. The decoding layer transforms bytes to chars according
46 * to an encoding standard. There are many available encodings to choose
47 * from. The desired encoding can either be specified by name, or if no
48 * encoding is selected, the system default encoding will be used. The
49 * system default encoding name is determined from the system property
50 * <code>file.encoding</code>. The only encodings that are guaranteed to
51 * be availalbe are "8859_1" (the Latin-1 character set) and "UTF8".
52 * Unforunately, Java does not provide a mechanism for listing the
53 * ecodings that are supported in a given implementation.
55 * Here is a list of standard encoding names that may be available:
58 * <li>8859_1 (ISO-8859-1/Latin-1)</li>
59 * <li>8859_2 (ISO-8859-2/Latin-2)</li>
60 * <li>8859_3 (ISO-8859-3/Latin-3)</li>
61 * <li>8859_4 (ISO-8859-4/Latin-4)</li>
62 * <li>8859_5 (ISO-8859-5/Latin-5)</li>
63 * <li>8859_6 (ISO-8859-6/Latin-6)</li>
64 * <li>8859_7 (ISO-8859-7/Latin-7)</li>
65 * <li>8859_8 (ISO-8859-8/Latin-8)</li>
66 * <li>8859_9 (ISO-8859-9/Latin-9)</li>
67 * <li>ASCII (7-bit ASCII)</li>
68 * <li>UTF8 (UCS Transformation Format-8)</li>
72 * It is recommended that applications do not use
73 * <code>InputStreamReader</code>'s
74 * directly. Rather, for efficiency purposes, an object of this class
75 * should be wrapped by a <code>BufferedReader</code>.
77 * Due to a deficiency the Java class library design, there is no standard
78 * way for an application to install its own byte-character encoding.
83 * @author Robert Schuster
84 * @author Aaron M. Renn (arenn@urbanophile.com)
85 * @author Per Bothner (bothner@cygnus.com)
86 * @date April 22, 1998.
88 public class InputStreamReader extends Reader
93 private InputStream in;
96 * The charset decoder.
98 //private CharsetDecoder decoder;
101 * End of stream reached.
103 private boolean isDone = false;
108 //private float maxBytesPerChar;
111 * Buffer holding surplus loaded bytes (if any)
113 //private ByteBuffer byteBuffer;
116 * java.io canonical name of the encoding.
118 //private String encoding;
121 * We might decode to a 2-char UTF-16 surrogate, which won't fit in the
122 * output buffer. In this case we need to save the surrogate char.
124 //private char savedSurrogate;
125 //private boolean hasSavedSurrogate = false;
128 * A byte array to be reused in read(byte[], int, int).
130 //private byte[] bytesCache;
133 * Locks the bytesCache above in read(byte[], int, int).
135 //private Object cacheLock = new Object();
138 * This method initializes a new instance of <code>InputStreamReader</code>
139 * to read from the specified stream using the default encoding.
141 * @param in The <code>InputStream</code> to read from
143 public InputStreamReader(InputStream in)
146 throw new /*NullPointer*/Exception("NullPointerException");
150 encoding = SystemProperties.getProperty("file.encoding");
151 // Don't use NIO if avoidable
152 if(EncodingHelper.isISOLatin1(encoding))
154 encoding = "ISO8859_1";
155 maxBytesPerChar = 1f;
159 Charset cs = EncodingHelper.getCharset(encoding);
160 decoder = cs.newDecoder();
161 encoding = EncodingHelper.getOldCanonical(cs.name());
163 maxBytesPerChar = cs.newEncoder().maxBytesPerChar();
164 } catch(UnsupportedOperationException _){
165 maxBytesPerChar = 1f;
167 decoder.onMalformedInput(CodingErrorAction.REPLACE);
168 decoder.onUnmappableCharacter(CodingErrorAction.REPLACE);
170 } catch(RuntimeException e) {
171 encoding = "ISO8859_1";
172 maxBytesPerChar = 1f;
174 } catch(UnsupportedEncodingException e) {
175 encoding = "ISO8859_1";
176 maxBytesPerChar = 1f;
182 * This method initializes a new instance of <code>InputStreamReader</code>
183 * to read from the specified stream using a caller supplied character
184 * encoding scheme. Note that due to a deficiency in the Java language
185 * design, there is no way to determine which encodings are supported.
187 * @param in The <code>InputStream</code> to read from
188 * @param encoding_name The name of the encoding scheme to use
190 * @exception UnsupportedEncodingException If the encoding scheme
191 * requested is not available.
193 /*public InputStreamReader(InputStream in, String encoding_name)
194 throws UnsupportedEncodingException
197 || encoding_name == null)
198 throw new NullPointerException();
201 // Don't use NIO if avoidable
202 if(EncodingHelper.isISOLatin1(encoding_name))
204 encoding = "ISO8859_1";
205 maxBytesPerChar = 1f;
210 Charset cs = EncodingHelper.getCharset(encoding_name);
212 maxBytesPerChar = cs.newEncoder().maxBytesPerChar();
213 } catch(UnsupportedOperationException _){
214 maxBytesPerChar = 1f;
217 decoder = cs.newDecoder();
218 decoder.onMalformedInput(CodingErrorAction.REPLACE);
219 decoder.onUnmappableCharacter(CodingErrorAction.REPLACE);
222 // The encoding should be the old name, if such exists.
223 encoding = EncodingHelper.getOldCanonical(cs.name());
224 } catch(RuntimeException e) {
225 encoding = "ISO8859_1";
226 maxBytesPerChar = 1f;
232 * Creates an InputStreamReader that uses a decoder of the given
233 * charset to decode the bytes in the InputStream into
238 /*public InputStreamReader(InputStream in, Charset charset) {
240 throw new NullPointerException();
242 decoder = charset.newDecoder();
245 maxBytesPerChar = charset.newEncoder().maxBytesPerChar();
246 } catch(UnsupportedOperationException _){
247 maxBytesPerChar = 1f;
250 decoder.onMalformedInput(CodingErrorAction.REPLACE);
251 decoder.onUnmappableCharacter(CodingErrorAction.REPLACE);
253 encoding = EncodingHelper.getOldCanonical(charset.name());
257 * Creates an InputStreamReader that uses the given charset decoder
258 * to decode the bytes in the InputStream into characters.
262 /*public InputStreamReader(InputStream in, CharsetDecoder decoder) {
264 throw new NullPointerException();
266 this.decoder = decoder;
268 Charset charset = decoder.charset();
271 maxBytesPerChar = 1f;
273 maxBytesPerChar = charset.newEncoder().maxBytesPerChar();
274 } catch(UnsupportedOperationException _){
275 maxBytesPerChar = 1f;
278 decoder.onMalformedInput(CodingErrorAction.REPLACE);
279 decoder.onUnmappableCharacter(CodingErrorAction.REPLACE);
282 encoding = "US-ASCII";
284 encoding = EncodingHelper.getOldCanonical(decoder.charset().name());
288 * This method closes this stream, as well as the underlying
289 * <code>InputStream</code>.
291 * @exception IOException If an error occurs
293 /*public void close() //throws IOException
297 // Makes sure all intermediate data is released by the decoder.
309 * This method returns the name of the encoding that is currently in use
310 * by this object. If the stream has been closed, this method is allowed
311 * to return <code>null</code>.
313 * @return The current encoding name
315 /*public String getEncoding()
317 return in != null ? encoding : null;
321 * This method checks to see if the stream is ready to be read. It
322 * will return <code>true</code> if is, or <code>false</code> if it is not.
323 * If the stream is not ready to be read, it could (although is not required
324 * to) block on the next read attempt.
326 * @return <code>true</code> if the stream is ready to be read,
327 * <code>false</code> otherwise
329 * @exception IOException If an error occurs
331 /*public boolean ready() throws IOException
334 throw new IOException("Reader has been closed");
336 return in.available() != 0;
340 * This method reads up to <code>length</code> characters from the stream into
341 * the specified array starting at index <code>offset</code> into the
344 * @param buf The character array to recieve the data read
345 * @param offset The offset into the array to start storing characters
346 * @param length The requested number of characters to read.
348 * @return The actual number of characters read, or -1 if end of stream.
350 * @exception IOException If an error occurs
352 /*public int read(char[] buf, int offset, int length) throws IOException
355 throw new IOException("Reader has been closed");
360 int totalBytes = (int)((double) length * maxBytesPerChar);
361 if (byteBuffer != null)
362 totalBytes = Math.max(totalBytes, byteBuffer.remaining());
364 // Fetch cached bytes array if available and big enough.
365 synchronized(cacheLock)
368 if (bytes == null || bytes.length < totalBytes)
369 bytes = new byte[totalBytes];
375 if(byteBuffer != null)
377 remaining = byteBuffer.remaining();
378 byteBuffer.get(bytes, 0, remaining);
381 if(totalBytes - remaining > 0)
383 read = in.read(bytes, remaining, totalBytes - remaining);
391 byteBuffer = ByteBuffer.wrap(bytes, 0, read);
392 CharBuffer cb = CharBuffer.wrap(buf, offset, length);
393 int startPos = cb.position();
395 if(hasSavedSurrogate){
396 hasSavedSurrogate = false;
397 cb.put(savedSurrogate);
401 CoderResult cr = decoder.decode(byteBuffer, cb, isDone);
403 // 1 char remains which is the first half of a surrogate pair.
404 if(cr.isOverflow() && cb.hasRemaining()){
405 CharBuffer overflowbuf = CharBuffer.allocate(2);
406 cr = decoder.decode(byteBuffer, overflowbuf, isDone);
408 if(overflowbuf.hasRemaining())
410 cb.put(overflowbuf.get());
411 savedSurrogate = overflowbuf.get();
412 hasSavedSurrogate = true;
417 if(byteBuffer.hasRemaining()) {
418 byteBuffer.compact();
424 read = cb.position() - startPos;
426 // Put cached bytes array back if we are finished and the cache
427 // is null or smaller than the used bytes array.
428 synchronized (cacheLock)
430 if (byteBuffer == null
431 && (bytesCache == null || bytesCache.length < bytes.length))
434 return (read <= 0) ? -1 : read;
439 // Fetch cached bytes array if available and big enough.
440 synchronized (cacheLock)
443 if (bytes == null || length < bytes.length)
444 bytes = new byte[length];
449 int read = in.read(bytes);
450 for(int i=0;i<read;i++)
451 buf[offset+i] = (char)(bytes[i]&0xFF);
453 // Put back byte array into cache if appropriate.
454 synchronized (cacheLock)
456 if (bytesCache == null || bytesCache.length < bytes.length)
464 * Reads an char from the input stream and returns it
465 * as an int in the range of 0-65535. This method also will return -1 if
466 * the end of the stream has been reached.
468 * This method will block until the char can be read.
470 * @return The char read or -1 if end of stream
472 * @exception IOException If an error occurs
474 /*public int read() throws IOException
476 char[] buf = new char[1];
477 int count = read(buf, 0, 1);
478 return count > 0 ? buf[0] : -1;
482 * Skips the specified number of chars in the stream. It
483 * returns the actual number of chars skipped, which may be less than the
486 * @param count The requested number of chars to skip
488 * @return The actual number of chars skipped.
490 * @exception IOException If an error occurs
492 /*public long skip(long count) throws IOException
495 throw new IOException("Reader has been closed");
497 return super.skip(count);