Uploaded image for project: 'JDK'
  1. JDK
  2. JDK-4391896

UTF16 Decoder Broken

XMLWordPrintable

    • Icon: Bug Bug
    • Resolution: Fixed
    • Icon: P3 P3
    • 1.4.0
    • 1.3.0
    • core-libs



      Name: yyT116575 Date: 11/22/2000


      java version "1.3.0"
      Java(TM) 2 Runtime Environment, Standard Edition (build 1.3.0-C)
      Java HotSpot(TM) Client VM (build 1.3.0-C, mixed mode)


      The problem with the UTF16 decoder is more readily noticable
      in Java 1.1.x. It appears that some fixes were done in
      version 1.2.x but it's not completely gone. I'll explain...

      Despite requesting a single character from the reader,
      the UTF16 decoder automatically buffers 8K of bytes! (This
      part of the problem has NOT changed from JDK version to
      version.) And what's worse is that the decoder throws an
      internal Error if not supplied with "enough" bytes to do
      its decoding. Not an Exception but an Error.

      In other words, it assumes that it can always access a set
      number of bytes in a block read and is unable to handle
      insufficient bytes returned from a block read of the
      underlying input stream. A simple example that illustates
      this problem is an input stream (such as a socket
      connection) that returns only 1 byte at a time, even in
      block reads, because more data is not available at that
      time.

      In JDK 1.1.x the decoder always needs both bytes of every
      character in order to perform the decoding operation,
      otherwise it throws the Error. This has been partially
      fixed in JDK 1.2.x so that it *can* handle block reads of
      only a single character. However, this bug *still* remains
      for the UTF16 BOM. The entire BOM has to be returned within
      a single block read or the decoder fails.

      /* Test case. */
      import java.io.ByteArrayInputStream;
      import java.io.FilterInputStream;
      import java.io.FilterReader;
      import java.io.InputStream;
      import java.io.InputStreamReader;
      import java.io.IOException;
      import java.io.Reader;

      public class BrokenUTF16 {

          // MAIN

          public static void main(String[] argv) throws Exception {
              System.out.println("#");
              System.out.println("# Byte array");
              System.out.println("#");
              final byte[] bytes = {
                  (byte)0xFF, (byte)0xFE, // BOM
                  (byte)0xE5, (byte)0x65, (byte)0x2C, (byte)0x67
              };
              for (int i = 0; i < bytes.length; i++) {
                  int c = bytes[i] & 0x00FF;
                  System.out.println("byte["+i+"]: 0x"+Integer.toHexString(c));
              }
              System.out.println("#");
              System.out.println("# Reading single byte: new InputStreamReader(bytes,\"UnicodeLittle\")");
              System.out.println("#");
              {
                  InputStream stream = new ByteArrayInputStream(bytes);
                  InputStream streamReporter = new InputStreamReporter(stream);
                  Reader reader = new InputStreamReader(streamReporter, "UnicodeLittle");
                  Reader readerReporter = new ReaderReporter(reader);
                  int c = readerReporter.read();
                  readerReporter.close();
              }

              System.out.println("#");
              System.out.println("# Limited block reads");
              System.out.println("#");
              for (int i = 0; i < bytes.length; i++) {
                  final int limit = i + 1;
                  System.out.println("# block reads limited to "+limit+" byte"+(limit!=1?"s":""));
                  InputStream stream = new ByteArrayInputStream(bytes);
                  InputStream limitedStream = new LimitedInputStream(stream, limit);
                  InputStream streamReporter = new InputStreamReporter(limitedStream);
                  Reader reader = new InputStreamReader(streamReporter, "UnicodeLittle");
                  Reader readerReporter = new ReaderReporter(reader);
                  try {
                      int c = readerReporter.read();
                  }
                  catch (Error error) {
                      error.printStackTrace(System.out);
                      continue;
                  }
                  finally {
                      readerReporter.close();
                  }
                  break;
              }
              System.out.println("#");
              System.out.println("# Limited block reads, skipping BOM");
              System.out.println("#");
              for (int i = 0; i < bytes.length - 2; i++) {
                  final int limit = i + 1;
                  System.out.println("# block reads limited to "+limit+" byte"+(limit!=1?"s":""));
                  InputStream stream = new ByteArrayInputStream(bytes, 2, bytes.length - 2);
                  InputStream limitedStream = new LimitedInputStream(stream, limit);
                  InputStream streamReporter = new InputStreamReporter(limitedStream);
                  Reader reader = new InputStreamReader(streamReporter, "UnicodeLittle");
                  Reader readerReporter = new ReaderReporter(reader);
                  try {
                      int c = readerReporter.read();
                  }
                  catch (Error error) {
                      error.printStackTrace(System.out);
                      continue;
                  }
                  finally {
                      readerReporter.close();
                  }
                  break;
              }
              System.out.println("#");
              System.out.println("# Done.");
              System.out.println("#");
          }

          // Classes

          static class ReaderReporter extends FilterReader {

              // Constructors

              public ReaderReporter(Reader reader) {
                  super(reader);
              }

              // Reader methods

              public int read() throws IOException {
                  int c = in.read();
                  System.out.print("Reader.read(): 0x");
                  if (c != -1) {
                      System.out.print(Integer.toHexString(c));
                  }
                  else {
                      System.out.print("EOF");
                  }
                  System.out.println();
                  return c;
              }

              public int read(char[] buffer, int offset, int length) throws IOException {
                  int count = super.in.read(buffer, offset, length);
                  System.out.println("Reader.read(char[],"+offset+','+length+"): "+count);
                  return count;
              }

          } // class ReaderReporter

          static class InputStreamReporter extends FilterInputStream {

              // Constructors

              public InputStreamReporter(InputStream stream) {
                  super(stream);
              }

              // InputStream methods

              public int read() throws IOException {
                  int c = in.read();
                  System.out.print("InputStream.read(): 0x");
                  if (c != -1) {
                      System.out.print(Integer.toHexString(c));
                  }
                  else {
                      System.out.print("EOF");
                  }
                  System.out.println();
                  return c;
              }

              public int read(byte[] buffer, int offset, int length) throws IOException {
                  int count = super.in.read(buffer, offset, length);
                  System.out.println("InputStream.read(byte[],"+offset+','+length+"): "+count);
                  return count;
              }

          } // class InputStreamReporter

          static class LimitedInputStream extends FilterInputStream {

              // Data

              private int limit;

              // Constructors

              public LimitedInputStream(InputStream stream) {
                  this(stream, 2);
              }

              public LimitedInputStream(InputStream stream, int limit) {
                  super(stream);
                  this.limit = limit;
              }

              // InputStream methods
              
              public int read(byte[] buffer, int offset, int length) throws IOException {
                  if (length > limit) {
                      length = limit;
                  }
                  int count = super.in.read(buffer, offset, length);
                  System.out.println("LimitedInputStream.read(byte[],"+offset+','+length+"): "+count);
                  return count;
              }

          } // class LimitedInputStream

      } // class BrokenUTF16
      (Review ID: 112650)
      ======================================================================

            ilittlesunw Ian Little (Inactive)
            yyoungsunw Yung-ching Young (Inactive)
            Votes:
            0 Vote for this issue
            Watchers:
            1 Start watching this issue

              Created:
              Updated:
              Resolved:
              Imported:
              Indexed: