Uploaded image for project: 'JDK'
  1. JDK
  2. JDK-6392670

COMPOUND_TEXT decoder does not work (1)in "incremental" mode and (2)with direct CharBuffers

XMLWordPrintable

    • Icon: Bug Bug
    • Resolution: Won't Fix
    • Icon: P4 P4
    • None
    • 6
    • core-libs
    • None

      COMPOUND_TEXT appears to work when decoding an entire input buffer,
      but if the buffer is made available incrementally, it fails,
      even for such simple input as a newline.

      Here is the test case:
      ----------------------------------------------
      import java.util.*;
      import java.nio.*;
      import java.nio.charset.*;

      public class Decode2 {
          private static boolean isAscii(char c) {
      return c < '\u0080';
          }

          private static boolean isPrintable(char c) {
      return ('\u0020' < c) && (c < '\u007f');
          }

          private static String string(char[] a) {
      StringBuilder sb = new StringBuilder();
      for (int j = 0; j < a.length; j++) {
      if (j > 0)
      sb.append(' ');
      char c = a[j];
      if (isPrintable(c))
      sb.append(c);
      else if (c == '\u001b') sb.append("ESC");
      else
      sb.append(String.format("\\u%04x", (int) c));
      }
      return sb.toString();
          }

          public static void main(String[] args) throws Throwable {
      Charset cs;
      try {
      cs = Charset.forName(args[0]);
      } catch (Throwable t) {
      System.out.println("Usage: java Decode CHARSET BYTE [BYTE ...]");
      throw t;
      }

      byte[] bytes = new byte[args.length-1];
      for (int i = 1; i < args.length; i++) {
      String arg = args[i];
      bytes[i-1] =
      (arg.length() == 1 && isAscii(arg.charAt(0))) ?
      (byte) arg.charAt(0) :
      arg.equals("ESC") ? 0x1b :
      arg.equals("SO") ? 0x0e :
      arg.equals("SI") ? 0x0f :
      arg.equals("SS2") ? (byte) 0x8e :
      arg.equals("SS3") ? (byte) 0x8f :
      arg.matches("0x.*") ? Integer.decode(arg).byteValue() :
      Integer.decode("0x"+arg).byteValue();
      }

      ByteBuffer ib = ByteBuffer.wrap(bytes);
      ib.position(0);
      CharBuffer ob = CharBuffer.allocate(100);
      CoderResult cr1 = cs.newDecoder().decode(ib, ob, false);
      char[] a1 = new char[ob.position()]; ob.flip(); ob.get(a1);

      ib.clear(); ib.limit(ib.capacity());
      ob.clear(); ob.limit(ob.capacity());

      CoderResult cr2 = null;
      CharsetDecoder coder2 = cs.newDecoder();
      for (int i = 0; i <= ib.capacity(); i++) {
      ib.limit(i);
      cr2 = coder2.decode(ib, ob, false);
      if (! cr2.isUnderflow())
      break;
      }
      char[] a2 = new char[ob.position()]; ob.flip(); ob.get(a2);

      ib.clear(); ib.limit(ib.capacity());
      ob.clear(); ob.limit(ob.capacity());
      CoderResult cr3 = null;
      CharsetDecoder coder3 = cs.newDecoder();
      for (int i = 0; i <= ob.capacity(); i++) {
      ob.limit(i);
      cr3 = coder3.decode(ib, ob, false);
      if (! cr3.isOverflow())
      break;
      }
      char[] a3 = new char[ob.position()]; ob.flip(); ob.get(a3);
      System.out.printf("%s %s%n%s %s%n%s %s%n",
      cr1, string(a1),
      cr2, string(a2),
      cr3, string(a3));
      if (! ((cr1 == cr2 && Arrays.equals(a1, a2)) &&
      (cr1 == cr3 && Arrays.equals(a1, a3))))
      throw new Error("Mismatch!");
          }
      }
      ----------------------------------------------

       jr Decode2 COMPOUND_TEXT 0a
      ==> javac -source 1.6 -Xlint:all Decode2.java
      ==> java -esa -ea Decode2 COMPOUND_TEXT 0a
      UNDERFLOW \u000a
      UNDERFLOW \u000a
      UNDERFLOW
      Exception in thread "main" java.lang.Error: Mismatch!
      at Decode2.main(Decode2.java:89)
      Command java -esa -ea Decode2 COMPOUND_TEXT 0a failed: rc=1
      Copied from #6381697:
      The test below fails on COMPOUND_TEXT as follows:

      Unexpected exception charset=COMPOUND_TEXT direct=true char=\u0000
      java.lang.UnsupportedOperationException
      at java.nio.CharBuffer.array(CharBuffer.java:939)
      at sun.nio.cs.ext.COMPOUND_TEXT_Encoder.encodeLoop(COMPOUND_TEXT_Encoder.java:75)
      at java.nio.charset.CharsetEncoder.encode(CharsetEncoder.java:544)
      at java.nio.charset.CharsetEncoder.encode(CharsetEncoder.java:766)
      at FindOneCharEncoderBugs.convert(FindOneCharEncoderBugs.java:51)
      at FindOneCharEncoderBugs.testChar(FindOneCharEncoderBugs.java:71)
      at FindOneCharEncoderBugs.testCharset(FindOneCharEncoderBugs.java:115)
      at FindOneCharEncoderBugs.realMain(FindOneCharEncoderBugs.java:121)
      at FindOneCharEncoderBugs.main(FindOneCharEncoderBugs.java:137)


      ----------------------------------------------------------------------------
      /* @test %W% %E%
         @bug 5058133
         @summary Check that all one-char sequences can be encoded by all charsets
         @run main/timeout=1200 FindOneCharEncoderBugs
         @author Martin Buchholz
       */

      import java.util.*;
      import java.nio.*;
      import java.nio.charset.*;

      public class FindOneCharEncoderBugs {
          final static String[] brokenCharsets = {
      // Delete the following lines when these charsets are fixed!
      "x-IBM933",
      // "x-IBM949",
      //"x-IBM949C",
      //"x-IBM970",
      // "COMPOUND_TEXT",
          };

          private static boolean equals(byte[] ba, ByteBuffer bb) {
      if (ba.length != bb.limit())
      return false;
      for (int i = 0; i < ba.length; i++)
      if (ba[i] != bb.get(i))
      return false;
      return true;
          }

          private static String toString(byte[] bytes) {
      final StringBuilder sb = new StringBuilder();
      for (byte b : bytes) {
      if (sb.length() != 0) sb.append(' ');
      sb.append(String.format("%02x", (int)b));
      }
      return sb.toString();
          }

          private static String toString(ByteBuffer bb) {
      final StringBuilder sb = new StringBuilder();
      for (int i = 0; i < bb.limit(); i++) {
      if (sb.length() != 0) sb.append(' ');
      sb.append(String.format("%02x", (int)bb.get(i)));
      }
      return sb.toString();
          }

          private static ByteBuffer convert(Charset cs, char c, CharBuffer cb) throws Throwable {
      cb.clear(); cb.put(c); cb.flip();
      return cs.newEncoder()
      .onUnmappableCharacter(CodingErrorAction.REPLACE)
      .onMalformedInput(CodingErrorAction.REPLACE)
      .encode(cb);
          }

          /** Returns a direct CharBuffer with the same capacity as ordinary CharBuffer ocb */
          private static CharBuffer directCharBuffer(CharBuffer ocb) {
      final CharBuffer dcb =
      ByteBuffer.allocateDirect(ocb.capacity() * Character.SIZE / Byte.SIZE)
      .asCharBuffer();
      check(! ocb.isDirect());
      check( dcb.isDirect());
      equal(ocb.capacity(), dcb.capacity());
      return dcb;
          }

          private static int testChar(byte[] expected, CharBuffer cb, Charset cs, char c) {
      int oopses = 0;
      try {
      final ByteBuffer bb = convert(cs, c, cb);
      if (! equals(expected, bb)) {
      System.out.printf("bytes differ charset=%s direct=%s char=\\u%04x%n%s%n%s%n",
      cs, cb.isDirect(), (int)c,
      toString(expected), toString(bb));
      oopses++;
      }
      } catch (Throwable t) {
      System.out.printf("Unexpected exception charset=%s direct=%s char=\\u%04x%n",
      cs, cb.isDirect(), (int)c);
      unexpected(t);
      oopses++;
      }
      failed += oopses;
      return oopses;
          }

          private static void testCharset(Charset cs) throws Throwable {
      if (! cs.canEncode())
      return;

      final String csn = cs.name();

      for (String n : brokenCharsets)
      if (csn.equals(n)) {
      System.out.printf("Skipping possibly broken charset %s%n", csn);
      return;
      }
      System.out.println(csn);

      final char[] theChar = new char[1];
      final CharBuffer ocb = CharBuffer.allocate(1);
      final CharBuffer dcb = directCharBuffer(ocb);
      int maxFailures = 3;

      for (char c = '\u0000'; c+1 != 0x10000 && maxFailures > 0; c++) {
      theChar[0] = c;
      byte[] bytes = new String(theChar).getBytes(csn);
      if (bytes.length == 0) {
      System.out.printf("Empty output?! charset=%s char=\\u%04x%n",
      cs, (int)c);
      maxFailures--; failed++;
      }
      maxFailures -= testChar(bytes, ocb, cs, c);
      maxFailures -= testChar(bytes, dcb, cs, c);
      }
          }

          private static void realMain(String[] args) {
      for (Charset cs : Charset.availableCharsets().values()) {
      try { testCharset(cs); }
      catch (Throwable t) { unexpected(t); }
      }
          }

          //--------------------- Infrastructure ---------------------------
          static volatile int passed = 0, failed = 0;
          static void pass() {passed++;}
          static void fail() {failed++; Thread.dumpStack();}
          static void fail(String msg) {System.out.println(msg); fail();}
          static void unexpected(Throwable t) {failed++; t.printStackTrace();}
          static void check(boolean cond) {if (cond) pass(); else fail();}
          static void equal(Object x, Object y) {
      if (x == null ? y == null : x.equals(y)) pass();
      else fail(x + " not equal to " + y);}
          public static void main(String[] args) throws Throwable {
      try {realMain(args);} catch (Throwable t) {unexpected(t);}
      System.out.printf("%nPassed = %d, failed = %d%n%n", passed, failed);
      if (failed > 0) throw new AssertionError("Some tests failed");}
          private static abstract class Fun {abstract void f() throws Throwable;}
          static void THROWS(Class<? extends Throwable> k, Fun... fs) {
      for (Fun f : fs)
      try { f.f(); fail("Expected " + k.getName() + " not thrown"); }
      catch (Throwable t) {
      if (k.isAssignableFrom(t.getClass())) pass();
      else unexpected(t);}}
          private static abstract class CheckedThread extends Thread {
      abstract void realRun() throws Throwable;
      public void run() {
      try {realRun();} catch (Throwable t) {unexpected(t);}}}
      }
      ----------------------------------------------------------------------------
      *** (#1 of 1): 2006-02-06 10:52:53 PST ###@###.###

            sherman Xueming Shen
            martin Martin Buchholz
            Votes:
            0 Vote for this issue
            Watchers:
            0 Start watching this issue

              Created:
              Updated:
              Resolved:
              Imported:
              Indexed: