-
Bug
-
Resolution: Won't Fix
-
P4
-
None
-
6
-
None
-
generic
-
generic
COMPOUND_TEXT appears to work when decoding an entire input buffer,
but if the buffer is made available incrementally, it fails,
even for such simple input as a newline.
Here is the test case:
----------------------------------------------
import java.util.*;
import java.nio.*;
import java.nio.charset.*;
public class Decode2 {
private static boolean isAscii(char c) {
return c < '\u0080';
}
private static boolean isPrintable(char c) {
return ('\u0020' < c) && (c < '\u007f');
}
private static String string(char[] a) {
StringBuilder sb = new StringBuilder();
for (int j = 0; j < a.length; j++) {
if (j > 0)
sb.append(' ');
char c = a[j];
if (isPrintable(c))
sb.append(c);
else if (c == '\u001b') sb.append("ESC");
else
sb.append(String.format("\\u%04x", (int) c));
}
return sb.toString();
}
public static void main(String[] args) throws Throwable {
Charset cs;
try {
cs = Charset.forName(args[0]);
} catch (Throwable t) {
System.out.println("Usage: java Decode CHARSET BYTE [BYTE ...]");
throw t;
}
byte[] bytes = new byte[args.length-1];
for (int i = 1; i < args.length; i++) {
String arg = args[i];
bytes[i-1] =
(arg.length() == 1 && isAscii(arg.charAt(0))) ?
(byte) arg.charAt(0) :
arg.equals("ESC") ? 0x1b :
arg.equals("SO") ? 0x0e :
arg.equals("SI") ? 0x0f :
arg.equals("SS2") ? (byte) 0x8e :
arg.equals("SS3") ? (byte) 0x8f :
arg.matches("0x.*") ? Integer.decode(arg).byteValue() :
Integer.decode("0x"+arg).byteValue();
}
ByteBuffer ib = ByteBuffer.wrap(bytes);
ib.position(0);
CharBuffer ob = CharBuffer.allocate(100);
CoderResult cr1 = cs.newDecoder().decode(ib, ob, false);
char[] a1 = new char[ob.position()]; ob.flip(); ob.get(a1);
ib.clear(); ib.limit(ib.capacity());
ob.clear(); ob.limit(ob.capacity());
CoderResult cr2 = null;
CharsetDecoder coder2 = cs.newDecoder();
for (int i = 0; i <= ib.capacity(); i++) {
ib.limit(i);
cr2 = coder2.decode(ib, ob, false);
if (! cr2.isUnderflow())
break;
}
char[] a2 = new char[ob.position()]; ob.flip(); ob.get(a2);
ib.clear(); ib.limit(ib.capacity());
ob.clear(); ob.limit(ob.capacity());
CoderResult cr3 = null;
CharsetDecoder coder3 = cs.newDecoder();
for (int i = 0; i <= ob.capacity(); i++) {
ob.limit(i);
cr3 = coder3.decode(ib, ob, false);
if (! cr3.isOverflow())
break;
}
char[] a3 = new char[ob.position()]; ob.flip(); ob.get(a3);
System.out.printf("%s %s%n%s %s%n%s %s%n",
cr1, string(a1),
cr2, string(a2),
cr3, string(a3));
if (! ((cr1 == cr2 && Arrays.equals(a1, a2)) &&
(cr1 == cr3 && Arrays.equals(a1, a3))))
throw new Error("Mismatch!");
}
}
----------------------------------------------
jr Decode2 COMPOUND_TEXT 0a
==> javac -source 1.6 -Xlint:all Decode2.java
==> java -esa -ea Decode2 COMPOUND_TEXT 0a
UNDERFLOW \u000a
UNDERFLOW \u000a
UNDERFLOW
Exception in thread "main" java.lang.Error: Mismatch!
at Decode2.main(Decode2.java:89)
Command java -esa -ea Decode2 COMPOUND_TEXT 0a failed: rc=1
Copied from #6381697:
The test below fails on COMPOUND_TEXT as follows:
Unexpected exception charset=COMPOUND_TEXT direct=true char=\u0000
java.lang.UnsupportedOperationException
at java.nio.CharBuffer.array(CharBuffer.java:939)
at sun.nio.cs.ext.COMPOUND_TEXT_Encoder.encodeLoop(COMPOUND_TEXT_Encoder.java:75)
at java.nio.charset.CharsetEncoder.encode(CharsetEncoder.java:544)
at java.nio.charset.CharsetEncoder.encode(CharsetEncoder.java:766)
at FindOneCharEncoderBugs.convert(FindOneCharEncoderBugs.java:51)
at FindOneCharEncoderBugs.testChar(FindOneCharEncoderBugs.java:71)
at FindOneCharEncoderBugs.testCharset(FindOneCharEncoderBugs.java:115)
at FindOneCharEncoderBugs.realMain(FindOneCharEncoderBugs.java:121)
at FindOneCharEncoderBugs.main(FindOneCharEncoderBugs.java:137)
----------------------------------------------------------------------------
/* @test %W% %E%
@bug 5058133
@summary Check that all one-char sequences can be encoded by all charsets
@run main/timeout=1200 FindOneCharEncoderBugs
@author Martin Buchholz
*/
import java.util.*;
import java.nio.*;
import java.nio.charset.*;
public class FindOneCharEncoderBugs {
final static String[] brokenCharsets = {
// Delete the following lines when these charsets are fixed!
"x-IBM933",
// "x-IBM949",
//"x-IBM949C",
//"x-IBM970",
// "COMPOUND_TEXT",
};
private static boolean equals(byte[] ba, ByteBuffer bb) {
if (ba.length != bb.limit())
return false;
for (int i = 0; i < ba.length; i++)
if (ba[i] != bb.get(i))
return false;
return true;
}
private static String toString(byte[] bytes) {
final StringBuilder sb = new StringBuilder();
for (byte b : bytes) {
if (sb.length() != 0) sb.append(' ');
sb.append(String.format("%02x", (int)b));
}
return sb.toString();
}
private static String toString(ByteBuffer bb) {
final StringBuilder sb = new StringBuilder();
for (int i = 0; i < bb.limit(); i++) {
if (sb.length() != 0) sb.append(' ');
sb.append(String.format("%02x", (int)bb.get(i)));
}
return sb.toString();
}
private static ByteBuffer convert(Charset cs, char c, CharBuffer cb) throws Throwable {
cb.clear(); cb.put(c); cb.flip();
return cs.newEncoder()
.onUnmappableCharacter(CodingErrorAction.REPLACE)
.onMalformedInput(CodingErrorAction.REPLACE)
.encode(cb);
}
/** Returns a direct CharBuffer with the same capacity as ordinary CharBuffer ocb */
private static CharBuffer directCharBuffer(CharBuffer ocb) {
final CharBuffer dcb =
ByteBuffer.allocateDirect(ocb.capacity() * Character.SIZE / Byte.SIZE)
.asCharBuffer();
check(! ocb.isDirect());
check( dcb.isDirect());
equal(ocb.capacity(), dcb.capacity());
return dcb;
}
private static int testChar(byte[] expected, CharBuffer cb, Charset cs, char c) {
int oopses = 0;
try {
final ByteBuffer bb = convert(cs, c, cb);
if (! equals(expected, bb)) {
System.out.printf("bytes differ charset=%s direct=%s char=\\u%04x%n%s%n%s%n",
cs, cb.isDirect(), (int)c,
toString(expected), toString(bb));
oopses++;
}
} catch (Throwable t) {
System.out.printf("Unexpected exception charset=%s direct=%s char=\\u%04x%n",
cs, cb.isDirect(), (int)c);
unexpected(t);
oopses++;
}
failed += oopses;
return oopses;
}
private static void testCharset(Charset cs) throws Throwable {
if (! cs.canEncode())
return;
final String csn = cs.name();
for (String n : brokenCharsets)
if (csn.equals(n)) {
System.out.printf("Skipping possibly broken charset %s%n", csn);
return;
}
System.out.println(csn);
final char[] theChar = new char[1];
final CharBuffer ocb = CharBuffer.allocate(1);
final CharBuffer dcb = directCharBuffer(ocb);
int maxFailures = 3;
for (char c = '\u0000'; c+1 != 0x10000 && maxFailures > 0; c++) {
theChar[0] = c;
byte[] bytes = new String(theChar).getBytes(csn);
if (bytes.length == 0) {
System.out.printf("Empty output?! charset=%s char=\\u%04x%n",
cs, (int)c);
maxFailures--; failed++;
}
maxFailures -= testChar(bytes, ocb, cs, c);
maxFailures -= testChar(bytes, dcb, cs, c);
}
}
private static void realMain(String[] args) {
for (Charset cs : Charset.availableCharsets().values()) {
try { testCharset(cs); }
catch (Throwable t) { unexpected(t); }
}
}
//--------------------- Infrastructure ---------------------------
static volatile int passed = 0, failed = 0;
static void pass() {passed++;}
static void fail() {failed++; Thread.dumpStack();}
static void fail(String msg) {System.out.println(msg); fail();}
static void unexpected(Throwable t) {failed++; t.printStackTrace();}
static void check(boolean cond) {if (cond) pass(); else fail();}
static void equal(Object x, Object y) {
if (x == null ? y == null : x.equals(y)) pass();
else fail(x + " not equal to " + y);}
public static void main(String[] args) throws Throwable {
try {realMain(args);} catch (Throwable t) {unexpected(t);}
System.out.printf("%nPassed = %d, failed = %d%n%n", passed, failed);
if (failed > 0) throw new AssertionError("Some tests failed");}
private static abstract class Fun {abstract void f() throws Throwable;}
static void THROWS(Class<? extends Throwable> k, Fun... fs) {
for (Fun f : fs)
try { f.f(); fail("Expected " + k.getName() + " not thrown"); }
catch (Throwable t) {
if (k.isAssignableFrom(t.getClass())) pass();
else unexpected(t);}}
private static abstract class CheckedThread extends Thread {
abstract void realRun() throws Throwable;
public void run() {
try {realRun();} catch (Throwable t) {unexpected(t);}}}
}
----------------------------------------------------------------------------
*** (#1 of 1): 2006-02-06 10:52:53 PST ###@###.###
but if the buffer is made available incrementally, it fails,
even for such simple input as a newline.
Here is the test case:
----------------------------------------------
import java.util.*;
import java.nio.*;
import java.nio.charset.*;
public class Decode2 {
private static boolean isAscii(char c) {
return c < '\u0080';
}
private static boolean isPrintable(char c) {
return ('\u0020' < c) && (c < '\u007f');
}
private static String string(char[] a) {
StringBuilder sb = new StringBuilder();
for (int j = 0; j < a.length; j++) {
if (j > 0)
sb.append(' ');
char c = a[j];
if (isPrintable(c))
sb.append(c);
else if (c == '\u001b') sb.append("ESC");
else
sb.append(String.format("\\u%04x", (int) c));
}
return sb.toString();
}
public static void main(String[] args) throws Throwable {
Charset cs;
try {
cs = Charset.forName(args[0]);
} catch (Throwable t) {
System.out.println("Usage: java Decode CHARSET BYTE [BYTE ...]");
throw t;
}
byte[] bytes = new byte[args.length-1];
for (int i = 1; i < args.length; i++) {
String arg = args[i];
bytes[i-1] =
(arg.length() == 1 && isAscii(arg.charAt(0))) ?
(byte) arg.charAt(0) :
arg.equals("ESC") ? 0x1b :
arg.equals("SO") ? 0x0e :
arg.equals("SI") ? 0x0f :
arg.equals("SS2") ? (byte) 0x8e :
arg.equals("SS3") ? (byte) 0x8f :
arg.matches("0x.*") ? Integer.decode(arg).byteValue() :
Integer.decode("0x"+arg).byteValue();
}
ByteBuffer ib = ByteBuffer.wrap(bytes);
ib.position(0);
CharBuffer ob = CharBuffer.allocate(100);
CoderResult cr1 = cs.newDecoder().decode(ib, ob, false);
char[] a1 = new char[ob.position()]; ob.flip(); ob.get(a1);
ib.clear(); ib.limit(ib.capacity());
ob.clear(); ob.limit(ob.capacity());
CoderResult cr2 = null;
CharsetDecoder coder2 = cs.newDecoder();
for (int i = 0; i <= ib.capacity(); i++) {
ib.limit(i);
cr2 = coder2.decode(ib, ob, false);
if (! cr2.isUnderflow())
break;
}
char[] a2 = new char[ob.position()]; ob.flip(); ob.get(a2);
ib.clear(); ib.limit(ib.capacity());
ob.clear(); ob.limit(ob.capacity());
CoderResult cr3 = null;
CharsetDecoder coder3 = cs.newDecoder();
for (int i = 0; i <= ob.capacity(); i++) {
ob.limit(i);
cr3 = coder3.decode(ib, ob, false);
if (! cr3.isOverflow())
break;
}
char[] a3 = new char[ob.position()]; ob.flip(); ob.get(a3);
System.out.printf("%s %s%n%s %s%n%s %s%n",
cr1, string(a1),
cr2, string(a2),
cr3, string(a3));
if (! ((cr1 == cr2 && Arrays.equals(a1, a2)) &&
(cr1 == cr3 && Arrays.equals(a1, a3))))
throw new Error("Mismatch!");
}
}
----------------------------------------------
jr Decode2 COMPOUND_TEXT 0a
==> javac -source 1.6 -Xlint:all Decode2.java
==> java -esa -ea Decode2 COMPOUND_TEXT 0a
UNDERFLOW \u000a
UNDERFLOW \u000a
UNDERFLOW
Exception in thread "main" java.lang.Error: Mismatch!
at Decode2.main(Decode2.java:89)
Command java -esa -ea Decode2 COMPOUND_TEXT 0a failed: rc=1
Copied from #6381697:
The test below fails on COMPOUND_TEXT as follows:
Unexpected exception charset=COMPOUND_TEXT direct=true char=\u0000
java.lang.UnsupportedOperationException
at java.nio.CharBuffer.array(CharBuffer.java:939)
at sun.nio.cs.ext.COMPOUND_TEXT_Encoder.encodeLoop(COMPOUND_TEXT_Encoder.java:75)
at java.nio.charset.CharsetEncoder.encode(CharsetEncoder.java:544)
at java.nio.charset.CharsetEncoder.encode(CharsetEncoder.java:766)
at FindOneCharEncoderBugs.convert(FindOneCharEncoderBugs.java:51)
at FindOneCharEncoderBugs.testChar(FindOneCharEncoderBugs.java:71)
at FindOneCharEncoderBugs.testCharset(FindOneCharEncoderBugs.java:115)
at FindOneCharEncoderBugs.realMain(FindOneCharEncoderBugs.java:121)
at FindOneCharEncoderBugs.main(FindOneCharEncoderBugs.java:137)
----------------------------------------------------------------------------
/* @test %W% %E%
@bug 5058133
@summary Check that all one-char sequences can be encoded by all charsets
@run main/timeout=1200 FindOneCharEncoderBugs
@author Martin Buchholz
*/
import java.util.*;
import java.nio.*;
import java.nio.charset.*;
public class FindOneCharEncoderBugs {
final static String[] brokenCharsets = {
// Delete the following lines when these charsets are fixed!
"x-IBM933",
// "x-IBM949",
//"x-IBM949C",
//"x-IBM970",
// "COMPOUND_TEXT",
};
private static boolean equals(byte[] ba, ByteBuffer bb) {
if (ba.length != bb.limit())
return false;
for (int i = 0; i < ba.length; i++)
if (ba[i] != bb.get(i))
return false;
return true;
}
private static String toString(byte[] bytes) {
final StringBuilder sb = new StringBuilder();
for (byte b : bytes) {
if (sb.length() != 0) sb.append(' ');
sb.append(String.format("%02x", (int)b));
}
return sb.toString();
}
private static String toString(ByteBuffer bb) {
final StringBuilder sb = new StringBuilder();
for (int i = 0; i < bb.limit(); i++) {
if (sb.length() != 0) sb.append(' ');
sb.append(String.format("%02x", (int)bb.get(i)));
}
return sb.toString();
}
private static ByteBuffer convert(Charset cs, char c, CharBuffer cb) throws Throwable {
cb.clear(); cb.put(c); cb.flip();
return cs.newEncoder()
.onUnmappableCharacter(CodingErrorAction.REPLACE)
.onMalformedInput(CodingErrorAction.REPLACE)
.encode(cb);
}
/** Returns a direct CharBuffer with the same capacity as ordinary CharBuffer ocb */
private static CharBuffer directCharBuffer(CharBuffer ocb) {
final CharBuffer dcb =
ByteBuffer.allocateDirect(ocb.capacity() * Character.SIZE / Byte.SIZE)
.asCharBuffer();
check(! ocb.isDirect());
check( dcb.isDirect());
equal(ocb.capacity(), dcb.capacity());
return dcb;
}
private static int testChar(byte[] expected, CharBuffer cb, Charset cs, char c) {
int oopses = 0;
try {
final ByteBuffer bb = convert(cs, c, cb);
if (! equals(expected, bb)) {
System.out.printf("bytes differ charset=%s direct=%s char=\\u%04x%n%s%n%s%n",
cs, cb.isDirect(), (int)c,
toString(expected), toString(bb));
oopses++;
}
} catch (Throwable t) {
System.out.printf("Unexpected exception charset=%s direct=%s char=\\u%04x%n",
cs, cb.isDirect(), (int)c);
unexpected(t);
oopses++;
}
failed += oopses;
return oopses;
}
private static void testCharset(Charset cs) throws Throwable {
if (! cs.canEncode())
return;
final String csn = cs.name();
for (String n : brokenCharsets)
if (csn.equals(n)) {
System.out.printf("Skipping possibly broken charset %s%n", csn);
return;
}
System.out.println(csn);
final char[] theChar = new char[1];
final CharBuffer ocb = CharBuffer.allocate(1);
final CharBuffer dcb = directCharBuffer(ocb);
int maxFailures = 3;
for (char c = '\u0000'; c+1 != 0x10000 && maxFailures > 0; c++) {
theChar[0] = c;
byte[] bytes = new String(theChar).getBytes(csn);
if (bytes.length == 0) {
System.out.printf("Empty output?! charset=%s char=\\u%04x%n",
cs, (int)c);
maxFailures--; failed++;
}
maxFailures -= testChar(bytes, ocb, cs, c);
maxFailures -= testChar(bytes, dcb, cs, c);
}
}
private static void realMain(String[] args) {
for (Charset cs : Charset.availableCharsets().values()) {
try { testCharset(cs); }
catch (Throwable t) { unexpected(t); }
}
}
//--------------------- Infrastructure ---------------------------
static volatile int passed = 0, failed = 0;
static void pass() {passed++;}
static void fail() {failed++; Thread.dumpStack();}
static void fail(String msg) {System.out.println(msg); fail();}
static void unexpected(Throwable t) {failed++; t.printStackTrace();}
static void check(boolean cond) {if (cond) pass(); else fail();}
static void equal(Object x, Object y) {
if (x == null ? y == null : x.equals(y)) pass();
else fail(x + " not equal to " + y);}
public static void main(String[] args) throws Throwable {
try {realMain(args);} catch (Throwable t) {unexpected(t);}
System.out.printf("%nPassed = %d, failed = %d%n%n", passed, failed);
if (failed > 0) throw new AssertionError("Some tests failed");}
private static abstract class Fun {abstract void f() throws Throwable;}
static void THROWS(Class<? extends Throwable> k, Fun... fs) {
for (Fun f : fs)
try { f.f(); fail("Expected " + k.getName() + " not thrown"); }
catch (Throwable t) {
if (k.isAssignableFrom(t.getClass())) pass();
else unexpected(t);}}
private static abstract class CheckedThread extends Thread {
abstract void realRun() throws Throwable;
public void run() {
try {realRun();} catch (Throwable t) {unexpected(t);}}}
}
----------------------------------------------------------------------------
*** (#1 of 1): 2006-02-06 10:52:53 PST ###@###.###
- duplicates
-
JDK-6381697 COMPOUND_TEXT does not work with direct CharBuffers
-
- Closed
-