-
Bug
-
Resolution: Unresolved
-
P4
-
None
-
6u11
-
Cause Known
-
sparc
-
solaris_2.5.1
OPERATING SYSTEM(S):
--------------------
Windows XP SP2 (Japanese)
FULL JDK VERSION(S):
-------------------
java version "1.6.0_11"
Java(TM) SE Runtime Environment (build 1.6.0_11-b03)
Java HotSpot(TM) Client VM (build 11.0-b16, mixed mode, sharing)
Also reproducible using latest JDK 7-ea-b41.
DESCRIPTION:
------------
I tried to test some of Japanese PC code converters.
It seems PCK converter returns unexpected result.
Please run following testcase:
============================================================
import java.nio.*;
import java.nio.charset.*;
class PCKTest {
public static void main(String[] args) throws Exception {
byte[] b1 = { (byte)0xee,(byte)0xf9,(byte)0xfa,(byte)0x54,(byte)0xfa,(byte)0x5b };
byte[] b2 = { (byte)0x81, (byte)0x83, (byte)0x84, (byte)0x87, (byte)0x89,
(byte)0x8a, (byte)0x8b, (byte)0x8c, (byte)0x8d, (byte)0x8e,
(byte)0x8f, (byte)0x90, (byte)0x91, (byte)0x92, (byte)0x93,
(byte)0x94, (byte)0x95, (byte)0x96, (byte)0x97, (byte)0x99,
(byte)0x9a, (byte)0x9b, (byte)0x9c, (byte)0x9d, (byte)0x9e,
(byte)0x9f, (byte)0xe0, (byte)0xe1, (byte)0xe2, (byte)0xe3,
(byte)0xe4, (byte)0xe5, (byte)0xe6, (byte)0xe7, (byte)0xe8,
(byte)0xe9, (byte)0xea, (byte)0xed, (byte)0xee, (byte)0xfa,
(byte)0xfb };
String[] charsets = new String[] {"PCK","Windows-31J","Cp943c"};
System.out.println("<<< Wrong Conversion Test >>>");
for (String cs : charsets) {
CharsetDecoder cd = Charset.forName(cs).newDecoder();
CharBuffer cb = cd.decode(ByteBuffer.wrap(b1));
System.out.print(cs+":\t");
for (char c : cb.toString().toCharArray()) {
System.out.printf("\\u%04x", (int)c);
}
System.out.println("");
}
System.out.println("<<< 0xXX7f Conversion Test >>>");
for (String cs : charsets) {
CharsetDecoder cd = Charset.forName(cs).newDecoder();
cd.onUnmappableCharacter(CodingErrorAction.REPORT);
byte[] bs = new byte[]{ (byte)0x0, (byte)0x7f };
System.out.print(cs+":\t");
for(byte b : b2) {
bs[0] = b;
try {
cd.reset();
CharBuffer cb = cd.decode(ByteBuffer.wrap(bs));
for (char c : cb.toString().toCharArray()) {
System.out.printf("\\u%04x", (int)c);
}
} catch (UnmappableCharacterException uce) {
} catch (Exception e) {
e.printStackTrace();
}
}
System.out.println("");
}
}
}
============================================================
Output is as follows
============================================================
> java PCKTest
<<< Wrong Conversion Test >>>
PCK: \u3052\u3052\u306e
Windows-31J: \uffe2\uffe2\u2235
Cp943c: \uffe2\uffe2\u2235
<<< 0xXX7f Conversion Test >>>
PCK: \u00f7\u30e0\u043e\u301d\u5712\u6a7f\u673d\u5287\u9805\u6b7b\u6e96\u9017\u64cd\u9010\u51cd\u6973\u65a7\u6469\u6c83\u51f0\u566b\u5b83\u6019\u6369\u68b3\u9ebe\u70d9\u75fc\u7ab6\u7e32\u8262\u86df\u8ae4\u8f62\u95a0\u9a3e\u9ef4\ufa10\u856b\u5164\ufa1a
Windows-31J:
Cp943c: \u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000
============================================================
For "Wrong Conversion Test", I think PCK's converter table is not correct.
For "0xXX7f Conversion Test", 0xXX7f are not valid Shift_JIS characters.
--------------------
Windows XP SP2 (Japanese)
FULL JDK VERSION(S):
-------------------
java version "1.6.0_11"
Java(TM) SE Runtime Environment (build 1.6.0_11-b03)
Java HotSpot(TM) Client VM (build 11.0-b16, mixed mode, sharing)
Also reproducible using latest JDK 7-ea-b41.
DESCRIPTION:
------------
I tried to test some of Japanese PC code converters.
It seems PCK converter returns unexpected result.
Please run following testcase:
============================================================
import java.nio.*;
import java.nio.charset.*;
class PCKTest {
public static void main(String[] args) throws Exception {
byte[] b1 = { (byte)0xee,(byte)0xf9,(byte)0xfa,(byte)0x54,(byte)0xfa,(byte)0x5b };
byte[] b2 = { (byte)0x81, (byte)0x83, (byte)0x84, (byte)0x87, (byte)0x89,
(byte)0x8a, (byte)0x8b, (byte)0x8c, (byte)0x8d, (byte)0x8e,
(byte)0x8f, (byte)0x90, (byte)0x91, (byte)0x92, (byte)0x93,
(byte)0x94, (byte)0x95, (byte)0x96, (byte)0x97, (byte)0x99,
(byte)0x9a, (byte)0x9b, (byte)0x9c, (byte)0x9d, (byte)0x9e,
(byte)0x9f, (byte)0xe0, (byte)0xe1, (byte)0xe2, (byte)0xe3,
(byte)0xe4, (byte)0xe5, (byte)0xe6, (byte)0xe7, (byte)0xe8,
(byte)0xe9, (byte)0xea, (byte)0xed, (byte)0xee, (byte)0xfa,
(byte)0xfb };
String[] charsets = new String[] {"PCK","Windows-31J","Cp943c"};
System.out.println("<<< Wrong Conversion Test >>>");
for (String cs : charsets) {
CharsetDecoder cd = Charset.forName(cs).newDecoder();
CharBuffer cb = cd.decode(ByteBuffer.wrap(b1));
System.out.print(cs+":\t");
for (char c : cb.toString().toCharArray()) {
System.out.printf("\\u%04x", (int)c);
}
System.out.println("");
}
System.out.println("<<< 0xXX7f Conversion Test >>>");
for (String cs : charsets) {
CharsetDecoder cd = Charset.forName(cs).newDecoder();
cd.onUnmappableCharacter(CodingErrorAction.REPORT);
byte[] bs = new byte[]{ (byte)0x0, (byte)0x7f };
System.out.print(cs+":\t");
for(byte b : b2) {
bs[0] = b;
try {
cd.reset();
CharBuffer cb = cd.decode(ByteBuffer.wrap(bs));
for (char c : cb.toString().toCharArray()) {
System.out.printf("\\u%04x", (int)c);
}
} catch (UnmappableCharacterException uce) {
} catch (Exception e) {
e.printStackTrace();
}
}
System.out.println("");
}
}
}
============================================================
Output is as follows
============================================================
> java PCKTest
<<< Wrong Conversion Test >>>
PCK: \u3052\u3052\u306e
Windows-31J: \uffe2\uffe2\u2235
Cp943c: \uffe2\uffe2\u2235
<<< 0xXX7f Conversion Test >>>
PCK: \u00f7\u30e0\u043e\u301d\u5712\u6a7f\u673d\u5287\u9805\u6b7b\u6e96\u9017\u64cd\u9010\u51cd\u6973\u65a7\u6469\u6c83\u51f0\u566b\u5b83\u6019\u6369\u68b3\u9ebe\u70d9\u75fc\u7ab6\u7e32\u8262\u86df\u8ae4\u8f62\u95a0\u9a3e\u9ef4\ufa10\u856b\u5164\ufa1a
Windows-31J:
Cp943c: \u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000
============================================================
For "Wrong Conversion Test", I think PCK's converter table is not correct.
For "0xXX7f Conversion Test", 0xXX7f are not valid Shift_JIS characters.