-
Bug
-
Resolution: Fixed
-
P4
-
9
-
None
-
b131
-
b141
-
x86
-
linux_redhat_6.0
===================================================================
SYNOPSIS
--------
Unexpected code conversion by HKSCS converters
OPERATING SYSTEM(S)
-------------------
RHEL6 x86_64
FULL JDK VERSION(S)
-------------------
java version "9-ea"
Java(TM) SE Runtime Environment (build 9-ea+131)
Java HotSpot(TM) 64-Bit Server VM (build 9-ea+131, mixed mode)
PROBLEM DESCRIPTION
-------------------
Convert U+FFFD or U2FFFD to byte data by using followng HKSCS related converters
Big5-HKSCS
x-Big5-HKSCS-2001
x-MS950-HKSCS
x-MS950-HKSCS-XP
It should be replacement character (\x3F), but converted to unexpected result
TESTCASE
--------
public class c2b {
public static void main(String[] args) throws Exception {
int i = Integer.parseInt(args[1], 16);
char[] ca = Character.toChars(i);
System.out.print(Character.isDefined(i) ? " " : "*");
for(char c : ca) System.out.printf("\\u%04X",(int)c);
System.out.print(" -> ");
for(byte b : (new String(ca)).getBytes(args[0])) System.out.printf("\\x%02X",(int)b&0xFF);
System.out.println();
}
}
REPRODUCTION INSTRUCTIONS
-------------------------
Compile and run testcase with FFFD or 2FFFD
$ ˜/jdk9/jdk-9/bin/java c2b Big5-HKSCS FFFD
\uFFFD -> \xFE\xFD
$ ˜/jdk9/jdk-9/bin/java c2b Big5-HKSCS 2FFFD
*\uD87F\uDFFD -> \xFE\xFE
$ ˜/jdk9/jdk-9/bin/java c2b x-Big5-HKSCS-2001 FFFD
\uFFFD -> \xFE\xFD
$ ˜/jdk9/jdk-9/bin/java c2b x-Big5-HKSCS-2001 2FFFD
*\uD87F\uDFFD -> \xFE\xFE
$ ˜/jdk9/jdk-9/bin/java c2b x-MS950-HKSCS FFFD
\uFFFD -> \xFE\xFD
$ ˜/jdk9/jdk-9/bin/java c2b x-MS950-HKSCS 2FFFD
*\uD87F\uDFFD -> \xFE\xFE
$ ˜/jdk9/jdk-9/bin/java c2b x-MS950-HKSCS-XP FFFD
\uFFFD -> \xFE\xDD
WORKAROUND
----------
N/A
SUGGESTED FIX
-------------
--- a/src/jdk.charsets/share/classes/sun/nio/cs/ext/Big5_HKSCS.java.template Tue Sep 06 13:09:29 2016 -0400
+++ b/src/jdk.charsets/share/classes/sun/nio/cs/ext/Big5_HKSCS.java.template Tue Jul 05 15:58:17 2016 +0800
@@ -81,7 +81,9 @@
static char[][] c2bSupp = new char[0x100][];
static {
initc2b(c2bBmp, HKSCSMapping.b2cBmpStr, HKSCSMapping.pua);
+ c2bBmp[0xFF][0xFD] = (char)UNMAPPABLE_ENCODING; // reset U+FFFD
initc2b(c2bSupp, HKSCSMapping.b2cSuppStr, null);
+ c2bSupp[0xFF][0xFD] = (char)UNMAPPABLE_ENCODING; // reset U+2FFFD
}
private Encoder(Charset cs) {
--- a/src/jdk.charsets/share/classes/sun/nio/cs/ext/Big5_HKSCS_2001.java Tue Sep 06 13:09:29 2016 -0400
+++ b/src/jdk.charsets/share/classes/sun/nio/cs/ext/Big5_HKSCS_2001.java Wed Sep 14 15:29:17 2016 +0900
@@ -77,7 +77,9 @@
static {
initc2b(c2bBmp, HKSCS2001Mapping.b2cBmpStr,
HKSCS2001Mapping.pua);
+ c2bBmp[0xFF][0xFD] = (char)UNMAPPABLE_ENCODING; // reset U+FFFD
initc2b(c2bSupp, HKSCS2001Mapping.b2cSuppStr, null);
+ c2bSupp[0xFF][0xFD] = (char)UNMAPPABLE_ENCODING; // reset U+2FFFD
}
private Encoder(Charset cs) {
--- a/src/jdk.charsets/share/classes/sun/nio/cs/ext/MS950_HKSCS.java Tue Sep 06 13:09:29 2016 -0400
+++ b/src/jdk.charsets/share/classes/sun/nio/cs/ext/MS950_HKSCS.java Tue Jul 05 15:58:17 2016 +0800
@@ -80,7 +80,9 @@
static char[][] c2bSupp = new char[0x100][];
static {
initc2b(c2bBmp, HKSCSMapping.b2cBmpStr, HKSCSMapping.pua);
+ c2bBmp[0xFF][0xFD] = (char)UNMAPPABLE_ENCODING; // reset U+FFFD
initc2b(c2bSupp, HKSCSMapping.b2cSuppStr, null);
+ c2bSupp[0xFF][0xFD] = (char)UNMAPPABLE_ENCODING; // reset U+2FFFD
}
private Encoder(Charset cs) {
--- a/src/jdk.charsets/share/classes/sun/nio/cs/ext/MS950_HKSCS_XP.java.template Tue Sep 06 13:09:29 2016 -0400
+++ b/src/jdk.charsets/share/classes/sun/nio/cs/ext/MS950_HKSCS_XP.java.template Tue Jul 05 15:58:17 2016 +0800
@@ -91,6 +91,7 @@
static char[][] c2bBmp = new char[0x100][];
static {
initc2b(c2bBmp, HKSCS_XPMapping.b2cBmpStr, null);
+ c2bBmp[0xFF][0xFD] = (char)UNMAPPABLE_ENCODING; // reset U+FFFD
}
public int encodeSupp(int cp) {
===================================================================
SYNOPSIS
--------
Unexpected code conversion by HKSCS converters
OPERATING SYSTEM(S)
-------------------
RHEL6 x86_64
FULL JDK VERSION(S)
-------------------
java version "9-ea"
Java(TM) SE Runtime Environment (build 9-ea+131)
Java HotSpot(TM) 64-Bit Server VM (build 9-ea+131, mixed mode)
PROBLEM DESCRIPTION
-------------------
Convert U+FFFD or U2FFFD to byte data by using followng HKSCS related converters
Big5-HKSCS
x-Big5-HKSCS-2001
x-MS950-HKSCS
x-MS950-HKSCS-XP
It should be replacement character (\x3F), but converted to unexpected result
TESTCASE
--------
public class c2b {
public static void main(String[] args) throws Exception {
int i = Integer.parseInt(args[1], 16);
char[] ca = Character.toChars(i);
System.out.print(Character.isDefined(i) ? " " : "*");
for(char c : ca) System.out.printf("\\u%04X",(int)c);
System.out.print(" -> ");
for(byte b : (new String(ca)).getBytes(args[0])) System.out.printf("\\x%02X",(int)b&0xFF);
System.out.println();
}
}
REPRODUCTION INSTRUCTIONS
-------------------------
Compile and run testcase with FFFD or 2FFFD
$ ˜/jdk9/jdk-9/bin/java c2b Big5-HKSCS FFFD
\uFFFD -> \xFE\xFD
$ ˜/jdk9/jdk-9/bin/java c2b Big5-HKSCS 2FFFD
*\uD87F\uDFFD -> \xFE\xFE
$ ˜/jdk9/jdk-9/bin/java c2b x-Big5-HKSCS-2001 FFFD
\uFFFD -> \xFE\xFD
$ ˜/jdk9/jdk-9/bin/java c2b x-Big5-HKSCS-2001 2FFFD
*\uD87F\uDFFD -> \xFE\xFE
$ ˜/jdk9/jdk-9/bin/java c2b x-MS950-HKSCS FFFD
\uFFFD -> \xFE\xFD
$ ˜/jdk9/jdk-9/bin/java c2b x-MS950-HKSCS 2FFFD
*\uD87F\uDFFD -> \xFE\xFE
$ ˜/jdk9/jdk-9/bin/java c2b x-MS950-HKSCS-XP FFFD
\uFFFD -> \xFE\xDD
WORKAROUND
----------
N/A
SUGGESTED FIX
-------------
--- a/src/jdk.charsets/share/classes/sun/nio/cs/ext/Big5_HKSCS.java.template Tue Sep 06 13:09:29 2016 -0400
+++ b/src/jdk.charsets/share/classes/sun/nio/cs/ext/Big5_HKSCS.java.template Tue Jul 05 15:58:17 2016 +0800
@@ -81,7 +81,9 @@
static char[][] c2bSupp = new char[0x100][];
static {
initc2b(c2bBmp, HKSCSMapping.b2cBmpStr, HKSCSMapping.pua);
+ c2bBmp[0xFF][0xFD] = (char)UNMAPPABLE_ENCODING; // reset U+FFFD
initc2b(c2bSupp, HKSCSMapping.b2cSuppStr, null);
+ c2bSupp[0xFF][0xFD] = (char)UNMAPPABLE_ENCODING; // reset U+2FFFD
}
private Encoder(Charset cs) {
--- a/src/jdk.charsets/share/classes/sun/nio/cs/ext/Big5_HKSCS_2001.java Tue Sep 06 13:09:29 2016 -0400
+++ b/src/jdk.charsets/share/classes/sun/nio/cs/ext/Big5_HKSCS_2001.java Wed Sep 14 15:29:17 2016 +0900
@@ -77,7 +77,9 @@
static {
initc2b(c2bBmp, HKSCS2001Mapping.b2cBmpStr,
HKSCS2001Mapping.pua);
+ c2bBmp[0xFF][0xFD] = (char)UNMAPPABLE_ENCODING; // reset U+FFFD
initc2b(c2bSupp, HKSCS2001Mapping.b2cSuppStr, null);
+ c2bSupp[0xFF][0xFD] = (char)UNMAPPABLE_ENCODING; // reset U+2FFFD
}
private Encoder(Charset cs) {
--- a/src/jdk.charsets/share/classes/sun/nio/cs/ext/MS950_HKSCS.java Tue Sep 06 13:09:29 2016 -0400
+++ b/src/jdk.charsets/share/classes/sun/nio/cs/ext/MS950_HKSCS.java Tue Jul 05 15:58:17 2016 +0800
@@ -80,7 +80,9 @@
static char[][] c2bSupp = new char[0x100][];
static {
initc2b(c2bBmp, HKSCSMapping.b2cBmpStr, HKSCSMapping.pua);
+ c2bBmp[0xFF][0xFD] = (char)UNMAPPABLE_ENCODING; // reset U+FFFD
initc2b(c2bSupp, HKSCSMapping.b2cSuppStr, null);
+ c2bSupp[0xFF][0xFD] = (char)UNMAPPABLE_ENCODING; // reset U+2FFFD
}
private Encoder(Charset cs) {
--- a/src/jdk.charsets/share/classes/sun/nio/cs/ext/MS950_HKSCS_XP.java.template Tue Sep 06 13:09:29 2016 -0400
+++ b/src/jdk.charsets/share/classes/sun/nio/cs/ext/MS950_HKSCS_XP.java.template Tue Jul 05 15:58:17 2016 +0800
@@ -91,6 +91,7 @@
static char[][] c2bBmp = new char[0x100][];
static {
initc2b(c2bBmp, HKSCS_XPMapping.b2cBmpStr, null);
+ c2bBmp[0xFF][0xFD] = (char)UNMAPPABLE_ENCODING; // reset U+FFFD
}
public int encodeSupp(int cp) {
===================================================================