-
Bug
-
Resolution: Unresolved
-
P4
-
9
-
Cause Known
-
b131
-
x86
-
windows_7
====================================================================
SYNOPSIS
--------
MS1361 NIO converter is not same as Windows Cp1361 implementation
OPERATING SYSTEM(S)
-------------------
Windows 7 x64 (Japanese)
FULL JDK VERSION(S)
-------------------
java version "9-ea"
Java(TM) SE Runtime Environment (build 9-ea+131)
Java HotSpot(TM) 64-Bit Server VM (build 9-ea+131, mixed mode)
java version "1.8.0_102"
Java(TM) SE Runtime Environment (build 1.8.0_102-b14)
Java HotSpot(TM) 64-Bit Server VM (build 25.102-b14, mixed mode)
PROBLEM DESCRIPTION
-------------------
It seems Java's MS1361 conversion result is not same as Windows' CP1361.
One of difference is Java's MS1361 refers "Hangul Compatibility Jamo" Unicode block,
but Windows CP1361 refers "Hangul Jamo" Unicode block.
Java's MS1361 may refer following table
ftp://ftp.unicode.org/Public/MAPPINGS/OBSOLETE/EASTASIA/KSC/JOHAB.TXT
Windows' CP1361 may refer following table
ftp://ftp.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WindowsBestFit/bestfit1361.txt
Also Java's MS1361 does not support Private Use Area
TESTCASE
--------
b2c.java
====================================================
import java.nio.*;
public class b2c {
public static void main(String[] args) throws Exception {
ByteBuffer bb = ByteBuffer.allocate((int)(args[1].length()/2));
StringBuilder sb = new StringBuilder(args[1].substring(0,bb.limit()*2));
while(sb.length() > 0) {
bb.put((byte)Integer.parseInt(sb.substring(0,2),16));
sb.delete(0,2);
}
byte[] ba = bb.array();
for(byte b : ba) System.out.printf("\\x%02X", (int)b & 0xFF);
System.out.print(" -> ");
for(char c : (new String(ba, args[0])).toCharArray()) System.out.printf("\\u%04X", (int)c);
System.out.println();
}
}
====================================================
c2b.java
====================================================
public class c2b {
public static void main(String[] args) throws Exception {
int i = Integer.parseInt(args[1], 16);
char[] ca = Character.toChars(i);
System.out.print(Character.isDefined(i) ? " " : "*");
for(char c : ca) System.out.printf("\\u%04X",(int)c);
System.out.print(" -> ");
for(byte b : (new String(ca)).getBytes(args[0])) System.out.printf("\\x%02X",(int)b&0xFF);
System.out.println();
}
}
====================================================
b2c_1.ps1
====================================================
param($code, $hex)
$h = [string]$hex
$enc_r = [Text.Encoding]::GetEncoding([int]$code)
[byte[]]$ba = @()
for($i = 0; $i -lt $h.length; $i+=2) {
$ba += ([System.Convert]::ToInt32($h.SubString($i,2), 16))
}
$s = ""
$enc_r.GetChars($ba) | foreach {$s += [System.Convert]::ToInt32($_).ToString("X4")}
$s
====================================================
c2b_1.ps1
====================================================
param($code, $hex)
$enc_r = [Text.Encoding]::GetEncoding([int]$code)
[char[]]$ca = @()
$ca += ([System.Convert]::ToInt32([string]$hex, 16))
$s = ""
$enc_r.GetBytes($ca) | foreach {$s += [System.Convert]::ToInt32($_).ToString("X2")}
$s
====================================================
REPRODUCTION INSTRUCTIONS
-------------------------
\x84\x44 is converted to \u3133 for Java, \u11AA for Windows
>jdk9\jdk-9\bin\java b2c MS1361 8444
\x84\x44 -> \u3133
>powershell -NoProfile -ExecutionPolicy Unrestricted .\b2c_1.ps1 1361 8444
11AA
Java's MS1361 can handle \u3133, but it cannot handle \u11AA
b>jdk9\jdk-9\bin\java c2b MS1361 3133
\u3133 -> \x84\x44
>jdk9\jdk-9\bin\java c2b MS1361 11AA
\u11AA -> \x3F
Windows' CP1361 can handle \u11AA and \u3133
>powershell -NoProfile -ExecutionPolicy Unrestricted .\c2b_1.ps1 1361 11AA
8444
>powershell -NoProfile -ExecutionPolicy Unrestricted .\c2b_1.ps1 1361 3133
8444
According to bestfit1361.txt, \uFFA3 should be converted to \x84\x44
>powershell -NoProfile -ExecutionPolicy Unrestricted .\c2b_1.ps1 1361 FFA3
8444
Also Java's MS1361 does not support Private Use Area
>jdk9\jdk-9\bin\java c2b MS1361 E000
\uE000 -> \x3F
b>powershell -NoProfile -ExecutionPolicy Unrestricted .\c2b_1.ps1 1361 E000
D831
WORKAROUND
----------
Unknown
SUGGESTED FIX
-------------
N/A
====================================================================
SYNOPSIS
--------
MS1361 NIO converter is not same as Windows Cp1361 implementation
OPERATING SYSTEM(S)
-------------------
Windows 7 x64 (Japanese)
FULL JDK VERSION(S)
-------------------
java version "9-ea"
Java(TM) SE Runtime Environment (build 9-ea+131)
Java HotSpot(TM) 64-Bit Server VM (build 9-ea+131, mixed mode)
java version "1.8.0_102"
Java(TM) SE Runtime Environment (build 1.8.0_102-b14)
Java HotSpot(TM) 64-Bit Server VM (build 25.102-b14, mixed mode)
PROBLEM DESCRIPTION
-------------------
It seems Java's MS1361 conversion result is not same as Windows' CP1361.
One of difference is Java's MS1361 refers "Hangul Compatibility Jamo" Unicode block,
but Windows CP1361 refers "Hangul Jamo" Unicode block.
Java's MS1361 may refer following table
ftp://ftp.unicode.org/Public/MAPPINGS/OBSOLETE/EASTASIA/KSC/JOHAB.TXT
Windows' CP1361 may refer following table
ftp://ftp.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WindowsBestFit/bestfit1361.txt
Also Java's MS1361 does not support Private Use Area
TESTCASE
--------
b2c.java
====================================================
import java.nio.*;
public class b2c {
public static void main(String[] args) throws Exception {
ByteBuffer bb = ByteBuffer.allocate((int)(args[1].length()/2));
StringBuilder sb = new StringBuilder(args[1].substring(0,bb.limit()*2));
while(sb.length() > 0) {
bb.put((byte)Integer.parseInt(sb.substring(0,2),16));
sb.delete(0,2);
}
byte[] ba = bb.array();
for(byte b : ba) System.out.printf("\\x%02X", (int)b & 0xFF);
System.out.print(" -> ");
for(char c : (new String(ba, args[0])).toCharArray()) System.out.printf("\\u%04X", (int)c);
System.out.println();
}
}
====================================================
c2b.java
====================================================
public class c2b {
public static void main(String[] args) throws Exception {
int i = Integer.parseInt(args[1], 16);
char[] ca = Character.toChars(i);
System.out.print(Character.isDefined(i) ? " " : "*");
for(char c : ca) System.out.printf("\\u%04X",(int)c);
System.out.print(" -> ");
for(byte b : (new String(ca)).getBytes(args[0])) System.out.printf("\\x%02X",(int)b&0xFF);
System.out.println();
}
}
====================================================
b2c_1.ps1
====================================================
param($code, $hex)
$h = [string]$hex
$enc_r = [Text.Encoding]::GetEncoding([int]$code)
[byte[]]$ba = @()
for($i = 0; $i -lt $h.length; $i+=2) {
$ba += ([System.Convert]::ToInt32($h.SubString($i,2), 16))
}
$s = ""
$enc_r.GetChars($ba) | foreach {$s += [System.Convert]::ToInt32($_).ToString("X4")}
$s
====================================================
c2b_1.ps1
====================================================
param($code, $hex)
$enc_r = [Text.Encoding]::GetEncoding([int]$code)
[char[]]$ca = @()
$ca += ([System.Convert]::ToInt32([string]$hex, 16))
$s = ""
$enc_r.GetBytes($ca) | foreach {$s += [System.Convert]::ToInt32($_).ToString("X2")}
$s
====================================================
REPRODUCTION INSTRUCTIONS
-------------------------
\x84\x44 is converted to \u3133 for Java, \u11AA for Windows
>jdk9\jdk-9\bin\java b2c MS1361 8444
\x84\x44 -> \u3133
>powershell -NoProfile -ExecutionPolicy Unrestricted .\b2c_1.ps1 1361 8444
11AA
Java's MS1361 can handle \u3133, but it cannot handle \u11AA
b>jdk9\jdk-9\bin\java c2b MS1361 3133
\u3133 -> \x84\x44
>jdk9\jdk-9\bin\java c2b MS1361 11AA
\u11AA -> \x3F
Windows' CP1361 can handle \u11AA and \u3133
>powershell -NoProfile -ExecutionPolicy Unrestricted .\c2b_1.ps1 1361 11AA
8444
>powershell -NoProfile -ExecutionPolicy Unrestricted .\c2b_1.ps1 1361 3133
8444
According to bestfit1361.txt, \uFFA3 should be converted to \x84\x44
>powershell -NoProfile -ExecutionPolicy Unrestricted .\c2b_1.ps1 1361 FFA3
8444
Also Java's MS1361 does not support Private Use Area
>jdk9\jdk-9\bin\java c2b MS1361 E000
\uE000 -> \x3F
b>powershell -NoProfile -ExecutionPolicy Unrestricted .\c2b_1.ps1 1361 E000
D831
WORKAROUND
----------
Unknown
SUGGESTED FIX
-------------
N/A
====================================================================