-
Bug
-
Resolution: Not an Issue
-
P3
-
None
-
1.4.1
-
x86
-
windows_2000
Name: nt126004 Date: 04/23/2003
FULL PRODUCT VERSION :
java version "1.4.1_02"
Java(TM) 2 Runtime Environment, Standard Edition (build 1.4.1_02-b06)
Java HotSpot(TM) Client VM (build 1.4.1_02-b06, mixed mode)
FULL OS VERSION :
Microsoft Windows 2000 [Version 5.00.2195]
A DESCRIPTION OF THE PROBLEM :
The ByteToChar (and CharToByte) converters map shift-jis character 0x8160 (known as the "wave dash" or "full width tilde") to unicode U+301C. However, the Unicode spec (http://www.unicode.org/charts/PDF/U3000.pdf) states that the mapping should be to unicode U+FF5E. Referring to character U+301C it says:
This character was encoded to match JIS C-6226 1978 1-33 "wave dash".
Subsequent revisions of the JIS standard and industry practice have
settled on JIS 1-33 as being the fullwidth tilde character.
Further, the Windows-31J (aka MS932) converter and deconverter map 0x8160 to U+FF5E.
STEPS TO FOLLOW TO REPRODUCE THE PROBLEM :
run test program below
EXPECTED VERSUS ACTUAL BEHAVIOR :
SJIS?F ?` (81 60) ?? 0xff5e(?`) (fullwidth tilde)
MS932?F ?` (81 60) ?? 0xff5e(?`) (fullwidth tilde)
SJIS?F ?` (81 60) ?? 0x301c(?) (wave dash)
MS932?F ?` (81 60) ?? 0xff5e(?`) (fullwidth tilde)
REPRODUCIBILITY :
This bug can be reproduced always.
---------- BEGIN SOURCE ----------
/*
* bake.java
* Java??MS932??SJIS???????R?[?h
* Created on 2003/04/10, 18:37
*/
import java.io.*;
/**
* bake - Java??MS932??SJIS???????R?[?h
* @author wkeese
*/
public class bake {
/** Creates a new instance of bake */
public bake() {
}
static String toHexString(byte[] ary) {
String out = "";
for(int i=0; i < ary.length; i++) {
if ( i > 0 ) out = out + " ";
out = out + Integer.toString( (int) ((char)ary[i]) % 0xff, 16);
}
return out;
}
public static void doit(String desc, byte[] array, String encoding) {
InputStream is = new ByteArrayInputStream(array);
InputStreamReader isr;
try {
isr = new InputStreamReader(is, encoding);
} catch ( UnsupportedEncodingException uce ) {
System.out.println("Unsupported coding exception for " + encoding);
return;
}
int myChar;
try {
myChar = isr.read();
} catch (IOException ioe) {
System.out.println("IOException");
return;
}
System.out.print(isr.getEncoding() + "?F " + desc + " (" +
toHexString(array) + ") ?? 0x" + Integer.toString( myChar, 16) + "(" + (char)myChar + ") ");
switch(myChar)
{
case 0x301c: System.out.println("(wave dash)"); break;
case 0xff5e: System.out.println("(fullwidth tilde)"); break;
case 0xfffd: System.out.println("(unknown character)"); break;
case 0x2460: System.out.println("(circled digit 1)"); break;
}
}
/**
* @param args the command line arguments
*/
public static void main(String[] args) {
System.out.println("java.version = " + System.getProperties().getProperty("java.version"));
System.out.println("java.vm.version = " + System.getProperties().getProperty("java.vm.version"));
// ?f?t?@???g???G???R?f?B???O
byte[] dummy = { 0x01 };
InputStream is = new ByteArrayInputStream(dummy);
InputStreamReader isr2 = new InputStreamReader(is);
System.out.println("?f?t?@???g???G???R?f?B???O?F" + isr2.getEncoding());
// What is Shift_JIS an alias for?
try {
isr2 = new InputStreamReader(is, "SJIS");
System.out.println("Alias: SJIS ??" + isr2.getEncoding());
} catch ( UnsupportedEncodingException uce ) {
System.out.println("Unsupported coding exception SJIS");
}
try {
isr2 = new InputStreamReader(is, "shift_jis");
System.out.println("Alias: shift_jis ??" + isr2.getEncoding());
} catch ( UnsupportedEncodingException uce ) {
System.out.println("Unsupported coding exception for shift_jis ");
}
// Windows codes for the wave dash character (?`) and maru-ichi ?@
byte[] wave= { (byte)0x81, 0x60 };
byte [] maruichi = { (byte)0x87, 0x40 };
byte[] eucwave= { (byte)0xa1, (byte)0xc1 };
byte [] eucmaruichi = { (byte)0xad, (byte)0xa1 };
byte [] iso2022maruichi = { 0x1b, 0x24, 0x42, 0x2d, 0x21, 0x1b, 0x28, 0x42 };
doit("?`", wave, "SJIS");
doit("?`",wave, "MS932");
doit("?`",eucwave, "EUC-JP");
doit("?@",maruichi, "SJIS");
doit("?@",maruichi, "MS932");
doit("?@",eucmaruichi, "EUC-JP");
//doit("?@",iso2022maruichi, "ISO-2022-JP");
}
}
---------- END SOURCE ----------
(Review ID: 184107)
======================================================================
FULL PRODUCT VERSION :
java version "1.4.1_02"
Java(TM) 2 Runtime Environment, Standard Edition (build 1.4.1_02-b06)
Java HotSpot(TM) Client VM (build 1.4.1_02-b06, mixed mode)
FULL OS VERSION :
Microsoft Windows 2000 [Version 5.00.2195]
A DESCRIPTION OF THE PROBLEM :
The ByteToChar (and CharToByte) converters map shift-jis character 0x8160 (known as the "wave dash" or "full width tilde") to unicode U+301C. However, the Unicode spec (http://www.unicode.org/charts/PDF/U3000.pdf) states that the mapping should be to unicode U+FF5E. Referring to character U+301C it says:
This character was encoded to match JIS C-6226 1978 1-33 "wave dash".
Subsequent revisions of the JIS standard and industry practice have
settled on JIS 1-33 as being the fullwidth tilde character.
Further, the Windows-31J (aka MS932) converter and deconverter map 0x8160 to U+FF5E.
STEPS TO FOLLOW TO REPRODUCE THE PROBLEM :
run test program below
EXPECTED VERSUS ACTUAL BEHAVIOR :
SJIS?F ?` (81 60) ?? 0xff5e(?`) (fullwidth tilde)
MS932?F ?` (81 60) ?? 0xff5e(?`) (fullwidth tilde)
SJIS?F ?` (81 60) ?? 0x301c(?) (wave dash)
MS932?F ?` (81 60) ?? 0xff5e(?`) (fullwidth tilde)
REPRODUCIBILITY :
This bug can be reproduced always.
---------- BEGIN SOURCE ----------
/*
* bake.java
* Java??MS932??SJIS???????R?[?h
* Created on 2003/04/10, 18:37
*/
import java.io.*;
/**
* bake - Java??MS932??SJIS???????R?[?h
* @author wkeese
*/
public class bake {
/** Creates a new instance of bake */
public bake() {
}
static String toHexString(byte[] ary) {
String out = "";
for(int i=0; i < ary.length; i++) {
if ( i > 0 ) out = out + " ";
out = out + Integer.toString( (int) ((char)ary[i]) % 0xff, 16);
}
return out;
}
public static void doit(String desc, byte[] array, String encoding) {
InputStream is = new ByteArrayInputStream(array);
InputStreamReader isr;
try {
isr = new InputStreamReader(is, encoding);
} catch ( UnsupportedEncodingException uce ) {
System.out.println("Unsupported coding exception for " + encoding);
return;
}
int myChar;
try {
myChar = isr.read();
} catch (IOException ioe) {
System.out.println("IOException");
return;
}
System.out.print(isr.getEncoding() + "?F " + desc + " (" +
toHexString(array) + ") ?? 0x" + Integer.toString( myChar, 16) + "(" + (char)myChar + ") ");
switch(myChar)
{
case 0x301c: System.out.println("(wave dash)"); break;
case 0xff5e: System.out.println("(fullwidth tilde)"); break;
case 0xfffd: System.out.println("(unknown character)"); break;
case 0x2460: System.out.println("(circled digit 1)"); break;
}
}
/**
* @param args the command line arguments
*/
public static void main(String[] args) {
System.out.println("java.version = " + System.getProperties().getProperty("java.version"));
System.out.println("java.vm.version = " + System.getProperties().getProperty("java.vm.version"));
// ?f?t?@???g???G???R?f?B???O
byte[] dummy = { 0x01 };
InputStream is = new ByteArrayInputStream(dummy);
InputStreamReader isr2 = new InputStreamReader(is);
System.out.println("?f?t?@???g???G???R?f?B???O?F" + isr2.getEncoding());
// What is Shift_JIS an alias for?
try {
isr2 = new InputStreamReader(is, "SJIS");
System.out.println("Alias: SJIS ??" + isr2.getEncoding());
} catch ( UnsupportedEncodingException uce ) {
System.out.println("Unsupported coding exception SJIS");
}
try {
isr2 = new InputStreamReader(is, "shift_jis");
System.out.println("Alias: shift_jis ??" + isr2.getEncoding());
} catch ( UnsupportedEncodingException uce ) {
System.out.println("Unsupported coding exception for shift_jis ");
}
// Windows codes for the wave dash character (?`) and maru-ichi ?@
byte[] wave= { (byte)0x81, 0x60 };
byte [] maruichi = { (byte)0x87, 0x40 };
byte[] eucwave= { (byte)0xa1, (byte)0xc1 };
byte [] eucmaruichi = { (byte)0xad, (byte)0xa1 };
byte [] iso2022maruichi = { 0x1b, 0x24, 0x42, 0x2d, 0x21, 0x1b, 0x28, 0x42 };
doit("?`", wave, "SJIS");
doit("?`",wave, "MS932");
doit("?`",eucwave, "EUC-JP");
doit("?@",maruichi, "SJIS");
doit("?@",maruichi, "MS932");
doit("?@",eucmaruichi, "EUC-JP");
//doit("?@",iso2022maruichi, "ISO-2022-JP");
}
}
---------- END SOURCE ----------
(Review ID: 184107)
======================================================================