Uploaded image for project: 'JDK'
  1. JDK
  2. JDK-4852917

ByteToChar for Shift-JIS wave-dash characters violates unicode spec suggestion

XMLWordPrintable

    • Icon: Bug Bug
    • Resolution: Not an Issue
    • Icon: P3 P3
    • None
    • 1.4.1
    • core-libs

      Name: nt126004 Date: 04/23/2003


      FULL PRODUCT VERSION :
      java version "1.4.1_02"
      Java(TM) 2 Runtime Environment, Standard Edition (build 1.4.1_02-b06)
      Java HotSpot(TM) Client VM (build 1.4.1_02-b06, mixed mode)


      FULL OS VERSION :
      Microsoft Windows 2000 [Version 5.00.2195]

      A DESCRIPTION OF THE PROBLEM :
      The ByteToChar (and CharToByte) converters map shift-jis character 0x8160 (known as the "wave dash" or "full width tilde") to unicode U+301C. However, the Unicode spec (http://www.unicode.org/charts/PDF/U3000.pdf) states that the mapping should be to unicode U+FF5E. Referring to character U+301C it says:

         This character was encoded to match JIS C-6226 1978 1-33 "wave dash".
         Subsequent revisions of the JIS standard and industry practice have
         settled on JIS 1-33 as being the fullwidth tilde character.

      Further, the Windows-31J (aka MS932) converter and deconverter map 0x8160 to U+FF5E.


      STEPS TO FOLLOW TO REPRODUCE THE PROBLEM :
      run test program below

      EXPECTED VERSUS ACTUAL BEHAVIOR :
      SJIS?F ?` (81 60) ?? 0xff5e(?`) (fullwidth tilde)
      MS932?F ?` (81 60) ?? 0xff5e(?`) (fullwidth tilde)
      SJIS?F ?` (81 60) ?? 0x301c(?) (wave dash)
      MS932?F ?` (81 60) ?? 0xff5e(?`) (fullwidth tilde)

      REPRODUCIBILITY :
      This bug can be reproduced always.

      ---------- BEGIN SOURCE ----------
      /*
       * bake.java
       * Java??MS932??SJIS???????R?[?h
       * Created on 2003/04/10, 18:37
       */

      import java.io.*;

      /**
       * bake - Java??MS932??SJIS???????R?[?h
       * @author wkeese
       */
      public class bake {
          
          /** Creates a new instance of bake */
          public bake() {
          }
          
          static String toHexString(byte[] ary) {
              String out = "";
              for(int i=0; i < ary.length; i++) {
                  if ( i > 0 ) out = out + " ";
                  out = out + Integer.toString( (int) ((char)ary[i]) % 0xff, 16);
              }
              return out;
          }
          
          public static void doit(String desc, byte[] array, String encoding) {
              InputStream is = new ByteArrayInputStream(array);
              
              InputStreamReader isr;
              try {
                  isr = new InputStreamReader(is, encoding);
              } catch ( UnsupportedEncodingException uce ) {
                  System.out.println("Unsupported coding exception for " + encoding);
                  return;
              }
              
              int myChar;
              try {
                  myChar = isr.read();
              } catch (IOException ioe) {
                  System.out.println("IOException");
                  return;
              }
              
              
              System.out.print(isr.getEncoding() + "?F " + desc + " (" +
              toHexString(array) + ") ?? 0x" + Integer.toString( myChar, 16) + "(" + (char)myChar + ") ");
              
              switch(myChar)
              {
                  case 0x301c: System.out.println("(wave dash)"); break;
                  case 0xff5e: System.out.println("(fullwidth tilde)"); break;
                  case 0xfffd: System.out.println("(unknown character)"); break;
                  case 0x2460: System.out.println("(circled digit 1)"); break;
              }
          }
          
          /**
           * @param args the command line arguments
           */
          public static void main(String[] args) {
              
              
              System.out.println("java.version = " + System.getProperties().getProperty("java.version"));
              System.out.println("java.vm.version = " + System.getProperties().getProperty("java.vm.version"));
              
               
              // ?f?t?@???g???G???R?f?B???O
              byte[] dummy = { 0x01 };
              InputStream is = new ByteArrayInputStream(dummy);
              InputStreamReader isr2 = new InputStreamReader(is);
              System.out.println("?f?t?@???g???G???R?f?B???O?F" + isr2.getEncoding());
              
              // What is Shift_JIS an alias for?
              try {
                  isr2 = new InputStreamReader(is, "SJIS");
                  System.out.println("Alias: SJIS ??" + isr2.getEncoding());
               } catch ( UnsupportedEncodingException uce ) {
                  System.out.println("Unsupported coding exception SJIS");
              }
              try {
                  isr2 = new InputStreamReader(is, "shift_jis");
                  System.out.println("Alias: shift_jis ??" + isr2.getEncoding());
              } catch ( UnsupportedEncodingException uce ) {
                  System.out.println("Unsupported coding exception for shift_jis ");
              }
             
            // Windows codes for the wave dash character (?`) and maru-ichi ?@
              byte[] wave= { (byte)0x81, 0x60 };
              byte [] maruichi = { (byte)0x87, 0x40 };
              byte[] eucwave= { (byte)0xa1, (byte)0xc1 };
              byte [] eucmaruichi = { (byte)0xad, (byte)0xa1 };
              byte [] iso2022maruichi = { 0x1b, 0x24, 0x42, 0x2d, 0x21, 0x1b, 0x28, 0x42 };
              doit("?`", wave, "SJIS");
              doit("?`",wave, "MS932");
              doit("?`",eucwave, "EUC-JP");
              doit("?@",maruichi, "SJIS");
              doit("?@",maruichi, "MS932");
              doit("?@",eucmaruichi, "EUC-JP");
              //doit("?@",iso2022maruichi, "ISO-2022-JP");
          }
          
      }

      ---------- END SOURCE ----------
      (Review ID: 184107)
      ======================================================================

            ilittlesunw Ian Little (Inactive)
            nthompsosunw Nathanael Thompson (Inactive)
            Votes:
            0 Vote for this issue
            Watchers:
            0 Start watching this issue

              Created:
              Updated:
              Resolved:
              Imported:
              Indexed: