Uploaded image for project: 'JDK'
  1. JDK
  2. JDK-4628881

String.getBytes("UTF-8") incorrectly encodes certain characters

XMLWordPrintable

    • Icon: Bug Bug
    • Resolution: Not an Issue
    • Icon: P4 P4
    • None
    • 1.4.0
    • core-libs

      Name: gm110360 Date: 01/25/2002


      java version "1.4.0-beta3"
      Java(TM) 2 Runtime Environment, Standard Edition (build 1.4.0-
      Java HotSpot(TM) Client VM (build 1.4.0-beta3-b84, mixed mode)


      When converting a String to bytes in UTF-8 format and back, the translation
      fails for certain character strings. See code below.

      import java.io.ByteArrayInputStream;
      import java.io.ByteArrayOutputStream;
      import java.io.DataInputStream;
      import java.io.DataOutputStream;

      /*
       * Copyright (c) 2001 Kenamea, Inc.
       */

      /**
       * Test program that seems to show a problem with the UTF-8 encoding
       * for Strings.
       */
      public class UTFTest {

        public static void main(String[] argv) throws Exception {

          checkConversions("hello, world");
          checkConversions(new String(new char[] { 0xd800 - 1 }));
          checkConversions(new String(new char[] { 0xd800 }));
          checkConversions(new String(new char[] { 0xd900 }));
          checkConversions(new String(new char[] { 0xda00 }));
          checkConversions(new String(new char[] { 0xdb00 }));
          checkConversions(new String(new char[] { 0xdc00 - 1 }));
          checkConversions(new String(new char[] { 0xdc00 }));
          checkConversions(new String(new char[] { 0xffff }));
        }

        private static void checkConversions(String in)
          throws Exception
        {
          checkStringConversion(in);
          checkDataInputOutputStream(in);
        }

        private static void checkStringConversion(String in)
          throws Exception
        {
          
          byte[] bytes = in.getBytes("UTF-8");
          String out = new String(bytes, 0, bytes.length, "UTF-8");

          checkStrings("string", in, out);
        }

        private static void checkDataInputOutputStream(String in)
          throws Exception
        {
          ByteArrayOutputStream baos = new ByteArrayOutputStream();
          DataOutputStream dos = new DataOutputStream(baos);
          dos.writeUTF(in);
          ByteArrayInputStream bais = new ByteArrayInputStream(baos.toByteArray());
          DataInputStream dis = new DataInputStream(bais);

          String out = dis.readUTF();
        
          checkStrings("data io", in, out);
        }


        private static void checkStrings(String label, String in, String out) {
          String fullLabel = label + " '" + hexString(in) + "': ";
          if (in.equals(out)) {
            say(fullLabel + "ok");
          } else {
            say(fullLabel + "bad; got '" + hexString(out) + "'");
          }
        }

        private static String hexString(String s) {
          StringBuffer buf = new StringBuffer();
          char[] chars = s.toCharArray();
          for (int i = 0; i < chars.length; i++) {
            String hex = Integer.toHexString(chars[i] & 0xffff);
            for (int j = 0; j < (4 - hex.length()); j++) {
      buf.append("0");
            }
            buf.append(hex);
            if (i <= chars.length) buf.append(" ");
          }
          return buf.toString();
        }

        private static void say(String s) { System.out.println(s); }
      }
      (Review ID: 137295)
      ======================================================================

            sherman Xueming Shen
            gmanwanisunw Girish Manwani (Inactive)
            Votes:
            0 Vote for this issue
            Watchers:
            0 Start watching this issue

              Created:
              Updated:
              Resolved:
              Imported:
              Indexed: