-
Bug
-
Resolution: Not an Issue
-
P4
-
None
-
1.4.0
-
generic
-
generic
Name: gm110360 Date: 01/25/2002
java version "1.4.0-beta3"
Java(TM) 2 Runtime Environment, Standard Edition (build 1.4.0-
Java HotSpot(TM) Client VM (build 1.4.0-beta3-b84, mixed mode)
When converting a String to bytes in UTF-8 format and back, the translation
fails for certain character strings. See code below.
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.DataInputStream;
import java.io.DataOutputStream;
/*
* Copyright (c) 2001 Kenamea, Inc.
*/
/**
* Test program that seems to show a problem with the UTF-8 encoding
* for Strings.
*/
public class UTFTest {
public static void main(String[] argv) throws Exception {
checkConversions("hello, world");
checkConversions(new String(new char[] { 0xd800 - 1 }));
checkConversions(new String(new char[] { 0xd800 }));
checkConversions(new String(new char[] { 0xd900 }));
checkConversions(new String(new char[] { 0xda00 }));
checkConversions(new String(new char[] { 0xdb00 }));
checkConversions(new String(new char[] { 0xdc00 - 1 }));
checkConversions(new String(new char[] { 0xdc00 }));
checkConversions(new String(new char[] { 0xffff }));
}
private static void checkConversions(String in)
throws Exception
{
checkStringConversion(in);
checkDataInputOutputStream(in);
}
private static void checkStringConversion(String in)
throws Exception
{
byte[] bytes = in.getBytes("UTF-8");
String out = new String(bytes, 0, bytes.length, "UTF-8");
checkStrings("string", in, out);
}
private static void checkDataInputOutputStream(String in)
throws Exception
{
ByteArrayOutputStream baos = new ByteArrayOutputStream();
DataOutputStream dos = new DataOutputStream(baos);
dos.writeUTF(in);
ByteArrayInputStream bais = new ByteArrayInputStream(baos.toByteArray());
DataInputStream dis = new DataInputStream(bais);
String out = dis.readUTF();
checkStrings("data io", in, out);
}
private static void checkStrings(String label, String in, String out) {
String fullLabel = label + " '" + hexString(in) + "': ";
if (in.equals(out)) {
say(fullLabel + "ok");
} else {
say(fullLabel + "bad; got '" + hexString(out) + "'");
}
}
private static String hexString(String s) {
StringBuffer buf = new StringBuffer();
char[] chars = s.toCharArray();
for (int i = 0; i < chars.length; i++) {
String hex = Integer.toHexString(chars[i] & 0xffff);
for (int j = 0; j < (4 - hex.length()); j++) {
buf.append("0");
}
buf.append(hex);
if (i <= chars.length) buf.append(" ");
}
return buf.toString();
}
private static void say(String s) { System.out.println(s); }
}
(Review ID: 137295)
======================================================================
java version "1.4.0-beta3"
Java(TM) 2 Runtime Environment, Standard Edition (build 1.4.0-
Java HotSpot(TM) Client VM (build 1.4.0-beta3-b84, mixed mode)
When converting a String to bytes in UTF-8 format and back, the translation
fails for certain character strings. See code below.
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.DataInputStream;
import java.io.DataOutputStream;
/*
* Copyright (c) 2001 Kenamea, Inc.
*/
/**
* Test program that seems to show a problem with the UTF-8 encoding
* for Strings.
*/
public class UTFTest {
public static void main(String[] argv) throws Exception {
checkConversions("hello, world");
checkConversions(new String(new char[] { 0xd800 - 1 }));
checkConversions(new String(new char[] { 0xd800 }));
checkConversions(new String(new char[] { 0xd900 }));
checkConversions(new String(new char[] { 0xda00 }));
checkConversions(new String(new char[] { 0xdb00 }));
checkConversions(new String(new char[] { 0xdc00 - 1 }));
checkConversions(new String(new char[] { 0xdc00 }));
checkConversions(new String(new char[] { 0xffff }));
}
private static void checkConversions(String in)
throws Exception
{
checkStringConversion(in);
checkDataInputOutputStream(in);
}
private static void checkStringConversion(String in)
throws Exception
{
byte[] bytes = in.getBytes("UTF-8");
String out = new String(bytes, 0, bytes.length, "UTF-8");
checkStrings("string", in, out);
}
private static void checkDataInputOutputStream(String in)
throws Exception
{
ByteArrayOutputStream baos = new ByteArrayOutputStream();
DataOutputStream dos = new DataOutputStream(baos);
dos.writeUTF(in);
ByteArrayInputStream bais = new ByteArrayInputStream(baos.toByteArray());
DataInputStream dis = new DataInputStream(bais);
String out = dis.readUTF();
checkStrings("data io", in, out);
}
private static void checkStrings(String label, String in, String out) {
String fullLabel = label + " '" + hexString(in) + "': ";
if (in.equals(out)) {
say(fullLabel + "ok");
} else {
say(fullLabel + "bad; got '" + hexString(out) + "'");
}
}
private static String hexString(String s) {
StringBuffer buf = new StringBuffer();
char[] chars = s.toCharArray();
for (int i = 0; i < chars.length; i++) {
String hex = Integer.toHexString(chars[i] & 0xffff);
for (int j = 0; j < (4 - hex.length()); j++) {
buf.append("0");
}
buf.append(hex);
if (i <= chars.length) buf.append(" ");
}
return buf.toString();
}
private static void say(String s) { System.out.println(s); }
}
(Review ID: 137295)
======================================================================