-
Bug
-
Resolution: Unresolved
-
P4
-
None
-
6
-
Cause Known
-
x86
-
windows_xp
FULL PRODUCT VERSION :
1.6.0_10-rc2-b32
ADDITIONAL OS VERSION INFORMATION :
Windows XP SR-2
A DESCRIPTION OF THE PROBLEM :
I have discovered 4 charsets, which don't completely contain the charsets determined by method contains():
- ISO-8859-15 should contain ISO-8859-1
- UTF-8 should contain ISO-8859-1
- EUC-JP should contain JIS-X-0208
- ISO-2022-JP should contain US-ASCII
I assume, there are more which don't match.
STEPS TO FOLLOW TO REPRODUCE THE PROBLEM :
Run JUnit test below.
REPRODUCIBILITY :
This bug can be reproduced always.
---------- BEGIN SOURCE ----------
import java.nio.*;
import java.nio.charset.*;
import java.util.*;
import org.junit.*;
import static org.junit.Assert.*;
/**
*
* @author Ulf.Zibis @ CoSoCo.de
*/
public class CharsetContainsTest {
// test parameters:
private static final int BYTE_RANGE = 1 << Byte.SIZE;
private static final byte[] IN_BYTES = new byte[BYTE_RANGE];
static {
for (int b = 0; b < BYTE_RANGE;)
IN_BYTES[b] = (byte)b++;
}
// parameters:
private ByteBuffer inBytes;
public CharsetContainsTest() {}
@Before
public void setUp() throws Exception {
inBytes = ByteBuffer.wrap(IN_BYTES);
System.out.println();
}
@Test
public void testISO8859_15_ISO8859_1() throws CharacterCodingException {
inBytes = ByteBuffer.wrap(IN_BYTES);
Charset ISO8859_1 = Charset.forName("ISO-8859-1");
Charset ISO8859_15 = Charset.forName("ISO-8859-15");
assertTrue(ISO8859_15.contains(ISO8859_1));
CharBuffer ISO8859_1Chars = ISO8859_1.decode(inBytes);
CharBuffer ISO8859_15Chars = ISO8859_15.decode((ByteBuffer)inBytes.rewind());
System.out.printf("ISO8859_1Chars: ");
for (char c : ISO8859_1Chars.array())
System.out.printf("\\u%04X,", (int)c);
System.out.println();
System.out.printf("ISO8859_15Chars: ");
for (char c : ISO8859_15Chars.array())
System.out.printf("\\u%04X,", (int)c);
System.out.println();
assertArrayEquals(ISO8859_1Chars.array(), ISO8859_15Chars.array());
}
@Test
public void testUTF_8_ISO8859_1() throws CharacterCodingException {
inBytes = ByteBuffer.wrap(IN_BYTES);
Charset ISO8859_1 = Charset.forName("ISO-8859-1");
Charset UTF_8 = Charset.forName("UTF-8");
assertTrue(UTF_8.contains(ISO8859_1));
CharBuffer ISO8859_1Chars = ISO8859_1.decode(inBytes);
CharBuffer UTF_8Chars = UTF_8.decode((ByteBuffer)inBytes.rewind());
System.out.printf("ISO8859_1Chars: ");
for (char c : ISO8859_1Chars.array())
System.out.printf("\\u%04X,", (int)c);
System.out.println();
System.out.printf("UTF_8Chars: ");
for (char c : UTF_8Chars.array())
System.out.printf("\\u%04X,", (int)c);
System.out.println();
assertArrayEquals(ISO8859_1Chars.array(), UTF_8Chars.array());
}
@Test
public void testEUC_JP_JIS_X0208() throws CharacterCodingException {
inBytes = ByteBuffer.wrap(IN_BYTES);
Charset JIS_X0208 = Charset.forName("x-JIS0208");
Charset EUC_JP = Charset.forName("EUC-JP");
assertTrue(EUC_JP.contains(JIS_X0208));
CharBuffer JIS_X0208Chars = JIS_X0208.decode(inBytes);
CharBuffer EUC_JPChars = EUC_JP.decode((ByteBuffer)inBytes.rewind());
System.out.printf("JIS_X0208: ");
for (char c : JIS_X0208Chars.array())
System.out.printf("\\u%04X,", (int)c);
System.out.println();
System.out.printf("EUC-JP: ");
for (char c : EUC_JPChars.array())
System.out.printf("\\u%04X,", (int)c);
System.out.println();
assertArrayEquals(JIS_X0208Chars.array(), EUC_JPChars.array());
}
@Test
public void testISO2022_JP_US_ASCII() throws CharacterCodingException {
inBytes = ByteBuffer.wrap(Arrays.copyOf(IN_BYTES, 0x80));
Charset US_ASCII = Charset.forName("US-ASCII");
Charset ISO2022_JP = Charset.forName("ISO-2022-JP");
assertTrue(ISO2022_JP.contains(US_ASCII));
CharBuffer US_ASCIIChars = US_ASCII.decode(inBytes);
CharBuffer ISO2022_JPChars = ISO2022_JP.decode((ByteBuffer)inBytes.rewind());
System.out.printf("US_ASCIIChars: ");
for (char c : US_ASCIIChars.array())
System.out.printf("\\u%04X,", (int)c);
System.out.println();
System.out.printf("ISO2022_JPChars: ");
for (char c : ISO2022_JPChars.array())
System.out.printf("\\u%04X,", (int)c);
System.out.println();
assertArrayEquals(US_ASCIIChars.array(), ISO2022_JPChars.array());
}
}
---------- END SOURCE ----------
1.6.0_10-rc2-b32
ADDITIONAL OS VERSION INFORMATION :
Windows XP SR-2
A DESCRIPTION OF THE PROBLEM :
I have discovered 4 charsets, which don't completely contain the charsets determined by method contains():
- ISO-8859-15 should contain ISO-8859-1
- UTF-8 should contain ISO-8859-1
- EUC-JP should contain JIS-X-0208
- ISO-2022-JP should contain US-ASCII
I assume, there are more which don't match.
STEPS TO FOLLOW TO REPRODUCE THE PROBLEM :
Run JUnit test below.
REPRODUCIBILITY :
This bug can be reproduced always.
---------- BEGIN SOURCE ----------
import java.nio.*;
import java.nio.charset.*;
import java.util.*;
import org.junit.*;
import static org.junit.Assert.*;
/**
*
* @author Ulf.Zibis @ CoSoCo.de
*/
public class CharsetContainsTest {
// test parameters:
private static final int BYTE_RANGE = 1 << Byte.SIZE;
private static final byte[] IN_BYTES = new byte[BYTE_RANGE];
static {
for (int b = 0; b < BYTE_RANGE;)
IN_BYTES[b] = (byte)b++;
}
// parameters:
private ByteBuffer inBytes;
public CharsetContainsTest() {}
@Before
public void setUp() throws Exception {
inBytes = ByteBuffer.wrap(IN_BYTES);
System.out.println();
}
@Test
public void testISO8859_15_ISO8859_1() throws CharacterCodingException {
inBytes = ByteBuffer.wrap(IN_BYTES);
Charset ISO8859_1 = Charset.forName("ISO-8859-1");
Charset ISO8859_15 = Charset.forName("ISO-8859-15");
assertTrue(ISO8859_15.contains(ISO8859_1));
CharBuffer ISO8859_1Chars = ISO8859_1.decode(inBytes);
CharBuffer ISO8859_15Chars = ISO8859_15.decode((ByteBuffer)inBytes.rewind());
System.out.printf("ISO8859_1Chars: ");
for (char c : ISO8859_1Chars.array())
System.out.printf("\\u%04X,", (int)c);
System.out.println();
System.out.printf("ISO8859_15Chars: ");
for (char c : ISO8859_15Chars.array())
System.out.printf("\\u%04X,", (int)c);
System.out.println();
assertArrayEquals(ISO8859_1Chars.array(), ISO8859_15Chars.array());
}
@Test
public void testUTF_8_ISO8859_1() throws CharacterCodingException {
inBytes = ByteBuffer.wrap(IN_BYTES);
Charset ISO8859_1 = Charset.forName("ISO-8859-1");
Charset UTF_8 = Charset.forName("UTF-8");
assertTrue(UTF_8.contains(ISO8859_1));
CharBuffer ISO8859_1Chars = ISO8859_1.decode(inBytes);
CharBuffer UTF_8Chars = UTF_8.decode((ByteBuffer)inBytes.rewind());
System.out.printf("ISO8859_1Chars: ");
for (char c : ISO8859_1Chars.array())
System.out.printf("\\u%04X,", (int)c);
System.out.println();
System.out.printf("UTF_8Chars: ");
for (char c : UTF_8Chars.array())
System.out.printf("\\u%04X,", (int)c);
System.out.println();
assertArrayEquals(ISO8859_1Chars.array(), UTF_8Chars.array());
}
@Test
public void testEUC_JP_JIS_X0208() throws CharacterCodingException {
inBytes = ByteBuffer.wrap(IN_BYTES);
Charset JIS_X0208 = Charset.forName("x-JIS0208");
Charset EUC_JP = Charset.forName("EUC-JP");
assertTrue(EUC_JP.contains(JIS_X0208));
CharBuffer JIS_X0208Chars = JIS_X0208.decode(inBytes);
CharBuffer EUC_JPChars = EUC_JP.decode((ByteBuffer)inBytes.rewind());
System.out.printf("JIS_X0208: ");
for (char c : JIS_X0208Chars.array())
System.out.printf("\\u%04X,", (int)c);
System.out.println();
System.out.printf("EUC-JP: ");
for (char c : EUC_JPChars.array())
System.out.printf("\\u%04X,", (int)c);
System.out.println();
assertArrayEquals(JIS_X0208Chars.array(), EUC_JPChars.array());
}
@Test
public void testISO2022_JP_US_ASCII() throws CharacterCodingException {
inBytes = ByteBuffer.wrap(Arrays.copyOf(IN_BYTES, 0x80));
Charset US_ASCII = Charset.forName("US-ASCII");
Charset ISO2022_JP = Charset.forName("ISO-2022-JP");
assertTrue(ISO2022_JP.contains(US_ASCII));
CharBuffer US_ASCIIChars = US_ASCII.decode(inBytes);
CharBuffer ISO2022_JPChars = ISO2022_JP.decode((ByteBuffer)inBytes.rewind());
System.out.printf("US_ASCIIChars: ");
for (char c : US_ASCIIChars.array())
System.out.printf("\\u%04X,", (int)c);
System.out.println();
System.out.printf("ISO2022_JPChars: ");
for (char c : ISO2022_JPChars.array())
System.out.printf("\\u%04X,", (int)c);
System.out.println();
assertArrayEquals(US_ASCIIChars.array(), ISO2022_JPChars.array());
}
}
---------- END SOURCE ----------