Uploaded image for project: 'JDK'
  1. JDK
  2. JDK-6761481

Charset#contains() should be examined

XMLWordPrintable

    • Icon: Bug Bug
    • Resolution: Unresolved
    • Icon: P4 P4
    • None
    • 6
    • core-libs

      FULL PRODUCT VERSION :
      1.6.0_10-rc2-b32

      ADDITIONAL OS VERSION INFORMATION :
      Windows XP SR-2

      A DESCRIPTION OF THE PROBLEM :
      I have discovered 4 charsets, which don't completely contain the charsets determined by method contains():

      - ISO-8859-15 should contain ISO-8859-1
      - UTF-8 should contain ISO-8859-1
      - EUC-JP should contain JIS-X-0208
      - ISO-2022-JP should contain US-ASCII

      I assume, there are more which don't match.


      STEPS TO FOLLOW TO REPRODUCE THE PROBLEM :
      Run JUnit test below.


      REPRODUCIBILITY :
      This bug can be reproduced always.

      ---------- BEGIN SOURCE ----------
      import java.nio.*;
      import java.nio.charset.*;
      import java.util.*;
      import org.junit.*;
      import static org.junit.Assert.*;

      /**
       *
       * @author Ulf.Zibis @ CoSoCo.de
       */
      public class CharsetContainsTest {

          // test parameters:
          private static final int BYTE_RANGE = 1 << Byte.SIZE;
          private static final byte[] IN_BYTES = new byte[BYTE_RANGE];
          static {
              for (int b = 0; b < BYTE_RANGE;)
                  IN_BYTES[b] = (byte)b++;
          }
          // parameters:
          private ByteBuffer inBytes;

          public CharsetContainsTest() {}

          @Before
          public void setUp() throws Exception {
              inBytes = ByteBuffer.wrap(IN_BYTES);
              System.out.println();
          }

          @Test
          public void testISO8859_15_ISO8859_1() throws CharacterCodingException {
              inBytes = ByteBuffer.wrap(IN_BYTES);
              Charset ISO8859_1 = Charset.forName("ISO-8859-1");
              Charset ISO8859_15 = Charset.forName("ISO-8859-15");
              assertTrue(ISO8859_15.contains(ISO8859_1));
              CharBuffer ISO8859_1Chars = ISO8859_1.decode(inBytes);
              CharBuffer ISO8859_15Chars = ISO8859_15.decode((ByteBuffer)inBytes.rewind());
              System.out.printf("ISO8859_1Chars: ");
              for (char c : ISO8859_1Chars.array())
                  System.out.printf("\\u%04X,", (int)c);
              System.out.println();
              System.out.printf("ISO8859_15Chars: ");
              for (char c : ISO8859_15Chars.array())
                  System.out.printf("\\u%04X,", (int)c);
              System.out.println();
              assertArrayEquals(ISO8859_1Chars.array(), ISO8859_15Chars.array());
          }

          @Test
          public void testUTF_8_ISO8859_1() throws CharacterCodingException {
              inBytes = ByteBuffer.wrap(IN_BYTES);
              Charset ISO8859_1 = Charset.forName("ISO-8859-1");
              Charset UTF_8 = Charset.forName("UTF-8");
              assertTrue(UTF_8.contains(ISO8859_1));
              CharBuffer ISO8859_1Chars = ISO8859_1.decode(inBytes);
              CharBuffer UTF_8Chars = UTF_8.decode((ByteBuffer)inBytes.rewind());
              System.out.printf("ISO8859_1Chars: ");
              for (char c : ISO8859_1Chars.array())
                  System.out.printf("\\u%04X,", (int)c);
              System.out.println();
              System.out.printf("UTF_8Chars: ");
              for (char c : UTF_8Chars.array())
                  System.out.printf("\\u%04X,", (int)c);
              System.out.println();
              assertArrayEquals(ISO8859_1Chars.array(), UTF_8Chars.array());
          }

          @Test
          public void testEUC_JP_JIS_X0208() throws CharacterCodingException {
              inBytes = ByteBuffer.wrap(IN_BYTES);
              Charset JIS_X0208 = Charset.forName("x-JIS0208");
              Charset EUC_JP = Charset.forName("EUC-JP");
              assertTrue(EUC_JP.contains(JIS_X0208));
              CharBuffer JIS_X0208Chars = JIS_X0208.decode(inBytes);
              CharBuffer EUC_JPChars = EUC_JP.decode((ByteBuffer)inBytes.rewind());
              System.out.printf("JIS_X0208: ");
              for (char c : JIS_X0208Chars.array())
                  System.out.printf("\\u%04X,", (int)c);
              System.out.println();
              System.out.printf("EUC-JP: ");
              for (char c : EUC_JPChars.array())
                  System.out.printf("\\u%04X,", (int)c);
              System.out.println();
              assertArrayEquals(JIS_X0208Chars.array(), EUC_JPChars.array());
          }

          @Test
          public void testISO2022_JP_US_ASCII() throws CharacterCodingException {
              inBytes = ByteBuffer.wrap(Arrays.copyOf(IN_BYTES, 0x80));
              Charset US_ASCII = Charset.forName("US-ASCII");
              Charset ISO2022_JP = Charset.forName("ISO-2022-JP");
              assertTrue(ISO2022_JP.contains(US_ASCII));
              CharBuffer US_ASCIIChars = US_ASCII.decode(inBytes);
              CharBuffer ISO2022_JPChars = ISO2022_JP.decode((ByteBuffer)inBytes.rewind());
              System.out.printf("US_ASCIIChars: ");
              for (char c : US_ASCIIChars.array())
                  System.out.printf("\\u%04X,", (int)c);
              System.out.println();
              System.out.printf("ISO2022_JPChars: ");
              for (char c : ISO2022_JPChars.array())
                  System.out.printf("\\u%04X,", (int)c);
              System.out.println();
              assertArrayEquals(US_ASCIIChars.array(), ISO2022_JPChars.array());
          }
      }

      ---------- END SOURCE ----------

            sherman Xueming Shen
            ndcosta Nelson Dcosta (Inactive)
            Votes:
            0 Vote for this issue
            Watchers:
            0 Start watching this issue

              Created:
              Updated:
              Imported:
              Indexed: