Uploaded image for project: 'JDK'
  1. JDK
  2. JDK-6378295

ASCII range encoder bugs in Greek charset (ISO-8859-7)

XMLWordPrintable

    • Icon: Bug Bug
    • Resolution: Fixed
    • Icon: P4 P4
    • 6
    • 6
    • core-libs

      Here's a program that finds all the non-ASCII-compatible encodings in
      the JDK. It correctly finds the bug in KOI8-U, and also finds
      apparently serious bugs in

      ISO-8859-7
      ISO-2022-KR

      src/toy/tt.java

      The test (incorrectly) assumes all encodings are ASCII-compatible.
      Please turn this test into a real regression test, and file bugs
      against various charsets.

      As I always say, the charset code is crawling with bugs...
      import java.io.*;
      import java.util.*;
      import java.nio.charset.*;
      import java.nio.*;

      public class tt {
          public static void main(String[] args) throws Exception {
      final byte[] asciiBytes = new byte[0x80];
      final char[] asciiChars = new char[0x80];
      for (int i = 0; i < 0x80; i++) {
      asciiBytes[i] = (byte) i;
      asciiChars[i] = (char) i;
      }
      final String asciiString = new String(asciiChars);
      for (Map.Entry<String,Charset> e
      : Charset.availableCharsets().entrySet()) {
      try {
      String csn = e.getKey();
      Charset cs = e.getValue();
      //cs.newDecoder();
      int failures = 0;
      System.out.println(csn);
      if (csn.equals("x-IBM933")) continue; // hangs!

      if (! cs.canEncode()) continue;
      if (! Arrays.equals(asciiString.getBytes(csn), asciiBytes)) {
      System.out.printf("%s -> bytes%n", csn);
      //System.out.println(new String(asciiString.getBytes(csn),
      "ISO-8859-1"));
      }
      if (! new String(asciiBytes, csn).equals(asciiString)) {
      System.out.printf("%s -> chars%n", csn);
      //System.out.println(asciiString);
      //System.out.println(new String(asciiBytes, csn));

      }
      } catch (Throwable t) { t.printStackTrace(); }
      }
          }
      }

      Martin

       $ /home/mb29450/bin/sun/jver /net/cremina/export/sherman/ws/rc/ jr tt
      ==> javac -source 1.6 -Xlint:all tt.java
      ==> java -esa -ea tt
      Big5
      Big5-HKSCS
      COMPOUND_TEXT
      COMPOUND_TEXT -> bytes
      COMPOUND_TEXT -> chars
      EUC-JP
      EUC-KR
      GB18030
      GB2312
      GBK
      IBM-Thai
      IBM-Thai -> bytes
      IBM-Thai -> chars
      IBM00858
      IBM01140
      IBM01140 -> bytes
      IBM01140 -> chars
      IBM01141
      IBM01141 -> bytes
      IBM01141 -> chars
      IBM01142
      IBM01142 -> bytes
      IBM01142 -> chars
      IBM01143
      IBM01143 -> bytes
      IBM01143 -> chars
      IBM01144
      IBM01144 -> bytes
      IBM01144 -> chars
      IBM01145
      IBM01145 -> bytes
      IBM01145 -> chars
      IBM01146
      IBM01146 -> bytes
      IBM01146 -> chars
      IBM01147
      IBM01147 -> bytes
      IBM01147 -> chars
      IBM01148
      IBM01148 -> bytes
      IBM01148 -> chars
      IBM01149
      IBM01149 -> bytes
      IBM01149 -> chars
      IBM037
      IBM037 -> bytes
      IBM037 -> chars
      IBM1026
      IBM1026 -> bytes
      IBM1026 -> chars
      IBM1047
      IBM1047 -> bytes
      IBM1047 -> chars
      IBM273
      IBM273 -> bytes
      IBM273 -> chars
      IBM277
      IBM277 -> bytes
      IBM277 -> chars
      IBM278
      IBM278 -> bytes
      IBM278 -> chars
      IBM280
      IBM280 -> bytes
      IBM280 -> chars
      IBM284
      IBM284 -> bytes
      IBM284 -> chars
      IBM285
      IBM285 -> bytes
      IBM285 -> chars
      IBM297
      IBM297 -> bytes
      IBM297 -> chars
      IBM420
      IBM420 -> bytes
      IBM420 -> chars
      IBM424
      IBM424 -> bytes
      IBM424 -> chars
      IBM437
      IBM500
      IBM500 -> bytes
      IBM500 -> chars
      IBM775
      IBM850
      IBM852
      IBM855
      IBM857
      IBM860
      IBM861
      IBM862
      IBM863
      IBM864
      IBM864 -> bytes
      IBM864 -> chars
      IBM865
      IBM866
      IBM868
      IBM869
      IBM870
      IBM870 -> bytes
      IBM870 -> chars
      IBM871
      IBM871 -> bytes
      IBM871 -> chars
      IBM918
      IBM918 -> bytes
      IBM918 -> chars
      ISO-2022-CN
      ISO-2022-JP
      ISO-2022-JP -> chars
      ISO-2022-JP-2
      ISO-2022-JP-2 -> chars
      ISO-2022-KR
      java.lang.NullPointerException
      at sun.nio.cs.ext.ISO2022$Decoder.decodeArrayLoop(ISO2022.java:174)
      at sun.nio.cs.ext.ISO2022$Decoder.decodeLoop(ISO2022.java:384)
      at java.nio.charset.CharsetDecoder.decode(CharsetDecoder.java:544)
      at java.lang.StringCoding$StringDecoder.decode(StringCoding.java:136)
      at java.lang.StringCoding.decode(StringCoding.java:169)
      at java.lang.String.<init>(String.java:401)
      at java.lang.String.<init>(String.java:429)
      at tt.main(tt.java:30)
      ISO-8859-1
      ISO-8859-13
      ISO-8859-15
      ISO-8859-2
      ISO-8859-3
      ISO-8859-4
      ISO-8859-5
      ISO-8859-6
      ISO-8859-7
      ISO-8859-7 -> bytes
      ISO-8859-7 -> chars
      ISO-8859-8
      ISO-8859-9
      JIS_X0201
      JIS_X0212-1990
      JIS_X0212-1990 -> bytes
      JIS_X0212-1990 -> chars
      KOI8-R
      KOI8-U
      KOI8-U -> bytes
      KOI8-U -> chars
      Shift_JIS
      TIS-620
      US-ASCII
      UTF-16
      UTF-16 -> bytes
      UTF-16 -> chars
      UTF-16BE
      UTF-16BE -> bytes
      UTF-16BE -> chars
      UTF-16LE
      UTF-16LE -> bytes
      UTF-16LE -> chars
      UTF-32
      UTF-32 -> bytes
      UTF-32 -> chars
      UTF-32BE
      UTF-32BE -> bytes
      UTF-32BE -> chars
      UTF-32LE
      UTF-32LE -> bytes
      UTF-32LE -> chars
      UTF-8
      windows-1250
      windows-1251
      windows-1252
      windows-1253
      windows-1254
      windows-1255
      windows-1256
      windows-1257
      windows-1258
      windows-31j
      x-Big5-Solaris
      x-euc-jp-linux
      x-EUC-TW
      x-eucJP-Open
      x-IBM1006
      x-IBM1025
      x-IBM1025 -> bytes
      x-IBM1025 -> chars
      x-IBM1046
      x-IBM1097
      x-IBM1097 -> bytes
      x-IBM1097 -> chars
      x-IBM1098
      x-IBM1112
      x-IBM1112 -> bytes
      x-IBM1112 -> chars
      x-IBM1122
      x-IBM1122 -> bytes
      x-IBM1122 -> chars
      x-IBM1123
      x-IBM1123 -> bytes
      x-IBM1123 -> chars
      x-IBM1124
      x-IBM1381
      x-IBM1383
      x-IBM33722
      x-IBM33722 -> bytes
      x-IBM33722 -> chars
      x-IBM737
      x-IBM856
      x-IBM874
      x-IBM875
      x-IBM875 -> bytes
      x-IBM875 -> chars
      x-IBM921
      x-IBM922
      x-IBM930
      x-IBM930 -> bytes
      x-IBM930 -> chars
      x-IBM933
      x-IBM935
      x-IBM935 -> bytes
      x-IBM935 -> chars
      x-IBM937
      x-IBM937 -> bytes
      x-IBM937 -> chars
      x-IBM939
      x-IBM939 -> bytes
      x-IBM939 -> chars
      x-IBM942
      x-IBM942 -> bytes
      x-IBM942 -> chars
      x-IBM942C
      x-IBM943
      x-IBM943 -> bytes
      x-IBM943 -> chars
      x-IBM943C
      x-IBM948
      x-IBM949
      java.lang.AssertionError
      at sun.nio.cs.Surrogate$Parser.parse(Surrogate.java:205)
      at sun.nio.cs.ext.IBM949$Encoder.encodeArrayLoop(IBM949.java:1554)
      at sun.nio.cs.ext.IBM949$Encoder.encodeLoop(IBM949.java:5239)
      at java.nio.charset.CharsetEncoder.encode(CharsetEncoder.java:544)
      at java.lang.StringCoding$StringEncoder.encode(StringCoding.java:230)
      at java.lang.StringCoding.encode(StringCoding.java:262)
      at java.lang.String.getBytes(String.java:820)
      at tt.main(tt.java:26)
      x-IBM949C
      java.lang.AssertionError
      at sun.nio.cs.Surrogate$Parser.parse(Surrogate.java:205)
      at sun.nio.cs.ext.IBM949$Encoder.encodeArrayLoop(IBM949.java:1554)
      at sun.nio.cs.ext.IBM949$Encoder.encodeLoop(IBM949.java:5239)
      at java.nio.charset.CharsetEncoder.encode(CharsetEncoder.java:544)
      at java.lang.StringCoding$StringEncoder.encode(StringCoding.java:230)
      at java.lang.StringCoding.encode(StringCoding.java:262)
      at java.lang.String.getBytes(String.java:820)
      at tt.main(tt.java:26)
      x-IBM950
      x-IBM964
      x-IBM970
      java.lang.AssertionError
      at sun.nio.cs.Surrogate$Parser.parse(Surrogate.java:205)
      at sun.nio.cs.ext.IBM970$Encoder.encodeArrayLoop(IBM970.java:1272)
      at sun.nio.cs.ext.IBM970$Encoder.encodeLoop(IBM970.java:1643)
      at java.nio.charset.CharsetEncoder.encode(CharsetEncoder.java:544)
      at java.lang.StringCoding$StringEncoder.encode(StringCoding.java:230)
      at java.lang.StringCoding.encode(StringCoding.java:262)
      at java.lang.String.getBytes(String.java:820)
      at tt.main(tt.java:26)
      x-ISCII91
      x-ISO-2022-CN-CNS
      x-ISO-2022-CN-CNS -> chars
      x-ISO-2022-CN-GB
      x-ISO-2022-CN-GB -> chars
      x-iso-8859-11
      x-JIS0208
      x-JIS0208 -> bytes
      x-JIS0208 -> chars
      x-JISAutoDetect
      x-Johab
      x-MacArabic
      x-MacCentralEurope
      x-MacCroatian
      x-MacCyrillic
      x-MacDingbat
      x-MacDingbat -> bytes
      x-MacDingbat -> chars
      x-MacGreek
      x-MacHebrew
      x-MacIceland
      x-MacRoman
      x-MacRomania
      x-MacSymbol
      x-MacSymbol -> bytes
      x-MacSymbol -> chars
      x-MacThai
      x-MacTurkish
      x-MacUkraine
      x-MS950-HKSCS
      x-mswin-936
      x-PCK
      x-UTF-16LE-BOM
      x-UTF-16LE-BOM -> bytes
      x-UTF-16LE-BOM -> chars
      X-UTF-32BE-BOM
      X-UTF-32BE-BOM -> bytes
      X-UTF-32BE-BOM -> chars
      X-UTF-32LE-BOM
      X-UTF-32LE-BOM -> bytes
      X-UTF-32LE-BOM -> chars
      x-windows-50220
      x-windows-50220 -> chars
      x-windows-50221
      x-windows-50221 -> chars
      x-windows-874
      x-windows-949
      x-windows-950
      x-windows-iso2022jp
      x-windows-iso2022jp -> chars

            sherman Xueming Shen
            sherman Xueming Shen
            Votes:
            0 Vote for this issue
            Watchers:
            0 Start watching this issue

              Created:
              Updated:
              Resolved:
              Imported:
              Indexed: