Uploaded image for project: 'JDK'
  1. JDK
  2. JDK-5065777

CharsetEncoder canEncode() methods often incorrectly return false

XMLWordPrintable

    • Icon: Bug Bug
    • Resolution: Fixed
    • Icon: P3 P3
    • 7
    • 6
    • core-libs

      The following program should print no output:

      --------------------------------------------------------------------
      import java.io.*;
      import java.util.*;
      import java.nio.charset.*;
      import java.nio.*;

      public class FindNonEncodableChars {
          static boolean encodable1(CharsetEncoder enc, char c) {
      return enc.canEncode(c);
          }

          static boolean encodable2(CharsetEncoder enc, char c) {
      try { enc.encode(CharBuffer.wrap(new char[]{c})); return true; }
      catch (CharacterCodingException e) { return false; }
          }
          
          public static void main(String[] args) throws Exception {
      for (Map.Entry<String,Charset> e
      : Charset.availableCharsets().entrySet()) {
      String csn = e.getKey();
      Charset cs = e.getValue();

      if (csn.equals("x-IBM933")) continue; // hangs!

      if (! cs.canEncode()) continue;
      CharsetEncoder enc = cs.newEncoder();

      for (char c = '\u0000'; c < '\uffff'; c++) {
      if (encodable1(enc, c) != encodable2(enc, c)) {
      System.out.printf("charset=\"%s\" char=\'\\u%04x\'%n",
      csn, (int)c);
      break;
      }
      }
      }
          }
      }
      --------------------------------------------------------------------

      Instead, it prints

      charset="Big5-HKSCS" char='\u0000'
      charset="GB18030" char='\ud800'
      charset="IBM-Thai" char='\u0000'
      charset="IBM00858" char='\u0000'
      charset="IBM01140" char='\u0000'
      charset="IBM01141" char='\u0000'
      charset="IBM01142" char='\u0000'
      charset="IBM01143" char='\u0000'
      charset="IBM01144" char='\u0000'
      charset="IBM01145" char='\u0000'
      charset="IBM01146" char='\u0000'
      charset="IBM01147" char='\u0000'
      charset="IBM01148" char='\u0000'
      charset="IBM01149" char='\u0000'
      charset="IBM037" char='\u0000'
      charset="IBM1026" char='\u0000'
      charset="IBM1047" char='\u0000'
      charset="IBM273" char='\u0000'
      charset="IBM277" char='\u0000'
      charset="IBM278" char='\u0000'
      charset="IBM280" char='\u0000'
      charset="IBM284" char='\u0000'
      charset="IBM285" char='\u0000'
      charset="IBM297" char='\u0000'
      charset="IBM420" char='\u0000'
      charset="IBM424" char='\u0000'
      charset="IBM437" char='\u0000'
      charset="IBM500" char='\u0000'
      charset="IBM775" char='\u0000'
      charset="IBM850" char='\u0000'
      charset="IBM852" char='\u0000'
      charset="IBM855" char='\u0000'
      charset="IBM857" char='\u0000'
      charset="IBM860" char='\u0000'
      charset="IBM861" char='\u0000'
      charset="IBM862" char='\u0000'
      charset="IBM863" char='\u0000'
      charset="IBM864" char='\u0000'
      charset="IBM865" char='\u0000'
      charset="IBM866" char='\u0000'
      charset="IBM868" char='\u0000'
      charset="IBM869" char='\u0000'
      charset="IBM870" char='\u0000'
      charset="IBM871" char='\u0000'
      charset="IBM918" char='\u0000'
      charset="ISO-2022-KR" char='\u0080'
      charset="ISO-8859-13" char='\u0000'
      charset="ISO-8859-15" char='\u0000'
      charset="ISO-8859-2" char='\u0000'
      charset="ISO-8859-3" char='\u0000'
      charset="ISO-8859-4" char='\u0000'
      charset="ISO-8859-5" char='\u0000'
      charset="ISO-8859-6" char='\u0000'
      charset="ISO-8859-7" char='\u0000'
      charset="ISO-8859-8" char='\u0000'
      charset="ISO-8859-9" char='\u0000'
      charset="JIS_X0201" char='\u0000'
      charset="KOI8-R" char='\u0000'
      charset="TIS-620" char='\u0000'
      charset="windows-1250" char='\u0000'
      charset="windows-1251" char='\u0000'
      charset="windows-1252" char='\u0000'
      charset="windows-1253" char='\u0000'
      charset="windows-1254" char='\u0000'
      charset="windows-1255" char='\u0000'
      charset="windows-1256" char='\u0000'
      charset="windows-1257" char='\u0000'
      charset="windows-1258" char='\u0000'
      charset="x-Big5-Solaris" char='\u0000'
      charset="x-EUC-TW" char='\u0000'
      charset="x-IBM1006" char='\u0000'
      charset="x-IBM1025" char='\u0000'
      charset="x-IBM1046" char='\u0000'
      charset="x-IBM1097" char='\u0000'
      charset="x-IBM1098" char='\u0000'
      charset="x-IBM1112" char='\u0000'
      charset="x-IBM1122" char='\u0000'
      charset="x-IBM1123" char='\u0000'
      charset="x-IBM1124" char='\u0000'
      charset="x-IBM737" char='\u0000'
      charset="x-IBM856" char='\u0000'
      charset="x-IBM874" char='\u0000'
      charset="x-IBM875" char='\u0000'
      charset="x-IBM921" char='\u0000'
      charset="x-IBM922" char='\u0000'
      charset="x-IBM949" char='\u0080'
      charset="x-IBM949C" char='\u0080'
      charset="x-IBM970" char='\u1119'
      charset="x-ISCII91" char='\u0900'
      charset="x-ISO-2022-CN-CNS" char='\ufffd'
      charset="x-ISO-2022-CN-GB" char='\ufffd'
      charset="x-iso-8859-11" char='\u0000'
      charset="x-MacArabic" char='\u0000'
      charset="x-MacCentralEurope" char='\u0000'
      charset="x-MacCroatian" char='\u0000'
      charset="x-MacCyrillic" char='\u0000'
      charset="x-MacDingbat" char='\u0000'
      charset="x-MacGreek" char='\u0000'
      charset="x-MacHebrew" char='\u0000'
      charset="x-MacIceland" char='\u0000'
      charset="x-MacRoman" char='\u0000'
      charset="x-MacRomania" char='\u0000'
      charset="x-MacSymbol" char='\u0000'
      charset="x-MacThai" char='\u0000'
      charset="x-MacTurkish" char='\u0000'
      charset="x-MacUkraine" char='\u0000'
      charset="x-MS950-HKSCS" char='\u0000'
      charset="x-PCK" char='\u0080'
      charset="x-windows-874" char='\u0000'

      ###@###.### 2004-06-19

            sherman Xueming Shen
            martin Martin Buchholz
            Votes:
            0 Vote for this issue
            Watchers:
            0 Start watching this issue

              Created:
              Updated:
              Resolved:
              Imported:
              Indexed: