-
Bug
-
Resolution: Fixed
-
P3
-
6
-
b62
-
generic
-
generic
--------------------------------------------------------------------
import java.io.*;
import java.util.*;
import java.nio.charset.*;
import java.nio.*;
public class FindNonEncodableChars {
static boolean encodable1(CharsetEncoder enc, char c) {
return enc.canEncode(c);
}
static boolean encodable2(CharsetEncoder enc, char c) {
try { enc.encode(CharBuffer.wrap(new char[]{c})); return true; }
catch (CharacterCodingException e) { return false; }
}
public static void main(String[] args) throws Exception {
for (Map.Entry<String,Charset> e
: Charset.availableCharsets().entrySet()) {
String csn = e.getKey();
Charset cs = e.getValue();
if (csn.equals("x-IBM933")) continue; // hangs!
if (! cs.canEncode()) continue;
CharsetEncoder enc = cs.newEncoder();
for (char c = '\u0000'; c < '\uffff'; c++) {
if (encodable1(enc, c) != encodable2(enc, c)) {
System.out.printf("charset=\"%s\" char=\'\\u%04x\'%n",
csn, (int)c);
break;
}
}
}
}
}
--------------------------------------------------------------------
Instead, it prints
charset="Big5-HKSCS" char='\u0000'
charset="GB18030" char='\ud800'
charset="IBM-Thai" char='\u0000'
charset="IBM00858" char='\u0000'
charset="IBM01140" char='\u0000'
charset="IBM01141" char='\u0000'
charset="IBM01142" char='\u0000'
charset="IBM01143" char='\u0000'
charset="IBM01144" char='\u0000'
charset="IBM01145" char='\u0000'
charset="IBM01146" char='\u0000'
charset="IBM01147" char='\u0000'
charset="IBM01148" char='\u0000'
charset="IBM01149" char='\u0000'
charset="IBM037" char='\u0000'
charset="IBM1026" char='\u0000'
charset="IBM1047" char='\u0000'
charset="IBM273" char='\u0000'
charset="IBM277" char='\u0000'
charset="IBM278" char='\u0000'
charset="IBM280" char='\u0000'
charset="IBM284" char='\u0000'
charset="IBM285" char='\u0000'
charset="IBM297" char='\u0000'
charset="IBM420" char='\u0000'
charset="IBM424" char='\u0000'
charset="IBM437" char='\u0000'
charset="IBM500" char='\u0000'
charset="IBM775" char='\u0000'
charset="IBM850" char='\u0000'
charset="IBM852" char='\u0000'
charset="IBM855" char='\u0000'
charset="IBM857" char='\u0000'
charset="IBM860" char='\u0000'
charset="IBM861" char='\u0000'
charset="IBM862" char='\u0000'
charset="IBM863" char='\u0000'
charset="IBM864" char='\u0000'
charset="IBM865" char='\u0000'
charset="IBM866" char='\u0000'
charset="IBM868" char='\u0000'
charset="IBM869" char='\u0000'
charset="IBM870" char='\u0000'
charset="IBM871" char='\u0000'
charset="IBM918" char='\u0000'
charset="ISO-2022-KR" char='\u0080'
charset="ISO-8859-13" char='\u0000'
charset="ISO-8859-15" char='\u0000'
charset="ISO-8859-2" char='\u0000'
charset="ISO-8859-3" char='\u0000'
charset="ISO-8859-4" char='\u0000'
charset="ISO-8859-5" char='\u0000'
charset="ISO-8859-6" char='\u0000'
charset="ISO-8859-7" char='\u0000'
charset="ISO-8859-8" char='\u0000'
charset="ISO-8859-9" char='\u0000'
charset="JIS_X0201" char='\u0000'
charset="KOI8-R" char='\u0000'
charset="TIS-620" char='\u0000'
charset="windows-1250" char='\u0000'
charset="windows-1251" char='\u0000'
charset="windows-1252" char='\u0000'
charset="windows-1253" char='\u0000'
charset="windows-1254" char='\u0000'
charset="windows-1255" char='\u0000'
charset="windows-1256" char='\u0000'
charset="windows-1257" char='\u0000'
charset="windows-1258" char='\u0000'
charset="x-Big5-Solaris" char='\u0000'
charset="x-EUC-TW" char='\u0000'
charset="x-IBM1006" char='\u0000'
charset="x-IBM1025" char='\u0000'
charset="x-IBM1046" char='\u0000'
charset="x-IBM1097" char='\u0000'
charset="x-IBM1098" char='\u0000'
charset="x-IBM1112" char='\u0000'
charset="x-IBM1122" char='\u0000'
charset="x-IBM1123" char='\u0000'
charset="x-IBM1124" char='\u0000'
charset="x-IBM737" char='\u0000'
charset="x-IBM856" char='\u0000'
charset="x-IBM874" char='\u0000'
charset="x-IBM875" char='\u0000'
charset="x-IBM921" char='\u0000'
charset="x-IBM922" char='\u0000'
charset="x-IBM949" char='\u0080'
charset="x-IBM949C" char='\u0080'
charset="x-IBM970" char='\u1119'
charset="x-ISCII91" char='\u0900'
charset="x-ISO-2022-CN-CNS" char='\ufffd'
charset="x-ISO-2022-CN-GB" char='\ufffd'
charset="x-iso-8859-11" char='\u0000'
charset="x-MacArabic" char='\u0000'
charset="x-MacCentralEurope" char='\u0000'
charset="x-MacCroatian" char='\u0000'
charset="x-MacCyrillic" char='\u0000'
charset="x-MacDingbat" char='\u0000'
charset="x-MacGreek" char='\u0000'
charset="x-MacHebrew" char='\u0000'
charset="x-MacIceland" char='\u0000'
charset="x-MacRoman" char='\u0000'
charset="x-MacRomania" char='\u0000'
charset="x-MacSymbol" char='\u0000'
charset="x-MacThai" char='\u0000'
charset="x-MacTurkish" char='\u0000'
charset="x-MacUkraine" char='\u0000'
charset="x-MS950-HKSCS" char='\u0000'
charset="x-PCK" char='\u0080'
charset="x-windows-874" char='\u0000'
###@###.### 2004-06-19
- relates to
-
JDK-5066874 cs.canEncode(c) incorrectly returns false when cs is Big5-like and c is ASCII
-
- Resolved
-
-
JDK-5066884 GB18030 canEncode returns incorrect result for Surrogates
-
- Resolved
-
-
JDK-6184158 Refactor duplicated canEncode method in UTF-16* encoders
-
- Resolved
-
-
JDK-5066863 Most single-byte encodings incorrectly return canEncode('\u0000') -> false
-
- Resolved
-
-
JDK-5066867 ISCII canEncode() returns true for some unmappable chars in the Devanagari block
-
- Resolved
-
-
JDK-5066879 cs.canEncode(c) incorrectly returns false when cs is EUC-TW and c <= 0xff
-
- Resolved
-
-
JDK-5066887 PCK encoder canEncode method returns incorrect values for some characters
-
- Resolved
-
-
JDK-6184166 Remove stray canEncode method from EUC_JP_Open.Decoder
-
- Resolved
-