-
Bug
-
Resolution: Unresolved
-
P3
-
11, 17, 18, 19, 20
-
None
-
generic
-
generic
Platform: Windows Server 2016 MUI with Arabic setting
It seems Arabic Windows command prompt's codepage is 720.
But 720 is not supported by OpenJDK.
So I used 864 (Arabic (864))
I ran following command on command prompt
===================================
>chcp 864
Active code page: 864
>type cpname.java
public class cpname {
public static void main(String[] args) throws Exception {
String cp = System.getProperty("sun.stdout.encoding");
if (cp == null) cp = System.getProperty("stdout.encoding");
System.out.println(cp);
System.out.println(new String(new byte[]{ 0x25 }, cp));
}
}
>jdk-20\bin\java cpname.java
cp864
ﻋﺕ
===================================
864 is supported by OpenJDK
* 0x25 is converted to U+066A
* U+066A is converted to 0x25
So expected result is "%".
I changed test program and ran it
===================================
>type cpname.java
public class cpname {
public static void main(String[] args) throws Exception {
String cp = System.getProperty("sun.stdout.encoding");
if (cp == null) cp = System.getProperty("stdout.encoding");
System.out.println(cp);
System.out.println(System.out.charset());
System.out.println(new String(new byte[]{ 0x25 }, cp));
}
}
>jdk-20\bin\java cpname.java
cp864
UTF-8
ﻋﺕ
===================================
It means 864 charset is not in java.base module.
To check other codepage, I created codepage list by using following PowerShell script, and implemented it into another test program.
===================================
>type encodinglist.ps1
[System.Text.EncodingInfo]$EncodingInfo | Out-Null
$Encodings = [System.Text.Encoding]::GetEncodings()
foreach($EncodingInfo in $Encodings ) {Write-Host ('"'+$EncodingInfo.CodePage+'", ') -NoNewLine}
>powershell -command .\encodinglist.ps1
"37", "437", "500", "708", "720", "737", "775", "850", "852", "855", "857", "858", "860", "861", "862", "863", "864", "865", "866", "869", "870", "874", "875", "932", "936", "949", "950", "1026", "1047", "1140", "1141", "1142", "1143", "1144", "1145", "1146", "1147", "1148", "1149", "1200", "1201", "1250", "1251", "1252", "1253", "1254", "1255", "1256", "1257", "1258", "1361", "10000", "10001", "10002", "10003", "10004", "10005", "10006", "10007", "10008", "10010", "10017", "10021", "10029", "10079", "10081", "10082", "12000", "12001", "20000", "20001", "20002", "20003", "20004", "20005", "20105", "20106", "20107", "20108", "20127", "20261", "20269", "20273", "20277", "20278", "20280", "20284", "20285", "20290", "20297", "20420", "20423", "20424", "20833", "20838", "20866", "20871", "20880", "20905", "20924", "20932", "20936", "20949", "21025", "21866", "28591", "28592", "28593", "28594", "28595", "28596", "28597", "28598", "28599", "28603", "28605", "29001", "38598", "50220", "50221", "50222", "50225", "50227", "51932", "51936", "51949", "52936", "54936", "57002", "57003", "57004", "57005", "57006", "57007", "57008", "57009", "57010", "57011", "65000", "65001",
>type chkcp.java
import java.nio.charset.*;
import java.util.*;
public class ckcp {
final static String[] cplist = {
"37", "437", "500", "708", "720", "737", "775", "850", "852", "855",
"857", "858", "860", "861", "862", "863", "864", "865", "866", "869",
"870", "874", "875", "932", "936", "949", "950", "1026", "1047",
"1140", "1141", "1142", "1143", "1144", "1145", "1146", "1147",
"1148", "1149", "1200", "1201", "1250", "1251", "1252", "1253",
"1254", "1255", "1256", "1257", "1258", "1361", "10000", "10001",
"10002", "10003", "10004", "10005", "10006", "10007", "10008",
"10010", "10017", "10021", "10029", "10079", "10081", "10082",
"12000", "12001", "20000", "20001", "20002", "20003", "20004",
"20005", "20105", "20106", "20107", "20108", "20127", "20261",
"20269", "20273", "20277", "20278", "20280", "20284", "20285",
"20290", "20297", "20420", "20423", "20424", "20833", "20838",
"20866", "20871", "20880", "20905", "20924", "20932", "20936",
"20949", "21025", "21866", "28591", "28592", "28593", "28594",
"28595", "28596", "28597", "28598", "28599", "28603", "28605",
"29001", "38598", "50220", "50221", "50222", "50225", "50227",
"51932", "51936", "51949", "52936", "54936", "57002", "57003",
"57004", "57005", "57006", "57007", "57008", "57009", "57010",
"57011", "65000", "65001", };
final static byte[] space = new byte[] { 0x20 };
public static void main(String[] args) throws Exception {
for (String s : cplist) {
String csname = (s.length() < 3 ? "cp0" : "cp") + s;
if (Charset.isSupported(csname)) {
var cs = Charset.forName(csname);
var ce = cs.newEncoder();
if (ce.maxBytesPerChar() != 1) continue; // Check SBCS
if (!Arrays.equals(space, " ".getBytes(cs))) continue; // Check ASCII base
// Check, not in java.base module
if (!"java.base".equals(cs.getClass().getModule().getName())) {
System.out.println(s + " " + cs.getClass().getSimpleName());
}
}
}
}
}
>jdk-20\bin\java chkcp.java
860 IBM860
861 IBM861
863 IBM863
864 IBM864
865 IBM865
869 IBM869
===================================
Above 6 charsets should be moved from jdk.charsets module to java.base module for Windows platform.
It seems Arabic Windows command prompt's codepage is 720.
But 720 is not supported by OpenJDK.
So I used 864 (Arabic (864))
I ran following command on command prompt
===================================
>chcp 864
Active code page: 864
>type cpname.java
public class cpname {
public static void main(String[] args) throws Exception {
String cp = System.getProperty("sun.stdout.encoding");
if (cp == null) cp = System.getProperty("stdout.encoding");
System.out.println(cp);
System.out.println(new String(new byte[]{ 0x25 }, cp));
}
}
>jdk-20\bin\java cpname.java
cp864
ﻋﺕ
===================================
864 is supported by OpenJDK
* 0x25 is converted to U+066A
* U+066A is converted to 0x25
So expected result is "%".
I changed test program and ran it
===================================
>type cpname.java
public class cpname {
public static void main(String[] args) throws Exception {
String cp = System.getProperty("sun.stdout.encoding");
if (cp == null) cp = System.getProperty("stdout.encoding");
System.out.println(cp);
System.out.println(System.out.charset());
System.out.println(new String(new byte[]{ 0x25 }, cp));
}
}
>jdk-20\bin\java cpname.java
cp864
UTF-8
ﻋﺕ
===================================
It means 864 charset is not in java.base module.
To check other codepage, I created codepage list by using following PowerShell script, and implemented it into another test program.
===================================
>type encodinglist.ps1
[System.Text.EncodingInfo]$EncodingInfo | Out-Null
$Encodings = [System.Text.Encoding]::GetEncodings()
foreach($EncodingInfo in $Encodings ) {Write-Host ('"'+$EncodingInfo.CodePage+'", ') -NoNewLine}
>powershell -command .\encodinglist.ps1
"37", "437", "500", "708", "720", "737", "775", "850", "852", "855", "857", "858", "860", "861", "862", "863", "864", "865", "866", "869", "870", "874", "875", "932", "936", "949", "950", "1026", "1047", "1140", "1141", "1142", "1143", "1144", "1145", "1146", "1147", "1148", "1149", "1200", "1201", "1250", "1251", "1252", "1253", "1254", "1255", "1256", "1257", "1258", "1361", "10000", "10001", "10002", "10003", "10004", "10005", "10006", "10007", "10008", "10010", "10017", "10021", "10029", "10079", "10081", "10082", "12000", "12001", "20000", "20001", "20002", "20003", "20004", "20005", "20105", "20106", "20107", "20108", "20127", "20261", "20269", "20273", "20277", "20278", "20280", "20284", "20285", "20290", "20297", "20420", "20423", "20424", "20833", "20838", "20866", "20871", "20880", "20905", "20924", "20932", "20936", "20949", "21025", "21866", "28591", "28592", "28593", "28594", "28595", "28596", "28597", "28598", "28599", "28603", "28605", "29001", "38598", "50220", "50221", "50222", "50225", "50227", "51932", "51936", "51949", "52936", "54936", "57002", "57003", "57004", "57005", "57006", "57007", "57008", "57009", "57010", "57011", "65000", "65001",
>type chkcp.java
import java.nio.charset.*;
import java.util.*;
public class ckcp {
final static String[] cplist = {
"37", "437", "500", "708", "720", "737", "775", "850", "852", "855",
"857", "858", "860", "861", "862", "863", "864", "865", "866", "869",
"870", "874", "875", "932", "936", "949", "950", "1026", "1047",
"1140", "1141", "1142", "1143", "1144", "1145", "1146", "1147",
"1148", "1149", "1200", "1201", "1250", "1251", "1252", "1253",
"1254", "1255", "1256", "1257", "1258", "1361", "10000", "10001",
"10002", "10003", "10004", "10005", "10006", "10007", "10008",
"10010", "10017", "10021", "10029", "10079", "10081", "10082",
"12000", "12001", "20000", "20001", "20002", "20003", "20004",
"20005", "20105", "20106", "20107", "20108", "20127", "20261",
"20269", "20273", "20277", "20278", "20280", "20284", "20285",
"20290", "20297", "20420", "20423", "20424", "20833", "20838",
"20866", "20871", "20880", "20905", "20924", "20932", "20936",
"20949", "21025", "21866", "28591", "28592", "28593", "28594",
"28595", "28596", "28597", "28598", "28599", "28603", "28605",
"29001", "38598", "50220", "50221", "50222", "50225", "50227",
"51932", "51936", "51949", "52936", "54936", "57002", "57003",
"57004", "57005", "57006", "57007", "57008", "57009", "57010",
"57011", "65000", "65001", };
final static byte[] space = new byte[] { 0x20 };
public static void main(String[] args) throws Exception {
for (String s : cplist) {
String csname = (s.length() < 3 ? "cp0" : "cp") + s;
if (Charset.isSupported(csname)) {
var cs = Charset.forName(csname);
var ce = cs.newEncoder();
if (ce.maxBytesPerChar() != 1) continue; // Check SBCS
if (!Arrays.equals(space, " ".getBytes(cs))) continue; // Check ASCII base
// Check, not in java.base module
if (!"java.base".equals(cs.getClass().getModule().getName())) {
System.out.println(s + " " + cs.getClass().getSimpleName());
}
}
}
}
}
>jdk-20\bin\java chkcp.java
860 IBM860
861 IBM861
863 IBM863
864 IBM864
865 IBM865
869 IBM869
===================================
Above 6 charsets should be moved from jdk.charsets module to java.base module for Windows platform.
- links to
-
Review openjdk/jdk/9761