During test and development of JDK-8181147, it was discovered that the code to convert Strings to native char* in jni_util.c is inconsistent with String.getBytes("cp1252"): codepoints in the 0x80-0x9F range will erroneously being converted to a Cp1252-specific character even though they are undefined.
diff -r abd077475d3b src/java.base/share/native/libjava/jni_util.c
--- a/src/java.base/share/native/libjava/jni_util.c Fri Jun 16 16:49:43 2017 +0200
+++ b/src/java.base/share/native/libjava/jni_util.c Mon Jun 19 09:18:34 2017 +0200
@@ -639,9 +639,13 @@
for (i=0; i<len; i++) {
jchar c = str[i];
- if (c < 256)
- result[i] = (char)c;
- else switch(c) {
+ if (c < 256) {
+ if ((c >= 0x80) && (c <= 0x9f)) {
+ result[i] = '?';
+ } else {
+ result[i] = (char)c;
+ }
+ } else switch(c) {
case 0x20AC: result[i] = (char)0x80; break;
case 0x201A: result[i] = (char)0x82; break;
case 0x0192: result[i] = (char)0x83; break;
diff -r abd077475d3b src/java.base/share/native/libjava/jni_util.c
--- a/src/java.base/share/native/libjava/jni_util.c Fri Jun 16 16:49:43 2017 +0200
+++ b/src/java.base/share/native/libjava/jni_util.c Mon Jun 19 09:18:34 2017 +0200
@@ -639,9 +639,13 @@
for (i=0; i<len; i++) {
jchar c = str[i];
- if (c < 256)
- result[i] = (char)c;
- else switch(c) {
+ if (c < 256) {
+ if ((c >= 0x80) && (c <= 0x9f)) {
+ result[i] = '?';
+ } else {
+ result[i] = (char)c;
+ }
+ } else switch(c) {
case 0x20AC: result[i] = (char)0x80; break;
case 0x201A: result[i] = (char)0x82; break;
case 0x0192: result[i] = (char)0x83; break;