-
Bug
-
Resolution: Cannot Reproduce
-
P4
-
None
-
11, 13, 14
-
x86_64
-
windows
ADDITIONAL SYSTEM INFORMATION :
Processor: Intel(R) Core(TM) i7-4710HQ CPU @ 2.50GHz
Installed memory (RAM): 12.0GB
System type: 64-bit Operating System, x64 based processor
Windows 8.1
$ java -version
java version "12.0.2" 2019-07-16
Java(TM) SE Runtime Environment (build 12.0.2+10)
Java HotSpot(TM) 64-Bit Server VM (build 12.0.2+10, mixed mode, sharing)
A DESCRIPTION OF THE PROBLEM :
When strings contain just LATIN-1 characters, using the default mode of the JVM, that is with compact strings, String.charAt is much slower that if the strings contain characters above 0xFF.
When not using compact strings, the timings for LATIN-1 strings and non LATIN-1 strings are comparable and a little faster, which is the expected result.
STEPS TO FOLLOW TO REPRODUCE THE PROBLEM :
Compile and run the attached CharAtTest.java.
The output from the commands
java -XX:+CompactStrings CharAtTtest LATIN-1
will run a series of loops performing String.charAt on a string that contains LATIN-1 characters using compact strings.
java -XX:+CompactStrings CharAtTtest UTF-16
will run the test doing String.charAt on a string that contains characters above 0xFF.
Rates are output for a test that measures the rate per second of the number of loops of 1000 times looping through a string of 100 characters doing charAt.
ACTUAL -
Following shows the rates for the java commands running a single test.
java -XX:+CompactStrings CharAtTtest LATIN-1 9,398
java -XX:+CompactStrings CharAtTtest UTF-16 42,147
java -XX:-CompactStrings CharAtTtest LATIN-1 50,114
java -XX:-CompactStrings CharAtTtest UTF-16 49,971
That is, LATIN-1 is much slower than UTF-16 when using compact strings.
Not using compact strings is a bit faster and similar rates for LATIN-1 and UTF-16 strings.
Also interesting is running different combinations of the tests in the same run of the JVM.
java -XX:+CompactStrings CharAtTtest LATIN-1 UTF-16 9,399 2,977
java -XX:+CompactStrings CharAtTtest UTF-16 LATIN-1 42,159 43,134
java -XX:-CompactStrings CharAtTtest LATIN-1 UTF-16 49,950 49,924
java -XX:-CompactStrings CharAtTtest UTF-16 LATIN-1 50,011 49,899
With compact strings, running LATIN-1 first then UTF-16, UTF-16 is really slow.
However running in the other order, UTF-16 then LATIN-1, the LATIN-1 loops are as fast as UTF-16.
In either order, without compact strings they produce similar rates.
---------- BEGIN SOURCE ----------
import java.util.ArrayList;
import java.util.List;
public class CharAtTtest {
private static final int TARGET_TEST_TIME = 1_000;
private static final int INITIAL_COUNT = 1;
private static final int MINIMUM_TRIAL_TIME = 3_000;
private static final int MAXIMUM_TRIAL_TIME = 5_000;
private static final int ZERO_TIME_INCREASE = 1_000;
private static final int MINIMUM_TOLERANCE = 50;
private static final int TEST_COUNT = 4;
private final List<TimedTestResult> testResults = new ArrayList<>();
public static void main(String[] args) {
if (args.length == 0) {
usage();
}
for (String arg : args) {
if (!"LATIN-1|UTF-16".contains(arg)) {
usage();
}
}
for (String arg : args) {
new CharAtTtest().run(arg);
}
}
private static void usage() {
System.out.println("Usage CharAtTtest type...");
System.out.println("where type is LATIN-1 or UTF-16");
System.exit(1);
}
public void run(String testType) {
System.out.println(testType);
TimedTest timedTest = new TimedTest(testType);
TimedTestResult benchmark = calibrate(timedTest);
System.out.println("");
if (benchmark != null) {
int testCount = benchmark.getCount();
for (int test = 0; test < TEST_COUNT; test++) {
int time = timeRunMillis(timedTest, testCount);
TimedTestResult result = new TimedTestResult(testCount, time);
testResults.add(result);
System.out.println("Test " + (test + 1) + " " + result.toString());
}
System.out.println("");
System.out.println(testType + " average rate: "
+ String.format("%.2f", calculateAverageRate()));
System.out.println("");
}
}
private TimedTestResult calibrate(TimedTest timedTest) {
int count = INITIAL_COUNT;
int totalTime = 0;
TimedTestResult result = null;
for (int trial = 0; totalTime < MAXIMUM_TRIAL_TIME && trial < 100; trial++) {
int time = timeRunMillis(timedTest, count);
totalTime += time;
result = new TimedTestResult(count, time);
System.out.println("Trial " + (trial + 1) + " " + result.toString());
if (time <= 10) {
long nextCount = (long) count * ZERO_TIME_INCREASE;
if (nextCount > Integer.MAX_VALUE) {
break;
}
count = (int) nextCount;
} else {
int tolerance = time / count + MINIMUM_TOLERANCE;
if (totalTime >= MINIMUM_TRIAL_TIME
&& time <= TARGET_TEST_TIME + tolerance
&& time >= TARGET_TEST_TIME - tolerance) {
break;
} else {
long nextCount = (long) count * TARGET_TEST_TIME / time;
if (nextCount == 0) {
nextCount = 1;
} else if (nextCount > Integer.MAX_VALUE) {
break;
}
count = (int) nextCount;
}
}
}
return result;
}
private int timeRunMillis(TimedTest timedTest, int count) {
long startTime = System.nanoTime();
timedTest.run(count);
return (int) ((System.nanoTime() - startTime + 500_000) / 1_000_000);
}
private double calculateAverageRate() {
Double total = 0.0;
for (TimedTestResult result : testResults) {
total += result.getRate();
}
return total / (testResults.size());
}
public static class TimedTestResult {
private final int count;
private final int millis;
public TimedTestResult(int count, int millis) {
this.count = count;
this.millis = millis;
}
public int getCount() {
return count;
}
public int getTimeMillis() {
return millis;
}
public double getRate() {
if (millis == 0) {
return 0.0;
}
return (double) count / millis * 1000;
}
@Override
public String toString() {
return "count " + count + ", timeMillis " + millis
+ ", rate " + String.format("%.2f", getRate());
}
}
public static class TimedTest {
private final int appendCount = 100;
private final String appendChars;
public TimedTest(String type) {
String chars = "abcdefghijklmnopqrstuvwxyz";
char[] wrk = new char[appendCount];
for (int k = 0; k < appendCount; k++) {
wrk[k] = chars.charAt(k % chars.length());
}
if ("UTF-16".equals(type)) {
for (int i = 0; i < wrk.length; i++) {
wrk[i] = (char) (wrk[i] + 0xff);
}
}
appendChars = new String(wrk);
}
private void testChar(char ch) {
if (ch < ' ') {
throw new RuntimeException();
}
}
public void run(int count) {
for (int i = 0; i < count; i++) {
for (int j = 0; j < 1000; j++) {
for (int k = 0; k < appendCount; k++) {
testChar(appendChars.charAt(k));
}
}
}
}
}
}
---------- END SOURCE ----------
CUSTOMER SUBMITTED WORKAROUND :
Don't use compact strings.
FREQUENCY : always
Processor: Intel(R) Core(TM) i7-4710HQ CPU @ 2.50GHz
Installed memory (RAM): 12.0GB
System type: 64-bit Operating System, x64 based processor
Windows 8.1
$ java -version
java version "12.0.2" 2019-07-16
Java(TM) SE Runtime Environment (build 12.0.2+10)
Java HotSpot(TM) 64-Bit Server VM (build 12.0.2+10, mixed mode, sharing)
A DESCRIPTION OF THE PROBLEM :
When strings contain just LATIN-1 characters, using the default mode of the JVM, that is with compact strings, String.charAt is much slower that if the strings contain characters above 0xFF.
When not using compact strings, the timings for LATIN-1 strings and non LATIN-1 strings are comparable and a little faster, which is the expected result.
STEPS TO FOLLOW TO REPRODUCE THE PROBLEM :
Compile and run the attached CharAtTest.java.
The output from the commands
java -XX:+CompactStrings CharAtTtest LATIN-1
will run a series of loops performing String.charAt on a string that contains LATIN-1 characters using compact strings.
java -XX:+CompactStrings CharAtTtest UTF-16
will run the test doing String.charAt on a string that contains characters above 0xFF.
Rates are output for a test that measures the rate per second of the number of loops of 1000 times looping through a string of 100 characters doing charAt.
ACTUAL -
Following shows the rates for the java commands running a single test.
java -XX:+CompactStrings CharAtTtest LATIN-1 9,398
java -XX:+CompactStrings CharAtTtest UTF-16 42,147
java -XX:-CompactStrings CharAtTtest LATIN-1 50,114
java -XX:-CompactStrings CharAtTtest UTF-16 49,971
That is, LATIN-1 is much slower than UTF-16 when using compact strings.
Not using compact strings is a bit faster and similar rates for LATIN-1 and UTF-16 strings.
Also interesting is running different combinations of the tests in the same run of the JVM.
java -XX:+CompactStrings CharAtTtest LATIN-1 UTF-16 9,399 2,977
java -XX:+CompactStrings CharAtTtest UTF-16 LATIN-1 42,159 43,134
java -XX:-CompactStrings CharAtTtest LATIN-1 UTF-16 49,950 49,924
java -XX:-CompactStrings CharAtTtest UTF-16 LATIN-1 50,011 49,899
With compact strings, running LATIN-1 first then UTF-16, UTF-16 is really slow.
However running in the other order, UTF-16 then LATIN-1, the LATIN-1 loops are as fast as UTF-16.
In either order, without compact strings they produce similar rates.
---------- BEGIN SOURCE ----------
import java.util.ArrayList;
import java.util.List;
public class CharAtTtest {
private static final int TARGET_TEST_TIME = 1_000;
private static final int INITIAL_COUNT = 1;
private static final int MINIMUM_TRIAL_TIME = 3_000;
private static final int MAXIMUM_TRIAL_TIME = 5_000;
private static final int ZERO_TIME_INCREASE = 1_000;
private static final int MINIMUM_TOLERANCE = 50;
private static final int TEST_COUNT = 4;
private final List<TimedTestResult> testResults = new ArrayList<>();
public static void main(String[] args) {
if (args.length == 0) {
usage();
}
for (String arg : args) {
if (!"LATIN-1|UTF-16".contains(arg)) {
usage();
}
}
for (String arg : args) {
new CharAtTtest().run(arg);
}
}
private static void usage() {
System.out.println("Usage CharAtTtest type...");
System.out.println("where type is LATIN-1 or UTF-16");
System.exit(1);
}
public void run(String testType) {
System.out.println(testType);
TimedTest timedTest = new TimedTest(testType);
TimedTestResult benchmark = calibrate(timedTest);
System.out.println("");
if (benchmark != null) {
int testCount = benchmark.getCount();
for (int test = 0; test < TEST_COUNT; test++) {
int time = timeRunMillis(timedTest, testCount);
TimedTestResult result = new TimedTestResult(testCount, time);
testResults.add(result);
System.out.println("Test " + (test + 1) + " " + result.toString());
}
System.out.println("");
System.out.println(testType + " average rate: "
+ String.format("%.2f", calculateAverageRate()));
System.out.println("");
}
}
private TimedTestResult calibrate(TimedTest timedTest) {
int count = INITIAL_COUNT;
int totalTime = 0;
TimedTestResult result = null;
for (int trial = 0; totalTime < MAXIMUM_TRIAL_TIME && trial < 100; trial++) {
int time = timeRunMillis(timedTest, count);
totalTime += time;
result = new TimedTestResult(count, time);
System.out.println("Trial " + (trial + 1) + " " + result.toString());
if (time <= 10) {
long nextCount = (long) count * ZERO_TIME_INCREASE;
if (nextCount > Integer.MAX_VALUE) {
break;
}
count = (int) nextCount;
} else {
int tolerance = time / count + MINIMUM_TOLERANCE;
if (totalTime >= MINIMUM_TRIAL_TIME
&& time <= TARGET_TEST_TIME + tolerance
&& time >= TARGET_TEST_TIME - tolerance) {
break;
} else {
long nextCount = (long) count * TARGET_TEST_TIME / time;
if (nextCount == 0) {
nextCount = 1;
} else if (nextCount > Integer.MAX_VALUE) {
break;
}
count = (int) nextCount;
}
}
}
return result;
}
private int timeRunMillis(TimedTest timedTest, int count) {
long startTime = System.nanoTime();
timedTest.run(count);
return (int) ((System.nanoTime() - startTime + 500_000) / 1_000_000);
}
private double calculateAverageRate() {
Double total = 0.0;
for (TimedTestResult result : testResults) {
total += result.getRate();
}
return total / (testResults.size());
}
public static class TimedTestResult {
private final int count;
private final int millis;
public TimedTestResult(int count, int millis) {
this.count = count;
this.millis = millis;
}
public int getCount() {
return count;
}
public int getTimeMillis() {
return millis;
}
public double getRate() {
if (millis == 0) {
return 0.0;
}
return (double) count / millis * 1000;
}
@Override
public String toString() {
return "count " + count + ", timeMillis " + millis
+ ", rate " + String.format("%.2f", getRate());
}
}
public static class TimedTest {
private final int appendCount = 100;
private final String appendChars;
public TimedTest(String type) {
String chars = "abcdefghijklmnopqrstuvwxyz";
char[] wrk = new char[appendCount];
for (int k = 0; k < appendCount; k++) {
wrk[k] = chars.charAt(k % chars.length());
}
if ("UTF-16".equals(type)) {
for (int i = 0; i < wrk.length; i++) {
wrk[i] = (char) (wrk[i] + 0xff);
}
}
appendChars = new String(wrk);
}
private void testChar(char ch) {
if (ch < ' ') {
throw new RuntimeException();
}
}
public void run(int count) {
for (int i = 0; i < count; i++) {
for (int j = 0; j < 1000; j++) {
for (int k = 0; k < appendCount; k++) {
testChar(appendChars.charAt(k));
}
}
}
}
}
}
---------- END SOURCE ----------
CUSTOMER SUBMITTED WORKAROUND :
Don't use compact strings.
FREQUENCY : always