-
Bug
-
Resolution: Fixed
-
P2
-
6
5005831: String constructors and method which take Charset rather than String as argument
introduced new constructors for String that take a Charset.
One would expect that these would be
uniformly faster than the equivalent constructors that take a String,
since the Charset lookup can be elided.
However, it appears that special String-based name optimizations in StringCoding foil that.
For the particular important case of ASCII or Latin-1 text,
we want to discourage the use of the deprecated constructors,
but the benchmark numbers cannot support such a recommendation.
The slowdown is only for small strings, of course.
Here's a microbenchmark, and a sample run:
----------------------------------------------------
import java.nio.*;
import java.nio.charset.*;
import java.util.*;
import java.util.concurrent.*;
public class Latin1StringMicroBenchmark {
abstract static class Job {
private final String name;
public Job(String name) { this.name = name; }
public String name() { return name; }
public abstract void work() throws Throwable;
}
private static final long SECOND = 1000L*1000L*1000L;
private static void collectAllGarbage() {
try {
for (int i = 0; i < 2; i++) {
System.gc();
Thread.sleep(10);
System.runFinalization();
Thread.sleep(10);
}
} catch (InterruptedException e) { throw new Error(e); }
}
/**
* Runs each job for at least 10 seconds.
* Returns array of average times per job per run.
*/
private static long[] time0(Job ... jobs) throws Throwable {
long[] nanoss = new long[jobs.length];
for (int i = 0; i < jobs.length; i++) {
collectAllGarbage();
long t0 = System.nanoTime();
long t;
int j = 0;
do { jobs[i].work(); j++; }
while ((t = System.nanoTime() - t0) < 10L * SECOND);
nanoss[i] = t/j;
}
return nanoss;
}
private static void time(Job ... jobs) throws Throwable {
long[] warmup = time0(jobs); // Warm up run
long[] nanoss = time0(jobs); // Real timing run
final String nameHeader = "Method";
int nameWidth = nameHeader.length();
for (Job job : jobs)
nameWidth = Math.max(nameWidth, job.name().length());
final String millisHeader = "Millis";
int millisWidth = millisHeader.length();
for (long nanos : nanoss)
millisWidth =
Math.max(millisWidth,
String.format("%d", nanos/(1000L * 1000L)).length());
final String ratioHeader = "Ratio";
int ratioWidth = ratioHeader.length();
String format = String.format("%%-%ds %%%dd %%.3f%%n",
nameWidth, millisWidth);
String headerFormat = String.format("%%-%ds %%-%ds %%-%ds%%n",
nameWidth, millisWidth, ratioWidth);
System.out.printf(headerFormat, "Method", "Millis", "Ratio");
// Print out absolute and relative times, calibrated against first job
for (int i = 0; i < jobs.length; i++) {
long millis = nanoss[i]/(1000L * 1000L);
double ratio = (double)nanoss[i] / (double)nanoss[0];
System.out.printf(format, jobs[i].name(), millis, ratio);
}
}
private static int intArg(String[] args, int i, int defaultValue) {
return args.length > i ? Integer.parseInt(args[i]) : defaultValue;
}
public static void main(String[] args) throws Throwable {
final int length = intArg(args, 0, 1000);
final int iterations = intArg(args, 1, (int) (100000L * 1000L/length));
final byte[] latin1Bytes = new byte[length];
new Random().nextBytes(latin1Bytes);
final String expected = new String(latin1Bytes, "ISO-8859-1");
final String[] out = new String[1];
out[0] = "poopie";
time(
new Job("String(byte[], int hibyte)") {
@SuppressWarnings("deprecation")
public void work() throws Throwable {
for (int i = 0; i < iterations; i++) {
out[0] = new String(latin1Bytes, 0);
}
if (! out[0].equals(expected)) throw new Error();
}},
new Job("String(char[], int offset, int length)") {
public void work() throws Throwable {
char[] chars = new char[2*length];
for (int i = 0; i < iterations; i++) {
for (int j = 0; j < latin1Bytes.length; j++)
chars[j] = (char) (latin1Bytes[j] & 0xff);
out[0] = new String(chars, 0, latin1Bytes.length);
}
if (! out[0].equals(expected)) throw new Error();
}},
new Job("String(byte[], Charset cs)") {
public void work() throws Throwable {
Charset cs = Charset.forName("ISO-8859-1");
for (int i = 0; i < iterations; i++) {
out[0] = new String(latin1Bytes, cs);
}
if (! out[0].equals(expected)) throw new Error();
}},
new Job("String(byte[], String csn)") {
public void work() throws Throwable {
for (int i = 0; i < iterations; i++) {
out[0] = new String(latin1Bytes, "ISO-8859-1");
}
if (! out[0].equals(expected)) throw new Error();
}},
new Job("CharsetDecoder.decode(ByteBuffer, CharBuffer, true)") {
public void work() throws Throwable {
CharBuffer cb = CharBuffer.allocate(2*length);
CharsetDecoder coder =
Charset.forName("ISO-8859-1").newDecoder();
for (int i = 0; i < iterations; i++) {
ByteBuffer bb = ByteBuffer.wrap(latin1Bytes);
cb.clear();
coder.decode(bb, cb, true);
cb.flip();
out[0] = cb.toString();
}
if (! out[0].equals(expected)) throw new Error();
}}
);
}
}
-------------------------------------------------------
~/src/toy $ for size in 1 10 100 1000; do echo $size -----; jver mustang jr Latin1StringMicroBenchmark $size; done
1 -----
==> javac -source 1.6 -Xlint:all Latin1StringMicroBenchmark.java
==> java -esa -ea Latin1StringMicroBenchmark 1
Method Millis Ratio
String(byte[], int hibyte) 12413 1.000
String(char[], int offset, int length) 12360 0.996
String(byte[], Charset cs) 111204 8.959
String(byte[], String csn) 63524 5.118
CharsetDecoder.decode(ByteBuffer, CharBuffer, true) 47278 3.809
10 -----
==> javac -source 1.6 -Xlint:all Latin1StringMicroBenchmark.java
==> java -esa -ea Latin1StringMicroBenchmark 10
Method Millis Ratio
String(byte[], int hibyte) 1805 1.000
String(char[], int offset, int length) 2622 1.452
String(byte[], Charset cs) 11342 6.282
String(byte[], String csn) 6688 3.704
CharsetDecoder.decode(ByteBuffer, CharBuffer, true) 5118 2.834
100 -----
==> javac -source 1.6 -Xlint:all Latin1StringMicroBenchmark.java
==> java -esa -ea Latin1StringMicroBenchmark 100
Method Millis Ratio
String(byte[], int hibyte) 1061 1.000
String(char[], int offset, int length) 1183 1.114
String(byte[], Charset cs) 1964 1.850
String(byte[], String csn) 1471 1.386
CharsetDecoder.decode(ByteBuffer, CharBuffer, true) 1461 1.377
1000 -----
==> javac -source 1.6 -Xlint:all Latin1StringMicroBenchmark.java
==> java -esa -ea Latin1StringMicroBenchmark 1000
Method Millis Ratio
String(byte[], int hibyte) 1066 1.000
String(char[], int offset, int length) 1000 0.938
String(byte[], Charset cs) 1044 0.979
String(byte[], String csn) 963 0.903
CharsetDecoder.decode(ByteBuffer, CharBuffer, true) 1058 0.992
introduced new constructors for String that take a Charset.
One would expect that these would be
uniformly faster than the equivalent constructors that take a String,
since the Charset lookup can be elided.
However, it appears that special String-based name optimizations in StringCoding foil that.
For the particular important case of ASCII or Latin-1 text,
we want to discourage the use of the deprecated constructors,
but the benchmark numbers cannot support such a recommendation.
The slowdown is only for small strings, of course.
Here's a microbenchmark, and a sample run:
----------------------------------------------------
import java.nio.*;
import java.nio.charset.*;
import java.util.*;
import java.util.concurrent.*;
public class Latin1StringMicroBenchmark {
abstract static class Job {
private final String name;
public Job(String name) { this.name = name; }
public String name() { return name; }
public abstract void work() throws Throwable;
}
private static final long SECOND = 1000L*1000L*1000L;
private static void collectAllGarbage() {
try {
for (int i = 0; i < 2; i++) {
System.gc();
Thread.sleep(10);
System.runFinalization();
Thread.sleep(10);
}
} catch (InterruptedException e) { throw new Error(e); }
}
/**
* Runs each job for at least 10 seconds.
* Returns array of average times per job per run.
*/
private static long[] time0(Job ... jobs) throws Throwable {
long[] nanoss = new long[jobs.length];
for (int i = 0; i < jobs.length; i++) {
collectAllGarbage();
long t0 = System.nanoTime();
long t;
int j = 0;
do { jobs[i].work(); j++; }
while ((t = System.nanoTime() - t0) < 10L * SECOND);
nanoss[i] = t/j;
}
return nanoss;
}
private static void time(Job ... jobs) throws Throwable {
long[] warmup = time0(jobs); // Warm up run
long[] nanoss = time0(jobs); // Real timing run
final String nameHeader = "Method";
int nameWidth = nameHeader.length();
for (Job job : jobs)
nameWidth = Math.max(nameWidth, job.name().length());
final String millisHeader = "Millis";
int millisWidth = millisHeader.length();
for (long nanos : nanoss)
millisWidth =
Math.max(millisWidth,
String.format("%d", nanos/(1000L * 1000L)).length());
final String ratioHeader = "Ratio";
int ratioWidth = ratioHeader.length();
String format = String.format("%%-%ds %%%dd %%.3f%%n",
nameWidth, millisWidth);
String headerFormat = String.format("%%-%ds %%-%ds %%-%ds%%n",
nameWidth, millisWidth, ratioWidth);
System.out.printf(headerFormat, "Method", "Millis", "Ratio");
// Print out absolute and relative times, calibrated against first job
for (int i = 0; i < jobs.length; i++) {
long millis = nanoss[i]/(1000L * 1000L);
double ratio = (double)nanoss[i] / (double)nanoss[0];
System.out.printf(format, jobs[i].name(), millis, ratio);
}
}
private static int intArg(String[] args, int i, int defaultValue) {
return args.length > i ? Integer.parseInt(args[i]) : defaultValue;
}
public static void main(String[] args) throws Throwable {
final int length = intArg(args, 0, 1000);
final int iterations = intArg(args, 1, (int) (100000L * 1000L/length));
final byte[] latin1Bytes = new byte[length];
new Random().nextBytes(latin1Bytes);
final String expected = new String(latin1Bytes, "ISO-8859-1");
final String[] out = new String[1];
out[0] = "poopie";
time(
new Job("String(byte[], int hibyte)") {
@SuppressWarnings("deprecation")
public void work() throws Throwable {
for (int i = 0; i < iterations; i++) {
out[0] = new String(latin1Bytes, 0);
}
if (! out[0].equals(expected)) throw new Error();
}},
new Job("String(char[], int offset, int length)") {
public void work() throws Throwable {
char[] chars = new char[2*length];
for (int i = 0; i < iterations; i++) {
for (int j = 0; j < latin1Bytes.length; j++)
chars[j] = (char) (latin1Bytes[j] & 0xff);
out[0] = new String(chars, 0, latin1Bytes.length);
}
if (! out[0].equals(expected)) throw new Error();
}},
new Job("String(byte[], Charset cs)") {
public void work() throws Throwable {
Charset cs = Charset.forName("ISO-8859-1");
for (int i = 0; i < iterations; i++) {
out[0] = new String(latin1Bytes, cs);
}
if (! out[0].equals(expected)) throw new Error();
}},
new Job("String(byte[], String csn)") {
public void work() throws Throwable {
for (int i = 0; i < iterations; i++) {
out[0] = new String(latin1Bytes, "ISO-8859-1");
}
if (! out[0].equals(expected)) throw new Error();
}},
new Job("CharsetDecoder.decode(ByteBuffer, CharBuffer, true)") {
public void work() throws Throwable {
CharBuffer cb = CharBuffer.allocate(2*length);
CharsetDecoder coder =
Charset.forName("ISO-8859-1").newDecoder();
for (int i = 0; i < iterations; i++) {
ByteBuffer bb = ByteBuffer.wrap(latin1Bytes);
cb.clear();
coder.decode(bb, cb, true);
cb.flip();
out[0] = cb.toString();
}
if (! out[0].equals(expected)) throw new Error();
}}
);
}
}
-------------------------------------------------------
~/src/toy $ for size in 1 10 100 1000; do echo $size -----; jver mustang jr Latin1StringMicroBenchmark $size; done
1 -----
==> javac -source 1.6 -Xlint:all Latin1StringMicroBenchmark.java
==> java -esa -ea Latin1StringMicroBenchmark 1
Method Millis Ratio
String(byte[], int hibyte) 12413 1.000
String(char[], int offset, int length) 12360 0.996
String(byte[], Charset cs) 111204 8.959
String(byte[], String csn) 63524 5.118
CharsetDecoder.decode(ByteBuffer, CharBuffer, true) 47278 3.809
10 -----
==> javac -source 1.6 -Xlint:all Latin1StringMicroBenchmark.java
==> java -esa -ea Latin1StringMicroBenchmark 10
Method Millis Ratio
String(byte[], int hibyte) 1805 1.000
String(char[], int offset, int length) 2622 1.452
String(byte[], Charset cs) 11342 6.282
String(byte[], String csn) 6688 3.704
CharsetDecoder.decode(ByteBuffer, CharBuffer, true) 5118 2.834
100 -----
==> javac -source 1.6 -Xlint:all Latin1StringMicroBenchmark.java
==> java -esa -ea Latin1StringMicroBenchmark 100
Method Millis Ratio
String(byte[], int hibyte) 1061 1.000
String(char[], int offset, int length) 1183 1.114
String(byte[], Charset cs) 1964 1.850
String(byte[], String csn) 1471 1.386
CharsetDecoder.decode(ByteBuffer, CharBuffer, true) 1461 1.377
1000 -----
==> javac -source 1.6 -Xlint:all Latin1StringMicroBenchmark.java
==> java -esa -ea Latin1StringMicroBenchmark 1000
Method Millis Ratio
String(byte[], int hibyte) 1066 1.000
String(char[], int offset, int length) 1000 0.938
String(byte[], Charset cs) 1044 0.979
String(byte[], String csn) 963 0.903
CharsetDecoder.decode(ByteBuffer, CharBuffer, true) 1058 0.992
- relates to
-
JDK-5005831 String constructors and method which take Charset rather than String as argument
-
- Resolved
-
-
JDK-6405064 Undeprecate hibyte String constructors
-
- Closed
-