Uploaded image for project: 'JDK'
  1. JDK
  2. JDK-6402819

String(a, charset) slower than String(a, charsetname)

XMLWordPrintable

    • b77
    • 6
    • b87
    • generic
    • generic

      5005831: String constructors and method which take Charset rather than String as argument

      introduced new constructors for String that take a Charset.
      One would expect that these would be
      uniformly faster than the equivalent constructors that take a String,
      since the Charset lookup can be elided.

      However, it appears that special String-based name optimizations in StringCoding foil that.

      For the particular important case of ASCII or Latin-1 text,
      we want to discourage the use of the deprecated constructors,
      but the benchmark numbers cannot support such a recommendation.

      The slowdown is only for small strings, of course.

      Here's a microbenchmark, and a sample run:
      ----------------------------------------------------
      import java.nio.*;
      import java.nio.charset.*;
      import java.util.*;
      import java.util.concurrent.*;

      public class Latin1StringMicroBenchmark {
          abstract static class Job {
      private final String name;
      public Job(String name) { this.name = name; }
      public String name() { return name; }
      public abstract void work() throws Throwable;
          }

          private static final long SECOND = 1000L*1000L*1000L;

          private static void collectAllGarbage() {
      try {
      for (int i = 0; i < 2; i++) {
      System.gc();
      Thread.sleep(10);
      System.runFinalization();
      Thread.sleep(10);
      }
      } catch (InterruptedException e) { throw new Error(e); }
          }

          /**
           * Runs each job for at least 10 seconds.
           * Returns array of average times per job per run.
           */
          private static long[] time0(Job ... jobs) throws Throwable {
      long[] nanoss = new long[jobs.length];
      for (int i = 0; i < jobs.length; i++) {
      collectAllGarbage();
      long t0 = System.nanoTime();
      long t;
      int j = 0;
      do { jobs[i].work(); j++; }
      while ((t = System.nanoTime() - t0) < 10L * SECOND);
      nanoss[i] = t/j;
      }
      return nanoss;
          }

          private static void time(Job ... jobs) throws Throwable {

      long[] warmup = time0(jobs); // Warm up run
      long[] nanoss = time0(jobs); // Real timing run

      final String nameHeader = "Method";
      int nameWidth = nameHeader.length();
      for (Job job : jobs)
      nameWidth = Math.max(nameWidth, job.name().length());

      final String millisHeader = "Millis";
      int millisWidth = millisHeader.length();
      for (long nanos : nanoss)
      millisWidth =
      Math.max(millisWidth,
      String.format("%d", nanos/(1000L * 1000L)).length());

      final String ratioHeader = "Ratio";
      int ratioWidth = ratioHeader.length();

      String format = String.format("%%-%ds %%%dd %%.3f%%n",
      nameWidth, millisWidth);
      String headerFormat = String.format("%%-%ds %%-%ds %%-%ds%%n",
      nameWidth, millisWidth, ratioWidth);
      System.out.printf(headerFormat, "Method", "Millis", "Ratio");

      // Print out absolute and relative times, calibrated against first job
      for (int i = 0; i < jobs.length; i++) {
      long millis = nanoss[i]/(1000L * 1000L);
      double ratio = (double)nanoss[i] / (double)nanoss[0];
      System.out.printf(format, jobs[i].name(), millis, ratio);
      }
          }

          private static int intArg(String[] args, int i, int defaultValue) {
      return args.length > i ? Integer.parseInt(args[i]) : defaultValue;
          }

          public static void main(String[] args) throws Throwable {
      final int length = intArg(args, 0, 1000);
      final int iterations = intArg(args, 1, (int) (100000L * 1000L/length));

      final byte[] latin1Bytes = new byte[length];
      new Random().nextBytes(latin1Bytes);
      final String expected = new String(latin1Bytes, "ISO-8859-1");
      final String[] out = new String[1];
      out[0] = "poopie";

      time(
      new Job("String(byte[], int hibyte)") {
      @SuppressWarnings("deprecation")
      public void work() throws Throwable {
      for (int i = 0; i < iterations; i++) {
      out[0] = new String(latin1Bytes, 0);
      }
      if (! out[0].equals(expected)) throw new Error();
      }},
      new Job("String(char[], int offset, int length)") {
      public void work() throws Throwable {
      char[] chars = new char[2*length];
      for (int i = 0; i < iterations; i++) {
      for (int j = 0; j < latin1Bytes.length; j++)
      chars[j] = (char) (latin1Bytes[j] & 0xff);
      out[0] = new String(chars, 0, latin1Bytes.length);
      }
      if (! out[0].equals(expected)) throw new Error();
      }},
      new Job("String(byte[], Charset cs)") {
      public void work() throws Throwable {
      Charset cs = Charset.forName("ISO-8859-1");
      for (int i = 0; i < iterations; i++) {
      out[0] = new String(latin1Bytes, cs);
      }
      if (! out[0].equals(expected)) throw new Error();
      }},
      new Job("String(byte[], String csn)") {
      public void work() throws Throwable {
      for (int i = 0; i < iterations; i++) {
      out[0] = new String(latin1Bytes, "ISO-8859-1");
      }
      if (! out[0].equals(expected)) throw new Error();
      }},
      new Job("CharsetDecoder.decode(ByteBuffer, CharBuffer, true)") {
      public void work() throws Throwable {
      CharBuffer cb = CharBuffer.allocate(2*length);
      CharsetDecoder coder =
      Charset.forName("ISO-8859-1").newDecoder();
      for (int i = 0; i < iterations; i++) {
      ByteBuffer bb = ByteBuffer.wrap(latin1Bytes);
      cb.clear();
      coder.decode(bb, cb, true);
      cb.flip();
      out[0] = cb.toString();
      }
      if (! out[0].equals(expected)) throw new Error();
      }}
      );
          }
      }
      -------------------------------------------------------
       ~/src/toy $ for size in 1 10 100 1000; do echo $size -----; jver mustang jr Latin1StringMicroBenchmark $size; done
      1 -----
      ==> javac -source 1.6 -Xlint:all Latin1StringMicroBenchmark.java
      ==> java -esa -ea Latin1StringMicroBenchmark 1
      Method Millis Ratio
      String(byte[], int hibyte) 12413 1.000
      String(char[], int offset, int length) 12360 0.996
      String(byte[], Charset cs) 111204 8.959
      String(byte[], String csn) 63524 5.118
      CharsetDecoder.decode(ByteBuffer, CharBuffer, true) 47278 3.809
      10 -----
      ==> javac -source 1.6 -Xlint:all Latin1StringMicroBenchmark.java
      ==> java -esa -ea Latin1StringMicroBenchmark 10
      Method Millis Ratio
      String(byte[], int hibyte) 1805 1.000
      String(char[], int offset, int length) 2622 1.452
      String(byte[], Charset cs) 11342 6.282
      String(byte[], String csn) 6688 3.704
      CharsetDecoder.decode(ByteBuffer, CharBuffer, true) 5118 2.834
      100 -----
      ==> javac -source 1.6 -Xlint:all Latin1StringMicroBenchmark.java
      ==> java -esa -ea Latin1StringMicroBenchmark 100
      Method Millis Ratio
      String(byte[], int hibyte) 1061 1.000
      String(char[], int offset, int length) 1183 1.114
      String(byte[], Charset cs) 1964 1.850
      String(byte[], String csn) 1471 1.386
      CharsetDecoder.decode(ByteBuffer, CharBuffer, true) 1461 1.377
      1000 -----
      ==> javac -source 1.6 -Xlint:all Latin1StringMicroBenchmark.java
      ==> java -esa -ea Latin1StringMicroBenchmark 1000
      Method Millis Ratio
      String(byte[], int hibyte) 1066 1.000
      String(char[], int offset, int length) 1000 0.938
      String(byte[], Charset cs) 1044 0.979
      String(byte[], String csn) 963 0.903
      CharsetDecoder.decode(ByteBuffer, CharBuffer, true) 1058 0.992

            iris Iris Clark
            martin Martin Buchholz
            Votes:
            0 Vote for this issue
            Watchers:
            0 Start watching this issue

              Created:
              Updated:
              Resolved:
              Imported:
              Indexed: