Uploaded image for project: 'JDK'
  1. JDK
  2. JDK-8257850

ByteBuffer VarHandle views are slower than direct BB access

XMLWordPrintable

    • Icon: Bug Bug
    • Resolution: Unresolved
    • Icon: P4 P4
    • tbd
    • 15, 16
    • core-libs
    • None

      This benchmark:

      import org.openjdk.jmh.annotations.Benchmark;
      import org.openjdk.jmh.annotations.BenchmarkMode;
      import org.openjdk.jmh.annotations.Fork;
      import org.openjdk.jmh.annotations.Measurement;
      import org.openjdk.jmh.annotations.Mode;
      import org.openjdk.jmh.annotations.OutputTimeUnit;
      import org.openjdk.jmh.annotations.Setup;
      import org.openjdk.jmh.annotations.State;
      import org.openjdk.jmh.annotations.TearDown;
      import org.openjdk.jmh.annotations.Warmup;
      import sun.misc.Unsafe;

      import java.lang.invoke.MethodHandles;
      import java.lang.invoke.VarHandle;
      import java.nio.ByteBuffer;
      import java.nio.ByteOrder;
      import java.util.concurrent.TimeUnit;

      import static jdk.incubator.foreign.MemoryLayouts.JAVA_INT;

      @BenchmarkMode(Mode.AverageTime)
      @Warmup(iterations = 5, time = 500, timeUnit = TimeUnit.MILLISECONDS)
      @Measurement(iterations = 10, time = 500, timeUnit = TimeUnit.MILLISECONDS)
      @State(org.openjdk.jmh.annotations.Scope.Thread)
      @OutputTimeUnit(TimeUnit.MILLISECONDS)
      @Fork(value = 3, jvmArgsAppend = { "--add-modules=jdk.incubator.foreign" })
      public class LoopOverPolluted {

          static final int ELEM_SIZE = 1_000_000;
          static final int CARRIER_SIZE = (int) JAVA_INT.byteSize();
          static final int ALLOC_SIZE = ELEM_SIZE * CARRIER_SIZE;

          static final Unsafe unsafe = Utils.unsafe;

          ByteBuffer heapBuffer = ByteBuffer.allocate(ALLOC_SIZE).order(ByteOrder.nativeOrder());
          ByteBuffer directBuffer = ByteBuffer.allocateDirect(ALLOC_SIZE).order(ByteOrder.nativeOrder());

          static final VarHandle VH_int = MethodHandles.byteBufferViewVarHandle(int[].class, ByteOrder.nativeOrder());
          static final VarHandle VH_float = MethodHandles.byteBufferViewVarHandle(float[].class, ByteOrder.nativeOrder());
          byte[] arr;
          long addr;


          @Setup
          public void setup() {
              addr = unsafe.allocateMemory(ALLOC_SIZE);
              for (int i = 0; i < ELEM_SIZE; i++) {
                  unsafe.putInt(addr + (i * 4), i);
              }
              arr = new byte[ALLOC_SIZE];
              for (int i = 0; i < ELEM_SIZE; i++) {
                  unsafe.putInt(arr, Unsafe.ARRAY_BYTE_BASE_OFFSET + (i * 4), i);
              }
              for (int i = 0; i < ELEM_SIZE; i++) {
                  VH_int.set(directBuffer, i, i);
                  VH_float.set(directBuffer, i, i);
                  VH_int.set(heapBuffer, i, i);
                  VH_float.set(heapBuffer, i, i);
              }
          }

          @TearDown
          public void tearDown() {
              unsafe.invokeCleaner(directBuffer);
              heapBuffer = null;
              arr = null;
              unsafe.freeMemory(addr);
          }

          @Benchmark
          public int native_buffer() {
              int sum = 0;
              for (int k = 0; k < ELEM_SIZE; k++) {
                  VH_int.set(directBuffer, k, k + 1);
                  int v = (int)VH_int.get(directBuffer, k);
                  sum += v;
              }
              return sum;
          }

          @Benchmark
          public int heap_buffer() {
              int sum = 0;
              for (int k = 0; k < ELEM_SIZE; k++) {
                  VH_int.set(heapBuffer, k, k + 1);
                  int v = (int)VH_int.get(heapBuffer, k);
                  sum += v;
              }
              return sum;
          }

          @Benchmark
          public int heap_buffer_floats() {
              int sum = 0;
              for (int k = 0; k < ELEM_SIZE; k++) {
                  VH_float.set(heapBuffer, k, k + 1);
                  float v = (float)VH_float.get(heapBuffer, k);
                  sum += (int)v;
              }
              return sum;
          }

          @Benchmark
          public int heap_unsafe() {
              int sum = 0;
              for (int k = 0; k < ALLOC_SIZE; k += 4) {
                  unsafe.putInt(arr, k + Unsafe.ARRAY_BYTE_BASE_OFFSET, k + 1);
                  int v = unsafe.getInt(arr, k + Unsafe.ARRAY_BYTE_BASE_OFFSET);
                  sum += v;
              }
              return sum;
          }

          @Benchmark
          public int native_unsafe() {
              int sum = 0;
              for (int k = 0; k < ALLOC_SIZE; k += 4) {
                  unsafe.putInt(addr + k, k + 1);
                  int v = unsafe.getInt(addr + k);
                  sum += v;
              }
              return sum;
          }
      }


      Shows that ByteBiffer VH views are significantly slower than equivalent unsafe access. This is likely caused to lack of precise type information when passing the byte buffer base to the Unsafe call in the VH impl.

            chegar Chris Hegarty
            mcimadamore Maurizio Cimadamore
            Votes:
            0 Vote for this issue
            Watchers:
            4 Start watching this issue

              Created:
              Updated: