Loading...

Type: Bug
Resolution: Unresolved
Priority: P4
Fix Version/s: tbd
Affects Version/s: 15, 16
Component/s: core-libs
Labels:
None

This benchmark:

import org.openjdk.jmh.annotations.Benchmark;
import org.openjdk.jmh.annotations.BenchmarkMode;
import org.openjdk.jmh.annotations.Fork;
import org.openjdk.jmh.annotations.Measurement;
import org.openjdk.jmh.annotations.Mode;
import org.openjdk.jmh.annotations.OutputTimeUnit;
import org.openjdk.jmh.annotations.Setup;
import org.openjdk.jmh.annotations.State;
import org.openjdk.jmh.annotations.TearDown;
import org.openjdk.jmh.annotations.Warmup;
import sun.misc.Unsafe;

import java.lang.invoke.MethodHandles;
import java.lang.invoke.VarHandle;
import java.nio.ByteBuffer;
import java.nio.ByteOrder;
import java.util.concurrent.TimeUnit;

import static jdk.incubator.foreign.MemoryLayouts.JAVA_INT;

@BenchmarkMode(Mode.AverageTime)
@Warmup(iterations = 5, time = 500, timeUnit = TimeUnit.MILLISECONDS)
@Measurement(iterations = 10, time = 500, timeUnit = TimeUnit.MILLISECONDS)
@State(org.openjdk.jmh.annotations.Scope.Thread)
@OutputTimeUnit(TimeUnit.MILLISECONDS)
@Fork(value = 3, jvmArgsAppend = { "--add-modules=jdk.incubator.foreign" })
public class LoopOverPolluted {

    static final int ELEM_SIZE = 1_000_000;
    static final int CARRIER_SIZE = (int) JAVA_INT.byteSize();
    static final int ALLOC_SIZE = ELEM_SIZE * CARRIER_SIZE;

    static final Unsafe unsafe = Utils.unsafe;

    ByteBuffer heapBuffer = ByteBuffer.allocate(ALLOC_SIZE).order(ByteOrder.nativeOrder());
    ByteBuffer directBuffer = ByteBuffer.allocateDirect(ALLOC_SIZE).order(ByteOrder.nativeOrder());

    static final VarHandle VH_int = MethodHandles.byteBufferViewVarHandle(int[].class, ByteOrder.nativeOrder());
    static final VarHandle VH_float = MethodHandles.byteBufferViewVarHandle(float[].class, ByteOrder.nativeOrder());
    byte[] arr;
    long addr;

    @Setup
    public void setup() {
        addr = unsafe.allocateMemory(ALLOC_SIZE);
        for (int i = 0; i < ELEM_SIZE; i++) {
            unsafe.putInt(addr + (i * 4), i);
        }
        arr = new byte[ALLOC_SIZE];
        for (int i = 0; i < ELEM_SIZE; i++) {
            unsafe.putInt(arr, Unsafe.ARRAY_BYTE_BASE_OFFSET + (i * 4), i);
        }
        for (int i = 0; i < ELEM_SIZE; i++) {
            VH_int.set(directBuffer, i, i);
            VH_float.set(directBuffer, i, i);
            VH_int.set(heapBuffer, i, i);
            VH_float.set(heapBuffer, i, i);
        }
    }

    @TearDown
    public void tearDown() {
        unsafe.invokeCleaner(directBuffer);
        heapBuffer = null;
        arr = null;
        unsafe.freeMemory(addr);
    }

    @Benchmark
    public int native_buffer() {
        int sum = 0;
        for (int k = 0; k < ELEM_SIZE; k++) {
            VH_int.set(directBuffer, k, k + 1);
            int v = (int)VH_int.get(directBuffer, k);
            sum += v;
        }
        return sum;
    }

    @Benchmark
    public int heap_buffer() {
        int sum = 0;
        for (int k = 0; k < ELEM_SIZE; k++) {
            VH_int.set(heapBuffer, k, k + 1);
            int v = (int)VH_int.get(heapBuffer, k);
            sum += v;
        }
        return sum;
    }

    @Benchmark
    public int heap_buffer_floats() {
        int sum = 0;
        for (int k = 0; k < ELEM_SIZE; k++) {
            VH_float.set(heapBuffer, k, k + 1);
            float v = (float)VH_float.get(heapBuffer, k);
            sum += (int)v;
        }
        return sum;
    }

    @Benchmark
    public int heap_unsafe() {
        int sum = 0;
        for (int k = 0; k < ALLOC_SIZE; k += 4) {
            unsafe.putInt(arr, k + Unsafe.ARRAY_BYTE_BASE_OFFSET, k + 1);
            int v = unsafe.getInt(arr, k + Unsafe.ARRAY_BYTE_BASE_OFFSET);
            sum += v;
        }
        return sum;
    }

    @Benchmark
    public int native_unsafe() {
        int sum = 0;
        for (int k = 0; k < ALLOC_SIZE; k += 4) {
            unsafe.putInt(addr + k, k + 1);
            int v = unsafe.getInt(addr + k);
            sum += v;
        }
        return sum;
    }
}

Shows that ByteBiffer VH views are significantly slower than equivalent unsafe access. This is likely caused to lack of precise type information when passing the byte buffer base to the Unsafe call in the VH impl.

relates to

JDK-8257692 Using both heap and native segments can degrade performance

Resolved

Details

Description

Attachments

Issue Links

Activity

People

Dates