Uploaded image for project: 'JDK'
  1. JDK
  2. JDK-8316275

C2 profiling for switch-lookup is hurting performance with a common dataset

XMLWordPrintable

    • generic
    • generic

      ADDITIONAL SYSTEM INFORMATION :
      JDK-17

      A DESCRIPTION OF THE PROBLEM :
      Run the following JMH code with JDK-17, which simulates looping a common Http-Status dataset:

      import org.openjdk.jmh.annotations.Benchmark;
      import org.openjdk.jmh.annotations.BenchmarkMode;
      import org.openjdk.jmh.annotations.Fork;
      import org.openjdk.jmh.annotations.Level;
      import org.openjdk.jmh.annotations.Measurement;
      import org.openjdk.jmh.annotations.Mode;
      import org.openjdk.jmh.annotations.OperationsPerInvocation;
      import org.openjdk.jmh.annotations.OutputTimeUnit;
      import org.openjdk.jmh.annotations.Scope;
      import org.openjdk.jmh.annotations.Setup;
      import org.openjdk.jmh.annotations.State;
      import org.openjdk.jmh.annotations.Warmup;
      import org.openjdk.jmh.infra.Blackhole;
      import org.openjdk.jmh.runner.Runner;
      import org.openjdk.jmh.runner.RunnerException;
      import org.openjdk.jmh.runner.options.Options;
      import org.openjdk.jmh.runner.options.OptionsBuilder;
      import java.util.SplittableRandom;
      import java.util.concurrent.TimeUnit;

      /**
       * Run with JDK-17.
       */
      @BenchmarkMode(Mode.Throughput)
      @Warmup(iterations = 5, time = 1)
      @Measurement(iterations = 5, time = 1)
      @OutputTimeUnit(TimeUnit.MICROSECONDS)
      @Fork(1)
      @State(Scope.Thread)
      public class SwitchBenchmark {
          private static final int DATA_SIZE = 10000;
          private static final int[] DATA = new int[DATA_SIZE];
          private static final SplittableRandom RANDOM = new SplittableRandom();

          public static void main(String[] args) throws RunnerException {
              Options opt = new OptionsBuilder()
                      .include(SwitchBenchmark.class.getSimpleName())
                      .jvmArgs("-server")
                      .build();
              new Runner(opt).run();
          }

          @Setup(Level.Invocation)
          public void setup() {
              fillHttpStatusCodes(DATA);
          }

          @Benchmark
          @OperationsPerInvocation(DATA_SIZE)
          public void switch_profiling(Blackhole bh) {
              for (int i : DATA) {
                  switch (i / 100) {
                      case 1:
                          // 1XX HTTP code
                          bh.consume(i); break;
                      case 2:
                          // 2XX HTTP code
                          bh.consume(i); break;
                      case 3:
                          // 3XX HTTP code
                          bh.consume(i); break;
                      case 4:
                          // 4XX HTTP code
                          bh.consume(i); break;
                      case 5:
                          // 5XX HTTP code
                          bh.consume(i); break;
                      default:
                          // Unknown HTTP code
                          bh.consume(i);
                  }
              }
          }

          @Benchmark
          @OperationsPerInvocation(DATA_SIZE)
          @Fork(jvmArgsAppend = {"-XX:+UnlockDiagnosticVMOptions", "-XX:-UseSwitchProfiling"})
          public void switch_profiling_disabled(Blackhole bh) {
              for (int i : DATA) {
                  switch (i / 100) {
                      case 1:
                          // 1XX HTTP code
                          bh.consume(i); break;
                      case 2:
                          // 2XX HTTP code
                          bh.consume(i); break;
                      case 3:
                          // 3XX HTTP code
                          bh.consume(i); break;
                      case 4:
                          // 4XX HTTP code
                          bh.consume(i); break;
                      case 5:
                          // 5XX HTTP code
                          bh.consume(i); break;
                      default:
                          // Unknown HTTP code
                          bh.consume(i);
                  }
              }
          }

          private static void fillHttpStatusCodes(int[] benchMarkData) {
              for (int i = 0; i < benchMarkData.length;) {
                  // Random scope: [0, 100)
                  int code = RANDOM.nextInt(0, 100);
                  // Proportion: 38%
                  if (code < 38) {
                      // 1XX HTTP code
                      benchMarkData[i++] = RANDOM.nextInt(100, 200);
                      continue;
                  }
                  // Proportion: 30%
                  if (code < 68) {
                      // 2XX HTTP code
                      benchMarkData[i++] = RANDOM.nextInt(200, 300);
                      continue;
                  }
                  // Proportion: 15%
                  if (code < 83) {
                      // 3XX HTTP code
                      benchMarkData[i++] = RANDOM.nextInt(300, 400);
                      continue;
                  }
                  // Proportion: 10%
                  if (code < 93) {
                      // 4XX HTTP code
                      benchMarkData[i++] = RANDOM.nextInt(400, 500);
                      continue;
                  }
                  // Proportion: 5%
                  if (code < 98) {
                      // 5XX HTTP code
                      benchMarkData[i++] = RANDOM.nextInt(500, 600);
                      continue;
                  }
                  // Proportion: 2%
                  // Unknown HTTP code
                  benchMarkData[i++] = RANDOM.nextInt(-50, 50);
              }
          }
      }

      The benchmark results are:
      Benchmark Mode Cnt Score Error Units
      SwitchBenchmark.switch_profiling thrpt 5 144.712 ± 0.856 ops/us
      SwitchBenchmark.switch_profiling_disabled thrpt 5 154.381 ± 0.111 ops/us

      As we can see, with C2 switch profiling disabled, the throughput goes up, which means the default C2 switch profiling is hurting the performance.

      The pom.xml dependencies:
      <dependencies>
              <dependency>
                  <groupId>org.openjdk.jmh</groupId>
                  <artifactId>jmh-core</artifactId>
                  <version>1.37</version>
              </dependency>
              <dependency>
                  <groupId>org.openjdk.jmh</groupId>
                  <artifactId>jmh-generator-annprocess</artifactId>
                  <version>1.37</version>
              </dependency>
      </dependencies>



            Unassigned Unassigned
            webbuggrp Webbug Group
            Votes:
            0 Vote for this issue
            Watchers:
            6 Start watching this issue

              Created:
              Updated: