package org.openjdk;

import org.openjdk.jmh.annotations.*;
import org.openjdk.jmh.infra.Blackhole;

import java.util.concurrent.TimeUnit;

@Warmup(iterations = 3, time = 1, timeUnit = TimeUnit.SECONDS)
@Measurement(iterations = 3, time = 1, timeUnit = TimeUnit.SECONDS)
@Fork(value = 1, jvmArgsAppend = {"-XX:LoopMaxUnroll=1", "-XX:-UseCompressedOops"})
@BenchmarkMode(Mode.AverageTime)
@OutputTimeUnit(TimeUnit.NANOSECONDS)
@State(Scope.Benchmark)
public class MultiTypeBench {

    abstract static class C {}
    interface I1 {}
    interface I2 {}

    public Object create(int idx) {
        switch (idx) {
            case 0: return new Impl0();
            case 1: return new Impl1();
            case 2: return new Impl2();
            case 3: return new Impl3();
            case 4: return new Impl4();
            case 5: return new Impl5();
            case 6: return new Impl6();
            case 7: return new Impl7();
            case 8: return new Impl8();
            case 9: return new Impl9();
            case 10: return new Impl10();
            case 11: return new Impl11();
            case 12: return new Impl12();
            case 13: return new Impl13();
            case 14: return new Impl14();
            case 15: return new Impl15();
            case 16: return new Impl16();
            case 17: return new Impl17();
            case 18: return new Impl18();
            case 19: return new Impl19();
            case 20: return new Impl20();
            case 21: return new Impl21();
            case 22: return new Impl22();
            case 23: return new Impl23();
            case 24: return new Impl24();
            case 25: return new Impl25();
            case 26: return new Impl26();
            case 27: return new Impl27();
            case 28: return new Impl28();
            case 29: return new Impl29();
            case 30: return new Impl30();
            case 31: return new Impl31();
            case 32: return new Impl32();
            case 33: return new Impl33();
            case 34: return new Impl34();
            case 35: return new Impl35();
            case 36: return new Impl36();
            case 37: return new Impl37();
            case 38: return new Impl38();
            case 39: return new Impl39();
            case 40: return new Impl40();
            case 41: return new Impl41();
            case 42: return new Impl42();
            case 43: return new Impl43();
            case 44: return new Impl44();
            case 45: return new Impl45();
            case 46: return new Impl46();
            case 47: return new Impl47();
            case 48: return new Impl48();
            case 49: return new Impl49();
            case 50: return new Impl50();
            case 51: return new Impl51();
            case 52: return new Impl52();
            case 53: return new Impl53();
            case 54: return new Impl54();
            case 55: return new Impl55();
            case 56: return new Impl56();
            case 57: return new Impl57();
            case 58: return new Impl58();
            case 59: return new Impl59();
            case 60: return new Impl60();
            case 61: return new Impl61();
            case 62: return new Impl62();
            case 63: return new Impl63();
            case 64: return new Impl64();
            case 65: return new Impl65();
            case 66: return new Impl66();
            case 67: return new Impl67();
            case 68: return new Impl68();
            case 69: return new Impl69();
            case 70: return new Impl70();
            case 71: return new Impl71();
            case 72: return new Impl72();
            case 73: return new Impl73();
            case 74: return new Impl74();
            case 75: return new Impl75();
            case 76: return new Impl76();
            case 77: return new Impl77();
            case 78: return new Impl78();
            case 79: return new Impl79();
            case 80: return new Impl80();
            case 81: return new Impl81();
            case 82: return new Impl82();
            case 83: return new Impl83();
            case 84: return new Impl84();
            case 85: return new Impl85();
            case 86: return new Impl86();
            case 87: return new Impl87();
            case 88: return new Impl88();
            case 89: return new Impl89();
            case 90: return new Impl90();
            case 91: return new Impl91();
            case 92: return new Impl92();
            case 93: return new Impl93();
            case 94: return new Impl94();
            case 95: return new Impl95();
            case 96: return new Impl96();
            case 97: return new Impl97();
            case 98: return new Impl98();
            case 99: return new Impl99();
            default:
                throw new IllegalArgumentException("Whoops");
        }
    }

    static class Impl0 extends C implements I1, I2 {};
    static class Impl1 extends C implements I1, I2 {};
    static class Impl2 extends C implements I1, I2 {};
    static class Impl3 extends C implements I1, I2 {};
    static class Impl4 extends C implements I1, I2 {};
    static class Impl5 extends C implements I1, I2 {};
    static class Impl6 extends C implements I1, I2 {};
    static class Impl7 extends C implements I1, I2 {};
    static class Impl8 extends C implements I1, I2 {};
    static class Impl9 extends C implements I1, I2 {};
    static class Impl10 extends C implements I1, I2 {};
    static class Impl11 extends C implements I1, I2 {};
    static class Impl12 extends C implements I1, I2 {};
    static class Impl13 extends C implements I1, I2 {};
    static class Impl14 extends C implements I1, I2 {};
    static class Impl15 extends C implements I1, I2 {};
    static class Impl16 extends C implements I1, I2 {};
    static class Impl17 extends C implements I1, I2 {};
    static class Impl18 extends C implements I1, I2 {};
    static class Impl19 extends C implements I1, I2 {};
    static class Impl20 extends C implements I1, I2 {};
    static class Impl21 extends C implements I1, I2 {};
    static class Impl22 extends C implements I1, I2 {};
    static class Impl23 extends C implements I1, I2 {};
    static class Impl24 extends C implements I1, I2 {};
    static class Impl25 extends C implements I1, I2 {};
    static class Impl26 extends C implements I1, I2 {};
    static class Impl27 extends C implements I1, I2 {};
    static class Impl28 extends C implements I1, I2 {};
    static class Impl29 extends C implements I1, I2 {};
    static class Impl30 extends C implements I1, I2 {};
    static class Impl31 extends C implements I1, I2 {};
    static class Impl32 extends C implements I1, I2 {};
    static class Impl33 extends C implements I1, I2 {};
    static class Impl34 extends C implements I1, I2 {};
    static class Impl35 extends C implements I1, I2 {};
    static class Impl36 extends C implements I1, I2 {};
    static class Impl37 extends C implements I1, I2 {};
    static class Impl38 extends C implements I1, I2 {};
    static class Impl39 extends C implements I1, I2 {};
    static class Impl40 extends C implements I1, I2 {};
    static class Impl41 extends C implements I1, I2 {};
    static class Impl42 extends C implements I1, I2 {};
    static class Impl43 extends C implements I1, I2 {};
    static class Impl44 extends C implements I1, I2 {};
    static class Impl45 extends C implements I1, I2 {};
    static class Impl46 extends C implements I1, I2 {};
    static class Impl47 extends C implements I1, I2 {};
    static class Impl48 extends C implements I1, I2 {};
    static class Impl49 extends C implements I1, I2 {};
    static class Impl50 extends C implements I1, I2 {};
    static class Impl51 extends C implements I1, I2 {};
    static class Impl52 extends C implements I1, I2 {};
    static class Impl53 extends C implements I1, I2 {};
    static class Impl54 extends C implements I1, I2 {};
    static class Impl55 extends C implements I1, I2 {};
    static class Impl56 extends C implements I1, I2 {};
    static class Impl57 extends C implements I1, I2 {};
    static class Impl58 extends C implements I1, I2 {};
    static class Impl59 extends C implements I1, I2 {};
    static class Impl60 extends C implements I1, I2 {};
    static class Impl61 extends C implements I1, I2 {};
    static class Impl62 extends C implements I1, I2 {};
    static class Impl63 extends C implements I1, I2 {};
    static class Impl64 extends C implements I1, I2 {};
    static class Impl65 extends C implements I1, I2 {};
    static class Impl66 extends C implements I1, I2 {};
    static class Impl67 extends C implements I1, I2 {};
    static class Impl68 extends C implements I1, I2 {};
    static class Impl69 extends C implements I1, I2 {};
    static class Impl70 extends C implements I1, I2 {};
    static class Impl71 extends C implements I1, I2 {};
    static class Impl72 extends C implements I1, I2 {};
    static class Impl73 extends C implements I1, I2 {};
    static class Impl74 extends C implements I1, I2 {};
    static class Impl75 extends C implements I1, I2 {};
    static class Impl76 extends C implements I1, I2 {};
    static class Impl77 extends C implements I1, I2 {};
    static class Impl78 extends C implements I1, I2 {};
    static class Impl79 extends C implements I1, I2 {};
    static class Impl80 extends C implements I1, I2 {};
    static class Impl81 extends C implements I1, I2 {};
    static class Impl82 extends C implements I1, I2 {};
    static class Impl83 extends C implements I1, I2 {};
    static class Impl84 extends C implements I1, I2 {};
    static class Impl85 extends C implements I1, I2 {};
    static class Impl86 extends C implements I1, I2 {};
    static class Impl87 extends C implements I1, I2 {};
    static class Impl88 extends C implements I1, I2 {};
    static class Impl89 extends C implements I1, I2 {};
    static class Impl90 extends C implements I1, I2 {};
    static class Impl91 extends C implements I1, I2 {};
    static class Impl92 extends C implements I1, I2 {};
    static class Impl93 extends C implements I1, I2 {};
    static class Impl94 extends C implements I1, I2 {};
    static class Impl95 extends C implements I1, I2 {};
    static class Impl96 extends C implements I1, I2 {};
    static class Impl97 extends C implements I1, I2 {};
    static class Impl98 extends C implements I1, I2 {};
    static class Impl99 extends C implements I1, I2 {};

    @Param({"1", "10", "100"})
    int typeCount;

    Object[] objects;

    @Setup
    public void setup() {
        objects = new Object[100];
        for (int c = 0; c < objects.length; c++) {
            objects[c] = create(c % typeCount);
        }
    }

    /*
                 Benchmark                    (typeCount)  Mode  Cnt    Score    Error  Units
         JDK 17: MultiTypeBench.flipflop              100  avgt    3  1672.534 ± 76.977  ns/op
         JDK 21: MultiTypeBench.flipflop              100  avgt    3   311.402 ±  3.118  ns/op
         JDK 25: MultiTypeBench.flipflop              100  avgt    3   367.831 ± 18.509  ns/op
       mainline: MultiTypeBench.flipflop              100  avgt    3   356.224 ± 29.400  ns/op

       This is JDK-8180450 working as intended: when types are flip-flopping, the single-slot
       secondary supers cache is actively worse. But you can also see that JDK 21 is quite a bit
       faster than other releases. This will be explained in later tests.
     */

    @Benchmark
    @CompilerControl(CompilerControl.Mode.DONT_INLINE)
    public void flipflop(Blackhole bh) {
        for (Object o : objects) {
            bh.consume((C)o);
            bh.consume((I1)o);
            bh.consume((I2)o);
        }
    }

    /*
                 Benchmark                    (typeCount)  Mode  Cnt    Score    Error  Units
        JDK 17: MultiTypeBench.steady_class          100  avgt    3    75.600 ±  9.434  ns/op
        JDK 21: MultiTypeBench.steady_class          100  avgt    3    79.603 ±  3.871  ns/op
        JDK 25: MultiTypeBench.steady_class          100  avgt    3    79.915 ±  7.711  ns/op
      mainline: MultiTypeBench.steady_class          100  avgt    3    78.293 ±  1.260  ns/op

      For abstract class queries, the performance is on par across all releases.
     */

    @Benchmark
    @CompilerControl(CompilerControl.Mode.DONT_INLINE)
    public void steady_class(Blackhole bh) {
        for (Object o : objects) {
            bh.consume((C)o);
            bh.consume((C)o);
            bh.consume((C)o);
        }
    }

    /*
                Benchmark                    (typeCount)  Mode  Cnt    Score    Error  Units
        JDK 17: MultiTypeBench.steady_intf           100  avgt    3    84.176 ±  1.953  ns/op
        JDK 21: MultiTypeBench.steady_intf           100  avgt    3    84.812 ±  2.313  ns/op ; <---- HAAAA!
        JDK 25: MultiTypeBench.steady_intf           100  avgt    3   208.186 ±  3.447  ns/op
      mainline: MultiTypeBench.steady_intf           100  avgt    3   202.927 ± 28.108  ns/op

      NOW, THIS IS CURIOUS. JDK 21 is significantly ahead of every other release.

      This is JDK 17 perfasm:
                   │↗   0x000073a2dc81ee40:   inc    %r9d                         ;*iinc {reexecute=0 rethrow=0 return_oop=0}
                   ││                                                             ; - org.openjdk.MultiTypeBench::steady_intf@50 (line 286)
                   ││   0x000073a2dc81ee43:   cmp    %r8d,%r9d
                   ││   0x000073a2dc81ee46:   jge    0x000073a2dc81ee0e           ;*aload_2 {reexecute=0 rethrow=0 return_oop=0}
                   ││                                                             ; - org.openjdk.MultiTypeBench::steady_intf@17 (line 286)
                   ↘│   0x000073a2dc81ee48:   mov    0x10(%r11,%r9,8),%rdx        ;*aaload {reexecute=0 rethrow=0 return_oop=0}
                    │                                                             ; - org.openjdk.MultiTypeBench::steady_intf@20 (line 286)
                    │   0x000073a2dc81ee4d:   mov    0x8(%rdx),%edi               ; implicit exception: dispatches to 0x000073a2dc81eebc
           6.97%    │   0x000073a2dc81ee50:   movabs $0x739a4f000000,%rsi
                    │   0x000073a2dc81ee5a:   add    %rdi,%rsi
           1.49%    │   0x000073a2dc81ee5d:   mov    0x20(%rsi),%rcx            ; <---- secondary super cache slot
          44.11%    │   0x000073a2dc81ee61:   cmp    %rax,%rcx
          33.11%    ╰   0x000073a2dc81ee64:   je     0x000073a2dc81ee40

      It shows the type check using the single slot cache. This is expected behavior pre-JDK-8180450.

      This is JDK 21 perfasm:
                    │↗  0x00007497a3c26050:   inc    %r10d                        ;*iinc {reexecute=0 rethrow=0 return_oop=0}
                    ││                                                            ; - org.openjdk.MultiTypeBench::steady_intf@50 (line 286)
                    ││  0x00007497a3c26053:   cmp    %r9d,%r10d
                    ││  0x00007497a3c26056:   jge    0x00007497a3c26017           ;*aload_2 {reexecute=0 rethrow=0 return_oop=0}
                    ││                                                            ; - org.openjdk.MultiTypeBench::steady_intf@17 (line 286)
                    ↘│  0x00007497a3c26058:   mov    0x10(%r12,%r10,8),%rbp       ;*aaload {reexecute=0 rethrow=0 return_oop=0}
                     │                                                            ; - org.openjdk.MultiTypeBench::steady_intf@20 (line 286)
            0.07%    │  0x00007497a3c2605d:   mov    0x8(%rbp),%ecx               ; implicit exception: dispatches to 0x00007497a3c26100
            7.30%    │  0x00007497a3c26060:   movabs $0x748f3b000000,%rsi
                     │  0x00007497a3c2606a:   add    %rcx,%rsi
            2.15%    │  0x00007497a3c2606d:   mov    0x20(%rsi),%r11              ; <---- secondary super cache slot
           48.50%    │  0x00007497a3c26071:   cmp    %rax,%r11
           33.52%    ╰  0x00007497a3c26074:   je     0x00007497a3c26050

      Surprise! JDK 21 is also using the single-slot cache. My suspicion is that the absence of
      JDK-8331341 (implementing interpreter and C1 support) makes interpreter/C1 populate that single-slot cache.
      C2 type checking code still reads it, somewhere in Phase::gen_subtype_check.

      For comparison, this is JDK 25 perfasm:
                   │↗    ↗  0x00007be2ef818420:   inc    %r10d                        ;*iinc {reexecute=0 rethrow=0 return_oop=0}
                   ││    │                                                            ; - org.openjdk.MultiTypeBench::steady_intf@50 (line 286)
           0.14%   ││    │  0x00007be2ef818423:   cmp    %r9d,%r10d
           0.53%   ││    │  0x00007be2ef818426:   jge    0x00007be2ef8183e7           ;*aload_2 {reexecute=0 rethrow=0 return_oop=0}
                   ││    │                                                            ; - org.openjdk.MultiTypeBench::steady_intf@17 (line 286)
           0.04%   ↘│    │  0x00007be2ef818428:   mov    0x10(%r12,%r10,8),%rbp       ;*aaload {reexecute=0 rethrow=0 return_oop=0}
                    │    │                                                            ; - org.openjdk.MultiTypeBench::steady_intf@20 (line 286)
           0.57%    │    │  0x00007be2ef81842d:   mov    0x8(%rbp),%ecx               ; implicit exception: dispatches to 0x00007be2ef8184c8
           2.42%    │    │  0x00007be2ef818430:   mov    $0x7a000000,%esi
           2.14%    │    │  0x00007be2ef818435:   add    %rcx,%rsi
           3.24%    │    │  0x00007be2ef818438:   mov    0x20(%rsi),%r11              ; <---- secondary super cache slot
          14.07%    │    │  0x00007be2ef81843c:   cmp    %rax,%r11
           1.14%    ╰    │  0x00007be2ef81843f:   je     0x00007be2ef818420
                         │  0x00007be2ef818441:   xor    %rdi,%rdi
           0.11%         │  0x00007be2ef818444:   mov    0xa8(%rsi),%r11
           3.21%         │  0x00007be2ef81844b:   mov    %r11,%rdx
           0.82%         │  0x00007be2ef81844e:   shl    $0x26,%rdx
           1.57%     ╭   │  0x00007be2ef818452:   jns    0x00007be2ef818479
           2.85%     │   │  0x00007be2ef818458:   popcnt %rdx,%rdx
           3.35%     │   │  0x00007be2ef81845d:   mov    0x28(%rsi),%rbx
           5.81%     │   │  0x00007be2ef818461:   cmp    (%rbx,%rdx,8),%rax
          53.87%     │╭  │  0x00007be2ef818465:   je     0x00007be2ef81847c
                     ││  │  0x00007be2ef818467:   bt     $0x1a,%r11
                     ││╭ │  0x00007be2ef81846c:   jae    0x00007be2ef818479
                     │││ │  0x00007be2ef81846e:   ror    $0x19,%r11
                     │││ │  0x00007be2ef818472:   call   Stub::lookup_secondary_supers_table_slow_path
                     │││ │                                                            ;   {runtime_call Stub::lookup_secondary_supers_table_slow_path}
                     │││╭│  0x00007be2ef818477:   jmp    0x00007be2ef81847c
                     ↘│↘││  0x00007be2ef818479:   inc    %rdi
                      ↘ ↘│  0x00007be2ef81847c:   test   %rdi,%rdi
                         ╰  0x00007be2ef81847f:   je     0x00007be2ef818420

        Now that single-slot cache is never populated, we always go to bitmap search.
     */

    @Benchmark
    @CompilerControl(CompilerControl.Mode.DONT_INLINE)
    public void steady_intf(Blackhole bh) {
        for (Object o : objects) {
            bh.consume((I1)o);
            bh.consume((I1)o);
            bh.consume((I1)o);
        }
    }

}
