Uploaded image for project: 'JDK'
  1. JDK
  2. JDK-8024327

Possible improvement of the Lookup Switch implementation in C2

XMLWordPrintable

      The C2 lookup switch implementation (using a binary search) seems to have unnecessary compare instructions. Below is a switch case with 16 cases, looking like this in Java code:

      public int switch_calculation() {
          int choice = ThreadLocalRandom.current().nextInt(16) + 1;
          switch (choice) {
            case 1: return 5;
            case 2: return 1;
            case 3: return 2;
            case 4: return 16;
            case 5: return 11;
            case 6: return 13;
            case 7: return 10;
            case 8: return 14;
            case 9: return 3;
            case 10: return 4;
            case 11: return 6;
            case 12: return 15;
            case 13: return 10;
            case 14: return 8;
            case 15: return 9;
            case 16: return 7;
            default: return 0;
          }
        }


      And the x64 assembly, I've pointed out some of the places for improvement by comment ";HERE".

      [Verified Entry Point]
        0x00007f34fd071700: mov %eax,-0x14000(%rsp)
        0x00007f34fd071707: push %rbp

        0x00007f34fd071708: sub $0x10,%rsp

        0x00007f34fd07170c: mov 0x1b8(%r15),%rcx ;*invokestatic currentThread
                                                      ; - java.util.concurrent.ThreadLocalRandom::current@3 (line 177)
                                                      ; - org.sample.MyBenchmark::switch_calculation@0 (line 37)
                                        
        0x00007f34fd071713: mov 0xf0(%rcx), %r10d

        0x00007f34fd07171a: test %r10d,%r10d
        0x00007f34fd07171d: je 0x00007f34fd071912 ;*getstatic instance
                                                      ; - java.util.concurrent.ThreadLocalRandom::current@18 (line 179)
                                                      ; - org.sample.MyBenchmark::switch_calculation@0 (line 37)


        0x00007f34fd071723: mov $0x5deece66d,%r10
        0x00007f34fd07172d: imul 0xe8(%rcx),%r10
        0x00007f34fd071735: add $0xb,%r10 ;*ladd
                                                      ; - java.util.concurrent.ThreadLocalRandom::next@28 (line 198)
                                                      ; - java.util.Random::nextInt@40 (line 311)
                                                      ; - org.sample.MyBenchmark::switch_calculation@5 (line 37)

        0x00007f34fd071739: mov %r10,%r11
        0x00007f34fd07173c: shr $0x11,%r11
        0x00007f34fd071740: and $0x7fffffff,%r11
        0x00007f34fd071747: mov %r11d,%ebx ;*l2i ; - java.util.concurrent.ThreadLocalRandom::next@44 (line 199)
                                                      ; - java.util.Random::nextInt@40 (line 311)
                                                      ; - org.sample.MyBenchmark::switch_calculation@5 (line 37)

        0x00007f34fd07174a: movslq %ebx,%r11
        0x00007f34fd07174d: mov %ebx,%r8d
        0x00007f34fd071750: sar $0x1f,%r8d
        0x00007f34fd071754: imul $0x78787879,%r11,%r11
        0x00007f34fd07175b: sar $0x23,%r11
        0x00007f34fd07175f: mov %r11d,%r11d
        0x00007f34fd071762: sub %r8d,%r11d
        0x00007f34fd071765: mov %r11d,%r8d
        0x00007f34fd071768: shl $0x4,%r8d
        0x00007f34fd07176c: add %r11d,%r8d
        0x00007f34fd07176f: mov %ebx,%r9d
        0x00007f34fd071772: sub %r8d,%r9d ;*irem
                                                      ; - java.util.Random::nextInt@46 (line 312)
                                                      ; - org.sample.MyBenchmark::switch_calculation@5 (line 37)

        0x00007f34fd071775: sub %r9d,%ebx
        0x00007f34fd071778: add $0x10,%ebx
        0x00007f34fd07177b: mov $0xffffffffffff,%rdi
        0x00007f34fd071785: and %rdi,%r10
        0x00007f34fd071788: mov %r10,0xe8(%rcx) ;*invokevirtual putLong
                                                      ; - java.util.concurrent.ThreadLocalRandom::next@35 (line 197)
                                                      ; - java.util.Random::nextInt@40 (line 311)
                                                      ; - org.sample.MyBenchmark::switch_calculation@5 (line 37)

        0x00007f34fd07178f: test %ebx,%ebx
        0x00007f34fd071791: jl 0x00007f34fd0718a0 ;*iload_3
                                                      ; - java.util.Random::nextInt@58 (line 314)
                                                      ; - org.sample.MyBenchmark::switch_calculation@5 (line 37)

        0x00007f34fd071797: inc %r9d ;*iadd
                                                      ; - org.sample.MyBenchmark::switch_calculation@9 (line 37)
        
        0x00007f34fd07179a: cmp $0x9,%r9d
        0x00007f34fd07179e: je 0x00007f34fd071888

        0x00007f34fd0717a4: mov $0xa,%eax

        0x00007f34fd0717a9: cmp $0x9,%r9d
        0x00007f34fd0717ad: jg 0x00007f34fd071827

        0x00007f34fd0717af: cmp $0x4,%r9d
        0x00007f34fd0717b3: je 0x00007f34fd071820
        ; HERE
        0x00007f34fd0717b5: cmp $0x4,%r9d
        0x00007f34fd0717b9: jg 0x00007f34fd0717f2

        0x00007f34fd0717bb: cmp $0x2,%r9d
        0x00007f34fd0717bf: je 0x00007f34fd0717e8
        ; HERE
        0x00007f34fd0717c1: cmp $0x2,%r9d
        0x00007f34fd0717c5: jg 0x00007f34fd0717de

        0x00007f34fd0717c7: cmp $0x1,%r9d
        0x00007f34fd0717cb: jne 0x00007f34fd0717d7 ;*tableswitch
                                                      ; - org.sample.MyBenchmark::switch_calculation@12 (line 39)

        0x00007f34fd0717cd: mov $0x5,%eax
        0x00007f34fd0717d2: jmpq 0x00007f34fd07188d ;*iconst_0
                                                      ; - org.sample.MyBenchmark::switch_calculation@142 (line 58)
       
        0x00007f34fd0717d7: xor %eax,%eax
        0x00007f34fd0717d9: jmpq 0x00007f34fd07188d

        0x00007f34fd0717de: mov $0x2,%eax
        0x00007f34fd0717e3: jmpq 0x00007f34fd07188d
        0x00007f34fd0717e8: mov $0x1,%eax
        0x00007f34fd0717ed: jmpq 0x00007f34fd07188d
        
        0x00007f34fd0717f2: cmp $0x7,%r9d
        0x00007f34fd0717f6: je 0x00007f34fd07188d
        ; HERE
        0x00007f34fd0717fc: cmp $0x7,%r9d

        0x00007f34fd071800: jle 0x00007f34fd07180c
        0x00007f34fd071802: mov $0xe,%eax
        0x00007f34fd071807: jmpq 0x00007f34fd07188d
        0x00007f34fd07180c: cmp $0x6,%r9d
        0x00007f34fd071810: jne 0x00007f34fd071819
        0x00007f34fd071812: mov $0x11,%eax
        0x00007f34fd071817: jmp 0x00007f34fd07188d
        0x00007f34fd071819: mov $0xb,%eax
        0x00007f34fd07181e: jmp 0x00007f34fd07188d
        0x00007f34fd071820: mov $0x10,%eax
        0x00007f34fd071825: jmp 0x00007f34fd07188d
        0x00007f34fd071827: cmp $0xe,%r9d
        0x00007f34fd07182b: je 0x00007f34fd071881
        ; HERE
        0x00007f34fd07182d: cmp $0xe,%r9d

        0x00007f34fd071831: jle 0x00007f34fd07185a
        0x00007f34fd071833: cmp $0x11,%r9d
        0x00007f34fd071837: je 0x00007f34fd071853
          ; HERE
        0x00007f34fd071839: cmp $0x11,%r9d
        0x00007f34fd07183d: jg 0x00007f34fd0717d7
        0x00007f34fd07183f: cmp $0x10,%r9d
        0x00007f34fd071843: jne 0x00007f34fd07184c
        0x00007f34fd071845: mov $0x7,%eax
        0x00007f34fd07184a: jmp 0x00007f34fd07188d
        0x00007f34fd07184c: mov $0x9,%eax
        0x00007f34fd071851: jmp 0x00007f34fd07188d
        0x00007f34fd071853: mov $0xd,%eax
        0x00007f34fd071858: jmp 0x00007f34fd07188d

        0x00007f34fd07185a: cmp $0xc,%r9d
        0x00007f34fd07185e: je 0x00007f34fd07187a
        ; HERE
        0x00007f34fd071860: cmp $0xc,%r9d
        0x00007f34fd071864: jg 0x00007f34fd07188d

        0x00007f34fd071866: cmp $0xb,%r9d
        0x00007f34fd07186a: jne 0x00007f34fd071873 ;*tableswitch
                                                      ; - org.sample.MyBenchmark::switch_calculation@12 (line 39)

        0x00007f34fd07186c: mov $0x6,%eax
        0x00007f34fd071871: jmp 0x00007f34fd07188d
        0x00007f34fd071873: mov $0x4,%eax
        0x00007f34fd071878: jmp 0x00007f34fd07188d
        0x00007f34fd07187a: mov $0xf,%eax
        0x00007f34fd07187f: jmp 0x00007f34fd07188d
        0x00007f34fd071881: mov $0x8,%eax
        0x00007f34fd071886: jmp 0x00007f34fd07188d
        0x00007f34fd071888: mov $0x3,%eax ;*invokestatic currentThread
                                                      ; - java.util.concurrent.ThreadLocalRandom::current@3 (line 177)
                                                      ; - org.sample.MyBenchmark::switch_calculation@0 (line 37)

        0x00007f34fd07188d: add $0x10,%rsp
        0x00007f34fd071891: pop %rbp
        0x00007f34fd071892: test %eax,0xbfb0768(%rip) # 0x00007f3509022000 ; {poll_return}
        0x00007f34fd071898: retq


        0x00007f34fd071899: nopl 0x0(%rax) ;*aload_0
                                                      ; - java.util.Random::nextInt@37 (line 311)
                                                      ; - org.sample.MyBenchmark::switch_calculation@5 (line 37)

        0x00007f34fd0718a0: mov $0x5deece66d,%r10
        0x00007f34fd0718aa: imul 0xe8(%rcx),%r10
        0x00007f34fd0718b2: add $0xb,%r10 ;*ladd
                                                      ; - java.util.concurrent.ThreadLocalRandom::next@28 (line 198)
                                                      ; - java.util.Random::nextInt@40 (line 311)
                                                      ; - org.sample.MyBenchmark::switch_calculation@5 (line 37)

        0x00007f34fd0718b6: mov %r10,%r11
        0x00007f34fd0718b9: and %rdi,%r11
        0x00007f34fd0718bc: mov %r11,0xe8(%rcx) ;*iflt
                                                      ; - java.util.Random::nextInt@55 (line 313)
                                                      ; - org.sample.MyBenchmark::switch_calculation@5 (line 37)

        0x00007f34fd0718c3: shr $0x11,%r10
        0x00007f34fd0718c7: and $0x7fffffff,%r10
        0x00007f34fd0718ce: mov %r10d,%r11d ;*l2i ; - java.util.concurrent.ThreadLocalRandom::next@44 (line 199)
                                                      ; - java.util.Random::nextInt@40 (line 311)
                                                      ; - org.sample.MyBenchmark::switch_calculation@5 (line 37)

        0x00007f34fd0718d1: movslq %r11d,%r10
        0x00007f34fd0718d4: mov %r11d,%r8d
        0x00007f34fd0718d7: sar $0x1f,%r8d
        0x00007f34fd0718db: imul $0x78787879,%r10,%r10
        0x00007f34fd0718e2: sar $0x23,%r10
        0x00007f34fd0718e6: mov %r10d,%r10d
        0x00007f34fd0718e9: sub %r8d,%r10d
        0x00007f34fd0718ec: mov %r10d,%ebx
        0x00007f34fd0718ef: shl $0x4,%ebx
        0x00007f34fd0718f2: add %r10d,%ebx
        0x00007f34fd0718f5: mov %r11d,%r9d
        0x00007f34fd0718f8: sub %ebx,%r9d ;*irem
                                                      ; - java.util.Random::nextInt@46 (line 312)
                                                      ; - org.sample.MyBenchmark::switch_calculation@5 (line 37)

        0x00007f34fd0718fb: sub %r9d,%r11d
        0x00007f34fd0718fe: add $0x10,%r11d ; OopMap{rcx=Oop off=546}
                                                      ;*iflt
                                                      ; - java.util.Random::nextInt@55 (line 313)
                                                      ; - org.sample.MyBenchmark::switch_calculation@5 (line 37)

        0x00007f34fd071902: test %eax,0xbfb06f8(%rip) # 0x00007f3509022000
                                                      ; {poll}
        0x00007f34fd071908: test %r11d,%r11d
        0x00007f34fd07190b: jl 0x00007f34fd0718a0 ;*iflt
                                                      ; - java.util.Random::nextInt@55 (line 313)
                                                      ; - org.sample.MyBenchmark::switch_calculation@5 (line 37)

        0x00007f34fd07190d: jmpq 0x00007f34fd071797
        0x00007f34fd071912: nop
        0x00007f34fd071913: callq 0x00007f34fd037f60 ; OopMap{off=568}
                                                      ;*invokestatic localInit
                                                      ; - java.util.concurrent.ThreadLocalRandom::current@15 (line 178)
                                                      ; - org.sample.MyBenchmark::switch_calculation@0 (line 37)
                                                      ; {static_call}
        0x00007f34fd071918: mov 0x1b8(%r15),%rcx ;*invokestatic currentThread
                                                      ; - java.util.concurrent.ThreadLocalRandom::next@3 (line 197)
                                                      ; - java.util.Random::nextInt@40 (line 311)
                                                      ; - org.sample.MyBenchmark::switch_calculation@5 (line 37)

        0x00007f34fd07191f: jmpq 0x00007f34fd071723 ;*invokestatic localInit
                                                      ; - java.util.concurrent.ThreadLocalRandom::current@15 (line 178)
                                                      ; - org.sample.MyBenchmark::switch_calculation@0 (line 37)

        0x00007f34fd071924: mov %rax,%rsi
        0x00007f34fd071927: add $0x10,%rsp
        0x00007f34fd07192b: pop %rbp
        0x00007f34fd07192c: jmpq 0x00007f34fd0638a0 ; {runtime_call}


      This has only been tested for x86, SPARC might also suffer from this.

            Unassigned Unassigned
            adlertz Niclas Adlertz (Inactive)
            Votes:
            0 Vote for this issue
            Watchers:
            3 Start watching this issue

              Created:
              Updated:
              Resolved: