Uploaded image for project: 'JDK'
  1. JDK
  2. JDK-8176513

Poor code quality for ByteBuffers

XMLWordPrintable

    • b163
    • generic
    • generic

        At some point in the JDK 9 project, code generation for ByteBuffers dramatically worsened. Loops are not vectorized and stores are not pipelined. This is an important use case.

        The attached test case is a simple loop storing ints into a ByteBuffer.

            void floss(ByteBuffer b, int n) {
                for (int i = 0; i < b.capacity(); i++) {
                    buf.putInt(i<<2, n);
                }
            }

        Its inner loop looks like this:

         ;; B15: # B30 B16 <- B14 B17 Loop: B15-B17 inner post of N217 Freq: 0.996032

          0x00007f4f3122b6f0: mov 0x1c(%rsi),%ebx
          0x00007f4f3122b6f3: mov 0x2c(%rsi),%r9d ;*getfield hb {reexecute=0 rethrow=0 return_oop=0}
                                                        ; - java.nio.HeapByteBuffer::putInt@4 (line 421)
                                                        ; - ByteBufferTest::floss@16 (line 19)

          0x00007f4f3122b6f7: mov %r10d,%edx
          0x00007f4f3122b6fa: shl $0x2,%edx ;*ishl {reexecute=0 rethrow=0 return_oop=0}
                                                        ; - ByteBufferTest::floss@14 (line 19)

          0x00007f4f3122b6fd: sub %edx,%ebx ;*isub {reexecute=0 rethrow=0 return_oop=0}
                                                        ; - java.nio.Buffer::checkIndex@10 (line 674)
                                                        ; - java.nio.HeapByteBuffer::putInt@11 (line 421)
                                                        ; - ByteBufferTest::floss@16 (line 19)

          0x00007f4f3122b6ff: cmp $0x4,%ebx
          0x00007f4f3122b702: jl 0x00007f4f3122b833 ;*if_icmple {reexecute=0 rethrow=0 return_oop=0}
                                                        ; - java.nio.Buffer::checkIndex@11 (line 674)
                                                        ; - java.nio.HeapByteBuffer::putInt@11 (line 421)
                                                        ; - ByteBufferTest::floss@16 (line 19)

         ;; B16: # B25 B17 <- B15 Freq: 0.996031

          0x00007f4f3122b708: movzbl 0x29(%rsi),%edi ;*getfield bigEndian {reexecute=0 rethrow=0 return_oop=0}
                                                        ; - java.nio.HeapByteBuffer::putInt@20 (line 421)
                                                        ; - ByteBufferTest::floss@16 (line 19)

          0x00007f4f3122b70c: movslq %edx,%rax
          0x00007f4f3122b70f: add 0x10(%rsi),%rax ;*ladd {reexecute=0 rethrow=0 return_oop=0}
                                                        ; - java.nio.HeapByteBuffer::byteOffset@5 (line 170)
                                                        ; - java.nio.HeapByteBuffer::putInt@15 (line 421)
                                                        ; - ByteBufferTest::floss@16 (line 19)

          0x00007f4f3122b713: test %edi,%edi
          0x00007f4f3122b715: jne 0x00007f4f3122b7f1

         ;; B17: # B15 B18 <- B16 Freq: 0.996031

          0x00007f4f3122b71b: mov %r9,%r11
          0x00007f4f3122b7a6: shl $0x3,%r11
          0x00007f4f3122b7aa: mov %ecx,(%r11,%rax,1) ;*invokevirtual putIntUnaligned {reexecute=0 rethrow=0 return_oop=0}
                                                        ; - jdk.internal.misc.Unsafe::putIntUnaligned@10 (line 3548)
                                                        ; - java.nio.HeapByteBuffer::putInt@23 (line 421)
                                                        ; - ByteBufferTest::floss@16 (line 19)

          0x00007f4f3122b7ae: inc %r10d ;*iinc {reexecute=0 rethrow=0 return_oop=0}
                                                        ; - ByteBufferTest::floss@20 (line 18)

          0x00007f4f3122b7b1: cmp $0x400,%r10d
          0x00007f4f3122b7b8: jl 0x00007f4f3122b6f0 ;*if_icmpge {reexecute=0 rethrow=0 return_oop=0}
                                                        ; - ByteBufferTest::floss@6 (line 18)

        whereas it should look like this:

        ;; B11: # B11 B12 <- B10 B11 Loop: B11-B11 inner main of N108 Freq: 5.37e+08

          0x00007f83252938a0: mov %r10d,%r11d
          0x00007f83252938a3: shl $0x2,%r11d
          0x00007f83252938a7: movslq %r11d,%r11 ;*i2l {reexecute=0 rethrow=0 return_oop=0}
                                                        ; - java.nio.HeapByteBuffer::putInt@14 (line 421)
                                                        ; - ByteBufferTest::floss@16 (line 19)
                                                        ; - ByteBufferTest::main@19 (line 31)

          0x00007f83252938aa: mov %rsi,%r8
          0x00007f83252938ad: add %r11,%r8
          0x00007f83252938b0: vmovdqu %xmm0,(%r8)
          0x00007f83252938b5: vmovdqu %xmm0,0x10(%r8)
          0x00007f83252938bb: vmovdqu %xmm0,0x20(%r8)
          0x00007f83252938c1: vmovdqu %xmm0,0x30(%r8) ;*invokevirtual putIntUnaligned {reexecute=0 rethrow=0 return_oop=0}
                                                        ; - jdk.internal.misc.Unsafe::putIntUnaligned@10 (line 3548)
                                                        ; - java.nio.HeapByteBuffer::putInt@23 (line 421)
                                                        ; - ByteBufferTest::floss@16 (line 19)
                                                        ; - ByteBufferTest::main@19 (line 31)

          0x00007f83252938c7: add $0x10,%r10d ;*iinc {reexecute=0 rethrow=0 return_oop=0}
                                                        ; - ByteBufferTest::floss@20 (line 18)
                                                        ; - ByteBufferTest::main@19 (line 31)

          0x00007f83252938cb: cmp %eax,%r10d
          0x00007f83252938ce: jl 0x00007f83252938a0 ;*if_icmpge {reexecute=0 rethrow=0 return_oop=0}
                                                        ; - ByteBufferTest::floss@6 (line 18)
                                                        ; - ByteBufferTest::main@19 (line 31)

              roland Roland Westrelin
              aph Andrew Haley
              Votes:
              0 Vote for this issue
              Watchers:
              10 Start watching this issue

                Created:
                Updated:
                Resolved: