-
Bug
-
Resolution: Fixed
-
P3
-
9, 10
-
b163
-
generic
-
generic
Issue | Fix Version | Assignee | Priority | Status | Resolution | Resolved In Build |
---|---|---|---|---|---|---|
JDK-8178206 | 10 | Roland Westrelin | P3 | Resolved | Fixed | b04 |
At some point in the JDK 9 project, code generation for ByteBuffers dramatically worsened. Loops are not vectorized and stores are not pipelined. This is an important use case.
The attached test case is a simple loop storing ints into a ByteBuffer.
void floss(ByteBuffer b, int n) {
for (int i = 0; i < b.capacity(); i++) {
buf.putInt(i<<2, n);
}
}
Its inner loop looks like this:
;; B15: # B30 B16 <- B14 B17 Loop: B15-B17 inner post of N217 Freq: 0.996032
0x00007f4f3122b6f0: mov 0x1c(%rsi),%ebx
0x00007f4f3122b6f3: mov 0x2c(%rsi),%r9d ;*getfield hb {reexecute=0 rethrow=0 return_oop=0}
; - java.nio.HeapByteBuffer::putInt@4 (line 421)
; - ByteBufferTest::floss@16 (line 19)
0x00007f4f3122b6f7: mov %r10d,%edx
0x00007f4f3122b6fa: shl $0x2,%edx ;*ishl {reexecute=0 rethrow=0 return_oop=0}
; - ByteBufferTest::floss@14 (line 19)
0x00007f4f3122b6fd: sub %edx,%ebx ;*isub {reexecute=0 rethrow=0 return_oop=0}
; - java.nio.Buffer::checkIndex@10 (line 674)
; - java.nio.HeapByteBuffer::putInt@11 (line 421)
; - ByteBufferTest::floss@16 (line 19)
0x00007f4f3122b6ff: cmp $0x4,%ebx
0x00007f4f3122b702: jl 0x00007f4f3122b833 ;*if_icmple {reexecute=0 rethrow=0 return_oop=0}
; - java.nio.Buffer::checkIndex@11 (line 674)
; - java.nio.HeapByteBuffer::putInt@11 (line 421)
; - ByteBufferTest::floss@16 (line 19)
;; B16: # B25 B17 <- B15 Freq: 0.996031
0x00007f4f3122b708: movzbl 0x29(%rsi),%edi ;*getfield bigEndian {reexecute=0 rethrow=0 return_oop=0}
; - java.nio.HeapByteBuffer::putInt@20 (line 421)
; - ByteBufferTest::floss@16 (line 19)
0x00007f4f3122b70c: movslq %edx,%rax
0x00007f4f3122b70f: add 0x10(%rsi),%rax ;*ladd {reexecute=0 rethrow=0 return_oop=0}
; - java.nio.HeapByteBuffer::byteOffset@5 (line 170)
; - java.nio.HeapByteBuffer::putInt@15 (line 421)
; - ByteBufferTest::floss@16 (line 19)
0x00007f4f3122b713: test %edi,%edi
0x00007f4f3122b715: jne 0x00007f4f3122b7f1
;; B17: # B15 B18 <- B16 Freq: 0.996031
0x00007f4f3122b71b: mov %r9,%r11
0x00007f4f3122b7a6: shl $0x3,%r11
0x00007f4f3122b7aa: mov %ecx,(%r11,%rax,1) ;*invokevirtual putIntUnaligned {reexecute=0 rethrow=0 return_oop=0}
; - jdk.internal.misc.Unsafe::putIntUnaligned@10 (line 3548)
; - java.nio.HeapByteBuffer::putInt@23 (line 421)
; - ByteBufferTest::floss@16 (line 19)
0x00007f4f3122b7ae: inc %r10d ;*iinc {reexecute=0 rethrow=0 return_oop=0}
; - ByteBufferTest::floss@20 (line 18)
0x00007f4f3122b7b1: cmp $0x400,%r10d
0x00007f4f3122b7b8: jl 0x00007f4f3122b6f0 ;*if_icmpge {reexecute=0 rethrow=0 return_oop=0}
; - ByteBufferTest::floss@6 (line 18)
whereas it should look like this:
;; B11: # B11 B12 <- B10 B11 Loop: B11-B11 inner main of N108 Freq: 5.37e+08
0x00007f83252938a0: mov %r10d,%r11d
0x00007f83252938a3: shl $0x2,%r11d
0x00007f83252938a7: movslq %r11d,%r11 ;*i2l {reexecute=0 rethrow=0 return_oop=0}
; - java.nio.HeapByteBuffer::putInt@14 (line 421)
; - ByteBufferTest::floss@16 (line 19)
; - ByteBufferTest::main@19 (line 31)
0x00007f83252938aa: mov %rsi,%r8
0x00007f83252938ad: add %r11,%r8
0x00007f83252938b0: vmovdqu %xmm0,(%r8)
0x00007f83252938b5: vmovdqu %xmm0,0x10(%r8)
0x00007f83252938bb: vmovdqu %xmm0,0x20(%r8)
0x00007f83252938c1: vmovdqu %xmm0,0x30(%r8) ;*invokevirtual putIntUnaligned {reexecute=0 rethrow=0 return_oop=0}
; - jdk.internal.misc.Unsafe::putIntUnaligned@10 (line 3548)
; - java.nio.HeapByteBuffer::putInt@23 (line 421)
; - ByteBufferTest::floss@16 (line 19)
; - ByteBufferTest::main@19 (line 31)
0x00007f83252938c7: add $0x10,%r10d ;*iinc {reexecute=0 rethrow=0 return_oop=0}
; - ByteBufferTest::floss@20 (line 18)
; - ByteBufferTest::main@19 (line 31)
0x00007f83252938cb: cmp %eax,%r10d
0x00007f83252938ce: jl 0x00007f83252938a0 ;*if_icmpge {reexecute=0 rethrow=0 return_oop=0}
; - ByteBufferTest::floss@6 (line 18)
; - ByteBufferTest::main@19 (line 31)
The attached test case is a simple loop storing ints into a ByteBuffer.
void floss(ByteBuffer b, int n) {
for (int i = 0; i < b.capacity(); i++) {
buf.putInt(i<<2, n);
}
}
Its inner loop looks like this:
;; B15: # B30 B16 <- B14 B17 Loop: B15-B17 inner post of N217 Freq: 0.996032
0x00007f4f3122b6f0: mov 0x1c(%rsi),%ebx
0x00007f4f3122b6f3: mov 0x2c(%rsi),%r9d ;*getfield hb {reexecute=0 rethrow=0 return_oop=0}
; - java.nio.HeapByteBuffer::putInt@4 (line 421)
; - ByteBufferTest::floss@16 (line 19)
0x00007f4f3122b6f7: mov %r10d,%edx
0x00007f4f3122b6fa: shl $0x2,%edx ;*ishl {reexecute=0 rethrow=0 return_oop=0}
; - ByteBufferTest::floss@14 (line 19)
0x00007f4f3122b6fd: sub %edx,%ebx ;*isub {reexecute=0 rethrow=0 return_oop=0}
; - java.nio.Buffer::checkIndex@10 (line 674)
; - java.nio.HeapByteBuffer::putInt@11 (line 421)
; - ByteBufferTest::floss@16 (line 19)
0x00007f4f3122b6ff: cmp $0x4,%ebx
0x00007f4f3122b702: jl 0x00007f4f3122b833 ;*if_icmple {reexecute=0 rethrow=0 return_oop=0}
; - java.nio.Buffer::checkIndex@11 (line 674)
; - java.nio.HeapByteBuffer::putInt@11 (line 421)
; - ByteBufferTest::floss@16 (line 19)
;; B16: # B25 B17 <- B15 Freq: 0.996031
0x00007f4f3122b708: movzbl 0x29(%rsi),%edi ;*getfield bigEndian {reexecute=0 rethrow=0 return_oop=0}
; - java.nio.HeapByteBuffer::putInt@20 (line 421)
; - ByteBufferTest::floss@16 (line 19)
0x00007f4f3122b70c: movslq %edx,%rax
0x00007f4f3122b70f: add 0x10(%rsi),%rax ;*ladd {reexecute=0 rethrow=0 return_oop=0}
; - java.nio.HeapByteBuffer::byteOffset@5 (line 170)
; - java.nio.HeapByteBuffer::putInt@15 (line 421)
; - ByteBufferTest::floss@16 (line 19)
0x00007f4f3122b713: test %edi,%edi
0x00007f4f3122b715: jne 0x00007f4f3122b7f1
;; B17: # B15 B18 <- B16 Freq: 0.996031
0x00007f4f3122b71b: mov %r9,%r11
0x00007f4f3122b7a6: shl $0x3,%r11
0x00007f4f3122b7aa: mov %ecx,(%r11,%rax,1) ;*invokevirtual putIntUnaligned {reexecute=0 rethrow=0 return_oop=0}
; - jdk.internal.misc.Unsafe::putIntUnaligned@10 (line 3548)
; - java.nio.HeapByteBuffer::putInt@23 (line 421)
; - ByteBufferTest::floss@16 (line 19)
0x00007f4f3122b7ae: inc %r10d ;*iinc {reexecute=0 rethrow=0 return_oop=0}
; - ByteBufferTest::floss@20 (line 18)
0x00007f4f3122b7b1: cmp $0x400,%r10d
0x00007f4f3122b7b8: jl 0x00007f4f3122b6f0 ;*if_icmpge {reexecute=0 rethrow=0 return_oop=0}
; - ByteBufferTest::floss@6 (line 18)
whereas it should look like this:
;; B11: # B11 B12 <- B10 B11 Loop: B11-B11 inner main of N108 Freq: 5.37e+08
0x00007f83252938a0: mov %r10d,%r11d
0x00007f83252938a3: shl $0x2,%r11d
0x00007f83252938a7: movslq %r11d,%r11 ;*i2l {reexecute=0 rethrow=0 return_oop=0}
; - java.nio.HeapByteBuffer::putInt@14 (line 421)
; - ByteBufferTest::floss@16 (line 19)
; - ByteBufferTest::main@19 (line 31)
0x00007f83252938aa: mov %rsi,%r8
0x00007f83252938ad: add %r11,%r8
0x00007f83252938b0: vmovdqu %xmm0,(%r8)
0x00007f83252938b5: vmovdqu %xmm0,0x10(%r8)
0x00007f83252938bb: vmovdqu %xmm0,0x20(%r8)
0x00007f83252938c1: vmovdqu %xmm0,0x30(%r8) ;*invokevirtual putIntUnaligned {reexecute=0 rethrow=0 return_oop=0}
; - jdk.internal.misc.Unsafe::putIntUnaligned@10 (line 3548)
; - java.nio.HeapByteBuffer::putInt@23 (line 421)
; - ByteBufferTest::floss@16 (line 19)
; - ByteBufferTest::main@19 (line 31)
0x00007f83252938c7: add $0x10,%r10d ;*iinc {reexecute=0 rethrow=0 return_oop=0}
; - ByteBufferTest::floss@20 (line 18)
; - ByteBufferTest::main@19 (line 31)
0x00007f83252938cb: cmp %eax,%r10d
0x00007f83252938ce: jl 0x00007f83252938a0 ;*if_icmpge {reexecute=0 rethrow=0 return_oop=0}
; - ByteBufferTest::floss@6 (line 18)
; - ByteBufferTest::main@19 (line 31)
- backported by
-
JDK-8178206 Poor code quality for ByteBuffers
-
- Resolved
-
- relates to
-
JDK-8177346 hotspot change for 8176513 breaks jdk9 build on Ubuntu 16.04
-
- Closed
-