-
Bug
-
Resolution: Fixed
-
P3
-
10
-
b37
-
x86_64
Issue | Fix Version | Assignee | Priority | Status | Resolution | Resolved In Build |
---|---|---|---|---|---|---|
JDK-8194586 | 11 | Vivek Deshpande | P3 | Resolved | Fixed | b01 |
I tracked down various regressions in microbenchmarks on my Haswell-gen workstation (Intel(R) Xeon(R) CPU E5-2630 v3) to be due to the vzeroupper instructions emitted for performance reasons in JDK-8178811. It needs to be examined if the optimization in JDK-8178811 is profitable on pre-Skylake CPUs, and if so if we can improve on when and where the vzeroupper instructions are emitted.
Example:
Benchmark Mode Cnt Score Error Units
Clazz.getClassLoader avgt 5 11.605 ± 0.350 ns/op # 10
Clazz.getClassLoader avgt 5 7.802 ± 0.323 ns/op # 9 GA (similar score with -XX:UseAVX=0)
package org.openjdk;
import org.openjdk.jmh.annotations.Benchmark;
import org.openjdk.jmh.annotations.BenchmarkMode;
import org.openjdk.jmh.annotations.Mode;
import org.openjdk.jmh.annotations.OutputTimeUnit;
import org.openjdk.jmh.annotations.CompilerControl;
import java.util.concurrent.TimeUnit;
@BenchmarkMode(Mode.AverageTime)
@OutputTimeUnit(TimeUnit.NANOSECONDS)
public class Clazz {
public static Class<?> c1 = Clazz.class;
@Benchmark
@CompilerControl(CompilerControl.Mode.DONT_INLINE)
public ClassLoader getClassLoader() {
return c1.getClassLoader();
}
}
-prof perfasm on 9 or with -XX:UseAVX=0:
# {method} {0x00007f5804680aa0} 'getClassLoader' '()Ljava/lang/ClassLoader;' in 'org/openjdk/Clazz'
# [sp+0x30] (sp of caller)
0x00007f590176c7a0: mov 0x8(%rsi),%r10d
0x00007f590176c7a4: shl $0x3,%r10
0x00007f590176c7a8: cmp %r10,%rax
0x00007f590176c7ab: jne 0x00007f58f9b94000 ; {runtime_call ic_miss_stub}
0x00007f590176c7b1: xchg %ax,%ax
0x00007f590176c7b4: nopl 0x0(%rax,%rax,1)
0x00007f590176c7bc: xchg %ax,%ax
[Verified Entry Point]
2.29% 2.33% 0x00007f590176c7c0: mov %eax,-0x14000(%rsp)
4.86% 4.19% 0x00007f590176c7c7: push %rbp
1.10% 1.20% 0x00007f590176c7c8: sub $0x20,%rsp ;*synchronization entry
; - org.openjdk.Clazz::getClassLoader@-1 (line 56)
2.16% 2.24% 0x00007f590176c7cc: mov $0x5e95a20b8,%r10 ; {oop(a 'java/lang/Class'{0x00000005e95a20b8} = 'org/openjdk/Clazz')}
2.55% 2.29% 0x00007f590176c7d6: mov 0x70(%r10),%r11d ;*getstatic c1 {reexecute=0 rethrow=0 return_oop=0}
; - org.openjdk.Clazz::getClassLoader@0 (line 56)
0.88% 1.01% 0x00007f590176c7da: mov 0x1c(%r12,%r11,8),%r11d ;*getfield classLoader {reexecute=0 rethrow=0 return_oop=0}
; - java.lang.Class::getClassLoader0@1 (line 811)
; - java.lang.Class::getClassLoader@1 (line 800)
; - org.openjdk.Clazz::getClassLoader@3 (line 56)
; implicit exception: dispatches to 0x00007f590176c81d
2.68% 2.77% 0x00007f590176c7df: test %r11d,%r11d
╭ 0x00007f590176c7e2: je 0x00007f590176c806 ;*ifnonnull {reexecute=0 rethrow=0 return_oop=0}
│ ; - java.lang.Class::getClassLoader@6 (line 801)
│ ; - org.openjdk.Clazz::getClassLoader@3 (line 56)
1.51% 1.64% │ 0x00007f590176c7e4: mov $0x5e9a00d60,%r10 ; {oop(a 'java/lang/Class'{0x00000005e9a00d60} = 'java/lang/System')}
2.68% 2.85% │ 0x00007f590176c7ee: mov 0x7c(%r10),%ebp ;*getstatic security {reexecute=0 rethrow=0 return_oop=0}
│ ; - java.lang.System::getSecurityManager@0 (line 366)
│ ; - java.lang.Class::getClassLoader@11 (line 803)
│ ; - org.openjdk.Clazz::getClassLoader@3 (line 56)
0.76% 0.85% │ 0x00007f590176c7f2: test %ebp,%ebp
│╭ 0x00007f590176c7f4: jne 0x00007f590176c80a ;*ifnull {reexecute=0 rethrow=0 return_oop=0}
││ ; - java.lang.Class::getClassLoader@16 (line 804)
││ ; - org.openjdk.Clazz::getClassLoader@3 (line 56)
1.48% 1.59% ││ 0x00007f590176c7f6: lea (%r12,%r11,8),%rax ;*invokevirtual getClassLoader {reexecute=0 rethrow=0 return_oop=0}
││ ; - org.openjdk.Clazz::getClassLoader@3 (line 56)
1.13% 0.97% ││↗ 0x00007f590176c7fa: add $0x20,%rsp
2.37% 2.18% │││ 0x00007f590176c7fe: pop %rbp
2.28% 2.50% │││ 0x00007f590176c7ff: test %eax,0x1bd9b7fb(%rip) # 0x00007f591d508000
│││ ; {poll_return}
1.87% 2.13% │││ 0x00007f590176c805: retq
↘││ 0x00007f590176c806: xor %eax,%eax
│╰ 0x00007f590176c808: jmp 0x00007f590176c7fa
↘ 0x00007f590176c80a: mov $0xffffff5d,%esi
0x00007f590176c80f: mov %r11d,(%rsp)
0x00007f590176c813: callq 0x00007f58f9b95c00 ; ImmutableOopMap{rbp=NarrowOop [0]=NarrowOop }
;*ifnull {reexecute=1 rethrow=0 return_oop=0}
; - java.lang.Class::getClassLoader@16 (line 804)
; - org.openjdk.Clazz::getClassLoader@3 (line 56)
; {runtime_call UncommonTrapBlob}
10 ASM:
# {method} {0x00007f3f6d8aa640} 'getClassLoader' '()Ljava/lang/ClassLoader;' in 'org/openjdk/Clazz'
# [sp+0x30] (sp of caller)
0x00007f40658f14a0: mov 0x8(%rsi),%r10d
0x00007f40658f14a4: shl $0x3,%r10
0x00007f40658f14a8: cmp %r10,%rax
0x00007f40658f14ab: jne 0x00007f405dcfa400 ; {runtime_call ic_miss_stub}
0x00007f40658f14b1: xchg %ax,%ax
0x00007f40658f14b4: nopl 0x0(%rax,%rax,1)
0x00007f40658f14bc: xchg %ax,%ax
[Verified Entry Point]
2.53% 3.10% 0x00007f40658f14c0: mov %eax,-0x14000(%rsp)
4.92% 5.72% 0x00007f40658f14c7: push %rbp
2.25% 3.08% 0x00007f40658f14c8: sub $0x20,%rsp ;*synchronization entry
; - org.openjdk.Clazz::getClassLoader@-1 (line 56)
1.35% 2.12% 0x00007f40658f14cc: mov $0x5e95d34d0,%r10 ; {oop(a 'java/lang/Class'{0x00000005e95d34d0} = 'org/openjdk/Clazz')}
1.65% 2.14% 0x00007f40658f14d6: mov 0x70(%r10),%r11d ;*getstatic c1 {reexecute=0 rethrow=0 return_oop=0}
; - org.openjdk.Clazz::getClassLoader@0 (line 56)
0.95% 1.27% 0x00007f40658f14da: mov 0x1c(%r12,%r11,8),%r11d ;*getfield classLoader {reexecute=0 rethrow=0 return_oop=0}
; - java.lang.Class::getClassLoader0@1 (line 814)
; - java.lang.Class::getClassLoader@1 (line 803)
; - org.openjdk.Clazz::getClassLoader@3 (line 56)
; implicit exception: dispatches to 0x00007f40658f1522
2.30% 3.11% 0x00007f40658f14df: test %r11d,%r11d
╭ 0x00007f40658f14e2: je 0x00007f40658f1509 ;*ifnonnull {reexecute=0 rethrow=0 return_oop=0}
│ ; - java.lang.Class::getClassLoader@6 (line 804)
│ ; - org.openjdk.Clazz::getClassLoader@3 (line 56)
1.23% 1.35% │ 0x00007f40658f14e4: mov $0x5e9a00d60,%r10 ; {oop(a 'java/lang/Class'{0x00000005e9a00d60} = 'java/lang/System')}
1.29% 1.44% │ 0x00007f40658f14ee: mov 0x7c(%r10),%ebp ;*getstatic security {reexecute=0 rethrow=0 return_oop=0}
│ ; - java.lang.System::getSecurityManager@0 (line 370)
│ ; - java.lang.Class::getClassLoader@11 (line 806)
│ ; - org.openjdk.Clazz::getClassLoader@3 (line 56)
1.21% 1.14% │ 0x00007f40658f14f2: test %ebp,%ebp
│╭ 0x00007f40658f14f4: jne 0x00007f40658f150d ;*ifnull {reexecute=0 rethrow=0 return_oop=0}
││ ; - java.lang.Class::getClassLoader@16 (line 807)
││ ; - org.openjdk.Clazz::getClassLoader@3 (line 56)
0.94% 0.99% ││ 0x00007f40658f14f6: lea (%r12,%r11,8),%rax ;*invokevirtual getClassLoader {reexecute=0 rethrow=0 return_oop=0}
││ ; - org.openjdk.Clazz::getClassLoader@3 (line 56)
1.06% 1.71% ││↗ 0x00007f40658f14fa: vzeroupper
4.46% 4.13% │││ 0x00007f40658f14fd: add $0x20,%rsp
1.24% 0.74% │││ 0x00007f40658f1501: pop %rbp
1.78% 1.66% │││ 0x00007f40658f1502: test %eax,0x1bde8af8(%rip) # 0x00007f40816da000
│││ ; {poll_return}
1.01% 1.24% │││ 0x00007f40658f1508: retq
↘││ 0x00007f40658f1509: xor %eax,%eax
│╰ 0x00007f40658f150b: jmp 0x00007f40658f14fa
↘ 0x00007f40658f150d: mov $0xffffff55,%esi
0x00007f40658f1512: mov %r11d,(%rsp)
0x00007f40658f1516: xchg %ax,%ax
0x00007f40658f1518: vzeroupper
0x00007f40658f151b: callq 0x00007f405dcfc080 ; ImmutableOopMap{rbp=NarrowOop [0]=NarrowOop }
;*ifnull {reexecute=1 rethrow=0 return_oop=0}
; - java.lang.Class::getClassLoader@16 (line 807)
Example:
Benchmark Mode Cnt Score Error Units
Clazz.getClassLoader avgt 5 11.605 ± 0.350 ns/op # 10
Clazz.getClassLoader avgt 5 7.802 ± 0.323 ns/op # 9 GA (similar score with -XX:UseAVX=0)
package org.openjdk;
import org.openjdk.jmh.annotations.Benchmark;
import org.openjdk.jmh.annotations.BenchmarkMode;
import org.openjdk.jmh.annotations.Mode;
import org.openjdk.jmh.annotations.OutputTimeUnit;
import org.openjdk.jmh.annotations.CompilerControl;
import java.util.concurrent.TimeUnit;
@BenchmarkMode(Mode.AverageTime)
@OutputTimeUnit(TimeUnit.NANOSECONDS)
public class Clazz {
public static Class<?> c1 = Clazz.class;
@Benchmark
@CompilerControl(CompilerControl.Mode.DONT_INLINE)
public ClassLoader getClassLoader() {
return c1.getClassLoader();
}
}
-prof perfasm on 9 or with -XX:UseAVX=0:
# {method} {0x00007f5804680aa0} 'getClassLoader' '()Ljava/lang/ClassLoader;' in 'org/openjdk/Clazz'
# [sp+0x30] (sp of caller)
0x00007f590176c7a0: mov 0x8(%rsi),%r10d
0x00007f590176c7a4: shl $0x3,%r10
0x00007f590176c7a8: cmp %r10,%rax
0x00007f590176c7ab: jne 0x00007f58f9b94000 ; {runtime_call ic_miss_stub}
0x00007f590176c7b1: xchg %ax,%ax
0x00007f590176c7b4: nopl 0x0(%rax,%rax,1)
0x00007f590176c7bc: xchg %ax,%ax
[Verified Entry Point]
2.29% 2.33% 0x00007f590176c7c0: mov %eax,-0x14000(%rsp)
4.86% 4.19% 0x00007f590176c7c7: push %rbp
1.10% 1.20% 0x00007f590176c7c8: sub $0x20,%rsp ;*synchronization entry
; - org.openjdk.Clazz::getClassLoader@-1 (line 56)
2.16% 2.24% 0x00007f590176c7cc: mov $0x5e95a20b8,%r10 ; {oop(a 'java/lang/Class'{0x00000005e95a20b8} = 'org/openjdk/Clazz')}
2.55% 2.29% 0x00007f590176c7d6: mov 0x70(%r10),%r11d ;*getstatic c1 {reexecute=0 rethrow=0 return_oop=0}
; - org.openjdk.Clazz::getClassLoader@0 (line 56)
0.88% 1.01% 0x00007f590176c7da: mov 0x1c(%r12,%r11,8),%r11d ;*getfield classLoader {reexecute=0 rethrow=0 return_oop=0}
; - java.lang.Class::getClassLoader0@1 (line 811)
; - java.lang.Class::getClassLoader@1 (line 800)
; - org.openjdk.Clazz::getClassLoader@3 (line 56)
; implicit exception: dispatches to 0x00007f590176c81d
2.68% 2.77% 0x00007f590176c7df: test %r11d,%r11d
╭ 0x00007f590176c7e2: je 0x00007f590176c806 ;*ifnonnull {reexecute=0 rethrow=0 return_oop=0}
│ ; - java.lang.Class::getClassLoader@6 (line 801)
│ ; - org.openjdk.Clazz::getClassLoader@3 (line 56)
1.51% 1.64% │ 0x00007f590176c7e4: mov $0x5e9a00d60,%r10 ; {oop(a 'java/lang/Class'{0x00000005e9a00d60} = 'java/lang/System')}
2.68% 2.85% │ 0x00007f590176c7ee: mov 0x7c(%r10),%ebp ;*getstatic security {reexecute=0 rethrow=0 return_oop=0}
│ ; - java.lang.System::getSecurityManager@0 (line 366)
│ ; - java.lang.Class::getClassLoader@11 (line 803)
│ ; - org.openjdk.Clazz::getClassLoader@3 (line 56)
0.76% 0.85% │ 0x00007f590176c7f2: test %ebp,%ebp
│╭ 0x00007f590176c7f4: jne 0x00007f590176c80a ;*ifnull {reexecute=0 rethrow=0 return_oop=0}
││ ; - java.lang.Class::getClassLoader@16 (line 804)
││ ; - org.openjdk.Clazz::getClassLoader@3 (line 56)
1.48% 1.59% ││ 0x00007f590176c7f6: lea (%r12,%r11,8),%rax ;*invokevirtual getClassLoader {reexecute=0 rethrow=0 return_oop=0}
││ ; - org.openjdk.Clazz::getClassLoader@3 (line 56)
1.13% 0.97% ││↗ 0x00007f590176c7fa: add $0x20,%rsp
2.37% 2.18% │││ 0x00007f590176c7fe: pop %rbp
2.28% 2.50% │││ 0x00007f590176c7ff: test %eax,0x1bd9b7fb(%rip) # 0x00007f591d508000
│││ ; {poll_return}
1.87% 2.13% │││ 0x00007f590176c805: retq
↘││ 0x00007f590176c806: xor %eax,%eax
│╰ 0x00007f590176c808: jmp 0x00007f590176c7fa
↘ 0x00007f590176c80a: mov $0xffffff5d,%esi
0x00007f590176c80f: mov %r11d,(%rsp)
0x00007f590176c813: callq 0x00007f58f9b95c00 ; ImmutableOopMap{rbp=NarrowOop [0]=NarrowOop }
;*ifnull {reexecute=1 rethrow=0 return_oop=0}
; - java.lang.Class::getClassLoader@16 (line 804)
; - org.openjdk.Clazz::getClassLoader@3 (line 56)
; {runtime_call UncommonTrapBlob}
10 ASM:
# {method} {0x00007f3f6d8aa640} 'getClassLoader' '()Ljava/lang/ClassLoader;' in 'org/openjdk/Clazz'
# [sp+0x30] (sp of caller)
0x00007f40658f14a0: mov 0x8(%rsi),%r10d
0x00007f40658f14a4: shl $0x3,%r10
0x00007f40658f14a8: cmp %r10,%rax
0x00007f40658f14ab: jne 0x00007f405dcfa400 ; {runtime_call ic_miss_stub}
0x00007f40658f14b1: xchg %ax,%ax
0x00007f40658f14b4: nopl 0x0(%rax,%rax,1)
0x00007f40658f14bc: xchg %ax,%ax
[Verified Entry Point]
2.53% 3.10% 0x00007f40658f14c0: mov %eax,-0x14000(%rsp)
4.92% 5.72% 0x00007f40658f14c7: push %rbp
2.25% 3.08% 0x00007f40658f14c8: sub $0x20,%rsp ;*synchronization entry
; - org.openjdk.Clazz::getClassLoader@-1 (line 56)
1.35% 2.12% 0x00007f40658f14cc: mov $0x5e95d34d0,%r10 ; {oop(a 'java/lang/Class'{0x00000005e95d34d0} = 'org/openjdk/Clazz')}
1.65% 2.14% 0x00007f40658f14d6: mov 0x70(%r10),%r11d ;*getstatic c1 {reexecute=0 rethrow=0 return_oop=0}
; - org.openjdk.Clazz::getClassLoader@0 (line 56)
0.95% 1.27% 0x00007f40658f14da: mov 0x1c(%r12,%r11,8),%r11d ;*getfield classLoader {reexecute=0 rethrow=0 return_oop=0}
; - java.lang.Class::getClassLoader0@1 (line 814)
; - java.lang.Class::getClassLoader@1 (line 803)
; - org.openjdk.Clazz::getClassLoader@3 (line 56)
; implicit exception: dispatches to 0x00007f40658f1522
2.30% 3.11% 0x00007f40658f14df: test %r11d,%r11d
╭ 0x00007f40658f14e2: je 0x00007f40658f1509 ;*ifnonnull {reexecute=0 rethrow=0 return_oop=0}
│ ; - java.lang.Class::getClassLoader@6 (line 804)
│ ; - org.openjdk.Clazz::getClassLoader@3 (line 56)
1.23% 1.35% │ 0x00007f40658f14e4: mov $0x5e9a00d60,%r10 ; {oop(a 'java/lang/Class'{0x00000005e9a00d60} = 'java/lang/System')}
1.29% 1.44% │ 0x00007f40658f14ee: mov 0x7c(%r10),%ebp ;*getstatic security {reexecute=0 rethrow=0 return_oop=0}
│ ; - java.lang.System::getSecurityManager@0 (line 370)
│ ; - java.lang.Class::getClassLoader@11 (line 806)
│ ; - org.openjdk.Clazz::getClassLoader@3 (line 56)
1.21% 1.14% │ 0x00007f40658f14f2: test %ebp,%ebp
│╭ 0x00007f40658f14f4: jne 0x00007f40658f150d ;*ifnull {reexecute=0 rethrow=0 return_oop=0}
││ ; - java.lang.Class::getClassLoader@16 (line 807)
││ ; - org.openjdk.Clazz::getClassLoader@3 (line 56)
0.94% 0.99% ││ 0x00007f40658f14f6: lea (%r12,%r11,8),%rax ;*invokevirtual getClassLoader {reexecute=0 rethrow=0 return_oop=0}
││ ; - org.openjdk.Clazz::getClassLoader@3 (line 56)
1.06% 1.71% ││↗ 0x00007f40658f14fa: vzeroupper
4.46% 4.13% │││ 0x00007f40658f14fd: add $0x20,%rsp
1.24% 0.74% │││ 0x00007f40658f1501: pop %rbp
1.78% 1.66% │││ 0x00007f40658f1502: test %eax,0x1bde8af8(%rip) # 0x00007f40816da000
│││ ; {poll_return}
1.01% 1.24% │││ 0x00007f40658f1508: retq
↘││ 0x00007f40658f1509: xor %eax,%eax
│╰ 0x00007f40658f150b: jmp 0x00007f40658f14fa
↘ 0x00007f40658f150d: mov $0xffffff55,%esi
0x00007f40658f1512: mov %r11d,(%rsp)
0x00007f40658f1516: xchg %ax,%ax
0x00007f40658f1518: vzeroupper
0x00007f40658f151b: callq 0x00007f405dcfc080 ; ImmutableOopMap{rbp=NarrowOop [0]=NarrowOop }
;*ifnull {reexecute=1 rethrow=0 return_oop=0}
; - java.lang.Class::getClassLoader@16 (line 807)
- backported by
-
JDK-8194586 Regressions on Haswell Xeon due to JDK-8178811
-
- Resolved
-
- relates to
-
JDK-8178811 Minimize the AVX <-> SSE transition penalty through generation of vzeroupper instruction on x86
-
- Resolved
-