-
Bug
-
Resolution: Fixed
-
P3
-
repo-valhalla
-
x86
I'd like to describe a set of issues with exists in 'acmp' codegeneration.
Benchmarks: http://cr.openjdk.java.net/~skuksenko/valhalla/cleanup/
(Params, FieldsO, FieldsI)
1. Clearing ArrayStorageProperties bits clazz ptr.
Obviously. We check types equality for both arguments and to do it - performs clearing flattened_array & non_null array.
That clearing is not required, because of we already know that one argument is not an array (and free from both bits).
2. In case when null never passed as an argument for our comparison, explicit null checks are not generated, traps to recompilation are used.
In that case load the first class ptr is always moved before checking markword for always locked pattern. That is useless for all non-inline types arguments:
-XX:-UseCompressedOops:
0.29% 0x00007f70001be624: cmp %rdx,%rbp
╭ 0x00007f70001be627: je 0x00007f70001be671
0.40% │ 0x00007f70001be629: mov 0x8(%rbp),%r10 ; implicit exception: dispatches to 0x00007f70001be80f
4.77% │ 0x00007f70001be62d: shl $0x3,%r10
3.80% │ 0x00007f70001be631: sar $0x3,%r10
SK: ^^ thats load should be moved to basic block where it's required (and arrays properties bits clearing is also moved here).
3.48% │ 0x00007f70001be635: mov $0x405,%r11d
│ 0x00007f70001be63b: and 0x0(%rbp),%r11
0.25% │ 0x00007f70001be63f: cmp $0x405,%r11
│╭ 0x00007f70001be646: jne 0x00007f70001be66d
││ 0x00007f70001be648: mov 0x8(%rdx),%r11 ; implicit exception: dispatches to 0x00007f70001be893
││ 0x00007f70001be64c: shl $0x3,%r11
││ 0x00007f70001be650: sar $0x3,%r11
││ 0x00007f70001be654: cmp %r11,%r10
││╭ 0x00007f70001be657: jne 0x00007f70001be66d ;*if_acmpne {reexecute=0 rethrow=0 return_oop=0 return_vt=0}
-XX:+UseCompressedOops:
0x00007f0d241bf543: cmp %r9d,%ebp
0x00007f0d241bf546: je 0x00007f0d241bf599 ;*if_acmpne {reexecute=0 rethrow=0 return_oop=0 return_vt=0}
; - acmp.FieldsO::cmp0@8 (line 34)
0x00007f0d241bf548: mov 0x8(%r12,%rbp,8),%r10d ; implicit exception: dispatches to 0x00007f0d241bf737
0x00007f0d241bf54d: lea (%r12,%rbp,8),%rsi ;*getfield f {reexecute=0 rethrow=0 return_oop=0 return_vt=0}
; - acmp.FieldsO::cmp0@5 (line 34)
0x00007f0d241bf551: mov $0x405,%r11d
0x00007f0d241bf557: and (%rsi),%r11
0x00007f0d241bf55a: cmp $0x405,%r11
0x00007f0d241bf561: jne 0x00007f0d241bf595
0x00007f0d241bf563: mov 0x8(%r12,%r9,8),%r11d ; implicit exception: dispatches to 0x00007f0d241bf7bb
0x00007f0d241bf568: mov %r11d,%r8d
0x00007f0d241bf56b: and $0x1fffffff,%r8d
0x00007f0d241bf572: and $0x1fffffff,%r10d
0x00007f0d241bf579: mov %r10d,%r11d
0x00007f0d241bf57c: cmp %r8d,%r11d
0x00007f0d241bf57f: jne 0x00007f0d241bf595 ;*if_acmpne {reexecute=0 rethrow=0 return_oop=0 return_vt=0}
; - acmp.FieldsO::cmp0@8 (line 34)
0x00007f0d241bf581: lea (%r12,%r9,8),%rdx ;*getfield f {reexecute=0 rethrow=0 return_oop=0 return_vt=0}
; - acmp.FieldsO::cmp0@1 (line 34)
0x00007f0d241bf585: mov %r9d,(%rsp)
0x00007f0d241bf589: xchg %ax,%ax
0x00007f0d241bf58b: callq 0x00007f0d1c717e80 ; ImmutableOopMap {rbp=NarrowOop [0]=NarrowOop }
In case of compressed oops the picture is same. Besides, there are many questions, why sometimes uncompressed pointer stored to register, but in other cases complex memory load is used.
Benchmarks: http://cr.openjdk.java.net/~skuksenko/valhalla/cleanup/
(Params, FieldsO, FieldsI)
1. Clearing ArrayStorageProperties bits clazz ptr.
Obviously. We check types equality for both arguments and to do it - performs clearing flattened_array & non_null array.
That clearing is not required, because of we already know that one argument is not an array (and free from both bits).
2. In case when null never passed as an argument for our comparison, explicit null checks are not generated, traps to recompilation are used.
In that case load the first class ptr is always moved before checking markword for always locked pattern. That is useless for all non-inline types arguments:
-XX:-UseCompressedOops:
0.29% 0x00007f70001be624: cmp %rdx,%rbp
╭ 0x00007f70001be627: je 0x00007f70001be671
0.40% │ 0x00007f70001be629: mov 0x8(%rbp),%r10 ; implicit exception: dispatches to 0x00007f70001be80f
4.77% │ 0x00007f70001be62d: shl $0x3,%r10
3.80% │ 0x00007f70001be631: sar $0x3,%r10
SK: ^^ thats load should be moved to basic block where it's required (and arrays properties bits clearing is also moved here).
3.48% │ 0x00007f70001be635: mov $0x405,%r11d
│ 0x00007f70001be63b: and 0x0(%rbp),%r11
0.25% │ 0x00007f70001be63f: cmp $0x405,%r11
│╭ 0x00007f70001be646: jne 0x00007f70001be66d
││ 0x00007f70001be648: mov 0x8(%rdx),%r11 ; implicit exception: dispatches to 0x00007f70001be893
││ 0x00007f70001be64c: shl $0x3,%r11
││ 0x00007f70001be650: sar $0x3,%r11
││ 0x00007f70001be654: cmp %r11,%r10
││╭ 0x00007f70001be657: jne 0x00007f70001be66d ;*if_acmpne {reexecute=0 rethrow=0 return_oop=0 return_vt=0}
-XX:+UseCompressedOops:
0x00007f0d241bf543: cmp %r9d,%ebp
0x00007f0d241bf546: je 0x00007f0d241bf599 ;*if_acmpne {reexecute=0 rethrow=0 return_oop=0 return_vt=0}
; - acmp.FieldsO::cmp0@8 (line 34)
0x00007f0d241bf548: mov 0x8(%r12,%rbp,8),%r10d ; implicit exception: dispatches to 0x00007f0d241bf737
0x00007f0d241bf54d: lea (%r12,%rbp,8),%rsi ;*getfield f {reexecute=0 rethrow=0 return_oop=0 return_vt=0}
; - acmp.FieldsO::cmp0@5 (line 34)
0x00007f0d241bf551: mov $0x405,%r11d
0x00007f0d241bf557: and (%rsi),%r11
0x00007f0d241bf55a: cmp $0x405,%r11
0x00007f0d241bf561: jne 0x00007f0d241bf595
0x00007f0d241bf563: mov 0x8(%r12,%r9,8),%r11d ; implicit exception: dispatches to 0x00007f0d241bf7bb
0x00007f0d241bf568: mov %r11d,%r8d
0x00007f0d241bf56b: and $0x1fffffff,%r8d
0x00007f0d241bf572: and $0x1fffffff,%r10d
0x00007f0d241bf579: mov %r10d,%r11d
0x00007f0d241bf57c: cmp %r8d,%r11d
0x00007f0d241bf57f: jne 0x00007f0d241bf595 ;*if_acmpne {reexecute=0 rethrow=0 return_oop=0 return_vt=0}
; - acmp.FieldsO::cmp0@8 (line 34)
0x00007f0d241bf581: lea (%r12,%r9,8),%rdx ;*getfield f {reexecute=0 rethrow=0 return_oop=0 return_vt=0}
; - acmp.FieldsO::cmp0@1 (line 34)
0x00007f0d241bf585: mov %r9d,(%rsp)
0x00007f0d241bf589: xchg %ax,%ax
0x00007f0d241bf58b: callq 0x00007f0d1c717e80 ; ImmutableOopMap {rbp=NarrowOop [0]=NarrowOop }
In case of compressed oops the picture is same. Besides, there are many questions, why sometimes uncompressed pointer stored to register, but in other cases complex memory load is used.
- relates to
-
JDK-8265726 [lworld] C2 compilation fails with assert "uses must be dominated by definitions"
-
- Resolved
-
-
JDK-8271531 [lworld] Implicit null check optimization does not hoist constant load input
-
- Resolved
-