-
Bug
-
Resolution: Fixed
-
P4
-
openjdk8u292, 11, 13
-
b19
-
aarch64
-
generic
Issue | Fix Version | Assignee | Priority | Status | Resolution | Resolved In Build |
---|---|---|---|---|---|---|
JDK-8263783 | 11.0.12 | Fei Yang | P4 | Resolved | Fixed | b01 |
Currently, two instructions will be emitted for minI_rReg/maxI_rReg patterns: cmpw + cselw.
As these two instructions are always emitted together, the GCM (Global Code Motion) phase will
not be able to schedule them independently.
Example test case:
public class BCE {
final static int MAX = 1024 * 16;
private static int total = 0;
private static final int[] numbers = new int[32];
public static void main(String[] args) throws Exception {
for (int i = 0; i < MAX; i++) {
try { addAll(-16, 16); } catch (Exception e) {}
try { addAll(0, 32); } catch (Exception e) {}
try { addAll(16, 48); } catch (Exception e) {}
}
Thread.sleep(4000);
System.out.println("total = " + total);
}
public static void addAll(int x, int y) {
for (int i = x; i < y; i++) {
total += numbers[i];
}
}
}
$ java -XX:-TieredCompilation -XX:CompileCommand=compileonly,BCE.addAll -XX:+PrintAssembly BCE
For BCE.addAll method, we see the following C2 JITed code snippet:
# {method} {0x0000ffff55d67610} 'addAll' '(II)V' in 'BCE'
# parm0: c_rarg1 = int
# parm1: c_rarg2 = int
# [sp+0x20] (sp of caller)
;; N1: # B1 <- B16 B14 Freq: 1
;; B1: # B14 B2 <- BLOCK HEAD IS JUNK Freq: 1
0x0000ffffa34148c0: nop
0x0000ffffa34148c4: sub x9, sp, #0x19, lsl #12
0x0000ffffa34148c8: str xzr, [x9]
0x0000ffffa34148cc: sub sp, sp, #0x20
0x0000ffffa34148d0: stp x29, x30, [sp,#16] ;*synchronization entry
; - BCE::addAll@-1 (line 19)
0x0000ffffa34148d4: cmp w1, w2
0x0000ffffa34148d8: b.ge 0x0000ffffa34149f0 ;*if_icmpge {reexecute=0 rethrow=0 return_oop=0}
; - BCE::addAll@4 (line 19)
;; B2: # B3 <- B1 Freq: 0.98039
;; 0x280A108
0x0000ffffa34148dc: mov x10, #0x0 // #0
; {oop(a 'java/lang/ArrayIndexOutOfBoundsException'{0x0000000101e00000})}
0x0000ffffa34148e0: movk x10, #0x1e0, lsl #16
0x0000ffffa34148e4: movk x10, #0x1, lsl #32
0x0000ffffa34148e8: ldr w16, [x10,#16]
0x0000ffffa34148ec: add w10, w1, #0x1
0x0000ffffa34148f0: mov w12, wzr <========
0x0000ffffa34148f4: cmp w10, w12 <========
0x0000ffffa34148f8: csel w10, w10, w12, gt <========
0x0000ffffa34148fc: cmp w10, w2 <========
0x0000ffffa3414900: csel w10, w10, w2, lt <========
;; 0x280A100
0x0000ffffa3414904: mov x18, #0xcde0 // #52704
; {oop([I{0x0000000101c4cde0})}
0x0000ffffa3414908: movk x18, #0x1c4, lsl #16
0x0000ffffa341490c: movk x18, #0x1, lsl #32
;; 0x280A0F8
0x0000ffffa3414910: mov x3, #0xc428 // #50216
; {oop(a 'java/lang/Class'{0x0000000101c4c428} = 'BCE')}
0x0000ffffa3414914: movk x3, #0x1c4, lsl #16
0x0000ffffa3414918: movk x3, #0x1, lsl #32 ;*getstatic total {reexecute=0 rethrow=0 return_oop=0}
After minI_rReg and maxI_rReg patterns expanded into two separate instructions, we will see:
# {method} {0x0000ffff4eb9c610} 'addAll' '(II)V' in 'BCE'
# parm0: c_rarg1 = int
# parm1: c_rarg2 = int
# [sp+0x20] (sp of caller)
;; N1: # B1 <- B16 B14 Freq: 1
;; B1: # B14 B2 <- BLOCK HEAD IS JUNK Freq: 1
0x0000ffff9c249940: nop
0x0000ffff9c249944: sub x9, sp, #0x19, lsl #12
0x0000ffff9c249948: str xzr, [x9]
0x0000ffff9c24994c: sub sp, sp, #0x20
0x0000ffff9c249950: stp x29, x30, [sp,#16] ;*synchronization entry
; - BCE::addAll@-1 (line 19)
0x0000ffff9c249954: cmp w1, w2
0x0000ffff9c249958: b.ge 0x0000ffff9c249a74 ;*if_icmpge {reexecute=0 rethrow=0 return_oop=0}
; - BCE::addAll@4 (line 19)
;; B2: # B3 <- B1 Freq: 0.980392
;; 0x3359E588
0x0000ffff9c24995c: mov x10, #0x0 // #0
; {oop(a 'java/lang/ArrayIndexOutOfBoundsException'{0x0000000101e00000})}
0x0000ffff9c249960: movk x10, #0x1e0, lsl #16
0x0000ffff9c249964: movk x10, #0x1, lsl #32
0x0000ffff9c249968: add w12, w1, #0x1
0x0000ffff9c24996c: ldr w11, [x10,#16]
0x0000ffff9c249970: mov w10, wzr <========
0x0000ffff9c249974: cmp w12, w10 <========
0x0000ffff9c249978: csel w10, w12, w10, gt <========
0x0000ffff9c24997c: cmp w10, w2 <========
;; 0x3359E580
0x0000ffff9c249980: mov x16, #0xcda8 // #52648
; {oop([I{0x0000000101c4cda8})}
0x0000ffff9c249984: movk x16, #0x1c4, lsl #16
0x0000ffff9c249988: movk x16, #0x1, lsl #32
;; 0x3359E578
0x0000ffff9c24998c: mov x18, #0xc3f0 // #50160
; {oop(a 'java/lang/Class'{0x0000000101c4c3f0} = 'BCE')}
0x0000ffff9c249990: movk x18, #0x1c4, lsl #16
0x0000ffff9c249994: movk x18, #0x1, lsl #32
0x0000ffff9c249998: csel w10, w10, w2, lt <======== ;*getstatic total {reexecute=0 rethrow=0 return_oop=0}
As these two instructions are always emitted together, the GCM (Global Code Motion) phase will
not be able to schedule them independently.
Example test case:
public class BCE {
final static int MAX = 1024 * 16;
private static int total = 0;
private static final int[] numbers = new int[32];
public static void main(String[] args) throws Exception {
for (int i = 0; i < MAX; i++) {
try { addAll(-16, 16); } catch (Exception e) {}
try { addAll(0, 32); } catch (Exception e) {}
try { addAll(16, 48); } catch (Exception e) {}
}
Thread.sleep(4000);
System.out.println("total = " + total);
}
public static void addAll(int x, int y) {
for (int i = x; i < y; i++) {
total += numbers[i];
}
}
}
$ java -XX:-TieredCompilation -XX:CompileCommand=compileonly,BCE.addAll -XX:+PrintAssembly BCE
For BCE.addAll method, we see the following C2 JITed code snippet:
# {method} {0x0000ffff55d67610} 'addAll' '(II)V' in 'BCE'
# parm0: c_rarg1 = int
# parm1: c_rarg2 = int
# [sp+0x20] (sp of caller)
;; N1: # B1 <- B16 B14 Freq: 1
;; B1: # B14 B2 <- BLOCK HEAD IS JUNK Freq: 1
0x0000ffffa34148c0: nop
0x0000ffffa34148c4: sub x9, sp, #0x19, lsl #12
0x0000ffffa34148c8: str xzr, [x9]
0x0000ffffa34148cc: sub sp, sp, #0x20
0x0000ffffa34148d0: stp x29, x30, [sp,#16] ;*synchronization entry
; - BCE::addAll@-1 (line 19)
0x0000ffffa34148d4: cmp w1, w2
0x0000ffffa34148d8: b.ge 0x0000ffffa34149f0 ;*if_icmpge {reexecute=0 rethrow=0 return_oop=0}
; - BCE::addAll@4 (line 19)
;; B2: # B3 <- B1 Freq: 0.98039
;; 0x280A108
0x0000ffffa34148dc: mov x10, #0x0 // #0
; {oop(a 'java/lang/ArrayIndexOutOfBoundsException'{0x0000000101e00000})}
0x0000ffffa34148e0: movk x10, #0x1e0, lsl #16
0x0000ffffa34148e4: movk x10, #0x1, lsl #32
0x0000ffffa34148e8: ldr w16, [x10,#16]
0x0000ffffa34148ec: add w10, w1, #0x1
0x0000ffffa34148f0: mov w12, wzr <========
0x0000ffffa34148f4: cmp w10, w12 <========
0x0000ffffa34148f8: csel w10, w10, w12, gt <========
0x0000ffffa34148fc: cmp w10, w2 <========
0x0000ffffa3414900: csel w10, w10, w2, lt <========
;; 0x280A100
0x0000ffffa3414904: mov x18, #0xcde0 // #52704
; {oop([I{0x0000000101c4cde0})}
0x0000ffffa3414908: movk x18, #0x1c4, lsl #16
0x0000ffffa341490c: movk x18, #0x1, lsl #32
;; 0x280A0F8
0x0000ffffa3414910: mov x3, #0xc428 // #50216
; {oop(a 'java/lang/Class'{0x0000000101c4c428} = 'BCE')}
0x0000ffffa3414914: movk x3, #0x1c4, lsl #16
0x0000ffffa3414918: movk x3, #0x1, lsl #32 ;*getstatic total {reexecute=0 rethrow=0 return_oop=0}
After minI_rReg and maxI_rReg patterns expanded into two separate instructions, we will see:
# {method} {0x0000ffff4eb9c610} 'addAll' '(II)V' in 'BCE'
# parm0: c_rarg1 = int
# parm1: c_rarg2 = int
# [sp+0x20] (sp of caller)
;; N1: # B1 <- B16 B14 Freq: 1
;; B1: # B14 B2 <- BLOCK HEAD IS JUNK Freq: 1
0x0000ffff9c249940: nop
0x0000ffff9c249944: sub x9, sp, #0x19, lsl #12
0x0000ffff9c249948: str xzr, [x9]
0x0000ffff9c24994c: sub sp, sp, #0x20
0x0000ffff9c249950: stp x29, x30, [sp,#16] ;*synchronization entry
; - BCE::addAll@-1 (line 19)
0x0000ffff9c249954: cmp w1, w2
0x0000ffff9c249958: b.ge 0x0000ffff9c249a74 ;*if_icmpge {reexecute=0 rethrow=0 return_oop=0}
; - BCE::addAll@4 (line 19)
;; B2: # B3 <- B1 Freq: 0.980392
;; 0x3359E588
0x0000ffff9c24995c: mov x10, #0x0 // #0
; {oop(a 'java/lang/ArrayIndexOutOfBoundsException'{0x0000000101e00000})}
0x0000ffff9c249960: movk x10, #0x1e0, lsl #16
0x0000ffff9c249964: movk x10, #0x1, lsl #32
0x0000ffff9c249968: add w12, w1, #0x1
0x0000ffff9c24996c: ldr w11, [x10,#16]
0x0000ffff9c249970: mov w10, wzr <========
0x0000ffff9c249974: cmp w12, w10 <========
0x0000ffff9c249978: csel w10, w12, w10, gt <========
0x0000ffff9c24997c: cmp w10, w2 <========
;; 0x3359E580
0x0000ffff9c249980: mov x16, #0xcda8 // #52648
; {oop([I{0x0000000101c4cda8})}
0x0000ffff9c249984: movk x16, #0x1c4, lsl #16
0x0000ffff9c249988: movk x16, #0x1, lsl #32
;; 0x3359E578
0x0000ffff9c24998c: mov x18, #0xc3f0 // #50160
; {oop(a 'java/lang/Class'{0x0000000101c4c3f0} = 'BCE')}
0x0000ffff9c249990: movk x18, #0x1c4, lsl #16
0x0000ffff9c249994: movk x18, #0x1, lsl #32
0x0000ffff9c249998: csel w10, w10, w2, lt <======== ;*getstatic total {reexecute=0 rethrow=0 return_oop=0}
- backported by
-
JDK-8263783 aarch64: expand minI_rReg and maxI_rReg patterns into separate instructions
- Resolved
- relates to
-
JDK-8153837 AArch64: Handle special cases for MaxINode & MinINode
- Resolved