-
Enhancement
-
Resolution: Unresolved
-
P4
-
25
-
generic
Following SIMD kernel makes heavy use of Broadcasted constants.
import jdk.incubator.vector.*;
public class test {
public static final VectorSpecies<Integer> ISP = IntVector.SPECIES_PREFERRED;
public static int micro(int i) {
IntVector vec1 = IntVector.broadcast(ISP, 10);
IntVector vec2 = IntVector.broadcast(ISP, 20);
IntVector vec3 = IntVector.broadcast(ISP, 30);
return vec1.lanewise(VectorOperators.ADD, vec2)
.lanewise(VectorOperators.MUL, vec3)
.lanewise(VectorOperators.SUB, i)
.lane(1);
}
public static void main(String [] args) {
int res = 0;
for (int i = 0 ; i < 100000; i++) {
res += micro(i);
}
long t1 = System.currentTimeMillis();
for (int i = 0 ; i < 100000; i++) {
res += micro(i);
}
long t2 = System.currentTimeMillis();
System.out.println("[time] " + (t2-t1) + " ms [res] " + res);
}
}
There is an opportunity to constant fold above IR as follows
Existing Ideal Graph :
AFTER: BEFORE_MATCHING
0 Root === 0 2 [[ 0 1 9 21 20 17 11 ]] inner
2 Return === 3 4 5 6 7 returns 8 [[ 0 ]]
3 Parm === 9 [[ 2 ]] Control !jvms: test::micro @ bci:-1 (line 8)
4 Parm === 9 [[ 2 ]] I_O !jvms: test::micro @ bci:-1 (line 8)
5 Parm === 9 [[ 2 ]] Memory Memory: @BotPTR *+bot, idx=Bot; !jvms: test::micro @ bci:-1 (line 8)
6 Parm === 9 [[ 2 ]] FramePtr !jvms: test::micro @ bci:-1 (line 8)
7 Parm === 9 [[ 2 ]] ReturnAdr !jvms: test::micro @ bci:-1 (line 8)
8 ExtractI === _ 10 11 [[ 2 ]] !orig=[1981] !jvms: Int512Vector::laneHelper @ bci:16 (line 543) Int512Vector::lane @ bci:88 (line 522) test::micro @ bci:50 (line 15)
9 Start === 9 0 [[ 9 3 4 5 6 7 16 ]] #{0:control, 1:abIO, 2:memory, 3:rawptr:BotPTR, 4:return_address, 5:int}
10 SubVI === _ 12 13 [[ 8 ]] #vectorz<I,16> !jvms: IntVector::lanewiseTemplate @ bci:154 (line 798) Int512Vector::lanewise @ bci:3 (line 278) Int512Vector::lanewise @ bci:3 (line 43) IntVector::lanewise @ bci:43 (line 944) test::micro @ bci:46 (line 14)
11 ConI === 0 [[ 8 ]] #int:1
12 MulVI === _ 14 15 [[ 10 ]] #vectorz<I,16> !jvms: IntVector::lanewiseTemplate @ bci:154 (line 798) Int512Vector::lanewise @ bci:3 (line 278) Int512Vector::lanewise @ bci:3 (line 43) test::micro @ bci:39 (line 13)
13 Replicate === _ 16 [[ 10 ]] #vectorz<I,16> !jvms: IntVector$IntSpecies::broadcastBits @ bci:19 (line 3884) IntVector$IntSpecies::broadcast @ bci:5 (line 3893) IntVector::broadcastTemplate @ bci:7 (line 631) Int512Vector::broadcast @ bci:2 (line 127) Int512Vector::broadcast @ bci:2 (line 43) IntVector::lanewise @ bci:40 (line 944) test::micro @ bci:46 (line 14)
14 Replicate === _ 17 [[ 12 ]] #vectorz<I,16> !jvms: IntVector$IntSpecies::broadcastBits @ bci:19 (line 3884) IntVector$IntSpecies::broadcast @ bci:5 (line 3893) IntVector::broadcast @ bci:7 (line 624) test::micro @ bci:23 (line 10)
15 AddVI === _ 18 19 [[ 12 ]] #vectorz<I,16> !jvms: IntVector::lanewiseTemplate @ bci:154 (line 798) Int512Vector::lanewise @ bci:3 (line 278) Int512Vector::lanewise @ bci:3 (line 43) test::micro @ bci:32 (line 12)
16 Parm === 9 [[ 13 ]] Parm0: int !jvms: test::micro @ bci:-1 (line 8)
17 ConI === 0 [[ 14 ]] #int:30
18 Replicate === _ 20 [[ 15 ]] #vectorz<I,16> !jvms: IntVector$IntSpecies::broadcastBits @ bci:19 (line 3884) IntVector$IntSpecies::broadcast @ bci:5 (line 3893) IntVector::broadcast @ bci:7 (line 624) test::micro @ bci:5 (line 8)
19 Replicate === _ 21 [[ 15 ]] #vectorz<I,16> !jvms: IntVector$IntSpecies::broadcastBits @ bci:19 (line 3884) IntVector$IntSpecies::broadcast @ bci:5 (line 3893) IntVector::broadcast @ bci:7 (line 624) test::micro @ bci:14 (line 9)
20 ConI === 0 [[ 18 ]] #int:10
21 ConI === 0 [[ 19 ]] #int:20
[time] 90 ms [res] -1229965408
Proposed IR Graph:
AFTER: BEFORE_MATCHING
0 Root === 0 2 [[ 0 1 9 13 11 ]] inner
2 Return === 3 4 5 6 7 returns 8 [[ 0 ]]
3 Parm === 9 [[ 2 ]] Control !jvms: test::micro @ bci:-1 (line 8)
4 Parm === 9 [[ 2 ]] I_O !jvms: test::micro @ bci:-1 (line 8)
5 Parm === 9 [[ 2 ]] Memory Memory: @BotPTR *+bot, idx=Bot; !jvms: test::micro @ bci:-1 (line 8)
6 Parm === 9 [[ 2 ]] FramePtr !jvms: test::micro @ bci:-1 (line 8)
7 Parm === 9 [[ 2 ]] ReturnAdr !jvms: test::micro @ bci:-1 (line 8)
8 ExtractI === _ 10 11 [[ 2 ]] !orig=[1981] !jvms: Int512Vector::laneHelper @ bci:16 (line 543) Int512Vector::lane @ bci:88 (line 522) test::micro @ bci:50 (line 15)
9 Start === 9 0 [[ 9 3 4 5 6 7 14 ]] #{0:control, 1:abIO, 2:memory, 3:rawptr:BotPTR, 4:return_address, 5:int}
10 Replicate === _ 12 [[ 8 ]] #vectorz<I,16> !jvms: IntVector::lanewiseTemplate @ bci:154 (line 798) Int512Vector::lanewise @ bci:3 (line 278) Int512Vector::lanewise @ bci:3 (line 43) IntVector::lanewise @ bci:43 (line 944) test::micro @ bci:46 (line 14)
11 ConI === 0 [[ 8 ]] #int:1
12 SubI === _ 13 14 [[ 10 ]] !jvms: IntVector::lanewiseTemplate @ bci:154 (line 798) Int512Vector::lanewise @ bci:3 (line 278) Int512Vector::lanewise @ bci:3 (line 43) IntVector::lanewise @ bci:43 (line 944) test::micro @ bci:46 (line 14)
13 ConI === 0 [[ 12 ]] #int:900
14 Parm === 9 [[ 12 ]] Parm0: int !jvms: test::micro @ bci:-1 (line 8)
[time] 73 ms [res] -1229965408
import jdk.incubator.vector.*;
public class test {
public static final VectorSpecies<Integer> ISP = IntVector.SPECIES_PREFERRED;
public static int micro(int i) {
IntVector vec1 = IntVector.broadcast(ISP, 10);
IntVector vec2 = IntVector.broadcast(ISP, 20);
IntVector vec3 = IntVector.broadcast(ISP, 30);
return vec1.lanewise(VectorOperators.ADD, vec2)
.lanewise(VectorOperators.MUL, vec3)
.lanewise(VectorOperators.SUB, i)
.lane(1);
}
public static void main(String [] args) {
int res = 0;
for (int i = 0 ; i < 100000; i++) {
res += micro(i);
}
long t1 = System.currentTimeMillis();
for (int i = 0 ; i < 100000; i++) {
res += micro(i);
}
long t2 = System.currentTimeMillis();
System.out.println("[time] " + (t2-t1) + " ms [res] " + res);
}
}
There is an opportunity to constant fold above IR as follows
Existing Ideal Graph :
AFTER: BEFORE_MATCHING
0 Root === 0 2 [[ 0 1 9 21 20 17 11 ]] inner
2 Return === 3 4 5 6 7 returns 8 [[ 0 ]]
3 Parm === 9 [[ 2 ]] Control !jvms: test::micro @ bci:-1 (line 8)
4 Parm === 9 [[ 2 ]] I_O !jvms: test::micro @ bci:-1 (line 8)
5 Parm === 9 [[ 2 ]] Memory Memory: @BotPTR *+bot, idx=Bot; !jvms: test::micro @ bci:-1 (line 8)
6 Parm === 9 [[ 2 ]] FramePtr !jvms: test::micro @ bci:-1 (line 8)
7 Parm === 9 [[ 2 ]] ReturnAdr !jvms: test::micro @ bci:-1 (line 8)
8 ExtractI === _ 10 11 [[ 2 ]] !orig=[1981] !jvms: Int512Vector::laneHelper @ bci:16 (line 543) Int512Vector::lane @ bci:88 (line 522) test::micro @ bci:50 (line 15)
9 Start === 9 0 [[ 9 3 4 5 6 7 16 ]] #{0:control, 1:abIO, 2:memory, 3:rawptr:BotPTR, 4:return_address, 5:int}
10 SubVI === _ 12 13 [[ 8 ]] #vectorz<I,16> !jvms: IntVector::lanewiseTemplate @ bci:154 (line 798) Int512Vector::lanewise @ bci:3 (line 278) Int512Vector::lanewise @ bci:3 (line 43) IntVector::lanewise @ bci:43 (line 944) test::micro @ bci:46 (line 14)
11 ConI === 0 [[ 8 ]] #int:1
12 MulVI === _ 14 15 [[ 10 ]] #vectorz<I,16> !jvms: IntVector::lanewiseTemplate @ bci:154 (line 798) Int512Vector::lanewise @ bci:3 (line 278) Int512Vector::lanewise @ bci:3 (line 43) test::micro @ bci:39 (line 13)
13 Replicate === _ 16 [[ 10 ]] #vectorz<I,16> !jvms: IntVector$IntSpecies::broadcastBits @ bci:19 (line 3884) IntVector$IntSpecies::broadcast @ bci:5 (line 3893) IntVector::broadcastTemplate @ bci:7 (line 631) Int512Vector::broadcast @ bci:2 (line 127) Int512Vector::broadcast @ bci:2 (line 43) IntVector::lanewise @ bci:40 (line 944) test::micro @ bci:46 (line 14)
14 Replicate === _ 17 [[ 12 ]] #vectorz<I,16> !jvms: IntVector$IntSpecies::broadcastBits @ bci:19 (line 3884) IntVector$IntSpecies::broadcast @ bci:5 (line 3893) IntVector::broadcast @ bci:7 (line 624) test::micro @ bci:23 (line 10)
15 AddVI === _ 18 19 [[ 12 ]] #vectorz<I,16> !jvms: IntVector::lanewiseTemplate @ bci:154 (line 798) Int512Vector::lanewise @ bci:3 (line 278) Int512Vector::lanewise @ bci:3 (line 43) test::micro @ bci:32 (line 12)
16 Parm === 9 [[ 13 ]] Parm0: int !jvms: test::micro @ bci:-1 (line 8)
17 ConI === 0 [[ 14 ]] #int:30
18 Replicate === _ 20 [[ 15 ]] #vectorz<I,16> !jvms: IntVector$IntSpecies::broadcastBits @ bci:19 (line 3884) IntVector$IntSpecies::broadcast @ bci:5 (line 3893) IntVector::broadcast @ bci:7 (line 624) test::micro @ bci:5 (line 8)
19 Replicate === _ 21 [[ 15 ]] #vectorz<I,16> !jvms: IntVector$IntSpecies::broadcastBits @ bci:19 (line 3884) IntVector$IntSpecies::broadcast @ bci:5 (line 3893) IntVector::broadcast @ bci:7 (line 624) test::micro @ bci:14 (line 9)
20 ConI === 0 [[ 18 ]] #int:10
21 ConI === 0 [[ 19 ]] #int:20
[time] 90 ms [res] -1229965408
Proposed IR Graph:
AFTER: BEFORE_MATCHING
0 Root === 0 2 [[ 0 1 9 13 11 ]] inner
2 Return === 3 4 5 6 7 returns 8 [[ 0 ]]
3 Parm === 9 [[ 2 ]] Control !jvms: test::micro @ bci:-1 (line 8)
4 Parm === 9 [[ 2 ]] I_O !jvms: test::micro @ bci:-1 (line 8)
5 Parm === 9 [[ 2 ]] Memory Memory: @BotPTR *+bot, idx=Bot; !jvms: test::micro @ bci:-1 (line 8)
6 Parm === 9 [[ 2 ]] FramePtr !jvms: test::micro @ bci:-1 (line 8)
7 Parm === 9 [[ 2 ]] ReturnAdr !jvms: test::micro @ bci:-1 (line 8)
8 ExtractI === _ 10 11 [[ 2 ]] !orig=[1981] !jvms: Int512Vector::laneHelper @ bci:16 (line 543) Int512Vector::lane @ bci:88 (line 522) test::micro @ bci:50 (line 15)
9 Start === 9 0 [[ 9 3 4 5 6 7 14 ]] #{0:control, 1:abIO, 2:memory, 3:rawptr:BotPTR, 4:return_address, 5:int}
10 Replicate === _ 12 [[ 8 ]] #vectorz<I,16> !jvms: IntVector::lanewiseTemplate @ bci:154 (line 798) Int512Vector::lanewise @ bci:3 (line 278) Int512Vector::lanewise @ bci:3 (line 43) IntVector::lanewise @ bci:43 (line 944) test::micro @ bci:46 (line 14)
11 ConI === 0 [[ 8 ]] #int:1
12 SubI === _ 13 14 [[ 10 ]] !jvms: IntVector::lanewiseTemplate @ bci:154 (line 798) Int512Vector::lanewise @ bci:3 (line 278) Int512Vector::lanewise @ bci:3 (line 43) IntVector::lanewise @ bci:43 (line 944) test::micro @ bci:46 (line 14)
13 ConI === 0 [[ 12 ]] #int:900
14 Parm === 9 [[ 12 ]] Parm0: int !jvms: test::micro @ bci:-1 (line 8)
[time] 73 ms [res] -1229965408