Different rules in two files with vReg and vecX/vecD operands makes the code not easy to maintain. We can try to use vReg for all neon rules, and merge SVE and NEON rules. E.g.
In aarch64_sve.ad:
instruct vaddI(vReg dst, vReg src1, vReg src2) %{
predicate(UseSVE > 0);
match(Set dst (AddVI src1 src2));
ins_cost(SVE_COST);
format %{ "sve_add $dst, $src1, $src2\t # vector (sve) (S)" %}
ins_encode %{
__ sve_add(as_FloatRegister($dst$$reg), __ S,
as_FloatRegister($src1$$reg),
as_FloatRegister($src2$$reg));
%}
ins_pipe(pipe_slow);
%}
In aarch64_neon.ad:
instruct vadd2I(vecD dst, vecD src1, vecD src2)
%{
predicate(n->as_Vector()->length() == 2);
match(Set dst (AddVI src1 src2));
ins_cost(INSN_COST);
format %{ "addv $dst,$src1,$src2\t# vector (2S)" %}
ins_encode %{
__ addv(as_FloatRegister($dst$$reg), __ T2S,
as_FloatRegister($src1$$reg),
as_FloatRegister($src2$$reg));
%}
ins_pipe(vdop64);
%}
instruct vadd4I(vecX dst, vecX src1, vecX src2)
%{
predicate(n->as_Vector()->length() == 4);
match(Set dst (AddVI src1 src2));
ins_cost(INSN_COST);
format %{ "addv $dst,$src1,$src2\t# vector (4S)" %}
ins_encode %{
__ addv(as_FloatRegister($dst$$reg), __ T4S,
as_FloatRegister($src1$$reg),
as_FloatRegister($src2$$reg));
%}
ins_pipe(vdop128);
%}
Can be merged into a single rule, like:
instruct vaddI(vReg dst, vReg src1, vReg src2) %{
match(Set dst (AddVI src1 src2));
ins_cost(SVE_COST);
format %{ "vadd $dst, $src1, $src2\t # vector (I)" %}
ins_encode %{
uint size = Matcher::vector_length(this);
if (size <= 4) {
__ addv(as_FloatRegister($dst$$reg), (size == 4) ? __ T4S : __ T2S,
as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
} else {
assert(UseSVE > 0, "should be sve");
__ sve_add(as_FloatRegister($dst$$reg), __ S,
as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
}
%}
ins_pipe(pipe_slow);
%}
To make vReg matches both VecA and VecD/VecX, we can use generic vector approach:
const uint Matcher::vector_ideal_reg(int len) {
if (UseSVE > 0 && 16 < len && len <= 256) {
return Op_VecA;
}
switch(len) {
// For 16-bit/32-bit mask vector, reuse VecD.
case 2:
case 4:
case 8: return Op_VecD;
case 16: return Op_VecX;
}
ShouldNotReachHere();
return 0;
}
MachOper* Matcher::pd_specialize_generic_vector_operand(MachOper* original_opnd, uint ideal_reg, bool is_temp) {
switch (ideal_reg) {
case Op_VecA: return new vecAOper();
case Op_VecX: return new vecXOper();
case Op_VecD: return new vecDOper();
}
ShouldNotReachHere();
return NULL;
}
In aarch64_sve.ad:
instruct vaddI(vReg dst, vReg src1, vReg src2) %{
predicate(UseSVE > 0);
match(Set dst (AddVI src1 src2));
ins_cost(SVE_COST);
format %{ "sve_add $dst, $src1, $src2\t # vector (sve) (S)" %}
ins_encode %{
__ sve_add(as_FloatRegister($dst$$reg), __ S,
as_FloatRegister($src1$$reg),
as_FloatRegister($src2$$reg));
%}
ins_pipe(pipe_slow);
%}
In aarch64_neon.ad:
instruct vadd2I(vecD dst, vecD src1, vecD src2)
%{
predicate(n->as_Vector()->length() == 2);
match(Set dst (AddVI src1 src2));
ins_cost(INSN_COST);
format %{ "addv $dst,$src1,$src2\t# vector (2S)" %}
ins_encode %{
__ addv(as_FloatRegister($dst$$reg), __ T2S,
as_FloatRegister($src1$$reg),
as_FloatRegister($src2$$reg));
%}
ins_pipe(vdop64);
%}
instruct vadd4I(vecX dst, vecX src1, vecX src2)
%{
predicate(n->as_Vector()->length() == 4);
match(Set dst (AddVI src1 src2));
ins_cost(INSN_COST);
format %{ "addv $dst,$src1,$src2\t# vector (4S)" %}
ins_encode %{
__ addv(as_FloatRegister($dst$$reg), __ T4S,
as_FloatRegister($src1$$reg),
as_FloatRegister($src2$$reg));
%}
ins_pipe(vdop128);
%}
Can be merged into a single rule, like:
instruct vaddI(vReg dst, vReg src1, vReg src2) %{
match(Set dst (AddVI src1 src2));
ins_cost(SVE_COST);
format %{ "vadd $dst, $src1, $src2\t # vector (I)" %}
ins_encode %{
uint size = Matcher::vector_length(this);
if (size <= 4) {
__ addv(as_FloatRegister($dst$$reg), (size == 4) ? __ T4S : __ T2S,
as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
} else {
assert(UseSVE > 0, "should be sve");
__ sve_add(as_FloatRegister($dst$$reg), __ S,
as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
}
%}
ins_pipe(pipe_slow);
%}
To make vReg matches both VecA and VecD/VecX, we can use generic vector approach:
const uint Matcher::vector_ideal_reg(int len) {
if (UseSVE > 0 && 16 < len && len <= 256) {
return Op_VecA;
}
switch(len) {
// For 16-bit/32-bit mask vector, reuse VecD.
case 2:
case 4:
case 8: return Op_VecD;
case 16: return Op_VecX;
}
ShouldNotReachHere();
return 0;
}
MachOper* Matcher::pd_specialize_generic_vector_operand(MachOper* original_opnd, uint ideal_reg, bool is_temp) {
switch (ideal_reg) {
case Op_VecA: return new vecAOper();
case Op_VecX: return new vecXOper();
case Op_VecD: return new vecDOper();
}
ShouldNotReachHere();
return NULL;
}
- relates to
-
JDK-8282875 AArch64: [vectorapi] Optimize Vector.reduceLane for SVE 64/128 vector size
-
- Closed
-