```
instruct loadV_masked(vReg dst, vmemA mem, pRegGov pg) %{
predicate(UseSVE > 0);
match(Set dst (LoadVectorMasked mem pg));
format %{ "loadV_masked $dst, $pg, $mem" %}
ins_encode %{
BasicType bt = Matcher::vector_element_basic_type(this);
loadStoreA_predicated(C2_MacroAssembler(&cbuf), /* is_store */ false, $dst$$FloatRegister,
$pg$$PRegister, bt, bt, $mem->opcode(),
as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
%}
ins_pipe(pipe_slow);
%}
```
For rules with mask input, we don't need predicate `UseSVE > 0`, since we should guarantee that this kind of node pattern can't be generated on non-sve machines.
```
instruct vmask_truecount_neon(iRegINoSp dst, vReg src, vReg tmp) %{
predicate(UseSVE == 0);
match(Set dst (VectorMaskTrueCount src));
effect(TEMP tmp);
format %{ "vmask_truecount_neon $dst, $src\t# KILL $tmp" %}
ins_encode %{
// Input "src" is a vector of boolean represented as bytes with
// 0x00/0x01 as element values.
BasicType bt = Matcher::vector_element_basic_type(this, $src);
assert(bt == T_BOOLEAN, "unsupported type");
uint length_in_bytes = Matcher::vector_length_in_bytes(this, $src);
__ addv($tmp$$FloatRegister, length_in_bytes == 16 ? __ T16B : __ T8B,
$src$$FloatRegister);
__ umov($dst$$Register, $tmp$$FloatRegister, __ B, 0);
%}
ins_pipe(pipe_slow);
%}
instruct vmask_truecount_sve(iRegINoSp dst, pReg src) %{
predicate(UseSVE > 0);
match(Set dst (VectorMaskTrueCount src));
format %{ "vmask_truecount_sve $dst, $src" %}
ins_encode %{
BasicType bt = Matcher::vector_element_basic_type(this, $src);
__ sve_cntp($dst$$Register, __ elemType_to_regVariant(bt),
ptrue, $src$$PRegister);
%}
ins_pipe(pipe_slow);
%}
```
For rules with different input, one of which has mask input while another one of which has vector input, we don't need predicate `UseSVE > 0` or `UseSVE = 0`, since they are different rules and matcher could help do the work.
Removing these extra predicates could help reduce lib code size.
instruct loadV_masked(vReg dst, vmemA mem, pRegGov pg) %{
predicate(UseSVE > 0);
match(Set dst (LoadVectorMasked mem pg));
format %{ "loadV_masked $dst, $pg, $mem" %}
ins_encode %{
BasicType bt = Matcher::vector_element_basic_type(this);
loadStoreA_predicated(C2_MacroAssembler(&cbuf), /* is_store */ false, $dst$$FloatRegister,
$pg$$PRegister, bt, bt, $mem->opcode(),
as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
%}
ins_pipe(pipe_slow);
%}
```
For rules with mask input, we don't need predicate `UseSVE > 0`, since we should guarantee that this kind of node pattern can't be generated on non-sve machines.
```
instruct vmask_truecount_neon(iRegINoSp dst, vReg src, vReg tmp) %{
predicate(UseSVE == 0);
match(Set dst (VectorMaskTrueCount src));
effect(TEMP tmp);
format %{ "vmask_truecount_neon $dst, $src\t# KILL $tmp" %}
ins_encode %{
// Input "src" is a vector of boolean represented as bytes with
// 0x00/0x01 as element values.
BasicType bt = Matcher::vector_element_basic_type(this, $src);
assert(bt == T_BOOLEAN, "unsupported type");
uint length_in_bytes = Matcher::vector_length_in_bytes(this, $src);
__ addv($tmp$$FloatRegister, length_in_bytes == 16 ? __ T16B : __ T8B,
$src$$FloatRegister);
__ umov($dst$$Register, $tmp$$FloatRegister, __ B, 0);
%}
ins_pipe(pipe_slow);
%}
instruct vmask_truecount_sve(iRegINoSp dst, pReg src) %{
predicate(UseSVE > 0);
match(Set dst (VectorMaskTrueCount src));
format %{ "vmask_truecount_sve $dst, $src" %}
ins_encode %{
BasicType bt = Matcher::vector_element_basic_type(this, $src);
__ sve_cntp($dst$$Register, __ elemType_to_regVariant(bt),
ptrue, $src$$PRegister);
%}
ins_pipe(pipe_slow);
%}
```
For rules with different input, one of which has mask input while another one of which has vector input, we don't need predicate `UseSVE > 0` or `UseSVE = 0`, since they are different rules and matcher could help do the work.
Removing these extra predicates could help reduce lib code size.
- links to
-
Review openjdk/jdk/14112