-
Bug
-
Resolution: Unresolved
-
P2
-
17, repo-panama
-
generic
-
generic
reproduceļ¼
run attachment after cherry-pickJDK-8265956
reason :
1. In interpreter:
static int partiallyWrapIndex(int index, int laneCount) {
return checkIndex0(index, laneCount, (byte)-1);
}
@ForceInline
static int checkIndex0(int index, int laneCount, byte mode) {
int wrapped = VectorIntrinsics.wrapToRange(index, laneCount);
if (mode == 0 || wrapped == index) { // NOTE here
return wrapped;
}
if (mode < 0) {
return wrapped - laneCount; // special mode for internal storage
}
throw checkIndexFailed(index, laneCount);
}
@ForceInline
static int wrapToRange(int index, int size) {
if ((size & (size - 1)) == 0) {
// Size is zero or a power of two, so we got this.
return index & (size - 1);
} else {
return wrapToRangeNPOT(index, size);
}
}
2. However, we have this intrinsics in
src/hotspot/share/opto/vectorIntrinsics.cpp [jdk/jdk]
386 } else {
387 ConINode* pred_node = (ConINode*)gvn().makecon(TypeInt::make(1)); // BoolTest::gt here
388 Node * lane_cnt = gvn().makecon(TypeInt::make(num_elem));
389 Node * bcast_lane_cnt = gvn().transform(VectorNode::scalar2vector(lane_cnt, num_elem, type_bt));
390 Node* mask = gvn().transform(new VectorMaskCmpNode(BoolTest::ge, bcast_lane_cnt, res, pred_node, vt));// here BoolTest::ge != 1 (which means BoolTest::gt)
3. In aarch64 neon backend, we use `BoolTest::ge` for generated code:
// cond is useless here
instruct vcmge8B(vecD dst, vecD src1, vecD src2, immI cond)
%{
predicate(n->as_Vector()->length() == 8 &&
n->as_VectorMaskCmp()->get_predicate() == BoolTest::ge &&
n->in(1)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
format %{ "cmge $dst, T8B, $src1, $src2\t# vector cmp (8B)" %}
ins_cost(INSN_COST);
ins_encode %{
__ cmge(as_FloatRegister($dst$$reg), __ T8B,
as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
%}
ins_pipe(vdop64);
%}
However, we use cond (=1 or BoolTest::gt). So X86 is right on jdk/jdk
instruct vcmp(legVec dst, legVec src1, legVec src2, immI8 cond, rRegP scratch) %{
predicate(vector_length_in_bytes(n->in(1)->in(1)) >= 8 && // src1
vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1
is_integral_type(vector_element_basic_type(n->in(1)->in(1)))); // src1
match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
effect(TEMP scratch);
format %{ "vector_compare $dst,$src1,$src2,$cond\t! using $scratch as TEMP" %}
ins_encode %{
int vlen_enc = vector_length_encoding(this, $src1);
Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
Assembler::Width ww = widthForType(vector_element_basic_type(this, $src1));
__ vpcmpCCW($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, ww, vlen_enc, $scratch$$Register);
%}
ins_pipe( pipe_slow );
%}
4. In repo panama-vector, both of them are wrong, because the IR is fixed:
455 } else {
456 ConINode* pred_node = (ConINode*)gvn().makecon(TypeInt::make(BoolTest::ge));// wrong here
457 Node * lane_cnt = gvn().makecon(TypeInt::make(num_elem));
458 Node * bcast_lane_cnt = gvn().transform(VectorNode::scalar2vector(lane_cnt, num_elem, type_bt));
459 Node* mask = gvn().transform(new VectorMaskCmpNode(BoolTest::ge, bcast_lane_cnt, res, pred_node, vt));
5. In fact, the real compare here is between unsigned (BoolTest:ugt) .
run attachment after cherry-pick
reason :
1. In interpreter:
static int partiallyWrapIndex(int index, int laneCount) {
return checkIndex0(index, laneCount, (byte)-1);
}
@ForceInline
static int checkIndex0(int index, int laneCount, byte mode) {
int wrapped = VectorIntrinsics.wrapToRange(index, laneCount);
if (mode == 0 || wrapped == index) { // NOTE here
return wrapped;
}
if (mode < 0) {
return wrapped - laneCount; // special mode for internal storage
}
throw checkIndexFailed(index, laneCount);
}
@ForceInline
static int wrapToRange(int index, int size) {
if ((size & (size - 1)) == 0) {
// Size is zero or a power of two, so we got this.
return index & (size - 1);
} else {
return wrapToRangeNPOT(index, size);
}
}
2. However, we have this intrinsics in
src/hotspot/share/opto/vectorIntrinsics.cpp [jdk/jdk]
386 } else {
387 ConINode* pred_node = (ConINode*)gvn().makecon(TypeInt::make(1)); // BoolTest::gt here
388 Node * lane_cnt = gvn().makecon(TypeInt::make(num_elem));
389 Node * bcast_lane_cnt = gvn().transform(VectorNode::scalar2vector(lane_cnt, num_elem, type_bt));
390 Node* mask = gvn().transform(new VectorMaskCmpNode(BoolTest::ge, bcast_lane_cnt, res, pred_node, vt));// here BoolTest::ge != 1 (which means BoolTest::gt)
3. In aarch64 neon backend, we use `BoolTest::ge` for generated code:
// cond is useless here
instruct vcmge8B(vecD dst, vecD src1, vecD src2, immI cond)
%{
predicate(n->as_Vector()->length() == 8 &&
n->as_VectorMaskCmp()->get_predicate() == BoolTest::ge &&
n->in(1)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
format %{ "cmge $dst, T8B, $src1, $src2\t# vector cmp (8B)" %}
ins_cost(INSN_COST);
ins_encode %{
__ cmge(as_FloatRegister($dst$$reg), __ T8B,
as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
%}
ins_pipe(vdop64);
%}
However, we use cond (=1 or BoolTest::gt). So X86 is right on jdk/jdk
instruct vcmp(legVec dst, legVec src1, legVec src2, immI8 cond, rRegP scratch) %{
predicate(vector_length_in_bytes(n->in(1)->in(1)) >= 8 && // src1
vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1
is_integral_type(vector_element_basic_type(n->in(1)->in(1)))); // src1
match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
effect(TEMP scratch);
format %{ "vector_compare $dst,$src1,$src2,$cond\t! using $scratch as TEMP" %}
ins_encode %{
int vlen_enc = vector_length_encoding(this, $src1);
Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
Assembler::Width ww = widthForType(vector_element_basic_type(this, $src1));
__ vpcmpCCW($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, ww, vlen_enc, $scratch$$Register);
%}
ins_pipe( pipe_slow );
%}
4. In repo panama-vector, both of them are wrong, because the IR is fixed:
455 } else {
456 ConINode* pred_node = (ConINode*)gvn().makecon(TypeInt::make(BoolTest::ge));// wrong here
457 Node * lane_cnt = gvn().makecon(TypeInt::make(num_elem));
458 Node * bcast_lane_cnt = gvn().transform(VectorNode::scalar2vector(lane_cnt, num_elem, type_bt));
459 Node* mask = gvn().transform(new VectorMaskCmpNode(BoolTest::ge, bcast_lane_cnt, res, pred_node, vt));
5. In fact, the real compare here is between unsigned (BoolTest:ugt) .