ByteVector.reduceLanes(VectorOperators.MUL, mask) produces incorrect results on certain species, CPUs when C2 in triggered
Species:
ByteVector.SPECIES_256
ByteVector.SPECIES_512
ByteVector.SPECIES_MAX (512)
CPU:
AMD EPYC 9J14 96-Core
Intel Xeon Platinum 8358
(1) The issue only occurs for masked reductions
(2) The issue is not reproducible if C2/intrinsic is not triggered
(3) The issue is not reproducible if AVX2 is enforced via -XX:UseAVX=2
Test Case
/*
* @test
* @key randomness
*
* @library /test/lib
* @modules jdk.incubator.vector
* @run testng/othervm/timeout=300
* -ea
* -esa
* -Xbatch
* -XX:-TieredCompilation
* ByteVectorReduceLanesWithMaskTests
*/
import jdk.incubator.vector.*;
import org.testng.Assert;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import java.util.function.IntFunction;
import java.util.stream.IntStream;
@Test
public class ByteVectorReduceLanesWithMaskTests {
static final int INVOC_COUNT = Integer.getInteger("jdk.incubator.vector.test.loop-iterations", 200);
static VectorSpecies<Byte> SPECIES = ByteVector.SPECIES_512;
static byte TESTING_BYTE = 7;
static byte[] get_bytes() {
byte[] a = new byte[SPECIES.length()];
for (int i = 0; i < a.length; i++) {
a[i] = TESTING_BYTE;
}
return a;
}
static boolean[] get_mask() {
boolean[] a = new boolean[SPECIES.length()];
for (int i = 0; i < a.length; i++) {
a[i] = (i % 5) == 0;
}
return a;
}
@Test
static void MULReduceByteNNNVectorTestsData() {
byte[] a = get_bytes();
boolean[] mask = get_mask();
int true_cnt = 0;
VectorMask<Byte> vmask = VectorMask.fromArray(SPECIES, mask, 0);
for (int i = 0; i < a.length; i += SPECIES.length()) {
true_cnt += vmask.trueCount();
}
System.out.println(" ----- MULReduceByteNNNVectorTestsData ----");
System.out.println(" SPECIES:" + SPECIES);
System.out.println(" INVOC_COUNT:" + INVOC_COUNT);
System.out.println(" a.len:" + a.length);
System.out.println(" a:" + Arrays.toString(a));
System.out.println(" mask.len:" + mask.length);
System.out.println(" true.cnt:" + true_cnt);
System.out.println(" mask:" + Arrays.toString(mask));
}
@Test
static void MULReduceByteNNNVectorTestsMaskedSmokeTest() {
byte[] a = get_bytes();
boolean[] mask = get_mask();
VectorMask<Byte> vmask = VectorMask.fromArray(SPECIES, mask, 0);
byte[] r = new byte[a.length];
for (int i = 0; i < a.length; i += SPECIES.length()) {
ByteVector av = ByteVector.fromArray(SPECIES, a, i);
r[i] = av.reduceLanes(VectorOperators.MUL, vmask);
}
assertReductionArraysEqualsMasked(r, a, mask, ByteVectorReduceLanesWithMaskTests::MULReduceMasked);
}
@Test
static void MULReduceByteNNNVectorTestsMasked() {
byte[] a = get_bytes(); // cornerCaseValue(i));
byte[] r = new byte[a.length];
boolean[] mask = get_mask();
VectorMask<Byte> vmask = VectorMask.fromArray(SPECIES, mask, 0);
for (int ic = 0; ic < INVOC_COUNT * INVOC_COUNT; ic++) {
for (int i = 0; i < a.length; i += SPECIES.length()) {
ByteVector av = ByteVector.fromArray(SPECIES, a, i);
r[i] = av.reduceLanes(VectorOperators.MUL, vmask);
}
}
assertReductionArraysEqualsMasked(r, a, mask, ByteVectorReduceLanesWithMaskTests::MULReduceMasked);
}
interface FReductionMaskedOp {
byte apply(byte[] a, int idx, boolean[] mask);
}
static byte MULReduceMasked(byte[] a, int idx, boolean[] mask) {
byte res = 1;
for (int i = idx; i < (idx + SPECIES.length()); i++) {
if (mask[i % SPECIES.length()])
res *= a[i];
}
return res;
}
static void assertReductionArraysEqualsMasked(byte[] r, byte[] a, boolean[] mask,
FReductionMaskedOp f) {
int i = 0;
try {
for (; i < a.length; i += SPECIES.length()) {
Assert.assertEquals(r[i], f.apply(a, i, mask));
}
} catch (AssertionError e) {
Assert.assertEquals(r[i], f.apply(a, i, mask), "at index #" + i);
}
}
}
Species:
ByteVector.SPECIES_256
ByteVector.SPECIES_512
ByteVector.SPECIES_MAX (512)
CPU:
AMD EPYC 9J14 96-Core
Intel Xeon Platinum 8358
(1) The issue only occurs for masked reductions
(2) The issue is not reproducible if C2/intrinsic is not triggered
(3) The issue is not reproducible if AVX2 is enforced via -XX:UseAVX=2
Test Case
/*
* @test
* @key randomness
*
* @library /test/lib
* @modules jdk.incubator.vector
* @run testng/othervm/timeout=300
* -ea
* -esa
* -Xbatch
* -XX:-TieredCompilation
* ByteVectorReduceLanesWithMaskTests
*/
import jdk.incubator.vector.*;
import org.testng.Assert;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import java.util.function.IntFunction;
import java.util.stream.IntStream;
@Test
public class ByteVectorReduceLanesWithMaskTests {
static final int INVOC_COUNT = Integer.getInteger("jdk.incubator.vector.test.loop-iterations", 200);
static VectorSpecies<Byte> SPECIES = ByteVector.SPECIES_512;
static byte TESTING_BYTE = 7;
static byte[] get_bytes() {
byte[] a = new byte[SPECIES.length()];
for (int i = 0; i < a.length; i++) {
a[i] = TESTING_BYTE;
}
return a;
}
static boolean[] get_mask() {
boolean[] a = new boolean[SPECIES.length()];
for (int i = 0; i < a.length; i++) {
a[i] = (i % 5) == 0;
}
return a;
}
@Test
static void MULReduceByteNNNVectorTestsData() {
byte[] a = get_bytes();
boolean[] mask = get_mask();
int true_cnt = 0;
VectorMask<Byte> vmask = VectorMask.fromArray(SPECIES, mask, 0);
for (int i = 0; i < a.length; i += SPECIES.length()) {
true_cnt += vmask.trueCount();
}
System.out.println(" ----- MULReduceByteNNNVectorTestsData ----");
System.out.println(" SPECIES:" + SPECIES);
System.out.println(" INVOC_COUNT:" + INVOC_COUNT);
System.out.println(" a.len:" + a.length);
System.out.println(" a:" + Arrays.toString(a));
System.out.println(" mask.len:" + mask.length);
System.out.println(" true.cnt:" + true_cnt);
System.out.println(" mask:" + Arrays.toString(mask));
}
@Test
static void MULReduceByteNNNVectorTestsMaskedSmokeTest() {
byte[] a = get_bytes();
boolean[] mask = get_mask();
VectorMask<Byte> vmask = VectorMask.fromArray(SPECIES, mask, 0);
byte[] r = new byte[a.length];
for (int i = 0; i < a.length; i += SPECIES.length()) {
ByteVector av = ByteVector.fromArray(SPECIES, a, i);
r[i] = av.reduceLanes(VectorOperators.MUL, vmask);
}
assertReductionArraysEqualsMasked(r, a, mask, ByteVectorReduceLanesWithMaskTests::MULReduceMasked);
}
@Test
static void MULReduceByteNNNVectorTestsMasked() {
byte[] a = get_bytes(); // cornerCaseValue(i));
byte[] r = new byte[a.length];
boolean[] mask = get_mask();
VectorMask<Byte> vmask = VectorMask.fromArray(SPECIES, mask, 0);
for (int ic = 0; ic < INVOC_COUNT * INVOC_COUNT; ic++) {
for (int i = 0; i < a.length; i += SPECIES.length()) {
ByteVector av = ByteVector.fromArray(SPECIES, a, i);
r[i] = av.reduceLanes(VectorOperators.MUL, vmask);
}
}
assertReductionArraysEqualsMasked(r, a, mask, ByteVectorReduceLanesWithMaskTests::MULReduceMasked);
}
interface FReductionMaskedOp {
byte apply(byte[] a, int idx, boolean[] mask);
}
static byte MULReduceMasked(byte[] a, int idx, boolean[] mask) {
byte res = 1;
for (int i = idx; i < (idx + SPECIES.length()); i++) {
if (mask[i % SPECIES.length()])
res *= a[i];
}
return res;
}
static void assertReductionArraysEqualsMasked(byte[] r, byte[] a, boolean[] mask,
FReductionMaskedOp f) {
int i = 0;
try {
for (; i < a.length; i += SPECIES.length()) {
Assert.assertEquals(r[i], f.apply(a, i, mask));
}
} catch (AssertionError e) {
Assert.assertEquals(r[i], f.apply(a, i, mask), "at index #" + i);
}
}
}