[VectorAPI] ByteVector.reduceLanes(VectorOperators.MUL, mask) produces incorrect results on certain CPUs with C2

XMLWordPrintable

    • Type: Bug
    • Resolution: Unresolved
    • Priority: P4
    • None
    • Affects Version/s: 26
    • Component/s: hotspot
    • x86_64
    • generic

      ByteVector.reduceLanes(VectorOperators.MUL, mask) produces incorrect results on certain species, CPUs when C2 in triggered

      Species:
      ByteVector.SPECIES_256
      ByteVector.SPECIES_512
      ByteVector.SPECIES_MAX (512)

      CPU:

      AMD EPYC 9J14 96-Core
      Intel Xeon Platinum 8358

      (1) The issue only occurs for masked reductions
      (2) The issue is not reproducible if C2/intrinsic is not triggered
      (3) The issue is not reproducible if AVX2 is enforced via -XX:UseAVX=2

      Test Case

      /*
       * @test
       * @key randomness
       *
       * @library /test/lib
       * @modules jdk.incubator.vector
       * @run testng/othervm/timeout=300
       * -ea
       * -esa
       * -Xbatch
       * -XX:-TieredCompilation
       * ByteVectorReduceLanesWithMaskTests
       */

      import jdk.incubator.vector.*;
      import org.testng.Assert;
      import org.testng.annotations.DataProvider;
      import org.testng.annotations.Test;

      import java.util.Arrays;
      import java.util.Collections;
      import java.util.List;
      import java.util.function.IntFunction;
      import java.util.stream.IntStream;

      @Test
      public class ByteVectorReduceLanesWithMaskTests {

          static final int INVOC_COUNT = Integer.getInteger("jdk.incubator.vector.test.loop-iterations", 200);

          static VectorSpecies<Byte> SPECIES = ByteVector.SPECIES_512;

          static byte TESTING_BYTE = 7;

          static byte[] get_bytes() {
              byte[] a = new byte[SPECIES.length()];
              for (int i = 0; i < a.length; i++) {
                  a[i] = TESTING_BYTE;
              }
              return a;
          }

          static boolean[] get_mask() {
              boolean[] a = new boolean[SPECIES.length()];
              for (int i = 0; i < a.length; i++) {
                  a[i] = (i % 5) == 0;
              }
              return a;
          }

          @Test
          static void MULReduceByteNNNVectorTestsData() {
              byte[] a = get_bytes();
              boolean[] mask = get_mask();

              int true_cnt = 0;
              VectorMask<Byte> vmask = VectorMask.fromArray(SPECIES, mask, 0);
              for (int i = 0; i < a.length; i += SPECIES.length()) {
                  true_cnt += vmask.trueCount();
              }

              System.out.println(" ----- MULReduceByteNNNVectorTestsData ----");
              System.out.println(" SPECIES:" + SPECIES);
              System.out.println(" INVOC_COUNT:" + INVOC_COUNT);
              System.out.println(" a.len:" + a.length);
              System.out.println(" a:" + Arrays.toString(a));
              System.out.println(" mask.len:" + mask.length);
              System.out.println(" true.cnt:" + true_cnt);
              System.out.println(" mask:" + Arrays.toString(mask));
           }

          @Test
          static void MULReduceByteNNNVectorTestsMaskedSmokeTest() {
              byte[] a = get_bytes();
              boolean[] mask = get_mask();
              VectorMask<Byte> vmask = VectorMask.fromArray(SPECIES, mask, 0);
              byte[] r = new byte[a.length];

              for (int i = 0; i < a.length; i += SPECIES.length()) {
                  ByteVector av = ByteVector.fromArray(SPECIES, a, i);
                  r[i] = av.reduceLanes(VectorOperators.MUL, vmask);
              }

              assertReductionArraysEqualsMasked(r, a, mask, ByteVectorReduceLanesWithMaskTests::MULReduceMasked);
          }

          @Test
          static void MULReduceByteNNNVectorTestsMasked() {
              byte[] a = get_bytes(); // cornerCaseValue(i));
              byte[] r = new byte[a.length];
              boolean[] mask = get_mask();
              VectorMask<Byte> vmask = VectorMask.fromArray(SPECIES, mask, 0);

              for (int ic = 0; ic < INVOC_COUNT * INVOC_COUNT; ic++) {
                  for (int i = 0; i < a.length; i += SPECIES.length()) {
                      ByteVector av = ByteVector.fromArray(SPECIES, a, i);
                      r[i] = av.reduceLanes(VectorOperators.MUL, vmask);
                  }
              }

              assertReductionArraysEqualsMasked(r, a, mask, ByteVectorReduceLanesWithMaskTests::MULReduceMasked);
          }

          interface FReductionMaskedOp {
              byte apply(byte[] a, int idx, boolean[] mask);
          }

          static byte MULReduceMasked(byte[] a, int idx, boolean[] mask) {
              byte res = 1;
              for (int i = idx; i < (idx + SPECIES.length()); i++) {
                  if (mask[i % SPECIES.length()])
                      res *= a[i];
              }
              return res;
          }

          static void assertReductionArraysEqualsMasked(byte[] r, byte[] a, boolean[] mask,
                                                        FReductionMaskedOp f) {
              int i = 0;
              try {
                  for (; i < a.length; i += SPECIES.length()) {
                      Assert.assertEquals(r[i], f.apply(a, i, mask));
                  }
              } catch (AssertionError e) {
                  Assert.assertEquals(r[i], f.apply(a, i, mask), "at index #" + i);
              }
          }
      }

            Assignee:
            Xueming Shen
            Reporter:
            Xueming Shen
            Votes:
            0 Vote for this issue
            Watchers:
            4 Start watching this issue

              Created:
              Updated: