Uploaded image for project: 'JDK'
  1. JDK
  2. JDK-8285790

AArch64: Merge C2 NEON and SVE matching rules

XMLWordPrintable

    • Icon: Enhancement Enhancement
    • Resolution: Fixed
    • Icon: P4 P4
    • 20
    • 19
    • hotspot
    • b11
    • aarch64
    • generic

      Different rules in two files with vReg and vecX/vecD operands makes the code not easy to maintain. We can try to use vReg for all neon rules, and merge SVE and NEON rules. E.g.

      In aarch64_sve.ad:
      instruct vaddI(vReg dst, vReg src1, vReg src2) %{
        predicate(UseSVE > 0);
        match(Set dst (AddVI src1 src2));
        ins_cost(SVE_COST);
        format %{ "sve_add $dst, $src1, $src2\t # vector (sve) (S)" %}
        ins_encode %{
          __ sve_add(as_FloatRegister($dst$$reg), __ S,
               as_FloatRegister($src1$$reg),
               as_FloatRegister($src2$$reg));
        %}
        ins_pipe(pipe_slow);
      %}

      In aarch64_neon.ad:

      instruct vadd2I(vecD dst, vecD src1, vecD src2)
      %{
        predicate(n->as_Vector()->length() == 2);
        match(Set dst (AddVI src1 src2));
        ins_cost(INSN_COST);
        format %{ "addv $dst,$src1,$src2\t# vector (2S)" %}
        ins_encode %{
          __ addv(as_FloatRegister($dst$$reg), __ T2S,
                  as_FloatRegister($src1$$reg),
                  as_FloatRegister($src2$$reg));
        %}
        ins_pipe(vdop64);
      %}

      instruct vadd4I(vecX dst, vecX src1, vecX src2)
      %{
        predicate(n->as_Vector()->length() == 4);
        match(Set dst (AddVI src1 src2));
        ins_cost(INSN_COST);
        format %{ "addv $dst,$src1,$src2\t# vector (4S)" %}
        ins_encode %{
          __ addv(as_FloatRegister($dst$$reg), __ T4S,
                  as_FloatRegister($src1$$reg),
                  as_FloatRegister($src2$$reg));
        %}
        ins_pipe(vdop128);
      %}

      Can be merged into a single rule, like:

      instruct vaddI(vReg dst, vReg src1, vReg src2) %{
        match(Set dst (AddVI src1 src2));
        ins_cost(SVE_COST);
        format %{ "vadd $dst, $src1, $src2\t # vector (I)" %}
        ins_encode %{
          uint size = Matcher::vector_length(this);
          if (size <= 4) {
            __ addv(as_FloatRegister($dst$$reg), (size == 4) ? __ T4S : __ T2S,
                    as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
          } else {
            assert(UseSVE > 0, "should be sve");
            __ sve_add(as_FloatRegister($dst$$reg), __ S,
                       as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
          }
        %}
        ins_pipe(pipe_slow);
      %}

      To make vReg matches both VecA and VecD/VecX, we can use generic vector approach:

      const uint Matcher::vector_ideal_reg(int len) {
        if (UseSVE > 0 && 16 < len && len <= 256) {
          return Op_VecA;
        }
        switch(len) {
          // For 16-bit/32-bit mask vector, reuse VecD.
          case 2:
          case 4:
          case 8: return Op_VecD;
          case 16: return Op_VecX;
        }
        ShouldNotReachHere();
        return 0;
      }

      MachOper* Matcher::pd_specialize_generic_vector_operand(MachOper* original_opnd, uint ideal_reg, bool is_temp) {
        switch (ideal_reg) {
        case Op_VecA: return new vecAOper();
        case Op_VecX: return new vecXOper();
        case Op_VecD: return new vecDOper();
        }
        ShouldNotReachHere();
        return NULL;
      }

            haosun Hao Sun
            njian Ningsheng Jian (Inactive)
            Votes:
            2 Vote for this issue
            Watchers:
            6 Start watching this issue

              Created:
              Updated:
              Resolved: