Uploaded image for project: 'JDK'
  1. JDK
  2. JDK-8262916

Merge LShiftCntV and RShiftCntV into a single node

XMLWordPrintable

    • Icon: Enhancement Enhancement
    • Resolution: Unresolved
    • Icon: P4 P4
    • repo-panama
    • repo-panama
    • hotspot
    • generic
    • generic

      ```
      // opto/vectornode.hpp
      // Vector left shift count
       class LShiftCntVNode : public VectorNode {
        public:
         LShiftCntVNode(Node* cnt, const TypeVect* vt) : VectorNode(cnt,vt) {}
         virtual int Opcode() const;
       };

       //------------------------------RShiftCntVNode---------------------------------
       // Vector right shift count
       class RShiftCntVNode : public VectorNode {
        public:
         RShiftCntVNode(Node* cnt, const TypeVect* vt) : VectorNode(cnt,vt) {}
         virtual int Opcode() const;
       };
      ```

      The vector shift count was defined by two separate nodes, but they have the same behavior actually.

       

      // test case
       public static void test_shiftv(int sh) {
           for (int i = 0; i < N; i+=1) {
             a0[i] = a1[i] << sh;
             b0[i] = b1[i] >> sh;
           }
         }

      // c2 generated assembly
      0x0000ffffa9105f88: dup v16.16b, w12
      0x0000ffffa9105f8c: add w18, w20, #0xf
      0x0000ffffa9105f90: cmp w20, w10
      ...
      0x0000ffffa9105ff0: dup v17.16b, w12 // duplicated with "dup v16.16b, w12"
      ...
      0x0000ffffa910606c: ldr q18, [x18, #16]
      0x0000ffffa9106070: add x12, x14, x12
      0x0000ffffa9106074: neg v19.16b, v17.16b
      0x0000ffffa9106078: sshl v18.4s, v18.4s, v19.4s // b0[i] = b1[i] >> sh;
      0x0000ffffa910607c: str q18, [x12, #16]
      0x0000ffffa9106080: ldr q18, [x13, #32]
      0x0000ffffa9106084: sshl v18.4s, v18.4s, v16.4s
      0x0000ffffa9106088: str q18, [x4, #32] // a0[i] = a1[i] << sh;

      0x0000ffffa910608c: ldr q18, [x18, #32]
      0x0000ffffa9106090: neg v19.16b, v17.16b
      0x0000ffffa9106094: sshl v18.4s, v18.4s, v19.4s
      0x0000ffffa9106098: str q18, [x12, #32] // b0[i] = b1[i] >> sh;
      0x0000ffffa910609c: ldr q18, [x13, #48]
      0x0000ffffa91060a0: sshl v18.4s, v18.4s, v16.4s
      0x0000ffffa91060a4: str q18, [x4, #48] // a0[i] = a1[i] << sh;


      By merging them into a single node, the final code could reduce one extra “dup”, so that saves one register.

            eliu Eric Liu
            eliu Eric Liu
            Votes:
            0 Vote for this issue
            Watchers:
            3 Start watching this issue

              Created:
              Updated: