Uploaded image for project: 'JDK'
  1. JDK
  2. JDK-8333006

RISC-V: C2: Support vector-scalar and vector-immediate arithmetic instructions

XMLWordPrintable

    • Icon: Enhancement Enhancement
    • Resolution: Fixed
    • Icon: P4 P4
    • 23
    • 23
    • hotspot
    • b26
    • riscv
    • linux

      Hi, We want to support vector-scalar and vector-immediate arithmetic instructions, It was implemented by referring to RVV v1.0 [1]. please take a look and have some reviews. Thanks a lot.
      We can use the Byte256VectorTests.java[2] to print the Opto JIT Code, verify and observe the generation of nodes.

      For example, we can use the following command to print the Opto JIT Code of a jtreg test case:

      ```
      /home/zifeihan/jtreg/bin/jtreg \
      -v:default \
      -concurrency:16 -timeout:50 \
      -javaoption:-XX:+UnlockExperimentalVMOptions \
      -javaoption:-XX:+UseRVV \
      -javaoption:-XX:+PrintOptoAssembly \
      -javaoption:-XX:LogFile=/home/zifeihan/jdk/Byte256VectorTests_PrintOptoAssembly.log \
      -jdk:/home/zifeihan/jdk/build/linux-riscv64-server-fastdebug/jdk \
      /home/zifeihan/jdk/test/jdk/jdk/incubator/vector/Byte256VectorTests.java
      ```


      we can observe the specified compilation log `Byte256VectorTests_PrintOptoAssembly.log`, which contains the vector-scalar and vector-immediate arithmetic instructions for the PR implementation.

      vadd_immI Node
      ```
      16c addw R11, R10, zr #@convI2L_reg_reg
      170 add R9, R31, R11 # ptr, #@addP_reg_reg
      174 addi R9, R9, #16 # ptr, #@addP_reg_imm
      176 loadV V1, [R9] # vector (rvv)
      17e vadd_immI V1, V1, #7
      186 add R11, R15, R11 # ptr, #@addP_reg_reg
      188 addi R11, R11, #16 # ptr, #@addP_reg_imm
      18a storeV [R11], V1 # vector (rvv)
      ```

      vadd_immI_masked Node
      ```
      1e8 B31: # out( B37 B32 ) <- in( B30 ) Freq: 76.2281
      1e8 loadV V2, [R31] # vector (rvv)
      1f0 vloadmask V0, V1
      1f8 vadd_immI_masked V2, V2, #7
      200 addi R31, R10, #48 # ptr, #@addP_reg_imm
      204 bgeu R30, R7, B37 #@cmpU_branch P=0.000001 C=-1.000000
      ```

      vadd_regI Node
      ```
      0c4 B4: # out( B9 B5 ) <- in( B8 B3 ) Freq: 1
      0c4 vloadcon V1 # generate iota indices
      0cc spill [sp, #4] -> R30 # spill size = 32
      0ce vmul_regI V1, V1, R30
      0d6 spill [sp, #0] -> R29 # spill size = 32
      0d8 vadd_regI V1, V1, R29
      ```

      vadd_regI_masked Node
      ```
      244 B36: # out( B33 B37 ) <- in( B35 ) Freq: 7427.81
      244 # castII of R30, #@castII
      244 addw R31, R30, zr #@convI2L_reg_reg
      248 spill [sp, #32] -> R10 # spill size = 64
      24a add R10, R10, R31 # ptr, #@addP_reg_reg
      24c addi R10, R10, #16 # ptr, #@addP_reg_imm
      24e loadV V2, [R10] # vector (rvv)
      256 vloadmask V0, V1
      25e vadd_regI_masked V2, V2, R29
      ```

      vsub_regI Node
      ```
      112 B20: # out( B63 B21 ) <- in( B19 ) Freq: 77.0107
      112 # castII of R20, #@castII
      112 addw R11, R20, zr #@convI2L_reg_reg
      116 add R12, R10, R11 # ptr, #@addP_reg_reg
      11a addi R12, R12, #16 # ptr, #@addP_reg_imm
      11c loadV V1, [R12] # vector (rvv)
      124 vsub_regI V1, V1, R31
      12c bgeu R20, R29, B63 #@cmpU_branch P=0.000001 C=-1.000000
      ```

      vsub_regI_masked Node
      ```
      1e8 B31: # out( B37 B32 ) <- in( B30 ) Freq: 76.2281
      1e8 loadV V2, [R31] # vector (rvv)
      1f0 vloadmask V0, V1
      1f8 vsub_regI_masked V2, V2, R29
      200 addi R31, R10, #48 # ptr, #@addP_reg_imm
      204 bgeu R30, R7, B37 #@cmpU_branch P=0.000001 C=-1.000000
      ```

      vmul_regI Node
      ```
      0ca B4: # out( B9 B5 ) <- in( B8 B3 ) Freq: 1
      0ca vloadcon V1 # generate iota indices
      0d2 spill [sp, #0] -> R29 # spill size = 64
      0d4 lwu R7, [R29, #12] # loadN, compressed ptr, #@loadN ! Field: jdk/internal/vm/vector/VectorSupport$VectorPayload.payload (constant)
      0d8 decode_heap_oop R7, R7 #@decodeHeapOop
      0da addi R7, R7, #16 # ptr, #@addP_reg_imm
      0dc vmul_regI V1, V1, R30
      0e4 loadV V2, [R7] # vector (rvv)
      ```

      vmul_regI_masked Node
      ```
      198 addw R30, R19, zr #@convI2L_reg_reg
      19c spill [sp, #32] -> R31 # spill size = 64
      19e add R31, R31, R30 # ptr, #@addP_reg_reg
      1a0 addi R10, R31, #16 # ptr, #@addP_reg_imm
      1a4 loadV V2, [R10] # vector (rvv)
      1ac vloadmask V0, V1
      1b4 vmul_regI_masked V2, V2, R29
      ```



      We can test test/jdk/jdk/incubator/vector/Long256VectorTests.java in the same way, and looking at the Opto logs, we will see nodes similar to vadd_immL, vadd_immL_masked, vadd_regL, vadd_regL_masked, vsub_regL, vsub_regL_masked, vmul_regL, vmul_regL_masked.

      vadd_immL Node
      ```
      112 addw R11, R9, zr #@convI2L_reg_reg
      116 slli R11, R11, (#3 & 0x3f) #@lShiftL_reg_imm
      118 add R14, R29, R11 # ptr, #@addP_reg_reg
      11c addi R14, R14, #16 # ptr, #@addP_reg_imm
      11e loadV V1, [R14] # vector (rvv)
      126 vadd_immL V1, V1, #7
      ```

      vadd_immL_masked Node
      ```
      194 addw R30, R19, zr #@convI2L_reg_reg
      198 slli R30, R30, (#3 & 0x3f) #@lShiftL_reg_imm
      19a spill [sp, #32] -> R31 # spill size = 64
      19c add R31, R31, R30 # ptr, #@addP_reg_reg
      19e addi R10, R31, #16 # ptr, #@addP_reg_imm
      1a2 loadV V1, [R10] # vector (rvv)
      1aa vadd_immL_masked V1, V1, #7
      ```

      vadd_regL Node
      ```
      104 B17: # out( B20 ) <- in( B16 ) Freq: 0.99999
      104 replicateL_imm5 V4, #1
      10c vadd_regL V4, V4, R17
      114 -- // R23=Thread::current(), empty, #@tlsLoadP
      114 mv R31, #0 # int, #@loadConI
      116 j B20 #@branch
      ```

      vadd_regL_masked Node
      ```
      198 addw R30, R19, zr #@convI2L_reg_reg
      19c slli R30, R30, (#3 & 0x3f) #@lShiftL_reg_imm
      19e spill [sp, #32] -> R31 # spill size = 64
      1a0 add R31, R31, R30 # ptr, #@addP_reg_reg
      1a2 addi R10, R31, #16 # ptr, #@addP_reg_imm
      1a6 loadV V1, [R10] # vector (rvv)
      1ae vadd_regL_masked V1, V1, R11
      ```

      vsub_regL Node
      ```
      116 addw R11, R19, zr #@convI2L_reg_reg
      11a slli R11, R11, (#3 & 0x3f) #@lShiftL_reg_imm
      11c add R12, R31, R11 # ptr, #@addP_reg_reg
      120 addi R12, R12, #16 # ptr, #@addP_reg_imm
      122 loadV V1, [R12] # vector (rvv)
      12a vsub_regL V1, V1, R14
      ```

      vsub_regL_masked Node
      ```
      198 addw R30, R19, zr #@convI2L_reg_reg
      19c slli R30, R30, (#3 & 0x3f) #@lShiftL_reg_imm
      19e spill [sp, #32] -> R31 # spill size = 64
      1a0 add R31, R31, R30 # ptr, #@addP_reg_reg
      1a2 addi R10, R31, #16 # ptr, #@addP_reg_imm
      1a6 loadV V1, [R10] # vector (rvv)
      1ae vsub_regL_masked V1, V1, R11
      ```

      vmul_regL Node
      ```
      0c2 vloadcon V1 # generate iota indices
      0ca spill [sp, #0] -> R29 # spill size = 64
      0cc lwu R7, [R29, #12] # loadN, compressed ptr, #@loadN ! Field: jdk/internal/vm/vector/VectorSupport$VectorPayload.payload (constant)
      0d0 decode_heap_oop R7, R7 #@decodeHeapOop
      0d2 addi R7, R7, #16 # ptr, #@addP_reg_imm
      0d4 addw R28, R30, zr #@convI2L_reg_reg
      0d8 loadV V2, [R7] # vector (rvv)
      0e0 vmul_regL V1, V1, R28
      ```

      vmul_regL_masked Node
      ```
      19c slli R30, R30, (#3 & 0x3f) #@lShiftL_reg_imm
      19e spill [sp, #32] -> R31 # spill size = 64
      1a0 add R31, R31, R30 # ptr, #@addP_reg_reg
      1a2 addi R10, R31, #16 # ptr, #@addP_reg_imm
      1a6 loadV V1, [R10] # vector (rvv)
      1ae vmul_regL_masked V1, V1, R11
      1b6 spill [sp, #48] -> R10 # spill size = 64
      ```


            gcao Gui Cao
            gcao Gui Cao
            Votes:
            0 Vote for this issue
            Watchers:
            3 Start watching this issue

              Created:
              Updated:
              Resolved: