Uploaded image for project: 'JDK'
  1. JDK
  2. JDK-8275913

C2 does not optimize memory access within a loop

XMLWordPrintable

    • generic
    • generic

      In the following example the code can be scalarized (last value store optimization: the value can be calculated on a regsiter and then stored to the array).

          static long[] ytest() {
              long[] a = {1};
              for (int i=0; i<10_000_000; i++) {
                  a[0] += 3;
              }
              return a;
          }

      [Verified Entry Point]
        # {method} {0x00007f8219c0db28} 'ytest' '()[J' in 'Test'
        # [sp+0x20] (sp of caller)
        0x00007f8278bfe5a0: mov %eax,-0x14000(%rsp)
        0x00007f8278bfe5a7: push %rbp
        0x00007f8278bfe5a8: sub $0x10,%rsp
        0x00007f8278bfe5ac: mov 0x100(%r15),%rax
        0x00007f8278bfe5b3: mov %rax,%r10
        0x00007f8278bfe5b6: add $0x18,%r10
        0x00007f8278bfe5ba: cmp 0x110(%r15),%r10
        0x00007f8278bfe5c1: jae 0x00007f8278bfe69e ;*goto
        0x00007f8278bfe5c7: mov %r10,0x100(%r15)
        0x00007f8278bfe5ce: prefetchw 0xc0(%r10)
        0x00007f8278bfe5d6: movq $0x1,(%rax)
        0x00007f8278bfe5dd: prefetchw 0x100(%r10)
        0x00007f8278bfe5e5: movl $0x40d20,0x8(%rax) ; {metadata({type array long})}
        0x00007f8278bfe5ec: prefetchw 0x140(%r10)
        0x00007f8278bfe5f4: movl $0x1,0xc(%rax)
        0x00007f8278bfe5fb: prefetchw 0x180(%r10)
        0x00007f8278bfe603: movq $0x4,0x10(%rax) ;*newarray
        0x00007f8278bfe60b: xor %r11d,%r11d
        0x00007f8278bfe60e: mov $0x1f40,%r8d
        0x00007f8278bfe614: mov $0x1,%ebx
        0x00007f8278bfe619: jmp 0x00007f8278bfe64a
        0x00007f8278bfe61b: nopl 0x0(%rax,%rax,1)
        
      loop_internal:
        0x00007f8278bfe620: mov %r9d,%ebx ;*aload_0
      loop_outer:
        0x00007f8278bfe623: addq $0x18,0x10(%rax) ;*lastore
        0x00007f8278bfe628: mov %ebx,%r9d
        0x00007f8278bfe62b: add $0x8,%r9d ;*iinc
        0x00007f8278bfe62f: cmp %r10d,%r9d
        0x00007f8278bfe632: jl 0x00007f8278bfe620 ; goto loop_internal
        
        0x00007f8278bfe634: mov 0x340(%r15),%r10 ; ImmutableOopMap {rax=Oop }
        0x00007f8278bfe63b: test %eax,(%r10) ; {poll}
        0x00007f8278bfe63e: cmp $0x989679,%r9d
        0x00007f8278bfe645: jge 0x00007f8278bfe66d
        0x00007f8278bfe647: mov %r9d,%ebx
        0x00007f8278bfe64a: mov $0x989679,%r10d
        0x00007f8278bfe650: sub %ebx,%r10d
        0x00007f8278bfe653: cmp $0x989679,%ebx
        0x00007f8278bfe659: cmovg %r11d,%r10d
        0x00007f8278bfe65d: cmp $0x1f40,%r10d
        0x00007f8278bfe664: cmova %r8d,%r10d
        0x00007f8278bfe668: add %ebx,%r10d
        0x00007f8278bfe66b: jmp 0x00007f8278bfe623 ; goto loop_outer

        0x00007f8278bfe66d: cmp $0x989680,%r9d
        0x00007f8278bfe674: jge 0x00007f8278bfe68b
        0x00007f8278bfe676: add $0x8,%ebx
        0x00007f8278bfe679: data16 xchg %ax,%ax ;*aload_0

      post_loop:
        0x00007f8278bfe67c: addq $0x3,0x10(%rax) ;*lastore
        0x00007f8278bfe681: inc %ebx ;*iinc
        0x00007f8278bfe683: cmp $0x989680,%ebx
        0x00007f8278bfe689: jl 0x00007f8278bfe67c ;*if_icmpge

        0x00007f8278bfe68b: add $0x10,%rsp
        0x00007f8278bfe68f: pop %rbp
        0x00007f8278bfe690: cmp 0x338(%r15),%rsp ; {poll_return}
        0x00007f8278bfe697: ja 0x00007f8278bfe6c6
        0x00007f8278bfe69d: retq

            roland Roland Westrelin
            bulasevich Boris Ulasevich
            Votes:
            0 Vote for this issue
            Watchers:
            5 Start watching this issue

              Created:
              Updated: