diff --git a/src/hotspot/cpu/aarch64/c2_MacroAssembler_aarch64.cpp b/src/hotspot/cpu/aarch64/c2_MacroAssembler_aarch64.cpp index 605a05a44a7..379e9506a3b 100644 --- a/src/hotspot/cpu/aarch64/c2_MacroAssembler_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/c2_MacroAssembler_aarch64.cpp @@ -107,7 +107,8 @@ address C2_MacroAssembler::arrays_hashcode(Register ary, Register cnt, Register assert(is_power_of_2(unroll_factor), "can't use this value to calculate the jump target PC"); andr(tmp2, cnt, unroll_factor - 1); adr(tmp1, BR_BASE); - sub(tmp1, tmp1, tmp2, ext::sxtw, 3); + // For Cortex A53 offset has to be 4 because 2 nops are generated. + sub(tmp1, tmp1, tmp2, ext::sxtw, VM_Version::supports_a53mac() ? 4 : 3); movw(tmp2, 0x1f); br(tmp1); @@ -115,6 +116,11 @@ address C2_MacroAssembler::arrays_hashcode(Register ary, Register cnt, Register for (size_t i = 0; i < unroll_factor; ++i) { load(tmp1, Address(post(ary, type2aelembytes(eltype))), eltype); maddw(result, result, tmp2, tmp1); + // maddw generates an extra nop for Cortex A53 (see maddw definition in macroAssembler). + // Generate 2nd nop to have 4 instructions per iteration. + if (VM_Version::supports_a53mac()) { + nop(); + } } bind(BR_BASE); subsw(cnt, cnt, unroll_factor); diff --git a/src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp b/src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp index f0f145e3d76..10420f815a2 100644 --- a/src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp @@ -6712,7 +6712,8 @@ class StubGenerator: public StubCodeGenerator { __ andr(rscratch2, cnt, vf - 1); __ bind(TAIL_SHORTCUT); __ adr(rscratch1, BR_BASE); - __ sub(rscratch1, rscratch1, rscratch2, ext::uxtw, 3); + // For Cortex A53 offset has to be 4 because 2 nops are generated. + __ sub(rscratch1, rscratch1, rscratch2, ext::uxtw, VM_Version::supports_a53mac() ? 4 : 3); __ movw(rscratch2, 0x1f); __ br(rscratch1); @@ -6720,6 +6721,11 @@ class StubGenerator: public StubCodeGenerator { __ load(rscratch1, Address(__ post(ary, type2aelembytes(eltype))), eltype); __ maddw(result, result, rscratch2, rscratch1); + // maddw generates an extra nop for Cortex A53 (see maddw definition in macroAssembler). + // Generate 2nd nop to have 4 instructions per iteration. + if (VM_Version::supports_a53mac()) { + __ nop(); + } } __ bind(BR_BASE);