While working on JDK-8302850 I ended up writing assembly code with this shape:
```
void LIRGenerator::do_Clone(Intrinsic* x) {
assert(x->number_of_arguments() == 1, "wrong type");
CodeEmitInfo* info = state_for(x, x->state());
LIRItem src(x->argument_at(0), this);
ciType* src_type = src.value()->exact_type();
LIRItem array(src.value(), this);
LIR_Opr len = FrameMap::r19_opr;
__ load(new LIR_Address(array.result(), arrayOopDesc::length_offset_in_bytes(), T_INT), len, info, lir_patch_none);
LIR_Opr reg = result_register_for(x->type());
LIR_Opr tmp1 = FrameMap::r10_oop_opr;
LIR_Opr tmp2 = FrameMap::r11_oop_opr;
LIR_Opr tmp3 = FrameMap::r5_oop_opr;
LIR_Opr tmp4 = reg;
LIR_Opr klass_reg = FrameMap::r3_metadata_opr;
ciArrayKlass* array_klass = src_type->as_array_klass();
BasicType elem_type = array_klass->element_type()->basic_type();
__ metadata2reg(ciTypeArrayKlass::make(elem_type)->constant_encoding(), klass_reg);
info = state_for(x, x->state()); // TODO is this right?
CodeStub* slow_path = new NewTypeArrayStub(klass_reg, len, reg, info);
__ allocate_array(reg, len, tmp1, tmp2, tmp3, tmp4, elem_type, klass_reg, slow_path);
LIRItem src_pos(new Constant(new IntConstant(0)), this);
LIR_Opr dst = FrameMap::as_opr(j_rarg2);
__ move(reg, dst);
LIRItem dst_pos(new Constant(new IntConstant(0)), this);
LIR_Opr length = FrameMap::as_opr(j_rarg4);
// operands for arraycopy must use fixed registers, otherwise
// LinearScan will fail allocation (because arraycopy always needs a
// call)
// The java calling convention will give us enough registers
// so that on the stub side the args will be perfect already.
// On the other slow/special case side we call C and the arg
// positions are not similar enough to pick one as the best.
// Also because the java calling convention is a "shifted" version
// of the C convention we can process the java args trivially into C
// args without worry of overwriting during the xfer
src.load_item_force (FrameMap::as_oop_opr(j_rarg0));
src_pos.load_item_force (FrameMap::as_opr(j_rarg1));
// __ move(dst, FrameMap::as_opr(j_rarg2));
dst_pos.load_item_force (FrameMap::as_opr(j_rarg3));
__ move(len, length);
LIR_Opr tmp = FrameMap::as_opr(j_rarg5);
int flags = 0;
ciArrayKlass* expected_type = src_type->as_array_klass();
__ arraycopy(src.result(), src_pos.result(), FrameMap::as_opr(j_rarg2), dst_pos.result(), length, tmp, expected_type, flags, info); // does add_safepoint
LIR_Opr result = rlock_result(x);
__ move(dst, result);
}
```
Running that results in a sigsegv on x0 due to the result being in the destination registry `j_arg2`. I discussed this with Roland who pointed out that:
{quote}
I think the bug is that when you passed j_rarg2 to array_copy and use it afterwards, nothing tells you're doing something impossible. c1 knows j_rarg2 is killed by array_copy.
{quote}
The register allocator code should assert this rather than let it through and waiting for the runtime sigsegv.
As a FYI, the issue is avoided if dst is placed in a different register. Here's an example that Roland suggested:
```
LIR_Opr dst = new_register(T_OBJECT);
__ move(reg, dst);
...
__ move(dst, FrameMap::as_opr(j_rarg2));
...
__ arraycopy(...);
...
__ move(dst, result);
```
```
void LIRGenerator::do_Clone(Intrinsic* x) {
assert(x->number_of_arguments() == 1, "wrong type");
CodeEmitInfo* info = state_for(x, x->state());
LIRItem src(x->argument_at(0), this);
ciType* src_type = src.value()->exact_type();
LIRItem array(src.value(), this);
LIR_Opr len = FrameMap::r19_opr;
__ load(new LIR_Address(array.result(), arrayOopDesc::length_offset_in_bytes(), T_INT), len, info, lir_patch_none);
LIR_Opr reg = result_register_for(x->type());
LIR_Opr tmp1 = FrameMap::r10_oop_opr;
LIR_Opr tmp2 = FrameMap::r11_oop_opr;
LIR_Opr tmp3 = FrameMap::r5_oop_opr;
LIR_Opr tmp4 = reg;
LIR_Opr klass_reg = FrameMap::r3_metadata_opr;
ciArrayKlass* array_klass = src_type->as_array_klass();
BasicType elem_type = array_klass->element_type()->basic_type();
__ metadata2reg(ciTypeArrayKlass::make(elem_type)->constant_encoding(), klass_reg);
info = state_for(x, x->state()); // TODO is this right?
CodeStub* slow_path = new NewTypeArrayStub(klass_reg, len, reg, info);
__ allocate_array(reg, len, tmp1, tmp2, tmp3, tmp4, elem_type, klass_reg, slow_path);
LIRItem src_pos(new Constant(new IntConstant(0)), this);
LIR_Opr dst = FrameMap::as_opr(j_rarg2);
__ move(reg, dst);
LIRItem dst_pos(new Constant(new IntConstant(0)), this);
LIR_Opr length = FrameMap::as_opr(j_rarg4);
// operands for arraycopy must use fixed registers, otherwise
// LinearScan will fail allocation (because arraycopy always needs a
// call)
// The java calling convention will give us enough registers
// so that on the stub side the args will be perfect already.
// On the other slow/special case side we call C and the arg
// positions are not similar enough to pick one as the best.
// Also because the java calling convention is a "shifted" version
// of the C convention we can process the java args trivially into C
// args without worry of overwriting during the xfer
src.load_item_force (FrameMap::as_oop_opr(j_rarg0));
src_pos.load_item_force (FrameMap::as_opr(j_rarg1));
// __ move(dst, FrameMap::as_opr(j_rarg2));
dst_pos.load_item_force (FrameMap::as_opr(j_rarg3));
__ move(len, length);
LIR_Opr tmp = FrameMap::as_opr(j_rarg5);
int flags = 0;
ciArrayKlass* expected_type = src_type->as_array_klass();
__ arraycopy(src.result(), src_pos.result(), FrameMap::as_opr(j_rarg2), dst_pos.result(), length, tmp, expected_type, flags, info); // does add_safepoint
LIR_Opr result = rlock_result(x);
__ move(dst, result);
}
```
Running that results in a sigsegv on x0 due to the result being in the destination registry `j_arg2`. I discussed this with Roland who pointed out that:
{quote}
I think the bug is that when you passed j_rarg2 to array_copy and use it afterwards, nothing tells you're doing something impossible. c1 knows j_rarg2 is killed by array_copy.
{quote}
The register allocator code should assert this rather than let it through and waiting for the runtime sigsegv.
As a FYI, the issue is avoided if dst is placed in a different register. Here's an example that Roland suggested:
```
LIR_Opr dst = new_register(T_OBJECT);
__ move(reg, dst);
...
__ move(dst, FrameMap::as_opr(j_rarg2));
...
__ arraycopy(...);
...
__ move(dst, result);
```
- relates to
-
JDK-8302850 Implement C1 clone intrinsic that reuses arraycopy code for primitive arrays
- Resolved