diff --git a/src/hotspot/cpu/aarch64/aarch64_vector.ad b/src/hotspot/cpu/aarch64/aarch64_vector.ad index d217fb7e949..6ee15e95b72 100644 --- a/src/hotspot/cpu/aarch64/aarch64_vector.ad +++ b/src/hotspot/cpu/aarch64/aarch64_vector.ad @@ -204,6 +204,11 @@ source %{ return false; } break; + case Op_VectorCastHF2F: + if (bt != T_SHORT) { + return false; + } + break; case Op_VectorLoadShuffle: case Op_VectorRearrange: if (vlen < 4) { diff --git a/src/hotspot/cpu/aarch64/aarch64_vector_ad.m4 b/src/hotspot/cpu/aarch64/aarch64_vector_ad.m4 index 422e98d9b68..60d82e83d39 100644 --- a/src/hotspot/cpu/aarch64/aarch64_vector_ad.m4 +++ b/src/hotspot/cpu/aarch64/aarch64_vector_ad.m4 @@ -194,6 +194,11 @@ source %{ return false; } break; + case Op_VectorCastHF2F: + if (bt != T_SHORT) { + return false; + } + break; case Op_VectorLoadShuffle: case Op_VectorRearrange: if (vlen < 4) { diff --git a/src/hotspot/cpu/riscv/riscv_v.ad b/src/hotspot/cpu/riscv/riscv_v.ad index 9892d2b9c03..5ffc5505d12 100644 --- a/src/hotspot/cpu/riscv/riscv_v.ad +++ b/src/hotspot/cpu/riscv/riscv_v.ad @@ -89,6 +89,10 @@ source %{ } break; case Op_VectorCastHF2F: + if (bt != T_SHORT) { + return false; + } + // fallthrough case Op_VectorCastF2HF: return UseZvfh; case Op_VectorLoadShuffle: diff --git a/src/hotspot/cpu/x86/x86.ad b/src/hotspot/cpu/x86/x86.ad index 8b2c5835544..50161b37aed 100644 --- a/src/hotspot/cpu/x86/x86.ad +++ b/src/hotspot/cpu/x86/x86.ad @@ -3793,6 +3793,7 @@ instruct vconvHF2F_reg_mem(vec dst, memory mem) %{ %} instruct vconvHF2F(vec dst, vec src) %{ + predicate(Matcher::vector_element_basic_type(n->in(1)) == T_SHORT); match(Set dst (VectorCastHF2F src)); ins_cost(125); format %{ "vector_conv_HF2F $dst,$src" %} @@ -3803,6 +3804,65 @@ instruct vconvHF2F(vec dst, vec src) %{ ins_pipe( pipe_slow ); %} +instruct vconvHF2F_B(vec dst, vec src) %{ + predicate(Matcher::vector_element_basic_type(n->in(1)) == T_BYTE); + match(Set dst (VectorCastHF2F src)); + ins_cost(125); + format %{ "vector_conv_HF2F $dst,$src" %} + ins_encode %{ + int src_sz = Matcher::vector_length(this, $src)*type2aelembytes(T_SHORT); + int src_vlen_enc = vector_length_encoding(src_sz); + int vlen_enc = vector_length_encoding(this); + __ vconvert_b2x(T_SHORT, $dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc); + __ vcvtph2ps($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); + %} + ins_pipe( pipe_slow ); +%} + +instruct vconvHF2F_I(vec dst, vec src) %{ + predicate((Matcher::vector_element_basic_type(n->in(1)) == T_INT) && + ((UseAVX > 2) || (Matcher::vector_length_in_bytes(n->in(1)) < 32))); + match(Set dst (VectorCastHF2F src)); + ins_cost(125); + format %{ "vector_conv_HF2F $dst,$src" %} + ins_encode %{ + int src_sz = Matcher::vector_length(this, $src)*type2aelembytes(T_INT); + int vlen_enc = vector_length_encoding(this); + // convert int to short float16 + if (UseAVX > 2) { + if (!VM_Version::supports_avx512vl()) { + vlen_enc = Assembler::AVX_512bit; + } + __ evpmovdw($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); + } else { + assert(src_sz < 32, "must be"); + __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg); + __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); + } + // convert short float16 to float + __ vcvtph2ps($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); + %} + ins_pipe( pipe_slow ); +%} + +instruct vconvHF2F_I32(vec dst, vec src, vec vtmp) %{ + predicate((Matcher::vector_element_basic_type(n->in(1)) == T_INT) && + ((UseAVX <= 2) && (Matcher::vector_length_in_bytes(n->in(1)) == 32))); + match(Set dst (VectorCastHF2F src)); + format %{ "vector_conv_HF2F $dst,$src\t! using $vtmp as temp" %} + effect(TEMP vtmp); + ins_encode %{ + int vlen_enc = vector_length_encoding(this); + // convert int to short float16 + __ vpand($vtmp$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg); + __ vextracti128($dst$$XMMRegister, $vtmp$$XMMRegister, 0x1); + __ vpackusdw($dst$$XMMRegister, $vtmp$$XMMRegister, $dst$$XMMRegister, vlen_enc); + // convert short float16 to float + __ vcvtph2ps($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); + %} + ins_pipe( pipe_slow ); +%} + // ---------------------------------------- VectorReinterpret ------------------------------------ instruct reinterpret_mask(kReg dst) %{ predicate(n->bottom_type()->isa_vectmask() && diff --git a/src/hotspot/share/opto/vectornode.cpp b/src/hotspot/share/opto/vectornode.cpp index dc7fd18a8d0..c03f1b9795d 100644 --- a/src/hotspot/share/opto/vectornode.cpp +++ b/src/hotspot/share/opto/vectornode.cpp @@ -1497,7 +1497,10 @@ int VectorCastNode::opcode(int sopc, BasicType bt, bool is_signed) { // Handle special case for to/from Half Float conversions switch (sopc) { case Op_ConvHF2F: - assert(bt == T_SHORT, ""); + if ((!is_signed) || ((bt != T_INT) && (bt != T_SHORT) && (bt != T_BYTE))) { + assert(false, "signed type or unknown type: %s", type2name(bt)); + return 0; + } return Op_VectorCastHF2F; case Op_ConvF2HF: assert(bt == T_FLOAT, ""); diff --git a/src/hotspot/share/opto/vectornode.hpp b/src/hotspot/share/opto/vectornode.hpp index 50220c9362b..7d9825b91bc 100644 --- a/src/hotspot/share/opto/vectornode.hpp +++ b/src/hotspot/share/opto/vectornode.hpp @@ -1826,7 +1826,8 @@ class VectorCastD2XNode : public VectorCastNode { class VectorCastHF2FNode : public VectorCastNode { public: VectorCastHF2FNode(Node* in, const TypeVect* vt) : VectorCastNode(in, vt) { - assert(in->bottom_type()->is_vect()->element_basic_type() == T_SHORT, "must be short"); + BasicType bt = in->bottom_type()->is_vect()->element_basic_type(); + assert((bt == T_INT) || (bt == T_SHORT) || (bt == T_BYTE), "must be int/short/byte"); } virtual int Opcode() const; };