[X86] Avoid zero extend i16 when inserting fp16 (#126194)
This commit is contained in:
@@ -22044,15 +22044,20 @@ SDValue X86TargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const {
|
||||
}
|
||||
|
||||
In = DAG.getBitcast(MVT::i16, In);
|
||||
In = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, MVT::v8i16,
|
||||
getZeroVector(MVT::v8i16, Subtarget, DAG, DL), In,
|
||||
DAG.getVectorIdxConstant(0, DL));
|
||||
SDValue Res;
|
||||
if (IsStrict) {
|
||||
In = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, MVT::v8i16,
|
||||
getZeroVector(MVT::v8i16, Subtarget, DAG, DL), In,
|
||||
DAG.getVectorIdxConstant(0, DL));
|
||||
Res = DAG.getNode(X86ISD::STRICT_CVTPH2PS, DL, {MVT::v4f32, MVT::Other},
|
||||
{Chain, In});
|
||||
Chain = Res.getValue(1);
|
||||
} else {
|
||||
In = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, In);
|
||||
In = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, MVT::v4i32,
|
||||
DAG.getUNDEF(MVT::v4f32), In,
|
||||
DAG.getVectorIdxConstant(0, DL));
|
||||
In = DAG.getBitcast(MVT::v8i16, In);
|
||||
Res = DAG.getNode(X86ISD::CVTPH2PS, DL, MVT::v4f32, In,
|
||||
DAG.getTargetConstant(4, DL, MVT::i32));
|
||||
}
|
||||
|
||||
@@ -2164,7 +2164,7 @@ define void @test_concat_v2i1(ptr %arg, ptr %arg1, ptr %arg2) nounwind {
|
||||
; KNL-NEXT: setb %al
|
||||
; KNL-NEXT: andl $1, %eax
|
||||
; KNL-NEXT: kmovw %eax, %k0
|
||||
; KNL-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[1,1,1,1,4,5,6,7]
|
||||
; KNL-NEXT: vpsrld $16, %xmm0, %xmm0
|
||||
; KNL-NEXT: vcvtph2ps %xmm0, %xmm0
|
||||
; KNL-NEXT: vucomiss %xmm2, %xmm0
|
||||
; KNL-NEXT: setb %al
|
||||
|
||||
@@ -1443,8 +1443,7 @@ define void @half_vec_compare(ptr %x, ptr %y) {
|
||||
; KNL: ## %bb.0: ## %entry
|
||||
; KNL-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; KNL-NEXT: ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0x07]
|
||||
; KNL-NEXT: vpshuflw $85, %xmm0, %xmm1 ## encoding: [0xc5,0xfb,0x70,0xc8,0x55]
|
||||
; KNL-NEXT: ## xmm1 = xmm0[1,1,1,1,4,5,6,7]
|
||||
; KNL-NEXT: vpsrld $16, %xmm0, %xmm1 ## encoding: [0xc5,0xf1,0x72,0xd0,0x10]
|
||||
; KNL-NEXT: vcvtph2ps %xmm1, %xmm1 ## encoding: [0xc4,0xe2,0x79,0x13,0xc9]
|
||||
; KNL-NEXT: xorl %eax, %eax ## encoding: [0x31,0xc0]
|
||||
; KNL-NEXT: vxorps %xmm2, %xmm2, %xmm2 ## encoding: [0xc5,0xe8,0x57,0xd2]
|
||||
@@ -1470,8 +1469,7 @@ define void @half_vec_compare(ptr %x, ptr %y) {
|
||||
; AVX512BW: ## %bb.0: ## %entry
|
||||
; AVX512BW-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; AVX512BW-NEXT: ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0x07]
|
||||
; AVX512BW-NEXT: vpshuflw $85, %xmm0, %xmm1 ## encoding: [0xc5,0xfb,0x70,0xc8,0x55]
|
||||
; AVX512BW-NEXT: ## xmm1 = xmm0[1,1,1,1,4,5,6,7]
|
||||
; AVX512BW-NEXT: vpsrld $16, %xmm0, %xmm1 ## encoding: [0xc5,0xf1,0x72,0xd0,0x10]
|
||||
; AVX512BW-NEXT: vcvtph2ps %xmm1, %xmm1 ## encoding: [0xc4,0xe2,0x79,0x13,0xc9]
|
||||
; AVX512BW-NEXT: xorl %eax, %eax ## encoding: [0x31,0xc0]
|
||||
; AVX512BW-NEXT: vxorps %xmm2, %xmm2, %xmm2 ## encoding: [0xc5,0xe8,0x57,0xd2]
|
||||
|
||||
@@ -1854,9 +1854,9 @@ define <4 x half> @test_fmaximum_v4f16(<4 x half> %x, <4 x half> %y) nounwind {
|
||||
; AVX512-NEXT: cmovpl %ecx, %r8d
|
||||
; AVX512-NEXT: movl $0, %r11d
|
||||
; AVX512-NEXT: cmoval %ecx, %r11d
|
||||
; AVX512-NEXT: vpshuflw {{.*#+}} xmm2 = xmm1[3,3,3,3,4,5,6,7]
|
||||
; AVX512-NEXT: vpsrlq $48, %xmm1, %xmm2
|
||||
; AVX512-NEXT: vcvtph2ps %xmm2, %xmm2
|
||||
; AVX512-NEXT: vpshuflw {{.*#+}} xmm3 = xmm0[3,3,3,3,4,5,6,7]
|
||||
; AVX512-NEXT: vpsrlq $48, %xmm0, %xmm3
|
||||
; AVX512-NEXT: vcvtph2ps %xmm3, %xmm3
|
||||
; AVX512-NEXT: vucomiss %xmm2, %xmm3
|
||||
; AVX512-NEXT: movl $0, %r10d
|
||||
@@ -1872,9 +1872,9 @@ define <4 x half> @test_fmaximum_v4f16(<4 x half> %x, <4 x half> %y) nounwind {
|
||||
; AVX512-NEXT: cmovpl %ecx, %ebx
|
||||
; AVX512-NEXT: movl $0, %r14d
|
||||
; AVX512-NEXT: cmoval %ecx, %r14d
|
||||
; AVX512-NEXT: vpshuflw {{.*#+}} xmm2 = xmm1[1,1,1,1,4,5,6,7]
|
||||
; AVX512-NEXT: vpsrld $16, %xmm1, %xmm2
|
||||
; AVX512-NEXT: vcvtph2ps %xmm2, %xmm2
|
||||
; AVX512-NEXT: vpshuflw {{.*#+}} xmm3 = xmm0[1,1,1,1,4,5,6,7]
|
||||
; AVX512-NEXT: vpsrld $16, %xmm0, %xmm3
|
||||
; AVX512-NEXT: vcvtph2ps %xmm3, %xmm3
|
||||
; AVX512-NEXT: vucomiss %xmm2, %xmm3
|
||||
; AVX512-NEXT: movl $0, %r15d
|
||||
@@ -1916,7 +1916,7 @@ define <4 x half> @test_fmaximum_v4f16(<4 x half> %x, <4 x half> %y) nounwind {
|
||||
; AVX512-NEXT: vpinsrw $7, %edx, %xmm3, %xmm3
|
||||
; AVX512-NEXT: vpbroadcastw {{.*#+}} xmm4 = [NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN]
|
||||
; AVX512-NEXT: vpblendvb %xmm3, %xmm4, %xmm2, %xmm2
|
||||
; AVX512-NEXT: vpshuflw {{.*#+}} xmm3 = xmm2[1,1,1,1,4,5,6,7]
|
||||
; AVX512-NEXT: vpsrld $16, %xmm2, %xmm3
|
||||
; AVX512-NEXT: vcvtph2ps %xmm3, %xmm3
|
||||
; AVX512-NEXT: vpxor %xmm4, %xmm4, %xmm4
|
||||
; AVX512-NEXT: vucomiss %xmm4, %xmm3
|
||||
@@ -1930,21 +1930,21 @@ define <4 x half> @test_fmaximum_v4f16(<4 x half> %x, <4 x half> %y) nounwind {
|
||||
; AVX512-NEXT: cmovpl %eax, %esi
|
||||
; AVX512-NEXT: vmovd %esi, %xmm3
|
||||
; AVX512-NEXT: vpinsrw $1, %edx, %xmm3, %xmm3
|
||||
; AVX512-NEXT: vpshufd {{.*#+}} xmm5 = xmm2[1,1,1,1]
|
||||
; AVX512-NEXT: vpshufd {{.*#+}} xmm5 = xmm2[1,1,3,3]
|
||||
; AVX512-NEXT: vcvtph2ps %xmm5, %xmm5
|
||||
; AVX512-NEXT: vucomiss %xmm4, %xmm5
|
||||
; AVX512-NEXT: movl $65535, %edx # imm = 0xFFFF
|
||||
; AVX512-NEXT: cmovnel %eax, %edx
|
||||
; AVX512-NEXT: cmovpl %eax, %edx
|
||||
; AVX512-NEXT: vpinsrw $2, %edx, %xmm3, %xmm3
|
||||
; AVX512-NEXT: vpshuflw {{.*#+}} xmm5 = xmm2[3,3,3,3,4,5,6,7]
|
||||
; AVX512-NEXT: vpsrlq $48, %xmm2, %xmm5
|
||||
; AVX512-NEXT: vcvtph2ps %xmm5, %xmm5
|
||||
; AVX512-NEXT: vucomiss %xmm4, %xmm5
|
||||
; AVX512-NEXT: movl $65535, %edx # imm = 0xFFFF
|
||||
; AVX512-NEXT: cmovnel %eax, %edx
|
||||
; AVX512-NEXT: cmovpl %eax, %edx
|
||||
; AVX512-NEXT: vpinsrw $3, %edx, %xmm3, %xmm3
|
||||
; AVX512-NEXT: vpshufd {{.*#+}} xmm5 = xmm2[2,3,2,3]
|
||||
; AVX512-NEXT: vpshufd {{.*#+}} xmm5 = xmm2[2,3,0,1]
|
||||
; AVX512-NEXT: vcvtph2ps %xmm5, %xmm5
|
||||
; AVX512-NEXT: vucomiss %xmm4, %xmm5
|
||||
; AVX512-NEXT: movl $65535, %edx # imm = 0xFFFF
|
||||
|
||||
@@ -1938,12 +1938,12 @@ define <4 x half> @test_fmaximumnum_v4f16(<4 x half> %x, <4 x half> %y) nounwind
|
||||
; AVX512-NEXT: vpunpcklwd {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
|
||||
; AVX512-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
|
||||
; AVX512-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
||||
; AVX512-NEXT: vpshuflw {{.*#+}} xmm0 = xmm4[3,3,3,3,4,5,6,7]
|
||||
; AVX512-NEXT: vpsrlq $48, %xmm4, %xmm0
|
||||
; AVX512-NEXT: vcvtph2ps %xmm0, %xmm0
|
||||
; AVX512-NEXT: vucomiss %xmm0, %xmm0
|
||||
; AVX512-NEXT: setp %al
|
||||
; AVX512-NEXT: kmovw %eax, %k1
|
||||
; AVX512-NEXT: vpshuflw {{.*#+}} xmm1 = xmm8[3,3,3,3,4,5,6,7]
|
||||
; AVX512-NEXT: vpsrlq $48, %xmm8, %xmm1
|
||||
; AVX512-NEXT: vcvtph2ps %xmm1, %xmm1
|
||||
; AVX512-NEXT: vucomiss %xmm1, %xmm1
|
||||
; AVX512-NEXT: setp %al
|
||||
@@ -1996,12 +1996,12 @@ define <4 x half> @test_fmaximumnum_v4f16(<4 x half> %x, <4 x half> %y) nounwind
|
||||
; AVX512-NEXT: seta %al
|
||||
; AVX512-NEXT: kmovw %eax, %k1
|
||||
; AVX512-NEXT: vmovss %xmm1, %xmm2, %xmm2 {%k1}
|
||||
; AVX512-NEXT: vpshuflw {{.*#+}} xmm1 = xmm4[1,1,1,1,4,5,6,7]
|
||||
; AVX512-NEXT: vpsrld $16, %xmm4, %xmm1
|
||||
; AVX512-NEXT: vcvtph2ps %xmm1, %xmm1
|
||||
; AVX512-NEXT: vucomiss %xmm1, %xmm1
|
||||
; AVX512-NEXT: setp %al
|
||||
; AVX512-NEXT: kmovw %eax, %k1
|
||||
; AVX512-NEXT: vpshuflw {{.*#+}} xmm4 = xmm8[1,1,1,1,4,5,6,7]
|
||||
; AVX512-NEXT: vpsrld $16, %xmm8, %xmm4
|
||||
; AVX512-NEXT: vcvtph2ps %xmm4, %xmm4
|
||||
; AVX512-NEXT: vucomiss %xmm4, %xmm4
|
||||
; AVX512-NEXT: setp %al
|
||||
|
||||
@@ -50,9 +50,6 @@ define half @round_f16(half %h) {
|
||||
;
|
||||
; AVX512F-LABEL: round_f16:
|
||||
; AVX512F: # %bb.0: # %entry
|
||||
; AVX512F-NEXT: vpextrw $0, %xmm0, %eax
|
||||
; AVX512F-NEXT: movzwl %ax, %eax
|
||||
; AVX512F-NEXT: vmovd %eax, %xmm0
|
||||
; AVX512F-NEXT: vcvtph2ps %xmm0, %xmm0
|
||||
; AVX512F-NEXT: vpbroadcastd {{.*#+}} xmm1 = [4.9999997E-1,4.9999997E-1,4.9999997E-1,4.9999997E-1]
|
||||
; AVX512F-NEXT: vpternlogd {{.*#+}} xmm1 = xmm1 | (xmm0 & mem)
|
||||
|
||||
@@ -698,7 +698,7 @@ define <4 x i32> @stest_f16i32(<4 x half> %x) nounwind {
|
||||
;
|
||||
; AVX2-LABEL: stest_f16i32:
|
||||
; AVX2: # %bb.0: # %entry
|
||||
; AVX2-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[3,3,3,3,4,5,6,7]
|
||||
; AVX2-NEXT: vpsrlq $48, %xmm0, %xmm1
|
||||
; AVX2-NEXT: vcvtph2ps %xmm1, %xmm1
|
||||
; AVX2-NEXT: vcvttss2si %xmm1, %rax
|
||||
; AVX2-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
|
||||
@@ -709,7 +709,7 @@ define <4 x i32> @stest_f16i32(<4 x half> %x) nounwind {
|
||||
; AVX2-NEXT: vcvttss2si %xmm1, %rax
|
||||
; AVX2-NEXT: vmovq %rcx, %xmm1
|
||||
; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
|
||||
; AVX2-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[1,1,1,1,4,5,6,7]
|
||||
; AVX2-NEXT: vpsrld $16, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vcvtph2ps %xmm0, %xmm0
|
||||
; AVX2-NEXT: vmovq %rax, %xmm2
|
||||
; AVX2-NEXT: vcvttss2si %xmm0, %rax
|
||||
@@ -836,7 +836,7 @@ define <4 x i32> @utesth_f16i32(<4 x half> %x) nounwind {
|
||||
;
|
||||
; AVX2-LABEL: utesth_f16i32:
|
||||
; AVX2: # %bb.0: # %entry
|
||||
; AVX2-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[3,3,3,3,4,5,6,7]
|
||||
; AVX2-NEXT: vpsrlq $48, %xmm0, %xmm1
|
||||
; AVX2-NEXT: vcvtph2ps %xmm1, %xmm2
|
||||
; AVX2-NEXT: vmovss {{.*#+}} xmm1 = [9.22337203E+18,0.0E+0,0.0E+0,0.0E+0]
|
||||
; AVX2-NEXT: vsubss %xmm1, %xmm2, %xmm3
|
||||
@@ -866,7 +866,7 @@ define <4 x i32> @utesth_f16i32(<4 x half> %x) nounwind {
|
||||
; AVX2-NEXT: sarq $63, %rdx
|
||||
; AVX2-NEXT: andq %rax, %rdx
|
||||
; AVX2-NEXT: orq %rcx, %rdx
|
||||
; AVX2-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[1,1,1,1,4,5,6,7]
|
||||
; AVX2-NEXT: vpsrld $16, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vcvtph2ps %xmm0, %xmm0
|
||||
; AVX2-NEXT: vsubss %xmm1, %xmm0, %xmm1
|
||||
; AVX2-NEXT: vcvttss2si %xmm1, %rax
|
||||
@@ -999,7 +999,7 @@ define <4 x i32> @ustest_f16i32(<4 x half> %x) nounwind {
|
||||
;
|
||||
; AVX2-LABEL: ustest_f16i32:
|
||||
; AVX2: # %bb.0: # %entry
|
||||
; AVX2-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[3,3,3,3,4,5,6,7]
|
||||
; AVX2-NEXT: vpsrlq $48, %xmm0, %xmm1
|
||||
; AVX2-NEXT: vcvtph2ps %xmm1, %xmm1
|
||||
; AVX2-NEXT: vcvttss2si %xmm1, %rax
|
||||
; AVX2-NEXT: vmovq %rax, %xmm1
|
||||
@@ -1011,7 +1011,7 @@ define <4 x i32> @ustest_f16i32(<4 x half> %x) nounwind {
|
||||
; AVX2-NEXT: vcvtph2ps %xmm0, %xmm2
|
||||
; AVX2-NEXT: vcvttss2si %xmm2, %rax
|
||||
; AVX2-NEXT: vmovq %rax, %xmm2
|
||||
; AVX2-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[1,1,1,1,4,5,6,7]
|
||||
; AVX2-NEXT: vpsrld $16, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vcvtph2ps %xmm0, %xmm0
|
||||
; AVX2-NEXT: vcvttss2si %xmm0, %rax
|
||||
; AVX2-NEXT: vmovq %rax, %xmm0
|
||||
@@ -3310,7 +3310,7 @@ define <4 x i32> @stest_f16i32_mm(<4 x half> %x) nounwind {
|
||||
;
|
||||
; AVX2-LABEL: stest_f16i32_mm:
|
||||
; AVX2: # %bb.0: # %entry
|
||||
; AVX2-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[3,3,3,3,4,5,6,7]
|
||||
; AVX2-NEXT: vpsrlq $48, %xmm0, %xmm1
|
||||
; AVX2-NEXT: vcvtph2ps %xmm1, %xmm1
|
||||
; AVX2-NEXT: vcvttss2si %xmm1, %rax
|
||||
; AVX2-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
|
||||
@@ -3321,7 +3321,7 @@ define <4 x i32> @stest_f16i32_mm(<4 x half> %x) nounwind {
|
||||
; AVX2-NEXT: vcvttss2si %xmm1, %rax
|
||||
; AVX2-NEXT: vmovq %rcx, %xmm1
|
||||
; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
|
||||
; AVX2-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[1,1,1,1,4,5,6,7]
|
||||
; AVX2-NEXT: vpsrld $16, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vcvtph2ps %xmm0, %xmm0
|
||||
; AVX2-NEXT: vmovq %rax, %xmm2
|
||||
; AVX2-NEXT: vcvttss2si %xmm0, %rax
|
||||
@@ -3446,7 +3446,7 @@ define <4 x i32> @utesth_f16i32_mm(<4 x half> %x) nounwind {
|
||||
;
|
||||
; AVX2-LABEL: utesth_f16i32_mm:
|
||||
; AVX2: # %bb.0: # %entry
|
||||
; AVX2-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[3,3,3,3,4,5,6,7]
|
||||
; AVX2-NEXT: vpsrlq $48, %xmm0, %xmm1
|
||||
; AVX2-NEXT: vcvtph2ps %xmm1, %xmm2
|
||||
; AVX2-NEXT: vmovss {{.*#+}} xmm1 = [9.22337203E+18,0.0E+0,0.0E+0,0.0E+0]
|
||||
; AVX2-NEXT: vsubss %xmm1, %xmm2, %xmm3
|
||||
@@ -3476,7 +3476,7 @@ define <4 x i32> @utesth_f16i32_mm(<4 x half> %x) nounwind {
|
||||
; AVX2-NEXT: sarq $63, %rdx
|
||||
; AVX2-NEXT: andq %rax, %rdx
|
||||
; AVX2-NEXT: orq %rcx, %rdx
|
||||
; AVX2-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[1,1,1,1,4,5,6,7]
|
||||
; AVX2-NEXT: vpsrld $16, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vcvtph2ps %xmm0, %xmm0
|
||||
; AVX2-NEXT: vsubss %xmm1, %xmm0, %xmm1
|
||||
; AVX2-NEXT: vcvttss2si %xmm1, %rax
|
||||
@@ -3608,7 +3608,7 @@ define <4 x i32> @ustest_f16i32_mm(<4 x half> %x) nounwind {
|
||||
;
|
||||
; AVX2-LABEL: ustest_f16i32_mm:
|
||||
; AVX2: # %bb.0: # %entry
|
||||
; AVX2-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[3,3,3,3,4,5,6,7]
|
||||
; AVX2-NEXT: vpsrlq $48, %xmm0, %xmm1
|
||||
; AVX2-NEXT: vcvtph2ps %xmm1, %xmm1
|
||||
; AVX2-NEXT: vcvttss2si %xmm1, %rax
|
||||
; AVX2-NEXT: vmovq %rax, %xmm1
|
||||
@@ -3620,7 +3620,7 @@ define <4 x i32> @ustest_f16i32_mm(<4 x half> %x) nounwind {
|
||||
; AVX2-NEXT: vcvtph2ps %xmm0, %xmm2
|
||||
; AVX2-NEXT: vcvttss2si %xmm2, %rax
|
||||
; AVX2-NEXT: vmovq %rax, %xmm2
|
||||
; AVX2-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[1,1,1,1,4,5,6,7]
|
||||
; AVX2-NEXT: vpsrld $16, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vcvtph2ps %xmm0, %xmm0
|
||||
; AVX2-NEXT: vcvttss2si %xmm0, %rax
|
||||
; AVX2-NEXT: vmovq %rax, %xmm0
|
||||
|
||||
@@ -1593,9 +1593,9 @@ define <8 x half> @maxnum_v8f16(<8 x half> %0, <8 x half> %1) #0 {
|
||||
; BWON-F16C-NEXT: # %bb.7:
|
||||
; BWON-F16C-NEXT: vmovaps %xmm5, %xmm6
|
||||
; BWON-F16C-NEXT: .LBB26_8:
|
||||
; BWON-F16C-NEXT: vpshuflw {{.*#+}} xmm5 = xmm1[3,3,3,3,4,5,6,7]
|
||||
; BWON-F16C-NEXT: vpsrlq $48, %xmm1, %xmm5
|
||||
; BWON-F16C-NEXT: vcvtph2ps %xmm5, %xmm7
|
||||
; BWON-F16C-NEXT: vpshuflw {{.*#+}} xmm5 = xmm0[3,3,3,3,4,5,6,7]
|
||||
; BWON-F16C-NEXT: vpsrlq $48, %xmm0, %xmm5
|
||||
; BWON-F16C-NEXT: vcvtph2ps %xmm5, %xmm5
|
||||
; BWON-F16C-NEXT: vucomiss %xmm7, %xmm5
|
||||
; BWON-F16C-NEXT: ja .LBB26_10
|
||||
@@ -1629,9 +1629,9 @@ define <8 x half> @maxnum_v8f16(<8 x half> %0, <8 x half> %1) #0 {
|
||||
; BWON-F16C-NEXT: vpunpckldq {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[1],xmm2[1]
|
||||
; BWON-F16C-NEXT: vpunpcklwd {{.*#+}} xmm3 = xmm5[0],xmm4[0],xmm5[1],xmm4[1],xmm5[2],xmm4[2],xmm5[3],xmm4[3]
|
||||
; BWON-F16C-NEXT: vcvtps2ph $4, %xmm6, %xmm4
|
||||
; BWON-F16C-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[1,1,1,1,4,5,6,7]
|
||||
; BWON-F16C-NEXT: vpsrld $16, %xmm1, %xmm1
|
||||
; BWON-F16C-NEXT: vcvtph2ps %xmm1, %xmm1
|
||||
; BWON-F16C-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[1,1,1,1,4,5,6,7]
|
||||
; BWON-F16C-NEXT: vpsrld $16, %xmm0, %xmm0
|
||||
; BWON-F16C-NEXT: vcvtph2ps %xmm0, %xmm0
|
||||
; BWON-F16C-NEXT: vucomiss %xmm1, %xmm0
|
||||
; BWON-F16C-NEXT: ja .LBB26_16
|
||||
|
||||
@@ -4,7 +4,7 @@
|
||||
define void @_test_func(<16 x half> %0) #0 {
|
||||
; CHECK-LABEL: _test_func:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[3,3,3,3,4,5,6,7]
|
||||
; CHECK-NEXT: vpsrlq $48, %xmm0, %xmm1
|
||||
; CHECK-NEXT: vcvtph2ps %xmm1, %xmm1
|
||||
; CHECK-NEXT: xorl %eax, %eax
|
||||
; CHECK-NEXT: vucomiss %xmm1, %xmm1
|
||||
@@ -16,7 +16,7 @@ define void @_test_func(<16 x half> %0) #0 {
|
||||
; CHECK-NEXT: vucomiss %xmm1, %xmm1
|
||||
; CHECK-NEXT: movl $0, %esi
|
||||
; CHECK-NEXT: cmovnpl %ecx, %esi
|
||||
; CHECK-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[1,1,1,1,4,5,6,7]
|
||||
; CHECK-NEXT: vpsrld $16, %xmm0, %xmm1
|
||||
; CHECK-NEXT: vcvtph2ps %xmm1, %xmm1
|
||||
; CHECK-NEXT: vucomiss %xmm1, %xmm1
|
||||
; CHECK-NEXT: movl $0, %edi
|
||||
|
||||
@@ -7,7 +7,8 @@ define void @PR91005(ptr %0) minsize {
|
||||
; CHECK-NEXT: testb %al, %al
|
||||
; CHECK-NEXT: je .LBB0_2
|
||||
; CHECK-NEXT: # %bb.1:
|
||||
; CHECK-NEXT: vbroadcastss {{.*#+}} xmm0 = [31744,31744,31744,31744]
|
||||
; CHECK-NEXT: movl $31744, %eax # imm = 0x7C00
|
||||
; CHECK-NEXT: vmovd %eax, %xmm0
|
||||
; CHECK-NEXT: vpcmpeqw %xmm0, %xmm0, %xmm0
|
||||
; CHECK-NEXT: vpinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
|
||||
; CHECK-NEXT: vpand %xmm1, %xmm0, %xmm0
|
||||
|
||||
@@ -31,7 +31,7 @@ define <2 x i64> @llrint_v2i64_v2f16(<2 x half> %x) {
|
||||
; AVX-NEXT: vcvtph2ps %xmm1, %xmm1
|
||||
; AVX-NEXT: vcvttss2si %xmm1, %rax
|
||||
; AVX-NEXT: vmovq %rax, %xmm1
|
||||
; AVX-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[1,1,1,1,4,5,6,7]
|
||||
; AVX-NEXT: vpsrld $16, %xmm0, %xmm0
|
||||
; AVX-NEXT: vcvtph2ps %xmm0, %xmm0
|
||||
; AVX-NEXT: vroundss $4, %xmm0, %xmm0, %xmm0
|
||||
; AVX-NEXT: vcvtps2ph $4, %xmm0, %xmm0
|
||||
@@ -52,7 +52,7 @@ define <2 x i64> @llrint_v2i64_v2f16(<2 x half> %x) {
|
||||
define <4 x i64> @llrint_v4i64_v4f16(<4 x half> %x) {
|
||||
; AVX-LABEL: llrint_v4i64_v4f16:
|
||||
; AVX: # %bb.0:
|
||||
; AVX-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[3,3,3,3,4,5,6,7]
|
||||
; AVX-NEXT: vpsrlq $48, %xmm0, %xmm1
|
||||
; AVX-NEXT: vcvtph2ps %xmm1, %xmm1
|
||||
; AVX-NEXT: vroundss $4, %xmm1, %xmm1, %xmm1
|
||||
; AVX-NEXT: vcvtps2ph $4, %xmm1, %xmm1
|
||||
@@ -73,7 +73,7 @@ define <4 x i64> @llrint_v4i64_v4f16(<4 x half> %x) {
|
||||
; AVX-NEXT: vcvtph2ps %xmm2, %xmm2
|
||||
; AVX-NEXT: vcvttss2si %xmm2, %rax
|
||||
; AVX-NEXT: vmovq %rax, %xmm2
|
||||
; AVX-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[1,1,1,1,4,5,6,7]
|
||||
; AVX-NEXT: vpsrld $16, %xmm0, %xmm0
|
||||
; AVX-NEXT: vcvtph2ps %xmm0, %xmm0
|
||||
; AVX-NEXT: vroundss $4, %xmm0, %xmm0, %xmm0
|
||||
; AVX-NEXT: vcvtps2ph $4, %xmm0, %xmm0
|
||||
@@ -95,7 +95,7 @@ define <4 x i64> @llrint_v4i64_v4f16(<4 x half> %x) {
|
||||
define <8 x i64> @llrint_v8i64_v8f16(<8 x half> %x) {
|
||||
; AVX-LABEL: llrint_v8i64_v8f16:
|
||||
; AVX: # %bb.0:
|
||||
; AVX-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[3,3,3,3,4,5,6,7]
|
||||
; AVX-NEXT: vpsrlq $48, %xmm0, %xmm1
|
||||
; AVX-NEXT: vcvtph2ps %xmm1, %xmm1
|
||||
; AVX-NEXT: vroundss $4, %xmm1, %xmm1, %xmm1
|
||||
; AVX-NEXT: vcvtps2ph $4, %xmm1, %xmm1
|
||||
@@ -116,7 +116,7 @@ define <8 x i64> @llrint_v8i64_v8f16(<8 x half> %x) {
|
||||
; AVX-NEXT: vcvtph2ps %xmm2, %xmm2
|
||||
; AVX-NEXT: vcvttss2si %xmm2, %rax
|
||||
; AVX-NEXT: vmovq %rax, %xmm2
|
||||
; AVX-NEXT: vpshuflw {{.*#+}} xmm3 = xmm0[1,1,1,1,4,5,6,7]
|
||||
; AVX-NEXT: vpsrld $16, %xmm0, %xmm3
|
||||
; AVX-NEXT: vcvtph2ps %xmm3, %xmm3
|
||||
; AVX-NEXT: vroundss $4, %xmm3, %xmm3, %xmm3
|
||||
; AVX-NEXT: vcvtps2ph $4, %xmm3, %xmm3
|
||||
@@ -171,7 +171,7 @@ define <16 x i64> @llrint_v16i64_v16f16(<16 x half> %x) {
|
||||
; AVX-LABEL: llrint_v16i64_v16f16:
|
||||
; AVX: # %bb.0:
|
||||
; AVX-NEXT: vmovdqa %ymm0, %ymm2
|
||||
; AVX-NEXT: vpshuflw {{.*#+}} xmm0 = xmm2[3,3,3,3,4,5,6,7]
|
||||
; AVX-NEXT: vpsrlq $48, %xmm2, %xmm0
|
||||
; AVX-NEXT: vcvtph2ps %xmm0, %xmm0
|
||||
; AVX-NEXT: vroundss $4, %xmm0, %xmm0, %xmm0
|
||||
; AVX-NEXT: vcvtps2ph $4, %xmm0, %xmm0
|
||||
@@ -192,7 +192,7 @@ define <16 x i64> @llrint_v16i64_v16f16(<16 x half> %x) {
|
||||
; AVX-NEXT: vcvtph2ps %xmm1, %xmm1
|
||||
; AVX-NEXT: vcvttss2si %xmm1, %rax
|
||||
; AVX-NEXT: vmovq %rax, %xmm1
|
||||
; AVX-NEXT: vpshuflw {{.*#+}} xmm3 = xmm2[1,1,1,1,4,5,6,7]
|
||||
; AVX-NEXT: vpsrld $16, %xmm2, %xmm3
|
||||
; AVX-NEXT: vcvtph2ps %xmm3, %xmm3
|
||||
; AVX-NEXT: vroundss $4, %xmm3, %xmm3, %xmm3
|
||||
; AVX-NEXT: vcvtps2ph $4, %xmm3, %xmm3
|
||||
@@ -233,7 +233,7 @@ define <16 x i64> @llrint_v16i64_v16f16(<16 x half> %x) {
|
||||
; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm3 = xmm4[0],xmm3[0]
|
||||
; AVX-NEXT: vinserti128 $1, %xmm1, %ymm3, %ymm1
|
||||
; AVX-NEXT: vextracti128 $1, %ymm2, %xmm3
|
||||
; AVX-NEXT: vpshuflw {{.*#+}} xmm2 = xmm3[3,3,3,3,4,5,6,7]
|
||||
; AVX-NEXT: vpsrlq $48, %xmm3, %xmm2
|
||||
; AVX-NEXT: vcvtph2ps %xmm2, %xmm2
|
||||
; AVX-NEXT: vroundss $4, %xmm2, %xmm2, %xmm2
|
||||
; AVX-NEXT: vcvtps2ph $4, %xmm2, %xmm2
|
||||
@@ -254,7 +254,7 @@ define <16 x i64> @llrint_v16i64_v16f16(<16 x half> %x) {
|
||||
; AVX-NEXT: vcvtph2ps %xmm4, %xmm4
|
||||
; AVX-NEXT: vcvttss2si %xmm4, %rax
|
||||
; AVX-NEXT: vmovq %rax, %xmm4
|
||||
; AVX-NEXT: vpshuflw {{.*#+}} xmm5 = xmm3[1,1,1,1,4,5,6,7]
|
||||
; AVX-NEXT: vpsrld $16, %xmm3, %xmm5
|
||||
; AVX-NEXT: vcvtph2ps %xmm5, %xmm5
|
||||
; AVX-NEXT: vroundss $4, %xmm5, %xmm5, %xmm5
|
||||
; AVX-NEXT: vcvtps2ph $4, %xmm5, %xmm5
|
||||
@@ -348,7 +348,7 @@ define <32 x i64> @llrint_v32i64_v32f16(<32 x half> %x) {
|
||||
; AVX-NEXT: vcvtph2ps %xmm4, %xmm4
|
||||
; AVX-NEXT: vcvttss2si %xmm4, %rcx
|
||||
; AVX-NEXT: vmovq %rcx, %xmm4
|
||||
; AVX-NEXT: vpshuflw {{.*#+}} xmm5 = xmm1[1,1,1,1,4,5,6,7]
|
||||
; AVX-NEXT: vpsrld $16, %xmm1, %xmm5
|
||||
; AVX-NEXT: vcvtph2ps %xmm5, %xmm5
|
||||
; AVX-NEXT: vroundss $4, %xmm5, %xmm5, %xmm5
|
||||
; AVX-NEXT: vcvtps2ph $4, %xmm5, %xmm5
|
||||
@@ -356,7 +356,7 @@ define <32 x i64> @llrint_v32i64_v32f16(<32 x half> %x) {
|
||||
; AVX-NEXT: vcvttss2si %xmm5, %rcx
|
||||
; AVX-NEXT: vmovq %rcx, %xmm5
|
||||
; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm4 = xmm4[0],xmm5[0]
|
||||
; AVX-NEXT: vpshuflw {{.*#+}} xmm5 = xmm1[3,3,3,3,4,5,6,7]
|
||||
; AVX-NEXT: vpsrlq $48, %xmm1, %xmm5
|
||||
; AVX-NEXT: vcvtph2ps %xmm5, %xmm5
|
||||
; AVX-NEXT: vroundss $4, %xmm5, %xmm5, %xmm5
|
||||
; AVX-NEXT: vcvtps2ph $4, %xmm5, %xmm5
|
||||
@@ -408,7 +408,7 @@ define <32 x i64> @llrint_v32i64_v32f16(<32 x half> %x) {
|
||||
; AVX-NEXT: vcvtph2ps %xmm7, %xmm7
|
||||
; AVX-NEXT: vcvttss2si %xmm7, %rcx
|
||||
; AVX-NEXT: vmovq %rcx, %xmm7
|
||||
; AVX-NEXT: vpshuflw {{.*#+}} xmm9 = xmm8[1,1,1,1,4,5,6,7]
|
||||
; AVX-NEXT: vpsrld $16, %xmm8, %xmm9
|
||||
; AVX-NEXT: vcvtph2ps %xmm9, %xmm9
|
||||
; AVX-NEXT: vroundss $4, %xmm9, %xmm9, %xmm9
|
||||
; AVX-NEXT: vcvtps2ph $4, %xmm9, %xmm9
|
||||
@@ -416,7 +416,7 @@ define <32 x i64> @llrint_v32i64_v32f16(<32 x half> %x) {
|
||||
; AVX-NEXT: vcvttss2si %xmm9, %rcx
|
||||
; AVX-NEXT: vmovq %rcx, %xmm9
|
||||
; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm7 = xmm7[0],xmm9[0]
|
||||
; AVX-NEXT: vpshuflw {{.*#+}} xmm9 = xmm8[3,3,3,3,4,5,6,7]
|
||||
; AVX-NEXT: vpsrlq $48, %xmm8, %xmm9
|
||||
; AVX-NEXT: vcvtph2ps %xmm9, %xmm9
|
||||
; AVX-NEXT: vroundss $4, %xmm9, %xmm9, %xmm9
|
||||
; AVX-NEXT: vcvtps2ph $4, %xmm9, %xmm9
|
||||
@@ -467,7 +467,7 @@ define <32 x i64> @llrint_v32i64_v32f16(<32 x half> %x) {
|
||||
; AVX-NEXT: vcvtph2ps %xmm11, %xmm11
|
||||
; AVX-NEXT: vcvttss2si %xmm11, %rcx
|
||||
; AVX-NEXT: vmovq %rcx, %xmm11
|
||||
; AVX-NEXT: vpshuflw {{.*#+}} xmm12 = xmm0[1,1,1,1,4,5,6,7]
|
||||
; AVX-NEXT: vpsrld $16, %xmm0, %xmm12
|
||||
; AVX-NEXT: vcvtph2ps %xmm12, %xmm12
|
||||
; AVX-NEXT: vroundss $4, %xmm12, %xmm12, %xmm12
|
||||
; AVX-NEXT: vcvtps2ph $4, %xmm12, %xmm12
|
||||
@@ -475,7 +475,7 @@ define <32 x i64> @llrint_v32i64_v32f16(<32 x half> %x) {
|
||||
; AVX-NEXT: vcvttss2si %xmm12, %rcx
|
||||
; AVX-NEXT: vmovq %rcx, %xmm12
|
||||
; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm11 = xmm11[0],xmm12[0]
|
||||
; AVX-NEXT: vpshuflw {{.*#+}} xmm12 = xmm0[3,3,3,3,4,5,6,7]
|
||||
; AVX-NEXT: vpsrlq $48, %xmm0, %xmm12
|
||||
; AVX-NEXT: vcvtph2ps %xmm12, %xmm12
|
||||
; AVX-NEXT: vroundss $4, %xmm12, %xmm12, %xmm12
|
||||
; AVX-NEXT: vcvtps2ph $4, %xmm12, %xmm12
|
||||
@@ -526,7 +526,7 @@ define <32 x i64> @llrint_v32i64_v32f16(<32 x half> %x) {
|
||||
; AVX-NEXT: vcvtps2ph $4, %xmm15, %xmm15
|
||||
; AVX-NEXT: vcvtph2ps %xmm15, %xmm15
|
||||
; AVX-NEXT: vcvttss2si %xmm15, %rcx
|
||||
; AVX-NEXT: vpshuflw {{.*#+}} xmm15 = xmm0[1,1,1,1,4,5,6,7]
|
||||
; AVX-NEXT: vpsrld $16, %xmm0, %xmm15
|
||||
; AVX-NEXT: vcvtph2ps %xmm15, %xmm15
|
||||
; AVX-NEXT: vroundss $4, %xmm15, %xmm15, %xmm15
|
||||
; AVX-NEXT: vcvtps2ph $4, %xmm15, %xmm15
|
||||
@@ -535,7 +535,7 @@ define <32 x i64> @llrint_v32i64_v32f16(<32 x half> %x) {
|
||||
; AVX-NEXT: vmovq %rcx, %xmm15
|
||||
; AVX-NEXT: vmovq %rdx, %xmm2
|
||||
; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm15[0],xmm2[0]
|
||||
; AVX-NEXT: vpshuflw {{.*#+}} xmm15 = xmm0[3,3,3,3,4,5,6,7]
|
||||
; AVX-NEXT: vpsrlq $48, %xmm0, %xmm15
|
||||
; AVX-NEXT: vcvtph2ps %xmm15, %xmm15
|
||||
; AVX-NEXT: vroundss $4, %xmm15, %xmm15, %xmm15
|
||||
; AVX-NEXT: vcvtps2ph $4, %xmm15, %xmm15
|
||||
|
||||
@@ -76,7 +76,7 @@ declare <1 x iXLen> @llvm.lrint.v1iXLen.v1f16(<1 x half>)
|
||||
define <2 x iXLen> @lrint_v2f16(<2 x half> %x) {
|
||||
; X86-AVX-I16-LABEL: lrint_v2f16:
|
||||
; X86-AVX-I16: # %bb.0:
|
||||
; X86-AVX-I16-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[1,1,1,1,4,5,6,7]
|
||||
; X86-AVX-I16-NEXT: vpsrld $16, %xmm0, %xmm1
|
||||
; X86-AVX-I16-NEXT: vcvtph2ps %xmm1, %xmm1
|
||||
; X86-AVX-I16-NEXT: vroundss $4, %xmm1, %xmm1, %xmm1
|
||||
; X86-AVX-I16-NEXT: vcvtps2ph $4, %xmm1, %xmm1
|
||||
@@ -96,7 +96,7 @@ define <2 x iXLen> @lrint_v2f16(<2 x half> %x) {
|
||||
; X86-AVX-I16-NEXT: vcvtph2ps %xmm2, %xmm2
|
||||
; X86-AVX-I16-NEXT: vcvttss2si %xmm2, %eax
|
||||
; X86-AVX-I16-NEXT: vpinsrw $2, %eax, %xmm1, %xmm1
|
||||
; X86-AVX-I16-NEXT: vpshuflw {{.*#+}} xmm2 = xmm0[3,3,3,3,4,5,6,7]
|
||||
; X86-AVX-I16-NEXT: vpsrlq $48, %xmm0, %xmm2
|
||||
; X86-AVX-I16-NEXT: vcvtph2ps %xmm2, %xmm2
|
||||
; X86-AVX-I16-NEXT: vroundss $4, %xmm2, %xmm2, %xmm2
|
||||
; X86-AVX-I16-NEXT: vcvtps2ph $4, %xmm2, %xmm2
|
||||
@@ -140,7 +140,7 @@ define <2 x iXLen> @lrint_v2f16(<2 x half> %x) {
|
||||
;
|
||||
; X64-AVX-I16-LABEL: lrint_v2f16:
|
||||
; X64-AVX-I16: # %bb.0:
|
||||
; X64-AVX-I16-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[1,1,1,1,4,5,6,7]
|
||||
; X64-AVX-I16-NEXT: vpsrld $16, %xmm0, %xmm1
|
||||
; X64-AVX-I16-NEXT: vcvtph2ps %xmm1, %xmm1
|
||||
; X64-AVX-I16-NEXT: vroundss $4, %xmm1, %xmm1, %xmm1
|
||||
; X64-AVX-I16-NEXT: vcvtps2ph $4, %xmm1, %xmm1
|
||||
@@ -160,7 +160,7 @@ define <2 x iXLen> @lrint_v2f16(<2 x half> %x) {
|
||||
; X64-AVX-I16-NEXT: vcvtph2ps %xmm2, %xmm2
|
||||
; X64-AVX-I16-NEXT: vcvttss2si %xmm2, %eax
|
||||
; X64-AVX-I16-NEXT: vpinsrw $2, %eax, %xmm1, %xmm1
|
||||
; X64-AVX-I16-NEXT: vpshuflw {{.*#+}} xmm2 = xmm0[3,3,3,3,4,5,6,7]
|
||||
; X64-AVX-I16-NEXT: vpsrlq $48, %xmm0, %xmm2
|
||||
; X64-AVX-I16-NEXT: vcvtph2ps %xmm2, %xmm2
|
||||
; X64-AVX-I16-NEXT: vroundss $4, %xmm2, %xmm2, %xmm2
|
||||
; X64-AVX-I16-NEXT: vcvtps2ph $4, %xmm2, %xmm2
|
||||
@@ -204,7 +204,7 @@ define <2 x iXLen> @lrint_v2f16(<2 x half> %x) {
|
||||
;
|
||||
; X86-AVX-I32-LABEL: lrint_v2f16:
|
||||
; X86-AVX-I32: # %bb.0:
|
||||
; X86-AVX-I32-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[1,1,1,1,4,5,6,7]
|
||||
; X86-AVX-I32-NEXT: vpsrld $16, %xmm0, %xmm1
|
||||
; X86-AVX-I32-NEXT: vcvtph2ps %xmm1, %xmm1
|
||||
; X86-AVX-I32-NEXT: vroundss $4, %xmm1, %xmm1, %xmm1
|
||||
; X86-AVX-I32-NEXT: vcvtps2ph $4, %xmm1, %xmm1
|
||||
@@ -226,7 +226,7 @@ define <2 x iXLen> @lrint_v2f16(<2 x half> %x) {
|
||||
;
|
||||
; X64-AVX-I32-LABEL: lrint_v2f16:
|
||||
; X64-AVX-I32: # %bb.0:
|
||||
; X64-AVX-I32-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[1,1,1,1,4,5,6,7]
|
||||
; X64-AVX-I32-NEXT: vpsrld $16, %xmm0, %xmm1
|
||||
; X64-AVX-I32-NEXT: vcvtph2ps %xmm1, %xmm1
|
||||
; X64-AVX-I32-NEXT: vroundss $4, %xmm1, %xmm1, %xmm1
|
||||
; X64-AVX-I32-NEXT: vcvtps2ph $4, %xmm1, %xmm1
|
||||
@@ -253,7 +253,7 @@ declare <2 x iXLen> @llvm.lrint.v2iXLen.v2f16(<2 x half>)
|
||||
define <4 x iXLen> @lrint_v4f16(<4 x half> %x) {
|
||||
; X86-AVX-I16-LABEL: lrint_v4f16:
|
||||
; X86-AVX-I16: # %bb.0:
|
||||
; X86-AVX-I16-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[1,1,1,1,4,5,6,7]
|
||||
; X86-AVX-I16-NEXT: vpsrld $16, %xmm0, %xmm1
|
||||
; X86-AVX-I16-NEXT: vcvtph2ps %xmm1, %xmm1
|
||||
; X86-AVX-I16-NEXT: vroundss $4, %xmm1, %xmm1, %xmm1
|
||||
; X86-AVX-I16-NEXT: vcvtps2ph $4, %xmm1, %xmm1
|
||||
@@ -273,7 +273,7 @@ define <4 x iXLen> @lrint_v4f16(<4 x half> %x) {
|
||||
; X86-AVX-I16-NEXT: vcvtph2ps %xmm2, %xmm2
|
||||
; X86-AVX-I16-NEXT: vcvttss2si %xmm2, %eax
|
||||
; X86-AVX-I16-NEXT: vpinsrw $2, %eax, %xmm1, %xmm1
|
||||
; X86-AVX-I16-NEXT: vpshuflw {{.*#+}} xmm2 = xmm0[3,3,3,3,4,5,6,7]
|
||||
; X86-AVX-I16-NEXT: vpsrlq $48, %xmm0, %xmm2
|
||||
; X86-AVX-I16-NEXT: vcvtph2ps %xmm2, %xmm2
|
||||
; X86-AVX-I16-NEXT: vroundss $4, %xmm2, %xmm2, %xmm2
|
||||
; X86-AVX-I16-NEXT: vcvtps2ph $4, %xmm2, %xmm2
|
||||
@@ -317,7 +317,7 @@ define <4 x iXLen> @lrint_v4f16(<4 x half> %x) {
|
||||
;
|
||||
; X64-AVX-I16-LABEL: lrint_v4f16:
|
||||
; X64-AVX-I16: # %bb.0:
|
||||
; X64-AVX-I16-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[1,1,1,1,4,5,6,7]
|
||||
; X64-AVX-I16-NEXT: vpsrld $16, %xmm0, %xmm1
|
||||
; X64-AVX-I16-NEXT: vcvtph2ps %xmm1, %xmm1
|
||||
; X64-AVX-I16-NEXT: vroundss $4, %xmm1, %xmm1, %xmm1
|
||||
; X64-AVX-I16-NEXT: vcvtps2ph $4, %xmm1, %xmm1
|
||||
@@ -337,7 +337,7 @@ define <4 x iXLen> @lrint_v4f16(<4 x half> %x) {
|
||||
; X64-AVX-I16-NEXT: vcvtph2ps %xmm2, %xmm2
|
||||
; X64-AVX-I16-NEXT: vcvttss2si %xmm2, %eax
|
||||
; X64-AVX-I16-NEXT: vpinsrw $2, %eax, %xmm1, %xmm1
|
||||
; X64-AVX-I16-NEXT: vpshuflw {{.*#+}} xmm2 = xmm0[3,3,3,3,4,5,6,7]
|
||||
; X64-AVX-I16-NEXT: vpsrlq $48, %xmm0, %xmm2
|
||||
; X64-AVX-I16-NEXT: vcvtph2ps %xmm2, %xmm2
|
||||
; X64-AVX-I16-NEXT: vroundss $4, %xmm2, %xmm2, %xmm2
|
||||
; X64-AVX-I16-NEXT: vcvtps2ph $4, %xmm2, %xmm2
|
||||
@@ -381,7 +381,7 @@ define <4 x iXLen> @lrint_v4f16(<4 x half> %x) {
|
||||
;
|
||||
; X86-AVX-I32-LABEL: lrint_v4f16:
|
||||
; X86-AVX-I32: # %bb.0:
|
||||
; X86-AVX-I32-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[1,1,1,1,4,5,6,7]
|
||||
; X86-AVX-I32-NEXT: vpsrld $16, %xmm0, %xmm1
|
||||
; X86-AVX-I32-NEXT: vcvtph2ps %xmm1, %xmm1
|
||||
; X86-AVX-I32-NEXT: vroundss $4, %xmm1, %xmm1, %xmm1
|
||||
; X86-AVX-I32-NEXT: vcvtps2ph $4, %xmm1, %xmm1
|
||||
@@ -401,7 +401,7 @@ define <4 x iXLen> @lrint_v4f16(<4 x half> %x) {
|
||||
; X86-AVX-I32-NEXT: vcvtph2ps %xmm2, %xmm2
|
||||
; X86-AVX-I32-NEXT: vcvttss2si %xmm2, %eax
|
||||
; X86-AVX-I32-NEXT: vpinsrd $2, %eax, %xmm1, %xmm1
|
||||
; X86-AVX-I32-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[3,3,3,3,4,5,6,7]
|
||||
; X86-AVX-I32-NEXT: vpsrlq $48, %xmm0, %xmm0
|
||||
; X86-AVX-I32-NEXT: vcvtph2ps %xmm0, %xmm0
|
||||
; X86-AVX-I32-NEXT: vroundss $4, %xmm0, %xmm0, %xmm0
|
||||
; X86-AVX-I32-NEXT: vcvtps2ph $4, %xmm0, %xmm0
|
||||
@@ -417,7 +417,7 @@ define <4 x iXLen> @lrint_v4f16(<4 x half> %x) {
|
||||
;
|
||||
; X64-AVX-I32-LABEL: lrint_v4f16:
|
||||
; X64-AVX-I32: # %bb.0:
|
||||
; X64-AVX-I32-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[1,1,1,1,4,5,6,7]
|
||||
; X64-AVX-I32-NEXT: vpsrld $16, %xmm0, %xmm1
|
||||
; X64-AVX-I32-NEXT: vcvtph2ps %xmm1, %xmm1
|
||||
; X64-AVX-I32-NEXT: vroundss $4, %xmm1, %xmm1, %xmm1
|
||||
; X64-AVX-I32-NEXT: vcvtps2ph $4, %xmm1, %xmm1
|
||||
@@ -437,7 +437,7 @@ define <4 x iXLen> @lrint_v4f16(<4 x half> %x) {
|
||||
; X64-AVX-I32-NEXT: vcvtph2ps %xmm2, %xmm2
|
||||
; X64-AVX-I32-NEXT: vcvttss2si %xmm2, %eax
|
||||
; X64-AVX-I32-NEXT: vpinsrd $2, %eax, %xmm1, %xmm1
|
||||
; X64-AVX-I32-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[3,3,3,3,4,5,6,7]
|
||||
; X64-AVX-I32-NEXT: vpsrlq $48, %xmm0, %xmm0
|
||||
; X64-AVX-I32-NEXT: vcvtph2ps %xmm0, %xmm0
|
||||
; X64-AVX-I32-NEXT: vroundss $4, %xmm0, %xmm0, %xmm0
|
||||
; X64-AVX-I32-NEXT: vcvtps2ph $4, %xmm0, %xmm0
|
||||
@@ -458,7 +458,7 @@ declare <4 x iXLen> @llvm.lrint.v4iXLen.v4f16(<4 x half>)
|
||||
define <8 x iXLen> @lrint_v8f16(<8 x half> %x) {
|
||||
; X86-AVX-I16-LABEL: lrint_v8f16:
|
||||
; X86-AVX-I16: # %bb.0:
|
||||
; X86-AVX-I16-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[1,1,1,1,4,5,6,7]
|
||||
; X86-AVX-I16-NEXT: vpsrld $16, %xmm0, %xmm1
|
||||
; X86-AVX-I16-NEXT: vcvtph2ps %xmm1, %xmm1
|
||||
; X86-AVX-I16-NEXT: vroundss $4, %xmm1, %xmm1, %xmm1
|
||||
; X86-AVX-I16-NEXT: vcvtps2ph $4, %xmm1, %xmm1
|
||||
@@ -478,7 +478,7 @@ define <8 x iXLen> @lrint_v8f16(<8 x half> %x) {
|
||||
; X86-AVX-I16-NEXT: vcvtph2ps %xmm2, %xmm2
|
||||
; X86-AVX-I16-NEXT: vcvttss2si %xmm2, %eax
|
||||
; X86-AVX-I16-NEXT: vpinsrw $2, %eax, %xmm1, %xmm1
|
||||
; X86-AVX-I16-NEXT: vpshuflw {{.*#+}} xmm2 = xmm0[3,3,3,3,4,5,6,7]
|
||||
; X86-AVX-I16-NEXT: vpsrlq $48, %xmm0, %xmm2
|
||||
; X86-AVX-I16-NEXT: vcvtph2ps %xmm2, %xmm2
|
||||
; X86-AVX-I16-NEXT: vroundss $4, %xmm2, %xmm2, %xmm2
|
||||
; X86-AVX-I16-NEXT: vcvtps2ph $4, %xmm2, %xmm2
|
||||
@@ -522,7 +522,7 @@ define <8 x iXLen> @lrint_v8f16(<8 x half> %x) {
|
||||
;
|
||||
; X64-AVX-I16-LABEL: lrint_v8f16:
|
||||
; X64-AVX-I16: # %bb.0:
|
||||
; X64-AVX-I16-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[1,1,1,1,4,5,6,7]
|
||||
; X64-AVX-I16-NEXT: vpsrld $16, %xmm0, %xmm1
|
||||
; X64-AVX-I16-NEXT: vcvtph2ps %xmm1, %xmm1
|
||||
; X64-AVX-I16-NEXT: vroundss $4, %xmm1, %xmm1, %xmm1
|
||||
; X64-AVX-I16-NEXT: vcvtps2ph $4, %xmm1, %xmm1
|
||||
@@ -542,7 +542,7 @@ define <8 x iXLen> @lrint_v8f16(<8 x half> %x) {
|
||||
; X64-AVX-I16-NEXT: vcvtph2ps %xmm2, %xmm2
|
||||
; X64-AVX-I16-NEXT: vcvttss2si %xmm2, %eax
|
||||
; X64-AVX-I16-NEXT: vpinsrw $2, %eax, %xmm1, %xmm1
|
||||
; X64-AVX-I16-NEXT: vpshuflw {{.*#+}} xmm2 = xmm0[3,3,3,3,4,5,6,7]
|
||||
; X64-AVX-I16-NEXT: vpsrlq $48, %xmm0, %xmm2
|
||||
; X64-AVX-I16-NEXT: vcvtph2ps %xmm2, %xmm2
|
||||
; X64-AVX-I16-NEXT: vroundss $4, %xmm2, %xmm2, %xmm2
|
||||
; X64-AVX-I16-NEXT: vcvtps2ph $4, %xmm2, %xmm2
|
||||
@@ -614,7 +614,7 @@ define <8 x iXLen> @lrint_v8f16(<8 x half> %x) {
|
||||
; X86-AVX-I32-NEXT: vcvtph2ps %xmm2, %xmm2
|
||||
; X86-AVX-I32-NEXT: vcvttss2si %xmm2, %eax
|
||||
; X86-AVX-I32-NEXT: vpinsrd $3, %eax, %xmm1, %xmm1
|
||||
; X86-AVX-I32-NEXT: vpshuflw {{.*#+}} xmm2 = xmm0[1,1,1,1,4,5,6,7]
|
||||
; X86-AVX-I32-NEXT: vpsrld $16, %xmm0, %xmm2
|
||||
; X86-AVX-I32-NEXT: vcvtph2ps %xmm2, %xmm2
|
||||
; X86-AVX-I32-NEXT: vroundss $4, %xmm2, %xmm2, %xmm2
|
||||
; X86-AVX-I32-NEXT: vcvtps2ph $4, %xmm2, %xmm2
|
||||
@@ -634,7 +634,7 @@ define <8 x iXLen> @lrint_v8f16(<8 x half> %x) {
|
||||
; X86-AVX-I32-NEXT: vcvtph2ps %xmm3, %xmm3
|
||||
; X86-AVX-I32-NEXT: vcvttss2si %xmm3, %eax
|
||||
; X86-AVX-I32-NEXT: vpinsrd $2, %eax, %xmm2, %xmm2
|
||||
; X86-AVX-I32-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[3,3,3,3,4,5,6,7]
|
||||
; X86-AVX-I32-NEXT: vpsrlq $48, %xmm0, %xmm0
|
||||
; X86-AVX-I32-NEXT: vcvtph2ps %xmm0, %xmm0
|
||||
; X86-AVX-I32-NEXT: vroundss $4, %xmm0, %xmm0, %xmm0
|
||||
; X86-AVX-I32-NEXT: vcvtps2ph $4, %xmm0, %xmm0
|
||||
@@ -679,7 +679,7 @@ define <8 x iXLen> @lrint_v8f16(<8 x half> %x) {
|
||||
; X64-AVX-I32-NEXT: vcvtph2ps %xmm2, %xmm2
|
||||
; X64-AVX-I32-NEXT: vcvttss2si %xmm2, %eax
|
||||
; X64-AVX-I32-NEXT: vpinsrd $3, %eax, %xmm1, %xmm1
|
||||
; X64-AVX-I32-NEXT: vpshuflw {{.*#+}} xmm2 = xmm0[1,1,1,1,4,5,6,7]
|
||||
; X64-AVX-I32-NEXT: vpsrld $16, %xmm0, %xmm2
|
||||
; X64-AVX-I32-NEXT: vcvtph2ps %xmm2, %xmm2
|
||||
; X64-AVX-I32-NEXT: vroundss $4, %xmm2, %xmm2, %xmm2
|
||||
; X64-AVX-I32-NEXT: vcvtps2ph $4, %xmm2, %xmm2
|
||||
@@ -699,7 +699,7 @@ define <8 x iXLen> @lrint_v8f16(<8 x half> %x) {
|
||||
; X64-AVX-I32-NEXT: vcvtph2ps %xmm3, %xmm3
|
||||
; X64-AVX-I32-NEXT: vcvttss2si %xmm3, %eax
|
||||
; X64-AVX-I32-NEXT: vpinsrd $2, %eax, %xmm2, %xmm2
|
||||
; X64-AVX-I32-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[3,3,3,3,4,5,6,7]
|
||||
; X64-AVX-I32-NEXT: vpsrlq $48, %xmm0, %xmm0
|
||||
; X64-AVX-I32-NEXT: vcvtph2ps %xmm0, %xmm0
|
||||
; X64-AVX-I32-NEXT: vroundss $4, %xmm0, %xmm0, %xmm0
|
||||
; X64-AVX-I32-NEXT: vcvtps2ph $4, %xmm0, %xmm0
|
||||
@@ -722,7 +722,7 @@ define <16 x iXLen> @lrint_v16f16(<16 x half> %x) {
|
||||
; X86-AVX-I16-LABEL: lrint_v16f16:
|
||||
; X86-AVX-I16: # %bb.0:
|
||||
; X86-AVX-I16-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; X86-AVX-I16-NEXT: vpshuflw {{.*#+}} xmm2 = xmm1[1,1,1,1,4,5,6,7]
|
||||
; X86-AVX-I16-NEXT: vpsrld $16, %xmm1, %xmm2
|
||||
; X86-AVX-I16-NEXT: vcvtph2ps %xmm2, %xmm2
|
||||
; X86-AVX-I16-NEXT: vroundss $4, %xmm2, %xmm2, %xmm2
|
||||
; X86-AVX-I16-NEXT: vcvtps2ph $4, %xmm2, %xmm2
|
||||
@@ -742,7 +742,7 @@ define <16 x iXLen> @lrint_v16f16(<16 x half> %x) {
|
||||
; X86-AVX-I16-NEXT: vcvtph2ps %xmm3, %xmm3
|
||||
; X86-AVX-I16-NEXT: vcvttss2si %xmm3, %eax
|
||||
; X86-AVX-I16-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
|
||||
; X86-AVX-I16-NEXT: vpshuflw {{.*#+}} xmm3 = xmm1[3,3,3,3,4,5,6,7]
|
||||
; X86-AVX-I16-NEXT: vpsrlq $48, %xmm1, %xmm3
|
||||
; X86-AVX-I16-NEXT: vcvtph2ps %xmm3, %xmm3
|
||||
; X86-AVX-I16-NEXT: vroundss $4, %xmm3, %xmm3, %xmm3
|
||||
; X86-AVX-I16-NEXT: vcvtps2ph $4, %xmm3, %xmm3
|
||||
@@ -777,7 +777,7 @@ define <16 x iXLen> @lrint_v16f16(<16 x half> %x) {
|
||||
; X86-AVX-I16-NEXT: vcvtph2ps %xmm1, %xmm1
|
||||
; X86-AVX-I16-NEXT: vcvttss2si %xmm1, %eax
|
||||
; X86-AVX-I16-NEXT: vpinsrw $7, %eax, %xmm2, %xmm1
|
||||
; X86-AVX-I16-NEXT: vpshuflw {{.*#+}} xmm2 = xmm0[1,1,1,1,4,5,6,7]
|
||||
; X86-AVX-I16-NEXT: vpsrld $16, %xmm0, %xmm2
|
||||
; X86-AVX-I16-NEXT: vcvtph2ps %xmm2, %xmm2
|
||||
; X86-AVX-I16-NEXT: vroundss $4, %xmm2, %xmm2, %xmm2
|
||||
; X86-AVX-I16-NEXT: vcvtps2ph $4, %xmm2, %xmm2
|
||||
@@ -797,7 +797,7 @@ define <16 x iXLen> @lrint_v16f16(<16 x half> %x) {
|
||||
; X86-AVX-I16-NEXT: vcvtph2ps %xmm3, %xmm3
|
||||
; X86-AVX-I16-NEXT: vcvttss2si %xmm3, %eax
|
||||
; X86-AVX-I16-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
|
||||
; X86-AVX-I16-NEXT: vpshuflw {{.*#+}} xmm3 = xmm0[3,3,3,3,4,5,6,7]
|
||||
; X86-AVX-I16-NEXT: vpsrlq $48, %xmm0, %xmm3
|
||||
; X86-AVX-I16-NEXT: vcvtph2ps %xmm3, %xmm3
|
||||
; X86-AVX-I16-NEXT: vroundss $4, %xmm3, %xmm3, %xmm3
|
||||
; X86-AVX-I16-NEXT: vcvtps2ph $4, %xmm3, %xmm3
|
||||
@@ -843,7 +843,7 @@ define <16 x iXLen> @lrint_v16f16(<16 x half> %x) {
|
||||
; X64-AVX-I16-LABEL: lrint_v16f16:
|
||||
; X64-AVX-I16: # %bb.0:
|
||||
; X64-AVX-I16-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; X64-AVX-I16-NEXT: vpshuflw {{.*#+}} xmm2 = xmm1[1,1,1,1,4,5,6,7]
|
||||
; X64-AVX-I16-NEXT: vpsrld $16, %xmm1, %xmm2
|
||||
; X64-AVX-I16-NEXT: vcvtph2ps %xmm2, %xmm2
|
||||
; X64-AVX-I16-NEXT: vroundss $4, %xmm2, %xmm2, %xmm2
|
||||
; X64-AVX-I16-NEXT: vcvtps2ph $4, %xmm2, %xmm2
|
||||
@@ -863,7 +863,7 @@ define <16 x iXLen> @lrint_v16f16(<16 x half> %x) {
|
||||
; X64-AVX-I16-NEXT: vcvtph2ps %xmm3, %xmm3
|
||||
; X64-AVX-I16-NEXT: vcvttss2si %xmm3, %eax
|
||||
; X64-AVX-I16-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
|
||||
; X64-AVX-I16-NEXT: vpshuflw {{.*#+}} xmm3 = xmm1[3,3,3,3,4,5,6,7]
|
||||
; X64-AVX-I16-NEXT: vpsrlq $48, %xmm1, %xmm3
|
||||
; X64-AVX-I16-NEXT: vcvtph2ps %xmm3, %xmm3
|
||||
; X64-AVX-I16-NEXT: vroundss $4, %xmm3, %xmm3, %xmm3
|
||||
; X64-AVX-I16-NEXT: vcvtps2ph $4, %xmm3, %xmm3
|
||||
@@ -898,7 +898,7 @@ define <16 x iXLen> @lrint_v16f16(<16 x half> %x) {
|
||||
; X64-AVX-I16-NEXT: vcvtph2ps %xmm1, %xmm1
|
||||
; X64-AVX-I16-NEXT: vcvttss2si %xmm1, %eax
|
||||
; X64-AVX-I16-NEXT: vpinsrw $7, %eax, %xmm2, %xmm1
|
||||
; X64-AVX-I16-NEXT: vpshuflw {{.*#+}} xmm2 = xmm0[1,1,1,1,4,5,6,7]
|
||||
; X64-AVX-I16-NEXT: vpsrld $16, %xmm0, %xmm2
|
||||
; X64-AVX-I16-NEXT: vcvtph2ps %xmm2, %xmm2
|
||||
; X64-AVX-I16-NEXT: vroundss $4, %xmm2, %xmm2, %xmm2
|
||||
; X64-AVX-I16-NEXT: vcvtps2ph $4, %xmm2, %xmm2
|
||||
@@ -918,7 +918,7 @@ define <16 x iXLen> @lrint_v16f16(<16 x half> %x) {
|
||||
; X64-AVX-I16-NEXT: vcvtph2ps %xmm3, %xmm3
|
||||
; X64-AVX-I16-NEXT: vcvttss2si %xmm3, %eax
|
||||
; X64-AVX-I16-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
|
||||
; X64-AVX-I16-NEXT: vpshuflw {{.*#+}} xmm3 = xmm0[3,3,3,3,4,5,6,7]
|
||||
; X64-AVX-I16-NEXT: vpsrlq $48, %xmm0, %xmm3
|
||||
; X64-AVX-I16-NEXT: vcvtph2ps %xmm3, %xmm3
|
||||
; X64-AVX-I16-NEXT: vroundss $4, %xmm3, %xmm3, %xmm3
|
||||
; X64-AVX-I16-NEXT: vcvtps2ph $4, %xmm3, %xmm3
|
||||
@@ -991,7 +991,7 @@ define <16 x iXLen> @lrint_v16f16(<16 x half> %x) {
|
||||
; X86-AVX-I32-NEXT: vcvtph2ps %xmm2, %xmm2
|
||||
; X86-AVX-I32-NEXT: vcvttss2si %xmm2, %eax
|
||||
; X86-AVX-I32-NEXT: vpinsrd $3, %eax, %xmm1, %xmm1
|
||||
; X86-AVX-I32-NEXT: vpshuflw {{.*#+}} xmm2 = xmm0[1,1,1,1,4,5,6,7]
|
||||
; X86-AVX-I32-NEXT: vpsrld $16, %xmm0, %xmm2
|
||||
; X86-AVX-I32-NEXT: vcvtph2ps %xmm2, %xmm2
|
||||
; X86-AVX-I32-NEXT: vroundss $4, %xmm2, %xmm2, %xmm2
|
||||
; X86-AVX-I32-NEXT: vcvtps2ph $4, %xmm2, %xmm2
|
||||
@@ -1011,7 +1011,7 @@ define <16 x iXLen> @lrint_v16f16(<16 x half> %x) {
|
||||
; X86-AVX-I32-NEXT: vcvtph2ps %xmm3, %xmm3
|
||||
; X86-AVX-I32-NEXT: vcvttss2si %xmm3, %eax
|
||||
; X86-AVX-I32-NEXT: vpinsrd $2, %eax, %xmm2, %xmm2
|
||||
; X86-AVX-I32-NEXT: vpshuflw {{.*#+}} xmm3 = xmm0[3,3,3,3,4,5,6,7]
|
||||
; X86-AVX-I32-NEXT: vpsrlq $48, %xmm0, %xmm3
|
||||
; X86-AVX-I32-NEXT: vcvtph2ps %xmm3, %xmm3
|
||||
; X86-AVX-I32-NEXT: vroundss $4, %xmm3, %xmm3, %xmm3
|
||||
; X86-AVX-I32-NEXT: vcvtps2ph $4, %xmm3, %xmm3
|
||||
@@ -1048,7 +1048,7 @@ define <16 x iXLen> @lrint_v16f16(<16 x half> %x) {
|
||||
; X86-AVX-I32-NEXT: vcvtph2ps %xmm3, %xmm3
|
||||
; X86-AVX-I32-NEXT: vcvttss2si %xmm3, %eax
|
||||
; X86-AVX-I32-NEXT: vpinsrd $3, %eax, %xmm1, %xmm1
|
||||
; X86-AVX-I32-NEXT: vpshuflw {{.*#+}} xmm3 = xmm0[1,1,1,1,4,5,6,7]
|
||||
; X86-AVX-I32-NEXT: vpsrld $16, %xmm0, %xmm3
|
||||
; X86-AVX-I32-NEXT: vcvtph2ps %xmm3, %xmm3
|
||||
; X86-AVX-I32-NEXT: vroundss $4, %xmm3, %xmm3, %xmm3
|
||||
; X86-AVX-I32-NEXT: vcvtps2ph $4, %xmm3, %xmm3
|
||||
@@ -1068,7 +1068,7 @@ define <16 x iXLen> @lrint_v16f16(<16 x half> %x) {
|
||||
; X86-AVX-I32-NEXT: vcvtph2ps %xmm4, %xmm4
|
||||
; X86-AVX-I32-NEXT: vcvttss2si %xmm4, %eax
|
||||
; X86-AVX-I32-NEXT: vpinsrd $2, %eax, %xmm3, %xmm3
|
||||
; X86-AVX-I32-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[3,3,3,3,4,5,6,7]
|
||||
; X86-AVX-I32-NEXT: vpsrlq $48, %xmm0, %xmm0
|
||||
; X86-AVX-I32-NEXT: vcvtph2ps %xmm0, %xmm0
|
||||
; X86-AVX-I32-NEXT: vroundss $4, %xmm0, %xmm0, %xmm0
|
||||
; X86-AVX-I32-NEXT: vcvtps2ph $4, %xmm0, %xmm0
|
||||
@@ -1114,7 +1114,7 @@ define <16 x iXLen> @lrint_v16f16(<16 x half> %x) {
|
||||
; X64-AVX-I32-NEXT: vcvtph2ps %xmm2, %xmm2
|
||||
; X64-AVX-I32-NEXT: vcvttss2si %xmm2, %eax
|
||||
; X64-AVX-I32-NEXT: vpinsrd $3, %eax, %xmm1, %xmm1
|
||||
; X64-AVX-I32-NEXT: vpshuflw {{.*#+}} xmm2 = xmm0[1,1,1,1,4,5,6,7]
|
||||
; X64-AVX-I32-NEXT: vpsrld $16, %xmm0, %xmm2
|
||||
; X64-AVX-I32-NEXT: vcvtph2ps %xmm2, %xmm2
|
||||
; X64-AVX-I32-NEXT: vroundss $4, %xmm2, %xmm2, %xmm2
|
||||
; X64-AVX-I32-NEXT: vcvtps2ph $4, %xmm2, %xmm2
|
||||
@@ -1134,7 +1134,7 @@ define <16 x iXLen> @lrint_v16f16(<16 x half> %x) {
|
||||
; X64-AVX-I32-NEXT: vcvtph2ps %xmm3, %xmm3
|
||||
; X64-AVX-I32-NEXT: vcvttss2si %xmm3, %eax
|
||||
; X64-AVX-I32-NEXT: vpinsrd $2, %eax, %xmm2, %xmm2
|
||||
; X64-AVX-I32-NEXT: vpshuflw {{.*#+}} xmm3 = xmm0[3,3,3,3,4,5,6,7]
|
||||
; X64-AVX-I32-NEXT: vpsrlq $48, %xmm0, %xmm3
|
||||
; X64-AVX-I32-NEXT: vcvtph2ps %xmm3, %xmm3
|
||||
; X64-AVX-I32-NEXT: vroundss $4, %xmm3, %xmm3, %xmm3
|
||||
; X64-AVX-I32-NEXT: vcvtps2ph $4, %xmm3, %xmm3
|
||||
@@ -1171,7 +1171,7 @@ define <16 x iXLen> @lrint_v16f16(<16 x half> %x) {
|
||||
; X64-AVX-I32-NEXT: vcvtph2ps %xmm3, %xmm3
|
||||
; X64-AVX-I32-NEXT: vcvttss2si %xmm3, %eax
|
||||
; X64-AVX-I32-NEXT: vpinsrd $3, %eax, %xmm1, %xmm1
|
||||
; X64-AVX-I32-NEXT: vpshuflw {{.*#+}} xmm3 = xmm0[1,1,1,1,4,5,6,7]
|
||||
; X64-AVX-I32-NEXT: vpsrld $16, %xmm0, %xmm3
|
||||
; X64-AVX-I32-NEXT: vcvtph2ps %xmm3, %xmm3
|
||||
; X64-AVX-I32-NEXT: vroundss $4, %xmm3, %xmm3, %xmm3
|
||||
; X64-AVX-I32-NEXT: vcvtps2ph $4, %xmm3, %xmm3
|
||||
@@ -1191,7 +1191,7 @@ define <16 x iXLen> @lrint_v16f16(<16 x half> %x) {
|
||||
; X64-AVX-I32-NEXT: vcvtph2ps %xmm4, %xmm4
|
||||
; X64-AVX-I32-NEXT: vcvttss2si %xmm4, %eax
|
||||
; X64-AVX-I32-NEXT: vpinsrd $2, %eax, %xmm3, %xmm3
|
||||
; X64-AVX-I32-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[3,3,3,3,4,5,6,7]
|
||||
; X64-AVX-I32-NEXT: vpsrlq $48, %xmm0, %xmm0
|
||||
; X64-AVX-I32-NEXT: vcvtph2ps %xmm0, %xmm0
|
||||
; X64-AVX-I32-NEXT: vroundss $4, %xmm0, %xmm0, %xmm0
|
||||
; X64-AVX-I32-NEXT: vcvtps2ph $4, %xmm0, %xmm0
|
||||
@@ -1215,7 +1215,7 @@ define <32 x iXLen> @lrint_v32f32(<32 x half> %x) {
|
||||
; X86-AVX-I16-LABEL: lrint_v32f32:
|
||||
; X86-AVX-I16: # %bb.0:
|
||||
; X86-AVX-I16-NEXT: vextracti128 $1, %ymm0, %xmm2
|
||||
; X86-AVX-I16-NEXT: vpshuflw {{.*#+}} xmm3 = xmm2[1,1,1,1,4,5,6,7]
|
||||
; X86-AVX-I16-NEXT: vpsrld $16, %xmm2, %xmm3
|
||||
; X86-AVX-I16-NEXT: vcvtph2ps %xmm3, %xmm3
|
||||
; X86-AVX-I16-NEXT: vroundss $4, %xmm3, %xmm3, %xmm3
|
||||
; X86-AVX-I16-NEXT: vcvtps2ph $4, %xmm3, %xmm3
|
||||
@@ -1235,7 +1235,7 @@ define <32 x iXLen> @lrint_v32f32(<32 x half> %x) {
|
||||
; X86-AVX-I16-NEXT: vcvtph2ps %xmm4, %xmm4
|
||||
; X86-AVX-I16-NEXT: vcvttss2si %xmm4, %eax
|
||||
; X86-AVX-I16-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3
|
||||
; X86-AVX-I16-NEXT: vpshuflw {{.*#+}} xmm4 = xmm2[3,3,3,3,4,5,6,7]
|
||||
; X86-AVX-I16-NEXT: vpsrlq $48, %xmm2, %xmm4
|
||||
; X86-AVX-I16-NEXT: vcvtph2ps %xmm4, %xmm4
|
||||
; X86-AVX-I16-NEXT: vroundss $4, %xmm4, %xmm4, %xmm4
|
||||
; X86-AVX-I16-NEXT: vcvtps2ph $4, %xmm4, %xmm4
|
||||
@@ -1270,7 +1270,7 @@ define <32 x iXLen> @lrint_v32f32(<32 x half> %x) {
|
||||
; X86-AVX-I16-NEXT: vcvtph2ps %xmm2, %xmm2
|
||||
; X86-AVX-I16-NEXT: vcvttss2si %xmm2, %eax
|
||||
; X86-AVX-I16-NEXT: vpinsrw $7, %eax, %xmm3, %xmm2
|
||||
; X86-AVX-I16-NEXT: vpshuflw {{.*#+}} xmm3 = xmm0[1,1,1,1,4,5,6,7]
|
||||
; X86-AVX-I16-NEXT: vpsrld $16, %xmm0, %xmm3
|
||||
; X86-AVX-I16-NEXT: vcvtph2ps %xmm3, %xmm3
|
||||
; X86-AVX-I16-NEXT: vroundss $4, %xmm3, %xmm3, %xmm3
|
||||
; X86-AVX-I16-NEXT: vcvtps2ph $4, %xmm3, %xmm3
|
||||
@@ -1290,7 +1290,7 @@ define <32 x iXLen> @lrint_v32f32(<32 x half> %x) {
|
||||
; X86-AVX-I16-NEXT: vcvtph2ps %xmm4, %xmm4
|
||||
; X86-AVX-I16-NEXT: vcvttss2si %xmm4, %eax
|
||||
; X86-AVX-I16-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3
|
||||
; X86-AVX-I16-NEXT: vpshuflw {{.*#+}} xmm4 = xmm0[3,3,3,3,4,5,6,7]
|
||||
; X86-AVX-I16-NEXT: vpsrlq $48, %xmm0, %xmm4
|
||||
; X86-AVX-I16-NEXT: vcvtph2ps %xmm4, %xmm4
|
||||
; X86-AVX-I16-NEXT: vroundss $4, %xmm4, %xmm4, %xmm4
|
||||
; X86-AVX-I16-NEXT: vcvtps2ph $4, %xmm4, %xmm4
|
||||
@@ -1327,7 +1327,7 @@ define <32 x iXLen> @lrint_v32f32(<32 x half> %x) {
|
||||
; X86-AVX-I16-NEXT: vpinsrw $7, %eax, %xmm3, %xmm0
|
||||
; X86-AVX-I16-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
|
||||
; X86-AVX-I16-NEXT: vextracti128 $1, %ymm1, %xmm2
|
||||
; X86-AVX-I16-NEXT: vpshuflw {{.*#+}} xmm3 = xmm2[1,1,1,1,4,5,6,7]
|
||||
; X86-AVX-I16-NEXT: vpsrld $16, %xmm2, %xmm3
|
||||
; X86-AVX-I16-NEXT: vcvtph2ps %xmm3, %xmm3
|
||||
; X86-AVX-I16-NEXT: vroundss $4, %xmm3, %xmm3, %xmm3
|
||||
; X86-AVX-I16-NEXT: vcvtps2ph $4, %xmm3, %xmm3
|
||||
@@ -1347,7 +1347,7 @@ define <32 x iXLen> @lrint_v32f32(<32 x half> %x) {
|
||||
; X86-AVX-I16-NEXT: vcvtph2ps %xmm4, %xmm4
|
||||
; X86-AVX-I16-NEXT: vcvttss2si %xmm4, %eax
|
||||
; X86-AVX-I16-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3
|
||||
; X86-AVX-I16-NEXT: vpshuflw {{.*#+}} xmm4 = xmm2[3,3,3,3,4,5,6,7]
|
||||
; X86-AVX-I16-NEXT: vpsrlq $48, %xmm2, %xmm4
|
||||
; X86-AVX-I16-NEXT: vcvtph2ps %xmm4, %xmm4
|
||||
; X86-AVX-I16-NEXT: vroundss $4, %xmm4, %xmm4, %xmm4
|
||||
; X86-AVX-I16-NEXT: vcvtps2ph $4, %xmm4, %xmm4
|
||||
@@ -1382,7 +1382,7 @@ define <32 x iXLen> @lrint_v32f32(<32 x half> %x) {
|
||||
; X86-AVX-I16-NEXT: vcvtph2ps %xmm2, %xmm2
|
||||
; X86-AVX-I16-NEXT: vcvttss2si %xmm2, %eax
|
||||
; X86-AVX-I16-NEXT: vpinsrw $7, %eax, %xmm3, %xmm2
|
||||
; X86-AVX-I16-NEXT: vpshuflw {{.*#+}} xmm3 = xmm1[1,1,1,1,4,5,6,7]
|
||||
; X86-AVX-I16-NEXT: vpsrld $16, %xmm1, %xmm3
|
||||
; X86-AVX-I16-NEXT: vcvtph2ps %xmm3, %xmm3
|
||||
; X86-AVX-I16-NEXT: vroundss $4, %xmm3, %xmm3, %xmm3
|
||||
; X86-AVX-I16-NEXT: vcvtps2ph $4, %xmm3, %xmm3
|
||||
@@ -1402,7 +1402,7 @@ define <32 x iXLen> @lrint_v32f32(<32 x half> %x) {
|
||||
; X86-AVX-I16-NEXT: vcvtph2ps %xmm4, %xmm4
|
||||
; X86-AVX-I16-NEXT: vcvttss2si %xmm4, %eax
|
||||
; X86-AVX-I16-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3
|
||||
; X86-AVX-I16-NEXT: vpshuflw {{.*#+}} xmm4 = xmm1[3,3,3,3,4,5,6,7]
|
||||
; X86-AVX-I16-NEXT: vpsrlq $48, %xmm1, %xmm4
|
||||
; X86-AVX-I16-NEXT: vcvtph2ps %xmm4, %xmm4
|
||||
; X86-AVX-I16-NEXT: vroundss $4, %xmm4, %xmm4, %xmm4
|
||||
; X86-AVX-I16-NEXT: vcvtps2ph $4, %xmm4, %xmm4
|
||||
@@ -1448,7 +1448,7 @@ define <32 x iXLen> @lrint_v32f32(<32 x half> %x) {
|
||||
; X64-AVX-I16-LABEL: lrint_v32f32:
|
||||
; X64-AVX-I16: # %bb.0:
|
||||
; X64-AVX-I16-NEXT: vextracti128 $1, %ymm0, %xmm2
|
||||
; X64-AVX-I16-NEXT: vpshuflw {{.*#+}} xmm3 = xmm2[1,1,1,1,4,5,6,7]
|
||||
; X64-AVX-I16-NEXT: vpsrld $16, %xmm2, %xmm3
|
||||
; X64-AVX-I16-NEXT: vcvtph2ps %xmm3, %xmm3
|
||||
; X64-AVX-I16-NEXT: vroundss $4, %xmm3, %xmm3, %xmm3
|
||||
; X64-AVX-I16-NEXT: vcvtps2ph $4, %xmm3, %xmm3
|
||||
@@ -1468,7 +1468,7 @@ define <32 x iXLen> @lrint_v32f32(<32 x half> %x) {
|
||||
; X64-AVX-I16-NEXT: vcvtph2ps %xmm4, %xmm4
|
||||
; X64-AVX-I16-NEXT: vcvttss2si %xmm4, %eax
|
||||
; X64-AVX-I16-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3
|
||||
; X64-AVX-I16-NEXT: vpshuflw {{.*#+}} xmm4 = xmm2[3,3,3,3,4,5,6,7]
|
||||
; X64-AVX-I16-NEXT: vpsrlq $48, %xmm2, %xmm4
|
||||
; X64-AVX-I16-NEXT: vcvtph2ps %xmm4, %xmm4
|
||||
; X64-AVX-I16-NEXT: vroundss $4, %xmm4, %xmm4, %xmm4
|
||||
; X64-AVX-I16-NEXT: vcvtps2ph $4, %xmm4, %xmm4
|
||||
@@ -1503,7 +1503,7 @@ define <32 x iXLen> @lrint_v32f32(<32 x half> %x) {
|
||||
; X64-AVX-I16-NEXT: vcvtph2ps %xmm2, %xmm2
|
||||
; X64-AVX-I16-NEXT: vcvttss2si %xmm2, %eax
|
||||
; X64-AVX-I16-NEXT: vpinsrw $7, %eax, %xmm3, %xmm2
|
||||
; X64-AVX-I16-NEXT: vpshuflw {{.*#+}} xmm3 = xmm0[1,1,1,1,4,5,6,7]
|
||||
; X64-AVX-I16-NEXT: vpsrld $16, %xmm0, %xmm3
|
||||
; X64-AVX-I16-NEXT: vcvtph2ps %xmm3, %xmm3
|
||||
; X64-AVX-I16-NEXT: vroundss $4, %xmm3, %xmm3, %xmm3
|
||||
; X64-AVX-I16-NEXT: vcvtps2ph $4, %xmm3, %xmm3
|
||||
@@ -1523,7 +1523,7 @@ define <32 x iXLen> @lrint_v32f32(<32 x half> %x) {
|
||||
; X64-AVX-I16-NEXT: vcvtph2ps %xmm4, %xmm4
|
||||
; X64-AVX-I16-NEXT: vcvttss2si %xmm4, %eax
|
||||
; X64-AVX-I16-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3
|
||||
; X64-AVX-I16-NEXT: vpshuflw {{.*#+}} xmm4 = xmm0[3,3,3,3,4,5,6,7]
|
||||
; X64-AVX-I16-NEXT: vpsrlq $48, %xmm0, %xmm4
|
||||
; X64-AVX-I16-NEXT: vcvtph2ps %xmm4, %xmm4
|
||||
; X64-AVX-I16-NEXT: vroundss $4, %xmm4, %xmm4, %xmm4
|
||||
; X64-AVX-I16-NEXT: vcvtps2ph $4, %xmm4, %xmm4
|
||||
@@ -1560,7 +1560,7 @@ define <32 x iXLen> @lrint_v32f32(<32 x half> %x) {
|
||||
; X64-AVX-I16-NEXT: vpinsrw $7, %eax, %xmm3, %xmm0
|
||||
; X64-AVX-I16-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
|
||||
; X64-AVX-I16-NEXT: vextracti128 $1, %ymm1, %xmm2
|
||||
; X64-AVX-I16-NEXT: vpshuflw {{.*#+}} xmm3 = xmm2[1,1,1,1,4,5,6,7]
|
||||
; X64-AVX-I16-NEXT: vpsrld $16, %xmm2, %xmm3
|
||||
; X64-AVX-I16-NEXT: vcvtph2ps %xmm3, %xmm3
|
||||
; X64-AVX-I16-NEXT: vroundss $4, %xmm3, %xmm3, %xmm3
|
||||
; X64-AVX-I16-NEXT: vcvtps2ph $4, %xmm3, %xmm3
|
||||
@@ -1580,7 +1580,7 @@ define <32 x iXLen> @lrint_v32f32(<32 x half> %x) {
|
||||
; X64-AVX-I16-NEXT: vcvtph2ps %xmm4, %xmm4
|
||||
; X64-AVX-I16-NEXT: vcvttss2si %xmm4, %eax
|
||||
; X64-AVX-I16-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3
|
||||
; X64-AVX-I16-NEXT: vpshuflw {{.*#+}} xmm4 = xmm2[3,3,3,3,4,5,6,7]
|
||||
; X64-AVX-I16-NEXT: vpsrlq $48, %xmm2, %xmm4
|
||||
; X64-AVX-I16-NEXT: vcvtph2ps %xmm4, %xmm4
|
||||
; X64-AVX-I16-NEXT: vroundss $4, %xmm4, %xmm4, %xmm4
|
||||
; X64-AVX-I16-NEXT: vcvtps2ph $4, %xmm4, %xmm4
|
||||
@@ -1615,7 +1615,7 @@ define <32 x iXLen> @lrint_v32f32(<32 x half> %x) {
|
||||
; X64-AVX-I16-NEXT: vcvtph2ps %xmm2, %xmm2
|
||||
; X64-AVX-I16-NEXT: vcvttss2si %xmm2, %eax
|
||||
; X64-AVX-I16-NEXT: vpinsrw $7, %eax, %xmm3, %xmm2
|
||||
; X64-AVX-I16-NEXT: vpshuflw {{.*#+}} xmm3 = xmm1[1,1,1,1,4,5,6,7]
|
||||
; X64-AVX-I16-NEXT: vpsrld $16, %xmm1, %xmm3
|
||||
; X64-AVX-I16-NEXT: vcvtph2ps %xmm3, %xmm3
|
||||
; X64-AVX-I16-NEXT: vroundss $4, %xmm3, %xmm3, %xmm3
|
||||
; X64-AVX-I16-NEXT: vcvtps2ph $4, %xmm3, %xmm3
|
||||
@@ -1635,7 +1635,7 @@ define <32 x iXLen> @lrint_v32f32(<32 x half> %x) {
|
||||
; X64-AVX-I16-NEXT: vcvtph2ps %xmm4, %xmm4
|
||||
; X64-AVX-I16-NEXT: vcvttss2si %xmm4, %eax
|
||||
; X64-AVX-I16-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3
|
||||
; X64-AVX-I16-NEXT: vpshuflw {{.*#+}} xmm4 = xmm1[3,3,3,3,4,5,6,7]
|
||||
; X64-AVX-I16-NEXT: vpsrlq $48, %xmm1, %xmm4
|
||||
; X64-AVX-I16-NEXT: vcvtph2ps %xmm4, %xmm4
|
||||
; X64-AVX-I16-NEXT: vroundss $4, %xmm4, %xmm4, %xmm4
|
||||
; X64-AVX-I16-NEXT: vcvtps2ph $4, %xmm4, %xmm4
|
||||
@@ -1709,7 +1709,7 @@ define <32 x iXLen> @lrint_v32f32(<32 x half> %x) {
|
||||
; X86-AVX-I32-NEXT: vcvtph2ps %xmm3, %xmm3
|
||||
; X86-AVX-I32-NEXT: vcvttss2si %xmm3, %eax
|
||||
; X86-AVX-I32-NEXT: vpinsrd $3, %eax, %xmm0, %xmm0
|
||||
; X86-AVX-I32-NEXT: vpshuflw {{.*#+}} xmm3 = xmm2[1,1,1,1,4,5,6,7]
|
||||
; X86-AVX-I32-NEXT: vpsrld $16, %xmm2, %xmm3
|
||||
; X86-AVX-I32-NEXT: vcvtph2ps %xmm3, %xmm3
|
||||
; X86-AVX-I32-NEXT: vroundss $4, %xmm3, %xmm3, %xmm3
|
||||
; X86-AVX-I32-NEXT: vcvtps2ph $4, %xmm3, %xmm3
|
||||
@@ -1729,7 +1729,7 @@ define <32 x iXLen> @lrint_v32f32(<32 x half> %x) {
|
||||
; X86-AVX-I32-NEXT: vcvtph2ps %xmm4, %xmm4
|
||||
; X86-AVX-I32-NEXT: vcvttss2si %xmm4, %eax
|
||||
; X86-AVX-I32-NEXT: vpinsrd $2, %eax, %xmm3, %xmm3
|
||||
; X86-AVX-I32-NEXT: vpshuflw {{.*#+}} xmm4 = xmm2[3,3,3,3,4,5,6,7]
|
||||
; X86-AVX-I32-NEXT: vpsrlq $48, %xmm2, %xmm4
|
||||
; X86-AVX-I32-NEXT: vcvtph2ps %xmm4, %xmm4
|
||||
; X86-AVX-I32-NEXT: vroundss $4, %xmm4, %xmm4, %xmm4
|
||||
; X86-AVX-I32-NEXT: vcvtps2ph $4, %xmm4, %xmm4
|
||||
@@ -1766,7 +1766,7 @@ define <32 x iXLen> @lrint_v32f32(<32 x half> %x) {
|
||||
; X86-AVX-I32-NEXT: vcvtph2ps %xmm4, %xmm4
|
||||
; X86-AVX-I32-NEXT: vcvttss2si %xmm4, %eax
|
||||
; X86-AVX-I32-NEXT: vpinsrd $3, %eax, %xmm3, %xmm3
|
||||
; X86-AVX-I32-NEXT: vpshuflw {{.*#+}} xmm4 = xmm2[1,1,1,1,4,5,6,7]
|
||||
; X86-AVX-I32-NEXT: vpsrld $16, %xmm2, %xmm4
|
||||
; X86-AVX-I32-NEXT: vcvtph2ps %xmm4, %xmm4
|
||||
; X86-AVX-I32-NEXT: vroundss $4, %xmm4, %xmm4, %xmm4
|
||||
; X86-AVX-I32-NEXT: vcvtps2ph $4, %xmm4, %xmm4
|
||||
@@ -1786,7 +1786,7 @@ define <32 x iXLen> @lrint_v32f32(<32 x half> %x) {
|
||||
; X86-AVX-I32-NEXT: vcvtph2ps %xmm5, %xmm5
|
||||
; X86-AVX-I32-NEXT: vcvttss2si %xmm5, %eax
|
||||
; X86-AVX-I32-NEXT: vpinsrd $2, %eax, %xmm4, %xmm4
|
||||
; X86-AVX-I32-NEXT: vpshuflw {{.*#+}} xmm2 = xmm2[3,3,3,3,4,5,6,7]
|
||||
; X86-AVX-I32-NEXT: vpsrlq $48, %xmm2, %xmm2
|
||||
; X86-AVX-I32-NEXT: vcvtph2ps %xmm2, %xmm2
|
||||
; X86-AVX-I32-NEXT: vroundss $4, %xmm2, %xmm2, %xmm2
|
||||
; X86-AVX-I32-NEXT: vcvtps2ph $4, %xmm2, %xmm2
|
||||
@@ -1822,7 +1822,7 @@ define <32 x iXLen> @lrint_v32f32(<32 x half> %x) {
|
||||
; X86-AVX-I32-NEXT: vcvtph2ps %xmm3, %xmm3
|
||||
; X86-AVX-I32-NEXT: vcvttss2si %xmm3, %eax
|
||||
; X86-AVX-I32-NEXT: vpinsrd $3, %eax, %xmm2, %xmm2
|
||||
; X86-AVX-I32-NEXT: vpshuflw {{.*#+}} xmm3 = xmm1[1,1,1,1,4,5,6,7]
|
||||
; X86-AVX-I32-NEXT: vpsrld $16, %xmm1, %xmm3
|
||||
; X86-AVX-I32-NEXT: vcvtph2ps %xmm3, %xmm3
|
||||
; X86-AVX-I32-NEXT: vroundss $4, %xmm3, %xmm3, %xmm3
|
||||
; X86-AVX-I32-NEXT: vcvtps2ph $4, %xmm3, %xmm3
|
||||
@@ -1842,7 +1842,7 @@ define <32 x iXLen> @lrint_v32f32(<32 x half> %x) {
|
||||
; X86-AVX-I32-NEXT: vcvtph2ps %xmm5, %xmm5
|
||||
; X86-AVX-I32-NEXT: vcvttss2si %xmm5, %eax
|
||||
; X86-AVX-I32-NEXT: vpinsrd $2, %eax, %xmm3, %xmm3
|
||||
; X86-AVX-I32-NEXT: vpshuflw {{.*#+}} xmm5 = xmm1[3,3,3,3,4,5,6,7]
|
||||
; X86-AVX-I32-NEXT: vpsrlq $48, %xmm1, %xmm5
|
||||
; X86-AVX-I32-NEXT: vcvtph2ps %xmm5, %xmm5
|
||||
; X86-AVX-I32-NEXT: vroundss $4, %xmm5, %xmm5, %xmm5
|
||||
; X86-AVX-I32-NEXT: vcvtps2ph $4, %xmm5, %xmm5
|
||||
@@ -1879,7 +1879,7 @@ define <32 x iXLen> @lrint_v32f32(<32 x half> %x) {
|
||||
; X86-AVX-I32-NEXT: vcvtph2ps %xmm5, %xmm5
|
||||
; X86-AVX-I32-NEXT: vcvttss2si %xmm5, %eax
|
||||
; X86-AVX-I32-NEXT: vpinsrd $3, %eax, %xmm3, %xmm3
|
||||
; X86-AVX-I32-NEXT: vpshuflw {{.*#+}} xmm5 = xmm1[1,1,1,1,4,5,6,7]
|
||||
; X86-AVX-I32-NEXT: vpsrld $16, %xmm1, %xmm5
|
||||
; X86-AVX-I32-NEXT: vcvtph2ps %xmm5, %xmm5
|
||||
; X86-AVX-I32-NEXT: vroundss $4, %xmm5, %xmm5, %xmm5
|
||||
; X86-AVX-I32-NEXT: vcvtps2ph $4, %xmm5, %xmm5
|
||||
@@ -1899,7 +1899,7 @@ define <32 x iXLen> @lrint_v32f32(<32 x half> %x) {
|
||||
; X86-AVX-I32-NEXT: vcvtph2ps %xmm6, %xmm6
|
||||
; X86-AVX-I32-NEXT: vcvttss2si %xmm6, %eax
|
||||
; X86-AVX-I32-NEXT: vpinsrd $2, %eax, %xmm5, %xmm5
|
||||
; X86-AVX-I32-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[3,3,3,3,4,5,6,7]
|
||||
; X86-AVX-I32-NEXT: vpsrlq $48, %xmm1, %xmm1
|
||||
; X86-AVX-I32-NEXT: vcvtph2ps %xmm1, %xmm1
|
||||
; X86-AVX-I32-NEXT: vroundss $4, %xmm1, %xmm1, %xmm1
|
||||
; X86-AVX-I32-NEXT: vcvtps2ph $4, %xmm1, %xmm1
|
||||
@@ -1949,7 +1949,7 @@ define <32 x iXLen> @lrint_v32f32(<32 x half> %x) {
|
||||
; X64-AVX-I32-NEXT: vcvtph2ps %xmm3, %xmm3
|
||||
; X64-AVX-I32-NEXT: vcvttss2si %xmm3, %eax
|
||||
; X64-AVX-I32-NEXT: vpinsrd $3, %eax, %xmm0, %xmm0
|
||||
; X64-AVX-I32-NEXT: vpshuflw {{.*#+}} xmm3 = xmm2[1,1,1,1,4,5,6,7]
|
||||
; X64-AVX-I32-NEXT: vpsrld $16, %xmm2, %xmm3
|
||||
; X64-AVX-I32-NEXT: vcvtph2ps %xmm3, %xmm3
|
||||
; X64-AVX-I32-NEXT: vroundss $4, %xmm3, %xmm3, %xmm3
|
||||
; X64-AVX-I32-NEXT: vcvtps2ph $4, %xmm3, %xmm3
|
||||
@@ -1969,7 +1969,7 @@ define <32 x iXLen> @lrint_v32f32(<32 x half> %x) {
|
||||
; X64-AVX-I32-NEXT: vcvtph2ps %xmm4, %xmm4
|
||||
; X64-AVX-I32-NEXT: vcvttss2si %xmm4, %eax
|
||||
; X64-AVX-I32-NEXT: vpinsrd $2, %eax, %xmm3, %xmm3
|
||||
; X64-AVX-I32-NEXT: vpshuflw {{.*#+}} xmm4 = xmm2[3,3,3,3,4,5,6,7]
|
||||
; X64-AVX-I32-NEXT: vpsrlq $48, %xmm2, %xmm4
|
||||
; X64-AVX-I32-NEXT: vcvtph2ps %xmm4, %xmm4
|
||||
; X64-AVX-I32-NEXT: vroundss $4, %xmm4, %xmm4, %xmm4
|
||||
; X64-AVX-I32-NEXT: vcvtps2ph $4, %xmm4, %xmm4
|
||||
@@ -2006,7 +2006,7 @@ define <32 x iXLen> @lrint_v32f32(<32 x half> %x) {
|
||||
; X64-AVX-I32-NEXT: vcvtph2ps %xmm4, %xmm4
|
||||
; X64-AVX-I32-NEXT: vcvttss2si %xmm4, %eax
|
||||
; X64-AVX-I32-NEXT: vpinsrd $3, %eax, %xmm3, %xmm3
|
||||
; X64-AVX-I32-NEXT: vpshuflw {{.*#+}} xmm4 = xmm2[1,1,1,1,4,5,6,7]
|
||||
; X64-AVX-I32-NEXT: vpsrld $16, %xmm2, %xmm4
|
||||
; X64-AVX-I32-NEXT: vcvtph2ps %xmm4, %xmm4
|
||||
; X64-AVX-I32-NEXT: vroundss $4, %xmm4, %xmm4, %xmm4
|
||||
; X64-AVX-I32-NEXT: vcvtps2ph $4, %xmm4, %xmm4
|
||||
@@ -2026,7 +2026,7 @@ define <32 x iXLen> @lrint_v32f32(<32 x half> %x) {
|
||||
; X64-AVX-I32-NEXT: vcvtph2ps %xmm5, %xmm5
|
||||
; X64-AVX-I32-NEXT: vcvttss2si %xmm5, %eax
|
||||
; X64-AVX-I32-NEXT: vpinsrd $2, %eax, %xmm4, %xmm4
|
||||
; X64-AVX-I32-NEXT: vpshuflw {{.*#+}} xmm2 = xmm2[3,3,3,3,4,5,6,7]
|
||||
; X64-AVX-I32-NEXT: vpsrlq $48, %xmm2, %xmm2
|
||||
; X64-AVX-I32-NEXT: vcvtph2ps %xmm2, %xmm2
|
||||
; X64-AVX-I32-NEXT: vroundss $4, %xmm2, %xmm2, %xmm2
|
||||
; X64-AVX-I32-NEXT: vcvtps2ph $4, %xmm2, %xmm2
|
||||
@@ -2062,7 +2062,7 @@ define <32 x iXLen> @lrint_v32f32(<32 x half> %x) {
|
||||
; X64-AVX-I32-NEXT: vcvtph2ps %xmm3, %xmm3
|
||||
; X64-AVX-I32-NEXT: vcvttss2si %xmm3, %eax
|
||||
; X64-AVX-I32-NEXT: vpinsrd $3, %eax, %xmm2, %xmm2
|
||||
; X64-AVX-I32-NEXT: vpshuflw {{.*#+}} xmm3 = xmm1[1,1,1,1,4,5,6,7]
|
||||
; X64-AVX-I32-NEXT: vpsrld $16, %xmm1, %xmm3
|
||||
; X64-AVX-I32-NEXT: vcvtph2ps %xmm3, %xmm3
|
||||
; X64-AVX-I32-NEXT: vroundss $4, %xmm3, %xmm3, %xmm3
|
||||
; X64-AVX-I32-NEXT: vcvtps2ph $4, %xmm3, %xmm3
|
||||
@@ -2082,7 +2082,7 @@ define <32 x iXLen> @lrint_v32f32(<32 x half> %x) {
|
||||
; X64-AVX-I32-NEXT: vcvtph2ps %xmm5, %xmm5
|
||||
; X64-AVX-I32-NEXT: vcvttss2si %xmm5, %eax
|
||||
; X64-AVX-I32-NEXT: vpinsrd $2, %eax, %xmm3, %xmm3
|
||||
; X64-AVX-I32-NEXT: vpshuflw {{.*#+}} xmm5 = xmm1[3,3,3,3,4,5,6,7]
|
||||
; X64-AVX-I32-NEXT: vpsrlq $48, %xmm1, %xmm5
|
||||
; X64-AVX-I32-NEXT: vcvtph2ps %xmm5, %xmm5
|
||||
; X64-AVX-I32-NEXT: vroundss $4, %xmm5, %xmm5, %xmm5
|
||||
; X64-AVX-I32-NEXT: vcvtps2ph $4, %xmm5, %xmm5
|
||||
@@ -2119,7 +2119,7 @@ define <32 x iXLen> @lrint_v32f32(<32 x half> %x) {
|
||||
; X64-AVX-I32-NEXT: vcvtph2ps %xmm5, %xmm5
|
||||
; X64-AVX-I32-NEXT: vcvttss2si %xmm5, %eax
|
||||
; X64-AVX-I32-NEXT: vpinsrd $3, %eax, %xmm3, %xmm3
|
||||
; X64-AVX-I32-NEXT: vpshuflw {{.*#+}} xmm5 = xmm1[1,1,1,1,4,5,6,7]
|
||||
; X64-AVX-I32-NEXT: vpsrld $16, %xmm1, %xmm5
|
||||
; X64-AVX-I32-NEXT: vcvtph2ps %xmm5, %xmm5
|
||||
; X64-AVX-I32-NEXT: vroundss $4, %xmm5, %xmm5, %xmm5
|
||||
; X64-AVX-I32-NEXT: vcvtps2ph $4, %xmm5, %xmm5
|
||||
@@ -2139,7 +2139,7 @@ define <32 x iXLen> @lrint_v32f32(<32 x half> %x) {
|
||||
; X64-AVX-I32-NEXT: vcvtph2ps %xmm6, %xmm6
|
||||
; X64-AVX-I32-NEXT: vcvttss2si %xmm6, %eax
|
||||
; X64-AVX-I32-NEXT: vpinsrd $2, %eax, %xmm5, %xmm5
|
||||
; X64-AVX-I32-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[3,3,3,3,4,5,6,7]
|
||||
; X64-AVX-I32-NEXT: vpsrlq $48, %xmm1, %xmm1
|
||||
; X64-AVX-I32-NEXT: vcvtph2ps %xmm1, %xmm1
|
||||
; X64-AVX-I32-NEXT: vroundss $4, %xmm1, %xmm1, %xmm1
|
||||
; X64-AVX-I32-NEXT: vcvtps2ph $4, %xmm1, %xmm1
|
||||
|
||||
Reference in New Issue
Block a user