; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefixes=CHECK-SSE ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=CHECK-AVX,CHECK-AVX2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefixes=CHECK-AVX,CHECK-AVX512F,CHECK-NO-FASTFMA ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=skx -fp-contract=fast | FileCheck %s --check-prefixes=CHECK-AVX,CHECK-AVX512F,CHECK-FMA declare i16 @llvm.umax.i16(i16, i16) declare i64 @llvm.umin.i64(i64, i64) declare <4 x float> @llvm.ldexp.v4f32.v4i32(<4 x float>, <4 x i32>) define <4 x float> @fmul_pow2_4xfloat(<4 x i32> %i) { ; CHECK-SSE-LABEL: fmul_pow2_4xfloat: ; CHECK-SSE: # %bb.0: ; CHECK-SSE-NEXT: pslld $23, %xmm0 ; CHECK-SSE-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 ; CHECK-SSE-NEXT: retq ; ; CHECK-AVX2-LABEL: fmul_pow2_4xfloat: ; CHECK-AVX2: # %bb.0: ; CHECK-AVX2-NEXT: vpslld $23, %xmm0, %xmm0 ; CHECK-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [1091567616,1091567616,1091567616,1091567616] ; CHECK-AVX2-NEXT: vpaddd %xmm1, %xmm0, %xmm0 ; CHECK-AVX2-NEXT: retq ; ; CHECK-NO-FASTFMA-LABEL: fmul_pow2_4xfloat: ; CHECK-NO-FASTFMA: # %bb.0: ; CHECK-NO-FASTFMA-NEXT: vpslld $23, %xmm0, %xmm0 ; CHECK-NO-FASTFMA-NEXT: vpbroadcastd {{.*#+}} xmm1 = [1091567616,1091567616,1091567616,1091567616] ; CHECK-NO-FASTFMA-NEXT: vpaddd %xmm1, %xmm0, %xmm0 ; CHECK-NO-FASTFMA-NEXT: retq ; ; CHECK-FMA-LABEL: fmul_pow2_4xfloat: ; CHECK-FMA: # %bb.0: ; CHECK-FMA-NEXT: vpslld $23, %xmm0, %xmm0 ; CHECK-FMA-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0 ; CHECK-FMA-NEXT: retq %p2 = shl <4 x i32> , %i %p2_f = uitofp <4 x i32> %p2 to <4 x float> %r = fmul <4 x float> , %p2_f ret <4 x float> %r } define <4 x float> @fmul_pow2_ldexp_4xfloat(<4 x i32> %i) { ; CHECK-SSE-LABEL: fmul_pow2_ldexp_4xfloat: ; CHECK-SSE: # %bb.0: ; CHECK-SSE-NEXT: subq $56, %rsp ; CHECK-SSE-NEXT: .cfi_def_cfa_offset 64 ; CHECK-SSE-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; CHECK-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[3,3,3,3] ; CHECK-SSE-NEXT: movd %xmm1, %edi ; CHECK-SSE-NEXT: movss {{.*#+}} xmm0 = [9.0E+0,0.0E+0,0.0E+0,0.0E+0] ; CHECK-SSE-NEXT: callq ldexpf@PLT ; CHECK-SSE-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill ; CHECK-SSE-NEXT: pshufd $238, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload ; CHECK-SSE-NEXT: # xmm0 = mem[2,3,2,3] ; CHECK-SSE-NEXT: movd %xmm0, %edi ; CHECK-SSE-NEXT: movss {{.*#+}} xmm0 = [9.0E+0,0.0E+0,0.0E+0,0.0E+0] ; CHECK-SSE-NEXT: callq ldexpf@PLT ; CHECK-SSE-NEXT: unpcklps (%rsp), %xmm0 # 16-byte Folded Reload ; CHECK-SSE-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] ; CHECK-SSE-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill ; CHECK-SSE-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload ; CHECK-SSE-NEXT: movd %xmm0, %edi ; CHECK-SSE-NEXT: movss {{.*#+}} xmm0 = [9.0E+0,0.0E+0,0.0E+0,0.0E+0] ; CHECK-SSE-NEXT: callq ldexpf@PLT ; CHECK-SSE-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; CHECK-SSE-NEXT: pshufd $85, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload ; CHECK-SSE-NEXT: # xmm0 = mem[1,1,1,1] ; CHECK-SSE-NEXT: movd %xmm0, %edi ; CHECK-SSE-NEXT: movss {{.*#+}} xmm0 = [9.0E+0,0.0E+0,0.0E+0,0.0E+0] ; CHECK-SSE-NEXT: callq ldexpf@PLT ; CHECK-SSE-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload ; CHECK-SSE-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] ; CHECK-SSE-NEXT: unpcklpd (%rsp), %xmm1 # 16-byte Folded Reload ; CHECK-SSE-NEXT: # xmm1 = xmm1[0],mem[0] ; CHECK-SSE-NEXT: movaps %xmm1, %xmm0 ; CHECK-SSE-NEXT: addq $56, %rsp ; CHECK-SSE-NEXT: .cfi_def_cfa_offset 8 ; CHECK-SSE-NEXT: retq ; ; CHECK-AVX-LABEL: fmul_pow2_ldexp_4xfloat: ; CHECK-AVX: # %bb.0: ; CHECK-AVX-NEXT: subq $40, %rsp ; CHECK-AVX-NEXT: .cfi_def_cfa_offset 48 ; CHECK-AVX-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; CHECK-AVX-NEXT: vextractps $1, %xmm0, %edi ; CHECK-AVX-NEXT: vmovss {{.*#+}} xmm0 = [9.0E+0,0.0E+0,0.0E+0,0.0E+0] ; CHECK-AVX-NEXT: callq ldexpf@PLT ; CHECK-AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill ; CHECK-AVX-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload ; CHECK-AVX-NEXT: vmovd %xmm0, %edi ; CHECK-AVX-NEXT: vmovss {{.*#+}} xmm0 = [9.0E+0,0.0E+0,0.0E+0,0.0E+0] ; CHECK-AVX-NEXT: callq ldexpf@PLT ; CHECK-AVX-NEXT: vinsertps $16, (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload ; CHECK-AVX-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[2,3] ; CHECK-AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill ; CHECK-AVX-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload ; CHECK-AVX-NEXT: vextractps $2, %xmm0, %edi ; CHECK-AVX-NEXT: vmovss {{.*#+}} xmm0 = [9.0E+0,0.0E+0,0.0E+0,0.0E+0] ; CHECK-AVX-NEXT: callq ldexpf@PLT ; CHECK-AVX-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload ; CHECK-AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3] ; CHECK-AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill ; CHECK-AVX-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload ; CHECK-AVX-NEXT: vextractps $3, %xmm0, %edi ; CHECK-AVX-NEXT: vmovss {{.*#+}} xmm0 = [9.0E+0,0.0E+0,0.0E+0,0.0E+0] ; CHECK-AVX-NEXT: callq ldexpf@PLT ; CHECK-AVX-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload ; CHECK-AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0] ; CHECK-AVX-NEXT: addq $40, %rsp ; CHECK-AVX-NEXT: .cfi_def_cfa_offset 8 ; CHECK-AVX-NEXT: retq %r = call <4 x float> @llvm.ldexp.v4f32.v4i32(<4 x float> , <4 x i32> %i) ret <4 x float> %r } define <4 x float> @fdiv_pow2_4xfloat(<4 x i32> %i) { ; CHECK-SSE-LABEL: fdiv_pow2_4xfloat: ; CHECK-SSE: # %bb.0: ; CHECK-SSE-NEXT: pslld $23, %xmm0 ; CHECK-SSE-NEXT: movdqa {{.*#+}} xmm1 = [1091567616,1091567616,1091567616,1091567616] ; CHECK-SSE-NEXT: psubd %xmm0, %xmm1 ; CHECK-SSE-NEXT: movdqa %xmm1, %xmm0 ; CHECK-SSE-NEXT: retq ; ; CHECK-AVX-LABEL: fdiv_pow2_4xfloat: ; CHECK-AVX: # %bb.0: ; CHECK-AVX-NEXT: vpslld $23, %xmm0, %xmm0 ; CHECK-AVX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [1091567616,1091567616,1091567616,1091567616] ; CHECK-AVX-NEXT: vpsubd %xmm0, %xmm1, %xmm0 ; CHECK-AVX-NEXT: retq %p2 = shl <4 x i32> , %i %p2_f = uitofp <4 x i32> %p2 to <4 x float> %r = fdiv <4 x float> , %p2_f ret <4 x float> %r } declare <8 x half> @llvm.ldexp.v8f16.v8i16(<8 x half>, <8 x i16>) define <8 x half> @fmul_pow2_8xhalf(<8 x i16> %i) { ; CHECK-SSE-LABEL: fmul_pow2_8xhalf: ; CHECK-SSE: # %bb.0: ; CHECK-SSE-NEXT: subq $88, %rsp ; CHECK-SSE-NEXT: .cfi_def_cfa_offset 96 ; CHECK-SSE-NEXT: movdqa %xmm0, %xmm1 ; CHECK-SSE-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4,4,5,5,6,6,7,7] ; CHECK-SSE-NEXT: pslld $23, %xmm1 ; CHECK-SSE-NEXT: movdqa {{.*#+}} xmm2 = [1065353216,1065353216,1065353216,1065353216] ; CHECK-SSE-NEXT: paddd %xmm2, %xmm1 ; CHECK-SSE-NEXT: cvttps2dq %xmm1, %xmm1 ; CHECK-SSE-NEXT: movaps %xmm1, (%rsp) # 16-byte Spill ; CHECK-SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] ; CHECK-SSE-NEXT: pslld $23, %xmm0 ; CHECK-SSE-NEXT: paddd %xmm2, %xmm0 ; CHECK-SSE-NEXT: cvttps2dq %xmm0, %xmm0 ; CHECK-SSE-NEXT: pslld $16, %xmm0 ; CHECK-SSE-NEXT: psrld $16, %xmm0 ; CHECK-SSE-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; CHECK-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,1,1] ; CHECK-SSE-NEXT: cvtdq2ps %xmm0, %xmm0 ; CHECK-SSE-NEXT: callq __truncsfhf2@PLT ; CHECK-SSE-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; CHECK-SSE-NEXT: cvtdq2ps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload ; CHECK-SSE-NEXT: callq __truncsfhf2@PLT ; CHECK-SSE-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; CHECK-SSE-NEXT: pshufd $238, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload ; CHECK-SSE-NEXT: # xmm0 = mem[2,3,2,3] ; CHECK-SSE-NEXT: cvtdq2ps %xmm0, %xmm0 ; CHECK-SSE-NEXT: callq __truncsfhf2@PLT ; CHECK-SSE-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; CHECK-SSE-NEXT: pshufd $255, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload ; CHECK-SSE-NEXT: # xmm0 = mem[3,3,3,3] ; CHECK-SSE-NEXT: cvtdq2ps %xmm0, %xmm0 ; CHECK-SSE-NEXT: callq __truncsfhf2@PLT ; CHECK-SSE-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; CHECK-SSE-NEXT: movdqa (%rsp), %xmm0 # 16-byte Reload ; CHECK-SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 ; CHECK-SSE-NEXT: movdqa %xmm0, (%rsp) # 16-byte Spill ; CHECK-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,1,1] ; CHECK-SSE-NEXT: cvtdq2ps %xmm0, %xmm0 ; CHECK-SSE-NEXT: callq __truncsfhf2@PLT ; CHECK-SSE-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; CHECK-SSE-NEXT: cvtdq2ps (%rsp), %xmm0 # 16-byte Folded Reload ; CHECK-SSE-NEXT: callq __truncsfhf2@PLT ; CHECK-SSE-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; CHECK-SSE-NEXT: pshufd $238, (%rsp), %xmm0 # 16-byte Folded Reload ; CHECK-SSE-NEXT: # xmm0 = mem[2,3,2,3] ; CHECK-SSE-NEXT: cvtdq2ps %xmm0, %xmm0 ; CHECK-SSE-NEXT: callq __truncsfhf2@PLT ; CHECK-SSE-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; CHECK-SSE-NEXT: pshufd $255, (%rsp), %xmm0 # 16-byte Folded Reload ; CHECK-SSE-NEXT: # xmm0 = mem[3,3,3,3] ; CHECK-SSE-NEXT: cvtdq2ps %xmm0, %xmm0 ; CHECK-SSE-NEXT: callq __truncsfhf2@PLT ; CHECK-SSE-NEXT: callq __extendhfsf2@PLT ; CHECK-SSE-NEXT: mulss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 ; CHECK-SSE-NEXT: callq __truncsfhf2@PLT ; CHECK-SSE-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill ; CHECK-SSE-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload ; CHECK-SSE-NEXT: # xmm0 = mem[0],zero,zero,zero ; CHECK-SSE-NEXT: callq __extendhfsf2@PLT ; CHECK-SSE-NEXT: mulss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 ; CHECK-SSE-NEXT: callq __truncsfhf2@PLT ; CHECK-SSE-NEXT: punpcklwd (%rsp), %xmm0 # 16-byte Folded Reload ; CHECK-SSE-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] ; CHECK-SSE-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; CHECK-SSE-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload ; CHECK-SSE-NEXT: # xmm0 = mem[0],zero,zero,zero ; CHECK-SSE-NEXT: callq __extendhfsf2@PLT ; CHECK-SSE-NEXT: mulss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 ; CHECK-SSE-NEXT: callq __truncsfhf2@PLT ; CHECK-SSE-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill ; CHECK-SSE-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload ; CHECK-SSE-NEXT: # xmm0 = mem[0],zero,zero,zero ; CHECK-SSE-NEXT: callq __extendhfsf2@PLT ; CHECK-SSE-NEXT: mulss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 ; CHECK-SSE-NEXT: callq __truncsfhf2@PLT ; CHECK-SSE-NEXT: movdqa (%rsp), %xmm1 # 16-byte Reload ; CHECK-SSE-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] ; CHECK-SSE-NEXT: punpckldq {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload ; CHECK-SSE-NEXT: # xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] ; CHECK-SSE-NEXT: movdqa %xmm1, (%rsp) # 16-byte Spill ; CHECK-SSE-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload ; CHECK-SSE-NEXT: # xmm0 = mem[0],zero,zero,zero ; CHECK-SSE-NEXT: callq __extendhfsf2@PLT ; CHECK-SSE-NEXT: mulss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 ; CHECK-SSE-NEXT: callq __truncsfhf2@PLT ; CHECK-SSE-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; CHECK-SSE-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload ; CHECK-SSE-NEXT: # xmm0 = mem[0],zero,zero,zero ; CHECK-SSE-NEXT: callq __extendhfsf2@PLT ; CHECK-SSE-NEXT: mulss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 ; CHECK-SSE-NEXT: callq __truncsfhf2@PLT ; CHECK-SSE-NEXT: punpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload ; CHECK-SSE-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] ; CHECK-SSE-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; CHECK-SSE-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload ; CHECK-SSE-NEXT: # xmm0 = mem[0],zero,zero,zero ; CHECK-SSE-NEXT: callq __extendhfsf2@PLT ; CHECK-SSE-NEXT: mulss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 ; CHECK-SSE-NEXT: callq __truncsfhf2@PLT ; CHECK-SSE-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; CHECK-SSE-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload ; CHECK-SSE-NEXT: # xmm0 = mem[0],zero,zero,zero ; CHECK-SSE-NEXT: callq __extendhfsf2@PLT ; CHECK-SSE-NEXT: mulss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 ; CHECK-SSE-NEXT: callq __truncsfhf2@PLT ; CHECK-SSE-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload ; CHECK-SSE-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] ; CHECK-SSE-NEXT: punpckldq {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload ; CHECK-SSE-NEXT: # xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] ; CHECK-SSE-NEXT: punpcklqdq (%rsp), %xmm1 # 16-byte Folded Reload ; CHECK-SSE-NEXT: # xmm1 = xmm1[0],mem[0] ; CHECK-SSE-NEXT: movdqa %xmm1, %xmm0 ; CHECK-SSE-NEXT: addq $88, %rsp ; CHECK-SSE-NEXT: .cfi_def_cfa_offset 8 ; CHECK-SSE-NEXT: retq ; ; CHECK-AVX2-LABEL: fmul_pow2_8xhalf: ; CHECK-AVX2: # %bb.0: ; CHECK-AVX2-NEXT: subq $120, %rsp ; CHECK-AVX2-NEXT: .cfi_def_cfa_offset 128 ; CHECK-AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero ; CHECK-AVX2-NEXT: vpbroadcastd {{.*#+}} ymm1 = [1,1,1,1,1,1,1,1] ; CHECK-AVX2-NEXT: vpsllvd %ymm0, %ymm1, %ymm0 ; CHECK-AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13,u,u,u,u,u,u,u,u,16,17,20,21,24,25,28,29,u,u,u,u,u,u,u,u] ; CHECK-AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3] ; CHECK-AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero ; CHECK-AVX2-NEXT: vcvtdq2ps %ymm0, %ymm0 ; CHECK-AVX2-NEXT: vmovups %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill ; CHECK-AVX2-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] ; CHECK-AVX2-NEXT: vzeroupper ; CHECK-AVX2-NEXT: callq __truncsfhf2@PLT ; CHECK-AVX2-NEXT: vmovss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; CHECK-AVX2-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload ; CHECK-AVX2-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 ; CHECK-AVX2-NEXT: vzeroupper ; CHECK-AVX2-NEXT: callq __truncsfhf2@PLT ; CHECK-AVX2-NEXT: vmovss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; CHECK-AVX2-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload ; CHECK-AVX2-NEXT: # xmm0 = mem[1,0] ; CHECK-AVX2-NEXT: callq __truncsfhf2@PLT ; CHECK-AVX2-NEXT: vmovss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; CHECK-AVX2-NEXT: vpermilps $255, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload ; CHECK-AVX2-NEXT: # xmm0 = mem[3,3,3,3] ; CHECK-AVX2-NEXT: callq __truncsfhf2@PLT ; CHECK-AVX2-NEXT: vmovss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; CHECK-AVX2-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload ; CHECK-AVX2-NEXT: vextractf128 $1, %ymm0, %xmm0 ; CHECK-AVX2-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; CHECK-AVX2-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] ; CHECK-AVX2-NEXT: vzeroupper ; CHECK-AVX2-NEXT: callq __truncsfhf2@PLT ; CHECK-AVX2-NEXT: vmovss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; CHECK-AVX2-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload ; CHECK-AVX2-NEXT: callq __truncsfhf2@PLT ; CHECK-AVX2-NEXT: vmovss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; CHECK-AVX2-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload ; CHECK-AVX2-NEXT: # xmm0 = mem[1,0] ; CHECK-AVX2-NEXT: callq __truncsfhf2@PLT ; CHECK-AVX2-NEXT: vmovss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; CHECK-AVX2-NEXT: vpermilps $255, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload ; CHECK-AVX2-NEXT: # xmm0 = mem[3,3,3,3] ; CHECK-AVX2-NEXT: callq __truncsfhf2@PLT ; CHECK-AVX2-NEXT: callq __extendhfsf2@PLT ; CHECK-AVX2-NEXT: vmulss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ; CHECK-AVX2-NEXT: callq __truncsfhf2@PLT ; CHECK-AVX2-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; CHECK-AVX2-NEXT: vmovss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload ; CHECK-AVX2-NEXT: # xmm0 = mem[0],zero,zero,zero ; CHECK-AVX2-NEXT: callq __extendhfsf2@PLT ; CHECK-AVX2-NEXT: vmulss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ; CHECK-AVX2-NEXT: callq __truncsfhf2@PLT ; CHECK-AVX2-NEXT: vpunpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload ; CHECK-AVX2-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] ; CHECK-AVX2-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; CHECK-AVX2-NEXT: vmovss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload ; CHECK-AVX2-NEXT: # xmm0 = mem[0],zero,zero,zero ; CHECK-AVX2-NEXT: callq __extendhfsf2@PLT ; CHECK-AVX2-NEXT: vmulss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ; CHECK-AVX2-NEXT: callq __truncsfhf2@PLT ; CHECK-AVX2-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; CHECK-AVX2-NEXT: vmovss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload ; CHECK-AVX2-NEXT: # xmm0 = mem[0],zero,zero,zero ; CHECK-AVX2-NEXT: callq __extendhfsf2@PLT ; CHECK-AVX2-NEXT: vmulss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ; CHECK-AVX2-NEXT: callq __truncsfhf2@PLT ; CHECK-AVX2-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload ; CHECK-AVX2-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] ; CHECK-AVX2-NEXT: vpunpckldq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload ; CHECK-AVX2-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] ; CHECK-AVX2-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; CHECK-AVX2-NEXT: vmovss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload ; CHECK-AVX2-NEXT: # xmm0 = mem[0],zero,zero,zero ; CHECK-AVX2-NEXT: callq __extendhfsf2@PLT ; CHECK-AVX2-NEXT: vmulss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ; CHECK-AVX2-NEXT: callq __truncsfhf2@PLT ; CHECK-AVX2-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; CHECK-AVX2-NEXT: vmovss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload ; CHECK-AVX2-NEXT: # xmm0 = mem[0],zero,zero,zero ; CHECK-AVX2-NEXT: callq __extendhfsf2@PLT ; CHECK-AVX2-NEXT: vmulss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ; CHECK-AVX2-NEXT: callq __truncsfhf2@PLT ; CHECK-AVX2-NEXT: vpunpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload ; CHECK-AVX2-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] ; CHECK-AVX2-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; CHECK-AVX2-NEXT: vmovss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload ; CHECK-AVX2-NEXT: # xmm0 = mem[0],zero,zero,zero ; CHECK-AVX2-NEXT: callq __extendhfsf2@PLT ; CHECK-AVX2-NEXT: vmulss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ; CHECK-AVX2-NEXT: callq __truncsfhf2@PLT ; CHECK-AVX2-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; CHECK-AVX2-NEXT: vmovss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload ; CHECK-AVX2-NEXT: # xmm0 = mem[0],zero,zero,zero ; CHECK-AVX2-NEXT: callq __extendhfsf2@PLT ; CHECK-AVX2-NEXT: vmulss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ; CHECK-AVX2-NEXT: callq __truncsfhf2@PLT ; CHECK-AVX2-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload ; CHECK-AVX2-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] ; CHECK-AVX2-NEXT: vpunpckldq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload ; CHECK-AVX2-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] ; CHECK-AVX2-NEXT: vpunpcklqdq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload ; CHECK-AVX2-NEXT: # xmm0 = xmm0[0],mem[0] ; CHECK-AVX2-NEXT: addq $120, %rsp ; CHECK-AVX2-NEXT: .cfi_def_cfa_offset 8 ; CHECK-AVX2-NEXT: retq ; ; CHECK-NO-FASTFMA-LABEL: fmul_pow2_8xhalf: ; CHECK-NO-FASTFMA: # %bb.0: ; CHECK-NO-FASTFMA-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero ; CHECK-NO-FASTFMA-NEXT: vpbroadcastd {{.*#+}} ymm1 = [1,1,1,1,1,1,1,1] ; CHECK-NO-FASTFMA-NEXT: vpsllvd %ymm0, %ymm1, %ymm0 ; CHECK-NO-FASTFMA-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; CHECK-NO-FASTFMA-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7],ymm0[8],ymm1[9],ymm0[10],ymm1[11],ymm0[12],ymm1[13],ymm0[14],ymm1[15] ; CHECK-NO-FASTFMA-NEXT: vcvtdq2ps %ymm0, %ymm0 ; CHECK-NO-FASTFMA-NEXT: vcvtps2ph $4, %ymm0, %xmm0 ; CHECK-NO-FASTFMA-NEXT: vcvtph2ps %xmm0, %ymm0 ; CHECK-NO-FASTFMA-NEXT: vbroadcastss {{.*#+}} ymm1 = [8.192E+3,8.192E+3,8.192E+3,8.192E+3,8.192E+3,8.192E+3,8.192E+3,8.192E+3] ; CHECK-NO-FASTFMA-NEXT: vmulps %ymm1, %ymm0, %ymm0 ; CHECK-NO-FASTFMA-NEXT: vcvtps2ph $4, %ymm0, %xmm0 ; CHECK-NO-FASTFMA-NEXT: vzeroupper ; CHECK-NO-FASTFMA-NEXT: retq ; ; CHECK-FMA-LABEL: fmul_pow2_8xhalf: ; CHECK-FMA: # %bb.0: ; CHECK-FMA-NEXT: vpbroadcastw {{.*#+}} xmm1 = [1,1,1,1,1,1,1,1] ; CHECK-FMA-NEXT: vpsllvw %xmm0, %xmm1, %xmm0 ; CHECK-FMA-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero ; CHECK-FMA-NEXT: vcvtdq2ps %ymm0, %ymm0 ; CHECK-FMA-NEXT: vcvtps2ph $4, %ymm0, %xmm0 ; CHECK-FMA-NEXT: vcvtph2ps %xmm0, %ymm0 ; CHECK-FMA-NEXT: vmulps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %ymm0 ; CHECK-FMA-NEXT: vcvtps2ph $4, %ymm0, %xmm0 ; CHECK-FMA-NEXT: vzeroupper ; CHECK-FMA-NEXT: retq %p2 = shl <8 x i16> , %i %p2_f = uitofp <8 x i16> %p2 to <8 x half> %r = fmul <8 x half> , %p2_f ret <8 x half> %r } define <8 x half> @fmul_pow2_ldexp_8xhalf(<8 x i16> %i) { ; CHECK-SSE-LABEL: fmul_pow2_ldexp_8xhalf: ; CHECK-SSE: # %bb.0: ; CHECK-SSE-NEXT: subq $72, %rsp ; CHECK-SSE-NEXT: .cfi_def_cfa_offset 80 ; CHECK-SSE-NEXT: movdqa %xmm0, (%rsp) # 16-byte Spill ; CHECK-SSE-NEXT: pextrw $7, %xmm0, %edi ; CHECK-SSE-NEXT: movss {{.*#+}} xmm0 = [8.192E+3,0.0E+0,0.0E+0,0.0E+0] ; CHECK-SSE-NEXT: callq ldexpf@PLT ; CHECK-SSE-NEXT: callq __truncsfhf2@PLT ; CHECK-SSE-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; CHECK-SSE-NEXT: movdqa (%rsp), %xmm0 # 16-byte Reload ; CHECK-SSE-NEXT: pextrw $6, %xmm0, %edi ; CHECK-SSE-NEXT: movd {{.*#+}} xmm0 = [8.192E+3,0.0E+0,0.0E+0,0.0E+0] ; CHECK-SSE-NEXT: callq ldexpf@PLT ; CHECK-SSE-NEXT: callq __truncsfhf2@PLT ; CHECK-SSE-NEXT: punpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload ; CHECK-SSE-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] ; CHECK-SSE-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; CHECK-SSE-NEXT: movdqa (%rsp), %xmm0 # 16-byte Reload ; CHECK-SSE-NEXT: pextrw $5, %xmm0, %edi ; CHECK-SSE-NEXT: movss {{.*#+}} xmm0 = [8.192E+3,0.0E+0,0.0E+0,0.0E+0] ; CHECK-SSE-NEXT: callq ldexpf@PLT ; CHECK-SSE-NEXT: callq __truncsfhf2@PLT ; CHECK-SSE-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; CHECK-SSE-NEXT: movdqa (%rsp), %xmm0 # 16-byte Reload ; CHECK-SSE-NEXT: pextrw $4, %xmm0, %edi ; CHECK-SSE-NEXT: movd {{.*#+}} xmm0 = [8.192E+3,0.0E+0,0.0E+0,0.0E+0] ; CHECK-SSE-NEXT: callq ldexpf@PLT ; CHECK-SSE-NEXT: callq __truncsfhf2@PLT ; CHECK-SSE-NEXT: punpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload ; CHECK-SSE-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] ; CHECK-SSE-NEXT: punpckldq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload ; CHECK-SSE-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] ; CHECK-SSE-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; CHECK-SSE-NEXT: movdqa (%rsp), %xmm0 # 16-byte Reload ; CHECK-SSE-NEXT: pextrw $3, %xmm0, %edi ; CHECK-SSE-NEXT: movss {{.*#+}} xmm0 = [8.192E+3,0.0E+0,0.0E+0,0.0E+0] ; CHECK-SSE-NEXT: callq ldexpf@PLT ; CHECK-SSE-NEXT: callq __truncsfhf2@PLT ; CHECK-SSE-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; CHECK-SSE-NEXT: movdqa (%rsp), %xmm0 # 16-byte Reload ; CHECK-SSE-NEXT: pextrw $2, %xmm0, %edi ; CHECK-SSE-NEXT: movd {{.*#+}} xmm0 = [8.192E+3,0.0E+0,0.0E+0,0.0E+0] ; CHECK-SSE-NEXT: callq ldexpf@PLT ; CHECK-SSE-NEXT: callq __truncsfhf2@PLT ; CHECK-SSE-NEXT: punpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload ; CHECK-SSE-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] ; CHECK-SSE-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; CHECK-SSE-NEXT: movdqa (%rsp), %xmm0 # 16-byte Reload ; CHECK-SSE-NEXT: pextrw $1, %xmm0, %edi ; CHECK-SSE-NEXT: movss {{.*#+}} xmm0 = [8.192E+3,0.0E+0,0.0E+0,0.0E+0] ; CHECK-SSE-NEXT: callq ldexpf@PLT ; CHECK-SSE-NEXT: callq __truncsfhf2@PLT ; CHECK-SSE-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; CHECK-SSE-NEXT: movdqa (%rsp), %xmm0 # 16-byte Reload ; CHECK-SSE-NEXT: movd %xmm0, %eax ; CHECK-SSE-NEXT: movzwl %ax, %edi ; CHECK-SSE-NEXT: movd {{.*#+}} xmm0 = [8.192E+3,0.0E+0,0.0E+0,0.0E+0] ; CHECK-SSE-NEXT: callq ldexpf@PLT ; CHECK-SSE-NEXT: callq __truncsfhf2@PLT ; CHECK-SSE-NEXT: punpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload ; CHECK-SSE-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] ; CHECK-SSE-NEXT: punpckldq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload ; CHECK-SSE-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] ; CHECK-SSE-NEXT: punpcklqdq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload ; CHECK-SSE-NEXT: # xmm0 = xmm0[0],mem[0] ; CHECK-SSE-NEXT: addq $72, %rsp ; CHECK-SSE-NEXT: .cfi_def_cfa_offset 8 ; CHECK-SSE-NEXT: retq ; ; CHECK-AVX2-LABEL: fmul_pow2_ldexp_8xhalf: ; CHECK-AVX2: # %bb.0: ; CHECK-AVX2-NEXT: subq $72, %rsp ; CHECK-AVX2-NEXT: .cfi_def_cfa_offset 80 ; CHECK-AVX2-NEXT: vmovdqa %xmm0, (%rsp) # 16-byte Spill ; CHECK-AVX2-NEXT: vpextrw $7, %xmm0, %edi ; CHECK-AVX2-NEXT: vmovss {{.*#+}} xmm0 = [8.192E+3,0.0E+0,0.0E+0,0.0E+0] ; CHECK-AVX2-NEXT: callq ldexpf@PLT ; CHECK-AVX2-NEXT: callq __truncsfhf2@PLT ; CHECK-AVX2-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; CHECK-AVX2-NEXT: vmovdqa (%rsp), %xmm0 # 16-byte Reload ; CHECK-AVX2-NEXT: vpextrw $6, %xmm0, %edi ; CHECK-AVX2-NEXT: vmovd {{.*#+}} xmm0 = [8.192E+3,0.0E+0,0.0E+0,0.0E+0] ; CHECK-AVX2-NEXT: callq ldexpf@PLT ; CHECK-AVX2-NEXT: callq __truncsfhf2@PLT ; CHECK-AVX2-NEXT: vpunpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload ; CHECK-AVX2-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] ; CHECK-AVX2-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; CHECK-AVX2-NEXT: vmovdqa (%rsp), %xmm0 # 16-byte Reload ; CHECK-AVX2-NEXT: vpextrw $5, %xmm0, %edi ; CHECK-AVX2-NEXT: vmovss {{.*#+}} xmm0 = [8.192E+3,0.0E+0,0.0E+0,0.0E+0] ; CHECK-AVX2-NEXT: callq ldexpf@PLT ; CHECK-AVX2-NEXT: callq __truncsfhf2@PLT ; CHECK-AVX2-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; CHECK-AVX2-NEXT: vmovdqa (%rsp), %xmm0 # 16-byte Reload ; CHECK-AVX2-NEXT: vpextrw $4, %xmm0, %edi ; CHECK-AVX2-NEXT: vmovd {{.*#+}} xmm0 = [8.192E+3,0.0E+0,0.0E+0,0.0E+0] ; CHECK-AVX2-NEXT: callq ldexpf@PLT ; CHECK-AVX2-NEXT: callq __truncsfhf2@PLT ; CHECK-AVX2-NEXT: vpunpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload ; CHECK-AVX2-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] ; CHECK-AVX2-NEXT: vpunpckldq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload ; CHECK-AVX2-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] ; CHECK-AVX2-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; CHECK-AVX2-NEXT: vmovdqa (%rsp), %xmm0 # 16-byte Reload ; CHECK-AVX2-NEXT: vpextrw $3, %xmm0, %edi ; CHECK-AVX2-NEXT: vmovss {{.*#+}} xmm0 = [8.192E+3,0.0E+0,0.0E+0,0.0E+0] ; CHECK-AVX2-NEXT: callq ldexpf@PLT ; CHECK-AVX2-NEXT: callq __truncsfhf2@PLT ; CHECK-AVX2-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; CHECK-AVX2-NEXT: vmovdqa (%rsp), %xmm0 # 16-byte Reload ; CHECK-AVX2-NEXT: vpextrw $2, %xmm0, %edi ; CHECK-AVX2-NEXT: vmovd {{.*#+}} xmm0 = [8.192E+3,0.0E+0,0.0E+0,0.0E+0] ; CHECK-AVX2-NEXT: callq ldexpf@PLT ; CHECK-AVX2-NEXT: callq __truncsfhf2@PLT ; CHECK-AVX2-NEXT: vpunpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload ; CHECK-AVX2-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] ; CHECK-AVX2-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; CHECK-AVX2-NEXT: vmovdqa (%rsp), %xmm0 # 16-byte Reload ; CHECK-AVX2-NEXT: vpextrw $1, %xmm0, %edi ; CHECK-AVX2-NEXT: vmovss {{.*#+}} xmm0 = [8.192E+3,0.0E+0,0.0E+0,0.0E+0] ; CHECK-AVX2-NEXT: callq ldexpf@PLT ; CHECK-AVX2-NEXT: callq __truncsfhf2@PLT ; CHECK-AVX2-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; CHECK-AVX2-NEXT: vmovdqa (%rsp), %xmm0 # 16-byte Reload ; CHECK-AVX2-NEXT: vmovd %xmm0, %eax ; CHECK-AVX2-NEXT: movzwl %ax, %edi ; CHECK-AVX2-NEXT: vmovd {{.*#+}} xmm0 = [8.192E+3,0.0E+0,0.0E+0,0.0E+0] ; CHECK-AVX2-NEXT: callq ldexpf@PLT ; CHECK-AVX2-NEXT: callq __truncsfhf2@PLT ; CHECK-AVX2-NEXT: vpunpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload ; CHECK-AVX2-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] ; CHECK-AVX2-NEXT: vpunpckldq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload ; CHECK-AVX2-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] ; CHECK-AVX2-NEXT: vpunpcklqdq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload ; CHECK-AVX2-NEXT: # xmm0 = xmm0[0],mem[0] ; CHECK-AVX2-NEXT: addq $72, %rsp ; CHECK-AVX2-NEXT: .cfi_def_cfa_offset 8 ; CHECK-AVX2-NEXT: retq ; ; CHECK-AVX512F-LABEL: fmul_pow2_ldexp_8xhalf: ; CHECK-AVX512F: # %bb.0: ; CHECK-AVX512F-NEXT: subq $72, %rsp ; CHECK-AVX512F-NEXT: .cfi_def_cfa_offset 80 ; CHECK-AVX512F-NEXT: vmovdqa %xmm0, (%rsp) # 16-byte Spill ; CHECK-AVX512F-NEXT: vpextrw $7, %xmm0, %edi ; CHECK-AVX512F-NEXT: vmovss {{.*#+}} xmm0 = [8.192E+3,0.0E+0,0.0E+0,0.0E+0] ; CHECK-AVX512F-NEXT: callq ldexpf@PLT ; CHECK-AVX512F-NEXT: vcvtps2ph $4, %xmm0, %xmm0 ; CHECK-AVX512F-NEXT: vmovd %xmm0, %eax ; CHECK-AVX512F-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0 ; CHECK-AVX512F-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; CHECK-AVX512F-NEXT: vmovdqa (%rsp), %xmm0 # 16-byte Reload ; CHECK-AVX512F-NEXT: vpextrw $6, %xmm0, %edi ; CHECK-AVX512F-NEXT: vmovss {{.*#+}} xmm0 = [8.192E+3,0.0E+0,0.0E+0,0.0E+0] ; CHECK-AVX512F-NEXT: callq ldexpf@PLT ; CHECK-AVX512F-NEXT: vcvtps2ph $4, %xmm0, %xmm0 ; CHECK-AVX512F-NEXT: vmovd %xmm0, %eax ; CHECK-AVX512F-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0 ; CHECK-AVX512F-NEXT: vpunpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload ; CHECK-AVX512F-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] ; CHECK-AVX512F-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; CHECK-AVX512F-NEXT: vmovdqa (%rsp), %xmm0 # 16-byte Reload ; CHECK-AVX512F-NEXT: vpextrw $5, %xmm0, %edi ; CHECK-AVX512F-NEXT: vmovss {{.*#+}} xmm0 = [8.192E+3,0.0E+0,0.0E+0,0.0E+0] ; CHECK-AVX512F-NEXT: callq ldexpf@PLT ; CHECK-AVX512F-NEXT: vcvtps2ph $4, %xmm0, %xmm0 ; CHECK-AVX512F-NEXT: vmovd %xmm0, %eax ; CHECK-AVX512F-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0 ; CHECK-AVX512F-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; CHECK-AVX512F-NEXT: vmovdqa (%rsp), %xmm0 # 16-byte Reload ; CHECK-AVX512F-NEXT: vpextrw $4, %xmm0, %edi ; CHECK-AVX512F-NEXT: vmovss {{.*#+}} xmm0 = [8.192E+3,0.0E+0,0.0E+0,0.0E+0] ; CHECK-AVX512F-NEXT: callq ldexpf@PLT ; CHECK-AVX512F-NEXT: vcvtps2ph $4, %xmm0, %xmm0 ; CHECK-AVX512F-NEXT: vmovd %xmm0, %eax ; CHECK-AVX512F-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0 ; CHECK-AVX512F-NEXT: vpunpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload ; CHECK-AVX512F-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] ; CHECK-AVX512F-NEXT: vpunpckldq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload ; CHECK-AVX512F-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] ; CHECK-AVX512F-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; CHECK-AVX512F-NEXT: vmovdqa (%rsp), %xmm0 # 16-byte Reload ; CHECK-AVX512F-NEXT: vpextrw $3, %xmm0, %edi ; CHECK-AVX512F-NEXT: vmovss {{.*#+}} xmm0 = [8.192E+3,0.0E+0,0.0E+0,0.0E+0] ; CHECK-AVX512F-NEXT: callq ldexpf@PLT ; CHECK-AVX512F-NEXT: vcvtps2ph $4, %xmm0, %xmm0 ; CHECK-AVX512F-NEXT: vmovd %xmm0, %eax ; CHECK-AVX512F-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0 ; CHECK-AVX512F-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; CHECK-AVX512F-NEXT: vmovdqa (%rsp), %xmm0 # 16-byte Reload ; CHECK-AVX512F-NEXT: vpextrw $2, %xmm0, %edi ; CHECK-AVX512F-NEXT: vmovss {{.*#+}} xmm0 = [8.192E+3,0.0E+0,0.0E+0,0.0E+0] ; CHECK-AVX512F-NEXT: callq ldexpf@PLT ; CHECK-AVX512F-NEXT: vcvtps2ph $4, %xmm0, %xmm0 ; CHECK-AVX512F-NEXT: vmovd %xmm0, %eax ; CHECK-AVX512F-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0 ; CHECK-AVX512F-NEXT: vpunpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload ; CHECK-AVX512F-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] ; CHECK-AVX512F-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; CHECK-AVX512F-NEXT: vmovdqa (%rsp), %xmm0 # 16-byte Reload ; CHECK-AVX512F-NEXT: vpextrw $1, %xmm0, %edi ; CHECK-AVX512F-NEXT: vmovss {{.*#+}} xmm0 = [8.192E+3,0.0E+0,0.0E+0,0.0E+0] ; CHECK-AVX512F-NEXT: callq ldexpf@PLT ; CHECK-AVX512F-NEXT: vcvtps2ph $4, %xmm0, %xmm0 ; CHECK-AVX512F-NEXT: vmovd %xmm0, %eax ; CHECK-AVX512F-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0 ; CHECK-AVX512F-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; CHECK-AVX512F-NEXT: vmovdqa (%rsp), %xmm0 # 16-byte Reload ; CHECK-AVX512F-NEXT: vmovd %xmm0, %eax ; CHECK-AVX512F-NEXT: movzwl %ax, %edi ; CHECK-AVX512F-NEXT: vmovss {{.*#+}} xmm0 = [8.192E+3,0.0E+0,0.0E+0,0.0E+0] ; CHECK-AVX512F-NEXT: callq ldexpf@PLT ; CHECK-AVX512F-NEXT: vcvtps2ph $4, %xmm0, %xmm0 ; CHECK-AVX512F-NEXT: vmovd %xmm0, %eax ; CHECK-AVX512F-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0 ; CHECK-AVX512F-NEXT: vpunpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload ; CHECK-AVX512F-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] ; CHECK-AVX512F-NEXT: vpunpckldq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload ; CHECK-AVX512F-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] ; CHECK-AVX512F-NEXT: vpunpcklqdq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload ; CHECK-AVX512F-NEXT: # xmm0 = xmm0[0],mem[0] ; CHECK-AVX512F-NEXT: addq $72, %rsp ; CHECK-AVX512F-NEXT: .cfi_def_cfa_offset 8 ; CHECK-AVX512F-NEXT: retq %r = call <8 x half> @llvm.ldexp.v8f16.v8i16(<8 x half> , <8 x i16> %i) ret <8 x half> %r } define <8 x half> @fdiv_pow2_8xhalf(<8 x i16> %i) { ; CHECK-SSE-LABEL: fdiv_pow2_8xhalf: ; CHECK-SSE: # %bb.0: ; CHECK-SSE-NEXT: psllw $10, %xmm0 ; CHECK-SSE-NEXT: movdqa {{.*#+}} xmm1 = [28672,28672,28672,28672,28672,28672,28672,28672] ; CHECK-SSE-NEXT: psubw %xmm0, %xmm1 ; CHECK-SSE-NEXT: movdqa %xmm1, %xmm0 ; CHECK-SSE-NEXT: retq ; ; CHECK-AVX-LABEL: fdiv_pow2_8xhalf: ; CHECK-AVX: # %bb.0: ; CHECK-AVX-NEXT: vpsllw $10, %xmm0, %xmm0 ; CHECK-AVX-NEXT: vpbroadcastw {{.*#+}} xmm1 = [28672,28672,28672,28672,28672,28672,28672,28672] ; CHECK-AVX-NEXT: vpsubw %xmm0, %xmm1, %xmm0 ; CHECK-AVX-NEXT: retq %p2 = shl <8 x i16> , %i %p2_f = uitofp <8 x i16> %p2 to <8 x half> %r = fdiv <8 x half> , %p2_f ret <8 x half> %r } define double @fmul_pow_shl_cnt(i64 %cnt) nounwind { ; CHECK-SSE-LABEL: fmul_pow_shl_cnt: ; CHECK-SSE: # %bb.0: ; CHECK-SSE-NEXT: shlq $52, %rdi ; CHECK-SSE-NEXT: movabsq $4621256167635550208, %rax # imm = 0x4022000000000000 ; CHECK-SSE-NEXT: addq %rdi, %rax ; CHECK-SSE-NEXT: movq %rax, %xmm0 ; CHECK-SSE-NEXT: retq ; ; CHECK-AVX-LABEL: fmul_pow_shl_cnt: ; CHECK-AVX: # %bb.0: ; CHECK-AVX-NEXT: shlq $52, %rdi ; CHECK-AVX-NEXT: movabsq $4621256167635550208, %rax # imm = 0x4022000000000000 ; CHECK-AVX-NEXT: addq %rdi, %rax ; CHECK-AVX-NEXT: vmovq %rax, %xmm0 ; CHECK-AVX-NEXT: retq %shl = shl nuw i64 1, %cnt %conv = uitofp i64 %shl to double %mul = fmul double 9.000000e+00, %conv ret double %mul } define double @fmul_pow_shl_cnt2(i64 %cnt) nounwind { ; CHECK-SSE-LABEL: fmul_pow_shl_cnt2: ; CHECK-SSE: # %bb.0: ; CHECK-SSE-NEXT: incl %edi ; CHECK-SSE-NEXT: shlq $52, %rdi ; CHECK-SSE-NEXT: movabsq $-4602115869219225600, %rax # imm = 0xC022000000000000 ; CHECK-SSE-NEXT: addq %rdi, %rax ; CHECK-SSE-NEXT: movq %rax, %xmm0 ; CHECK-SSE-NEXT: retq ; ; CHECK-AVX-LABEL: fmul_pow_shl_cnt2: ; CHECK-AVX: # %bb.0: ; CHECK-AVX-NEXT: incl %edi ; CHECK-AVX-NEXT: shlq $52, %rdi ; CHECK-AVX-NEXT: movabsq $-4602115869219225600, %rax # imm = 0xC022000000000000 ; CHECK-AVX-NEXT: addq %rdi, %rax ; CHECK-AVX-NEXT: vmovq %rax, %xmm0 ; CHECK-AVX-NEXT: retq %shl = shl nuw i64 2, %cnt %conv = uitofp i64 %shl to double %mul = fmul double -9.000000e+00, %conv ret double %mul } define float @fmul_pow_select(i32 %cnt, i1 %c) nounwind { ; CHECK-SSE-LABEL: fmul_pow_select: ; CHECK-SSE: # %bb.0: ; CHECK-SSE-NEXT: # kill: def $edi killed $edi def $rdi ; CHECK-SSE-NEXT: leal 1(%rdi), %eax ; CHECK-SSE-NEXT: testb $1, %sil ; CHECK-SSE-NEXT: cmovnel %edi, %eax ; CHECK-SSE-NEXT: shll $23, %eax ; CHECK-SSE-NEXT: addl $1091567616, %eax # imm = 0x41100000 ; CHECK-SSE-NEXT: movd %eax, %xmm0 ; CHECK-SSE-NEXT: retq ; ; CHECK-AVX-LABEL: fmul_pow_select: ; CHECK-AVX: # %bb.0: ; CHECK-AVX-NEXT: # kill: def $edi killed $edi def $rdi ; CHECK-AVX-NEXT: leal 1(%rdi), %eax ; CHECK-AVX-NEXT: testb $1, %sil ; CHECK-AVX-NEXT: cmovnel %edi, %eax ; CHECK-AVX-NEXT: shll $23, %eax ; CHECK-AVX-NEXT: addl $1091567616, %eax # imm = 0x41100000 ; CHECK-AVX-NEXT: vmovd %eax, %xmm0 ; CHECK-AVX-NEXT: retq %shl2 = shl nuw i32 2, %cnt %shl1 = shl nuw i32 1, %cnt %shl = select i1 %c, i32 %shl1, i32 %shl2 %conv = uitofp i32 %shl to float %mul = fmul float 9.000000e+00, %conv ret float %mul } define float @fmul_fly_pow_mul_min_pow2(i64 %cnt) nounwind { ; CHECK-SSE-LABEL: fmul_fly_pow_mul_min_pow2: ; CHECK-SSE: # %bb.0: ; CHECK-SSE-NEXT: addl $3, %edi ; CHECK-SSE-NEXT: cmpl $13, %edi ; CHECK-SSE-NEXT: movl $13, %eax ; CHECK-SSE-NEXT: cmovbl %edi, %eax ; CHECK-SSE-NEXT: shll $23, %eax ; CHECK-SSE-NEXT: addl $1091567616, %eax # imm = 0x41100000 ; CHECK-SSE-NEXT: movd %eax, %xmm0 ; CHECK-SSE-NEXT: retq ; ; CHECK-AVX-LABEL: fmul_fly_pow_mul_min_pow2: ; CHECK-AVX: # %bb.0: ; CHECK-AVX-NEXT: addl $3, %edi ; CHECK-AVX-NEXT: cmpl $13, %edi ; CHECK-AVX-NEXT: movl $13, %eax ; CHECK-AVX-NEXT: cmovbl %edi, %eax ; CHECK-AVX-NEXT: shll $23, %eax ; CHECK-AVX-NEXT: addl $1091567616, %eax # imm = 0x41100000 ; CHECK-AVX-NEXT: vmovd %eax, %xmm0 ; CHECK-AVX-NEXT: retq %shl8 = shl nuw i64 8, %cnt %shl = call i64 @llvm.umin.i64(i64 %shl8, i64 8192) %conv = uitofp i64 %shl to float %mul = fmul float 9.000000e+00, %conv ret float %mul } define double @fmul_pow_mul_max_pow2(i16 %cnt) nounwind { ; CHECK-SSE-LABEL: fmul_pow_mul_max_pow2: ; CHECK-SSE: # %bb.0: ; CHECK-SSE-NEXT: movl %edi, %eax ; CHECK-SSE-NEXT: leaq 1(%rax), %rcx ; CHECK-SSE-NEXT: cmpq %rcx, %rax ; CHECK-SSE-NEXT: cmovaq %rax, %rcx ; CHECK-SSE-NEXT: shlq $52, %rcx ; CHECK-SSE-NEXT: movabsq $4613937818241073152, %rax # imm = 0x4008000000000000 ; CHECK-SSE-NEXT: addq %rcx, %rax ; CHECK-SSE-NEXT: movq %rax, %xmm0 ; CHECK-SSE-NEXT: retq ; ; CHECK-AVX-LABEL: fmul_pow_mul_max_pow2: ; CHECK-AVX: # %bb.0: ; CHECK-AVX-NEXT: movl %edi, %eax ; CHECK-AVX-NEXT: leaq 1(%rax), %rcx ; CHECK-AVX-NEXT: cmpq %rcx, %rax ; CHECK-AVX-NEXT: cmovaq %rax, %rcx ; CHECK-AVX-NEXT: shlq $52, %rcx ; CHECK-AVX-NEXT: movabsq $4613937818241073152, %rax # imm = 0x4008000000000000 ; CHECK-AVX-NEXT: addq %rcx, %rax ; CHECK-AVX-NEXT: vmovq %rax, %xmm0 ; CHECK-AVX-NEXT: retq %shl2 = shl nuw i16 2, %cnt %shl1 = shl nuw i16 1, %cnt %shl = call i16 @llvm.umax.i16(i16 %shl1, i16 %shl2) %conv = uitofp i16 %shl to double %mul = fmul double 3.000000e+00, %conv ret double %mul } define double @fmul_pow_shl_cnt_fail_maybe_non_pow2(i64 %v, i64 %cnt) nounwind { ; CHECK-SSE-LABEL: fmul_pow_shl_cnt_fail_maybe_non_pow2: ; CHECK-SSE: # %bb.0: ; CHECK-SSE-NEXT: movq %rsi, %rcx ; CHECK-SSE-NEXT: # kill: def $cl killed $cl killed $rcx ; CHECK-SSE-NEXT: shlq %cl, %rdi ; CHECK-SSE-NEXT: movq %rdi, %xmm1 ; CHECK-SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] ; CHECK-SSE-NEXT: subpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 ; CHECK-SSE-NEXT: movapd %xmm1, %xmm0 ; CHECK-SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] ; CHECK-SSE-NEXT: addsd %xmm1, %xmm0 ; CHECK-SSE-NEXT: mulsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 ; CHECK-SSE-NEXT: retq ; ; CHECK-AVX2-LABEL: fmul_pow_shl_cnt_fail_maybe_non_pow2: ; CHECK-AVX2: # %bb.0: ; CHECK-AVX2-NEXT: movq %rsi, %rcx ; CHECK-AVX2-NEXT: # kill: def $cl killed $cl killed $rcx ; CHECK-AVX2-NEXT: shlq %cl, %rdi ; CHECK-AVX2-NEXT: vmovq %rdi, %xmm0 ; CHECK-AVX2-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] ; CHECK-AVX2-NEXT: vsubpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ; CHECK-AVX2-NEXT: vshufpd {{.*#+}} xmm1 = xmm0[1,0] ; CHECK-AVX2-NEXT: vaddsd %xmm0, %xmm1, %xmm0 ; CHECK-AVX2-NEXT: vmulsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ; CHECK-AVX2-NEXT: retq ; ; CHECK-NO-FASTFMA-LABEL: fmul_pow_shl_cnt_fail_maybe_non_pow2: ; CHECK-NO-FASTFMA: # %bb.0: ; CHECK-NO-FASTFMA-NEXT: movq %rsi, %rcx ; CHECK-NO-FASTFMA-NEXT: # kill: def $cl killed $cl killed $rcx ; CHECK-NO-FASTFMA-NEXT: shlq %cl, %rdi ; CHECK-NO-FASTFMA-NEXT: vcvtusi2sd %rdi, %xmm0, %xmm0 ; CHECK-NO-FASTFMA-NEXT: vmulsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ; CHECK-NO-FASTFMA-NEXT: retq ; ; CHECK-FMA-LABEL: fmul_pow_shl_cnt_fail_maybe_non_pow2: ; CHECK-FMA: # %bb.0: ; CHECK-FMA-NEXT: shlxq %rsi, %rdi, %rax ; CHECK-FMA-NEXT: vcvtusi2sd %rax, %xmm0, %xmm0 ; CHECK-FMA-NEXT: vmulsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ; CHECK-FMA-NEXT: retq %shl = shl nuw i64 %v, %cnt %conv = uitofp i64 %shl to double %mul = fmul double 9.000000e+00, %conv ret double %mul } define <2 x float> @fmul_pow_shl_cnt_vec_fail_expensive_cast(<2 x i64> %cnt) nounwind { ; CHECK-SSE-LABEL: fmul_pow_shl_cnt_vec_fail_expensive_cast: ; CHECK-SSE: # %bb.0: ; CHECK-SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3] ; CHECK-SSE-NEXT: movdqa {{.*#+}} xmm3 = [2,2] ; CHECK-SSE-NEXT: movdqa %xmm3, %xmm1 ; CHECK-SSE-NEXT: psllq %xmm2, %xmm1 ; CHECK-SSE-NEXT: psllq %xmm0, %xmm3 ; CHECK-SSE-NEXT: movq %xmm3, %rax ; CHECK-SSE-NEXT: testq %rax, %rax ; CHECK-SSE-NEXT: js .LBB12_1 ; CHECK-SSE-NEXT: # %bb.2: ; CHECK-SSE-NEXT: xorps %xmm0, %xmm0 ; CHECK-SSE-NEXT: cvtsi2ss %rax, %xmm0 ; CHECK-SSE-NEXT: jmp .LBB12_3 ; CHECK-SSE-NEXT: .LBB12_1: ; CHECK-SSE-NEXT: movq %rax, %rcx ; CHECK-SSE-NEXT: shrq %rcx ; CHECK-SSE-NEXT: andl $1, %eax ; CHECK-SSE-NEXT: orq %rcx, %rax ; CHECK-SSE-NEXT: xorps %xmm0, %xmm0 ; CHECK-SSE-NEXT: cvtsi2ss %rax, %xmm0 ; CHECK-SSE-NEXT: addss %xmm0, %xmm0 ; CHECK-SSE-NEXT: .LBB12_3: ; CHECK-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3] ; CHECK-SSE-NEXT: movq %xmm1, %rax ; CHECK-SSE-NEXT: testq %rax, %rax ; CHECK-SSE-NEXT: js .LBB12_4 ; CHECK-SSE-NEXT: # %bb.5: ; CHECK-SSE-NEXT: xorps %xmm1, %xmm1 ; CHECK-SSE-NEXT: cvtsi2ss %rax, %xmm1 ; CHECK-SSE-NEXT: jmp .LBB12_6 ; CHECK-SSE-NEXT: .LBB12_4: ; CHECK-SSE-NEXT: movq %rax, %rcx ; CHECK-SSE-NEXT: shrq %rcx ; CHECK-SSE-NEXT: andl $1, %eax ; CHECK-SSE-NEXT: orq %rcx, %rax ; CHECK-SSE-NEXT: xorps %xmm1, %xmm1 ; CHECK-SSE-NEXT: cvtsi2ss %rax, %xmm1 ; CHECK-SSE-NEXT: addss %xmm1, %xmm1 ; CHECK-SSE-NEXT: .LBB12_6: ; CHECK-SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] ; CHECK-SSE-NEXT: mulps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 ; CHECK-SSE-NEXT: retq ; ; CHECK-AVX2-LABEL: fmul_pow_shl_cnt_vec_fail_expensive_cast: ; CHECK-AVX2: # %bb.0: ; CHECK-AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [2,2] ; CHECK-AVX2-NEXT: vpsllvq %xmm0, %xmm1, %xmm0 ; CHECK-AVX2-NEXT: vpsrlq $1, %xmm0, %xmm1 ; CHECK-AVX2-NEXT: vblendvpd %xmm0, %xmm1, %xmm0, %xmm1 ; CHECK-AVX2-NEXT: vpextrq $1, %xmm1, %rax ; CHECK-AVX2-NEXT: vcvtsi2ss %rax, %xmm2, %xmm2 ; CHECK-AVX2-NEXT: vmovq %xmm1, %rax ; CHECK-AVX2-NEXT: vcvtsi2ss %rax, %xmm3, %xmm1 ; CHECK-AVX2-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0],xmm2[0],zero,zero ; CHECK-AVX2-NEXT: vaddps %xmm1, %xmm1, %xmm2 ; CHECK-AVX2-NEXT: vpxor %xmm3, %xmm3, %xmm3 ; CHECK-AVX2-NEXT: vpcmpgtq %xmm0, %xmm3, %xmm0 ; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,3,2,3] ; CHECK-AVX2-NEXT: vblendvps %xmm0, %xmm2, %xmm1, %xmm0 ; CHECK-AVX2-NEXT: vbroadcastss {{.*#+}} xmm1 = [1.5E+1,1.5E+1,1.5E+1,1.5E+1] ; CHECK-AVX2-NEXT: vmulps %xmm1, %xmm0, %xmm0 ; CHECK-AVX2-NEXT: retq ; ; CHECK-NO-FASTFMA-LABEL: fmul_pow_shl_cnt_vec_fail_expensive_cast: ; CHECK-NO-FASTFMA: # %bb.0: ; CHECK-NO-FASTFMA-NEXT: vpbroadcastq {{.*#+}} xmm1 = [2,2] ; CHECK-NO-FASTFMA-NEXT: vpsllvq %xmm0, %xmm1, %xmm0 ; CHECK-NO-FASTFMA-NEXT: vpextrq $1, %xmm0, %rax ; CHECK-NO-FASTFMA-NEXT: vcvtusi2ss %rax, %xmm2, %xmm1 ; CHECK-NO-FASTFMA-NEXT: vmovq %xmm0, %rax ; CHECK-NO-FASTFMA-NEXT: vcvtusi2ss %rax, %xmm2, %xmm0 ; CHECK-NO-FASTFMA-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero ; CHECK-NO-FASTFMA-NEXT: vbroadcastss {{.*#+}} xmm1 = [1.5E+1,1.5E+1,1.5E+1,1.5E+1] ; CHECK-NO-FASTFMA-NEXT: vmulps %xmm1, %xmm0, %xmm0 ; CHECK-NO-FASTFMA-NEXT: retq ; ; CHECK-FMA-LABEL: fmul_pow_shl_cnt_vec_fail_expensive_cast: ; CHECK-FMA: # %bb.0: ; CHECK-FMA-NEXT: vpbroadcastq {{.*#+}} xmm1 = [2,2] ; CHECK-FMA-NEXT: vpsllvq %xmm0, %xmm1, %xmm0 ; CHECK-FMA-NEXT: vcvtuqq2ps %xmm0, %xmm0 ; CHECK-FMA-NEXT: vmulps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0 ; CHECK-FMA-NEXT: retq %shl = shl nsw nuw <2 x i64> , %cnt %conv = uitofp <2 x i64> %shl to <2 x float> %mul = fmul <2 x float> , %conv ret <2 x float> %mul } define <2 x double> @fmul_pow_shl_cnt_vec(<2 x i64> %cnt) nounwind { ; CHECK-SSE-LABEL: fmul_pow_shl_cnt_vec: ; CHECK-SSE: # %bb.0: ; CHECK-SSE-NEXT: psllq $52, %xmm0 ; CHECK-SSE-NEXT: paddq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 ; CHECK-SSE-NEXT: retq ; ; CHECK-AVX2-LABEL: fmul_pow_shl_cnt_vec: ; CHECK-AVX2: # %bb.0: ; CHECK-AVX2-NEXT: vpsllq $52, %xmm0, %xmm0 ; CHECK-AVX2-NEXT: vpaddq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ; CHECK-AVX2-NEXT: retq ; ; CHECK-NO-FASTFMA-LABEL: fmul_pow_shl_cnt_vec: ; CHECK-NO-FASTFMA: # %bb.0: ; CHECK-NO-FASTFMA-NEXT: vpsllq $52, %xmm0, %xmm0 ; CHECK-NO-FASTFMA-NEXT: vpaddq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ; CHECK-NO-FASTFMA-NEXT: retq ; ; CHECK-FMA-LABEL: fmul_pow_shl_cnt_vec: ; CHECK-FMA: # %bb.0: ; CHECK-FMA-NEXT: vpsllq $52, %xmm0, %xmm0 ; CHECK-FMA-NEXT: vpaddq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0 ; CHECK-FMA-NEXT: retq %shl = shl nsw nuw <2 x i64> , %cnt %conv = uitofp <2 x i64> %shl to <2 x double> %mul = fmul <2 x double> , %conv ret <2 x double> %mul } define <4 x float> @fmul_pow_shl_cnt_vec_preserve_fma(<4 x i32> %cnt, <4 x float> %add) nounwind { ; CHECK-SSE-LABEL: fmul_pow_shl_cnt_vec_preserve_fma: ; CHECK-SSE: # %bb.0: ; CHECK-SSE-NEXT: pslld $23, %xmm0 ; CHECK-SSE-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 ; CHECK-SSE-NEXT: addps %xmm1, %xmm0 ; CHECK-SSE-NEXT: retq ; ; CHECK-AVX2-LABEL: fmul_pow_shl_cnt_vec_preserve_fma: ; CHECK-AVX2: # %bb.0: ; CHECK-AVX2-NEXT: vpslld $23, %xmm0, %xmm0 ; CHECK-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [1092616192,1092616192,1092616192,1092616192] ; CHECK-AVX2-NEXT: vpaddd %xmm2, %xmm0, %xmm0 ; CHECK-AVX2-NEXT: vaddps %xmm1, %xmm0, %xmm0 ; CHECK-AVX2-NEXT: retq ; ; CHECK-NO-FASTFMA-LABEL: fmul_pow_shl_cnt_vec_preserve_fma: ; CHECK-NO-FASTFMA: # %bb.0: ; CHECK-NO-FASTFMA-NEXT: vpslld $23, %xmm0, %xmm0 ; CHECK-NO-FASTFMA-NEXT: vpbroadcastd {{.*#+}} xmm2 = [1092616192,1092616192,1092616192,1092616192] ; CHECK-NO-FASTFMA-NEXT: vpaddd %xmm2, %xmm0, %xmm0 ; CHECK-NO-FASTFMA-NEXT: vaddps %xmm1, %xmm0, %xmm0 ; CHECK-NO-FASTFMA-NEXT: retq ; ; CHECK-FMA-LABEL: fmul_pow_shl_cnt_vec_preserve_fma: ; CHECK-FMA: # %bb.0: ; CHECK-FMA-NEXT: vpbroadcastd {{.*#+}} xmm2 = [2,2,2,2] ; CHECK-FMA-NEXT: vpsllvd %xmm0, %xmm2, %xmm0 ; CHECK-FMA-NEXT: vcvtudq2ps %xmm0, %xmm0 ; CHECK-FMA-NEXT: vfmadd132ps {{.*#+}} xmm0 = (xmm0 * mem) + xmm1 ; CHECK-FMA-NEXT: retq %shl = shl nsw nuw <4 x i32> , %cnt %conv = uitofp <4 x i32> %shl to <4 x float> %mul = fmul <4 x float> , %conv %res = fadd <4 x float> %mul, %add ret <4 x float> %res } define <2 x double> @fmul_pow_shl_cnt_vec_non_splat_todo(<2 x i64> %cnt) nounwind { ; CHECK-SSE-LABEL: fmul_pow_shl_cnt_vec_non_splat_todo: ; CHECK-SSE: # %bb.0: ; CHECK-SSE-NEXT: psllq $52, %xmm0 ; CHECK-SSE-NEXT: paddq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 ; CHECK-SSE-NEXT: retq ; ; CHECK-AVX-LABEL: fmul_pow_shl_cnt_vec_non_splat_todo: ; CHECK-AVX: # %bb.0: ; CHECK-AVX-NEXT: vpsllq $52, %xmm0, %xmm0 ; CHECK-AVX-NEXT: vpaddq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ; CHECK-AVX-NEXT: retq %shl = shl nsw nuw <2 x i64> , %cnt %conv = uitofp <2 x i64> %shl to <2 x double> %mul = fmul <2 x double> , %conv ret <2 x double> %mul } define <2 x double> @fmul_pow_shl_cnt_vec_non_splat2_todo(<2 x i64> %cnt) nounwind { ; CHECK-SSE-LABEL: fmul_pow_shl_cnt_vec_non_splat2_todo: ; CHECK-SSE: # %bb.0: ; CHECK-SSE-NEXT: psllq $52, %xmm0 ; CHECK-SSE-NEXT: paddq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 ; CHECK-SSE-NEXT: retq ; ; CHECK-AVX-LABEL: fmul_pow_shl_cnt_vec_non_splat2_todo: ; CHECK-AVX: # %bb.0: ; CHECK-AVX-NEXT: vpsllq $52, %xmm0, %xmm0 ; CHECK-AVX-NEXT: vpaddq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ; CHECK-AVX-NEXT: retq %shl = shl nsw nuw <2 x i64> , %cnt %conv = uitofp <2 x i64> %shl to <2 x double> %mul = fmul <2 x double> , %conv ret <2 x double> %mul } define <2 x half> @fmul_pow_shl_cnt_vec_fail_to_large(<2 x i16> %cnt) nounwind { ; CHECK-SSE-LABEL: fmul_pow_shl_cnt_vec_fail_to_large: ; CHECK-SSE: # %bb.0: ; CHECK-SSE-NEXT: subq $40, %rsp ; CHECK-SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] ; CHECK-SSE-NEXT: pslld $23, %xmm0 ; CHECK-SSE-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 ; CHECK-SSE-NEXT: cvttps2dq %xmm0, %xmm0 ; CHECK-SSE-NEXT: pshuflw {{.*#+}} xmm1 = xmm0[0,2,2,3,4,5,6,7] ; CHECK-SSE-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 ; CHECK-SSE-NEXT: pxor %xmm0, %xmm0 ; CHECK-SSE-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] ; CHECK-SSE-NEXT: movdqa %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; CHECK-SSE-NEXT: cvtdq2ps %xmm1, %xmm0 ; CHECK-SSE-NEXT: callq __truncsfhf2@PLT ; CHECK-SSE-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; CHECK-SSE-NEXT: pshufd $85, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload ; CHECK-SSE-NEXT: # xmm0 = mem[1,1,1,1] ; CHECK-SSE-NEXT: cvtdq2ps %xmm0, %xmm0 ; CHECK-SSE-NEXT: callq __truncsfhf2@PLT ; CHECK-SSE-NEXT: callq __extendhfsf2@PLT ; CHECK-SSE-NEXT: mulss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 ; CHECK-SSE-NEXT: callq __truncsfhf2@PLT ; CHECK-SSE-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; CHECK-SSE-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload ; CHECK-SSE-NEXT: # xmm0 = mem[0],zero,zero,zero ; CHECK-SSE-NEXT: callq __extendhfsf2@PLT ; CHECK-SSE-NEXT: mulss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 ; CHECK-SSE-NEXT: callq __truncsfhf2@PLT ; CHECK-SSE-NEXT: punpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload ; CHECK-SSE-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] ; CHECK-SSE-NEXT: addq $40, %rsp ; CHECK-SSE-NEXT: retq ; ; CHECK-AVX2-LABEL: fmul_pow_shl_cnt_vec_fail_to_large: ; CHECK-AVX2: # %bb.0: ; CHECK-AVX2-NEXT: subq $56, %rsp ; CHECK-AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero ; CHECK-AVX2-NEXT: vbroadcasti128 {{.*#+}} ymm1 = [2,2,0,0,2,2,0,0] ; CHECK-AVX2-NEXT: # ymm1 = mem[0,1,0,1] ; CHECK-AVX2-NEXT: vpsllvd %ymm0, %ymm1, %ymm0 ; CHECK-AVX2-NEXT: vmovdqu %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill ; CHECK-AVX2-NEXT: vpextrw $2, %xmm0, %eax ; CHECK-AVX2-NEXT: vcvtsi2ss %eax, %xmm2, %xmm0 ; CHECK-AVX2-NEXT: vzeroupper ; CHECK-AVX2-NEXT: callq __truncsfhf2@PLT ; CHECK-AVX2-NEXT: vmovss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; CHECK-AVX2-NEXT: vmovdqu {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload ; CHECK-AVX2-NEXT: vpextrw $0, %xmm0, %eax ; CHECK-AVX2-NEXT: vcvtsi2ss %eax, %xmm2, %xmm0 ; CHECK-AVX2-NEXT: vzeroupper ; CHECK-AVX2-NEXT: callq __truncsfhf2@PLT ; CHECK-AVX2-NEXT: callq __extendhfsf2@PLT ; CHECK-AVX2-NEXT: vmulss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ; CHECK-AVX2-NEXT: callq __truncsfhf2@PLT ; CHECK-AVX2-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; CHECK-AVX2-NEXT: vmovss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload ; CHECK-AVX2-NEXT: # xmm0 = mem[0],zero,zero,zero ; CHECK-AVX2-NEXT: callq __extendhfsf2@PLT ; CHECK-AVX2-NEXT: vmulss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ; CHECK-AVX2-NEXT: callq __truncsfhf2@PLT ; CHECK-AVX2-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload ; CHECK-AVX2-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] ; CHECK-AVX2-NEXT: addq $56, %rsp ; CHECK-AVX2-NEXT: retq ; ; CHECK-NO-FASTFMA-LABEL: fmul_pow_shl_cnt_vec_fail_to_large: ; CHECK-NO-FASTFMA: # %bb.0: ; CHECK-NO-FASTFMA-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero ; CHECK-NO-FASTFMA-NEXT: vbroadcasti128 {{.*#+}} ymm1 = [2,2,0,0,2,2,0,0] ; CHECK-NO-FASTFMA-NEXT: # ymm1 = mem[0,1,0,1] ; CHECK-NO-FASTFMA-NEXT: vpsllvd %ymm0, %ymm1, %ymm0 ; CHECK-NO-FASTFMA-NEXT: vpmovdw %zmm0, %ymm0 ; CHECK-NO-FASTFMA-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero ; CHECK-NO-FASTFMA-NEXT: vcvtdq2ps %ymm0, %ymm0 ; CHECK-NO-FASTFMA-NEXT: vcvtps2ph $4, %ymm0, %xmm0 ; CHECK-NO-FASTFMA-NEXT: vcvtph2ps %xmm0, %ymm0 ; CHECK-NO-FASTFMA-NEXT: vbroadcastss {{.*#+}} ymm1 = [1.5E+1,1.5E+1,1.5E+1,1.5E+1,1.5E+1,1.5E+1,1.5E+1,1.5E+1] ; CHECK-NO-FASTFMA-NEXT: vmulps %ymm1, %ymm0, %ymm0 ; CHECK-NO-FASTFMA-NEXT: vcvtps2ph $4, %ymm0, %xmm0 ; CHECK-NO-FASTFMA-NEXT: vzeroupper ; CHECK-NO-FASTFMA-NEXT: retq ; ; CHECK-FMA-LABEL: fmul_pow_shl_cnt_vec_fail_to_large: ; CHECK-FMA: # %bb.0: ; CHECK-FMA-NEXT: vpbroadcastw {{.*#+}} xmm1 = [2,2,2,2,2,2,2,2] ; CHECK-FMA-NEXT: vpsllvw %xmm0, %xmm1, %xmm0 ; CHECK-FMA-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero ; CHECK-FMA-NEXT: vcvtdq2ps %ymm0, %ymm0 ; CHECK-FMA-NEXT: vcvtps2ph $4, %ymm0, %xmm0 ; CHECK-FMA-NEXT: vcvtph2ps %xmm0, %ymm0 ; CHECK-FMA-NEXT: vmulps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %ymm0 ; CHECK-FMA-NEXT: vcvtps2ph $4, %ymm0, %xmm0 ; CHECK-FMA-NEXT: vzeroupper ; CHECK-FMA-NEXT: retq %shl = shl nsw nuw <2 x i16> , %cnt %conv = uitofp <2 x i16> %shl to <2 x half> %mul = fmul <2 x half> , %conv ret <2 x half> %mul } define double @fmul_pow_shl_cnt_fail_maybe_bad_exp(i64 %cnt) nounwind { ; CHECK-SSE-LABEL: fmul_pow_shl_cnt_fail_maybe_bad_exp: ; CHECK-SSE: # %bb.0: ; CHECK-SSE-NEXT: movq %rdi, %rcx ; CHECK-SSE-NEXT: movl $1, %eax ; CHECK-SSE-NEXT: # kill: def $cl killed $cl killed $rcx ; CHECK-SSE-NEXT: shlq %cl, %rax ; CHECK-SSE-NEXT: movq %rax, %xmm1 ; CHECK-SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] ; CHECK-SSE-NEXT: subpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 ; CHECK-SSE-NEXT: movapd %xmm1, %xmm0 ; CHECK-SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] ; CHECK-SSE-NEXT: addsd %xmm1, %xmm0 ; CHECK-SSE-NEXT: mulsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 ; CHECK-SSE-NEXT: retq ; ; CHECK-AVX2-LABEL: fmul_pow_shl_cnt_fail_maybe_bad_exp: ; CHECK-AVX2: # %bb.0: ; CHECK-AVX2-NEXT: movq %rdi, %rcx ; CHECK-AVX2-NEXT: movl $1, %eax ; CHECK-AVX2-NEXT: # kill: def $cl killed $cl killed $rcx ; CHECK-AVX2-NEXT: shlq %cl, %rax ; CHECK-AVX2-NEXT: vmovq %rax, %xmm0 ; CHECK-AVX2-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] ; CHECK-AVX2-NEXT: vsubpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ; CHECK-AVX2-NEXT: vshufpd {{.*#+}} xmm1 = xmm0[1,0] ; CHECK-AVX2-NEXT: vaddsd %xmm0, %xmm1, %xmm0 ; CHECK-AVX2-NEXT: vmulsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ; CHECK-AVX2-NEXT: retq ; ; CHECK-NO-FASTFMA-LABEL: fmul_pow_shl_cnt_fail_maybe_bad_exp: ; CHECK-NO-FASTFMA: # %bb.0: ; CHECK-NO-FASTFMA-NEXT: movq %rdi, %rcx ; CHECK-NO-FASTFMA-NEXT: movl $1, %eax ; CHECK-NO-FASTFMA-NEXT: # kill: def $cl killed $cl killed $rcx ; CHECK-NO-FASTFMA-NEXT: shlq %cl, %rax ; CHECK-NO-FASTFMA-NEXT: vcvtusi2sd %rax, %xmm0, %xmm0 ; CHECK-NO-FASTFMA-NEXT: vmulsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ; CHECK-NO-FASTFMA-NEXT: retq ; ; CHECK-FMA-LABEL: fmul_pow_shl_cnt_fail_maybe_bad_exp: ; CHECK-FMA: # %bb.0: ; CHECK-FMA-NEXT: movl $1, %eax ; CHECK-FMA-NEXT: shlxq %rdi, %rax, %rax ; CHECK-FMA-NEXT: vcvtusi2sd %rax, %xmm0, %xmm0 ; CHECK-FMA-NEXT: vmulsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ; CHECK-FMA-NEXT: retq %shl = shl nuw i64 1, %cnt %conv = uitofp i64 %shl to double %mul = fmul double 9.745314e+288, %conv ret double %mul } define double @fmul_pow_shl_cnt_safe(i16 %cnt) nounwind { ; CHECK-SSE-LABEL: fmul_pow_shl_cnt_safe: ; CHECK-SSE: # %bb.0: ; CHECK-SSE-NEXT: # kill: def $edi killed $edi def $rdi ; CHECK-SSE-NEXT: shlq $52, %rdi ; CHECK-SSE-NEXT: movabsq $8930638061065157010, %rax # imm = 0x7BEFFFFFFF5F3992 ; CHECK-SSE-NEXT: addq %rdi, %rax ; CHECK-SSE-NEXT: movq %rax, %xmm0 ; CHECK-SSE-NEXT: retq ; ; CHECK-AVX-LABEL: fmul_pow_shl_cnt_safe: ; CHECK-AVX: # %bb.0: ; CHECK-AVX-NEXT: # kill: def $edi killed $edi def $rdi ; CHECK-AVX-NEXT: shlq $52, %rdi ; CHECK-AVX-NEXT: movabsq $8930638061065157010, %rax # imm = 0x7BEFFFFFFF5F3992 ; CHECK-AVX-NEXT: addq %rdi, %rax ; CHECK-AVX-NEXT: vmovq %rax, %xmm0 ; CHECK-AVX-NEXT: retq %shl = shl nuw i16 1, %cnt %conv = uitofp i16 %shl to double %mul = fmul double 9.745314e+288, %conv ret double %mul } define <2 x double> @fdiv_pow_shl_cnt_vec(<2 x i64> %cnt) nounwind { ; CHECK-SSE-LABEL: fdiv_pow_shl_cnt_vec: ; CHECK-SSE: # %bb.0: ; CHECK-SSE-NEXT: psllq $52, %xmm0 ; CHECK-SSE-NEXT: movdqa {{.*#+}} xmm1 = [4607182418800017408,4607182418800017408] ; CHECK-SSE-NEXT: psubq %xmm0, %xmm1 ; CHECK-SSE-NEXT: movdqa %xmm1, %xmm0 ; CHECK-SSE-NEXT: retq ; ; CHECK-AVX-LABEL: fdiv_pow_shl_cnt_vec: ; CHECK-AVX: # %bb.0: ; CHECK-AVX-NEXT: vpsllq $52, %xmm0, %xmm0 ; CHECK-AVX-NEXT: vpbroadcastq {{.*#+}} xmm1 = [4607182418800017408,4607182418800017408] ; CHECK-AVX-NEXT: vpsubq %xmm0, %xmm1, %xmm0 ; CHECK-AVX-NEXT: retq %shl = shl nuw <2 x i64> , %cnt %conv = uitofp <2 x i64> %shl to <2 x double> %mul = fdiv <2 x double> , %conv ret <2 x double> %mul } define <2 x float> @fdiv_pow_shl_cnt_vec_with_expensive_cast(<2 x i64> %cnt) nounwind { ; CHECK-SSE-LABEL: fdiv_pow_shl_cnt_vec_with_expensive_cast: ; CHECK-SSE: # %bb.0: ; CHECK-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,2,2,3] ; CHECK-SSE-NEXT: pslld $23, %xmm1 ; CHECK-SSE-NEXT: movdqa {{.*#+}} xmm0 = [1065353216,1065353216,u,u] ; CHECK-SSE-NEXT: psubd %xmm1, %xmm0 ; CHECK-SSE-NEXT: retq ; ; CHECK-AVX-LABEL: fdiv_pow_shl_cnt_vec_with_expensive_cast: ; CHECK-AVX: # %bb.0: ; CHECK-AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] ; CHECK-AVX-NEXT: vpslld $23, %xmm0, %xmm0 ; CHECK-AVX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [1065353216,1065353216,1065353216,1065353216] ; CHECK-AVX-NEXT: vpsubd %xmm0, %xmm1, %xmm0 ; CHECK-AVX-NEXT: retq %shl = shl nuw <2 x i64> , %cnt %conv = uitofp <2 x i64> %shl to <2 x float> %mul = fdiv <2 x float> , %conv ret <2 x float> %mul } define float @fdiv_pow_shl_cnt_fail_maybe_z(i64 %cnt) nounwind { ; CHECK-SSE-LABEL: fdiv_pow_shl_cnt_fail_maybe_z: ; CHECK-SSE: # %bb.0: ; CHECK-SSE-NEXT: movq %rdi, %rcx ; CHECK-SSE-NEXT: movl $8, %eax ; CHECK-SSE-NEXT: # kill: def $cl killed $cl killed $rcx ; CHECK-SSE-NEXT: shlq %cl, %rax ; CHECK-SSE-NEXT: testq %rax, %rax ; CHECK-SSE-NEXT: js .LBB22_1 ; CHECK-SSE-NEXT: # %bb.2: ; CHECK-SSE-NEXT: cvtsi2ss %rax, %xmm1 ; CHECK-SSE-NEXT: jmp .LBB22_3 ; CHECK-SSE-NEXT: .LBB22_1: ; CHECK-SSE-NEXT: shrq %rax ; CHECK-SSE-NEXT: cvtsi2ss %rax, %xmm1 ; CHECK-SSE-NEXT: addss %xmm1, %xmm1 ; CHECK-SSE-NEXT: .LBB22_3: ; CHECK-SSE-NEXT: movss {{.*#+}} xmm0 = [-9.0E+0,0.0E+0,0.0E+0,0.0E+0] ; CHECK-SSE-NEXT: divss %xmm1, %xmm0 ; CHECK-SSE-NEXT: retq ; ; CHECK-AVX2-LABEL: fdiv_pow_shl_cnt_fail_maybe_z: ; CHECK-AVX2: # %bb.0: ; CHECK-AVX2-NEXT: movq %rdi, %rcx ; CHECK-AVX2-NEXT: movl $8, %eax ; CHECK-AVX2-NEXT: # kill: def $cl killed $cl killed $rcx ; CHECK-AVX2-NEXT: shlq %cl, %rax ; CHECK-AVX2-NEXT: testq %rax, %rax ; CHECK-AVX2-NEXT: js .LBB22_1 ; CHECK-AVX2-NEXT: # %bb.2: ; CHECK-AVX2-NEXT: vcvtsi2ss %rax, %xmm0, %xmm0 ; CHECK-AVX2-NEXT: jmp .LBB22_3 ; CHECK-AVX2-NEXT: .LBB22_1: ; CHECK-AVX2-NEXT: shrq %rax ; CHECK-AVX2-NEXT: vcvtsi2ss %rax, %xmm0, %xmm0 ; CHECK-AVX2-NEXT: vaddss %xmm0, %xmm0, %xmm0 ; CHECK-AVX2-NEXT: .LBB22_3: ; CHECK-AVX2-NEXT: vmovss {{.*#+}} xmm1 = [-9.0E+0,0.0E+0,0.0E+0,0.0E+0] ; CHECK-AVX2-NEXT: vdivss %xmm0, %xmm1, %xmm0 ; CHECK-AVX2-NEXT: retq ; ; CHECK-NO-FASTFMA-LABEL: fdiv_pow_shl_cnt_fail_maybe_z: ; CHECK-NO-FASTFMA: # %bb.0: ; CHECK-NO-FASTFMA-NEXT: movq %rdi, %rcx ; CHECK-NO-FASTFMA-NEXT: movl $8, %eax ; CHECK-NO-FASTFMA-NEXT: # kill: def $cl killed $cl killed $rcx ; CHECK-NO-FASTFMA-NEXT: shlq %cl, %rax ; CHECK-NO-FASTFMA-NEXT: vcvtusi2ss %rax, %xmm0, %xmm0 ; CHECK-NO-FASTFMA-NEXT: vmovss {{.*#+}} xmm1 = [-9.0E+0,0.0E+0,0.0E+0,0.0E+0] ; CHECK-NO-FASTFMA-NEXT: vdivss %xmm0, %xmm1, %xmm0 ; CHECK-NO-FASTFMA-NEXT: retq ; ; CHECK-FMA-LABEL: fdiv_pow_shl_cnt_fail_maybe_z: ; CHECK-FMA: # %bb.0: ; CHECK-FMA-NEXT: movl $8, %eax ; CHECK-FMA-NEXT: shlxq %rdi, %rax, %rax ; CHECK-FMA-NEXT: vcvtusi2ss %rax, %xmm0, %xmm0 ; CHECK-FMA-NEXT: vmovss {{.*#+}} xmm1 = [-9.0E+0,0.0E+0,0.0E+0,0.0E+0] ; CHECK-FMA-NEXT: vdivss %xmm0, %xmm1, %xmm0 ; CHECK-FMA-NEXT: retq %shl = shl i64 8, %cnt %conv = uitofp i64 %shl to float %mul = fdiv float -9.000000e+00, %conv ret float %mul } define float @fdiv_pow_shl_cnt_fail_neg_int(i64 %cnt) nounwind { ; CHECK-SSE-LABEL: fdiv_pow_shl_cnt_fail_neg_int: ; CHECK-SSE: # %bb.0: ; CHECK-SSE-NEXT: movq %rdi, %rcx ; CHECK-SSE-NEXT: movl $8, %eax ; CHECK-SSE-NEXT: # kill: def $cl killed $cl killed $rcx ; CHECK-SSE-NEXT: shlq %cl, %rax ; CHECK-SSE-NEXT: cvtsi2ss %rax, %xmm1 ; CHECK-SSE-NEXT: movss {{.*#+}} xmm0 = [-9.0E+0,0.0E+0,0.0E+0,0.0E+0] ; CHECK-SSE-NEXT: divss %xmm1, %xmm0 ; CHECK-SSE-NEXT: retq ; ; CHECK-AVX2-LABEL: fdiv_pow_shl_cnt_fail_neg_int: ; CHECK-AVX2: # %bb.0: ; CHECK-AVX2-NEXT: movq %rdi, %rcx ; CHECK-AVX2-NEXT: movl $8, %eax ; CHECK-AVX2-NEXT: # kill: def $cl killed $cl killed $rcx ; CHECK-AVX2-NEXT: shlq %cl, %rax ; CHECK-AVX2-NEXT: vcvtsi2ss %rax, %xmm0, %xmm0 ; CHECK-AVX2-NEXT: vmovss {{.*#+}} xmm1 = [-9.0E+0,0.0E+0,0.0E+0,0.0E+0] ; CHECK-AVX2-NEXT: vdivss %xmm0, %xmm1, %xmm0 ; CHECK-AVX2-NEXT: retq ; ; CHECK-NO-FASTFMA-LABEL: fdiv_pow_shl_cnt_fail_neg_int: ; CHECK-NO-FASTFMA: # %bb.0: ; CHECK-NO-FASTFMA-NEXT: movq %rdi, %rcx ; CHECK-NO-FASTFMA-NEXT: movl $8, %eax ; CHECK-NO-FASTFMA-NEXT: # kill: def $cl killed $cl killed $rcx ; CHECK-NO-FASTFMA-NEXT: shlq %cl, %rax ; CHECK-NO-FASTFMA-NEXT: vcvtsi2ss %rax, %xmm0, %xmm0 ; CHECK-NO-FASTFMA-NEXT: vmovss {{.*#+}} xmm1 = [-9.0E+0,0.0E+0,0.0E+0,0.0E+0] ; CHECK-NO-FASTFMA-NEXT: vdivss %xmm0, %xmm1, %xmm0 ; CHECK-NO-FASTFMA-NEXT: retq ; ; CHECK-FMA-LABEL: fdiv_pow_shl_cnt_fail_neg_int: ; CHECK-FMA: # %bb.0: ; CHECK-FMA-NEXT: movl $8, %eax ; CHECK-FMA-NEXT: shlxq %rdi, %rax, %rax ; CHECK-FMA-NEXT: vcvtsi2ss %rax, %xmm0, %xmm0 ; CHECK-FMA-NEXT: vmovss {{.*#+}} xmm1 = [-9.0E+0,0.0E+0,0.0E+0,0.0E+0] ; CHECK-FMA-NEXT: vdivss %xmm0, %xmm1, %xmm0 ; CHECK-FMA-NEXT: retq %shl = shl i64 8, %cnt %conv = sitofp i64 %shl to float %mul = fdiv float -9.000000e+00, %conv ret float %mul } define float @fdiv_pow_shl_cnt(i64 %cnt_in) nounwind { ; CHECK-SSE-LABEL: fdiv_pow_shl_cnt: ; CHECK-SSE: # %bb.0: ; CHECK-SSE-NEXT: movq %rdi, %rcx ; CHECK-SSE-NEXT: andb $31, %cl ; CHECK-SSE-NEXT: movl $8, %eax ; CHECK-SSE-NEXT: # kill: def $cl killed $cl killed $rcx ; CHECK-SSE-NEXT: shlq %cl, %rax ; CHECK-SSE-NEXT: cvtsi2ss %rax, %xmm1 ; CHECK-SSE-NEXT: movss {{.*#+}} xmm0 = [-5.0E-1,0.0E+0,0.0E+0,0.0E+0] ; CHECK-SSE-NEXT: divss %xmm1, %xmm0 ; CHECK-SSE-NEXT: retq ; ; CHECK-AVX2-LABEL: fdiv_pow_shl_cnt: ; CHECK-AVX2: # %bb.0: ; CHECK-AVX2-NEXT: movq %rdi, %rcx ; CHECK-AVX2-NEXT: andb $31, %cl ; CHECK-AVX2-NEXT: movl $8, %eax ; CHECK-AVX2-NEXT: # kill: def $cl killed $cl killed $rcx ; CHECK-AVX2-NEXT: shlq %cl, %rax ; CHECK-AVX2-NEXT: vcvtsi2ss %rax, %xmm0, %xmm0 ; CHECK-AVX2-NEXT: vmovss {{.*#+}} xmm1 = [-5.0E-1,0.0E+0,0.0E+0,0.0E+0] ; CHECK-AVX2-NEXT: vdivss %xmm0, %xmm1, %xmm0 ; CHECK-AVX2-NEXT: retq ; ; CHECK-NO-FASTFMA-LABEL: fdiv_pow_shl_cnt: ; CHECK-NO-FASTFMA: # %bb.0: ; CHECK-NO-FASTFMA-NEXT: movq %rdi, %rcx ; CHECK-NO-FASTFMA-NEXT: andb $31, %cl ; CHECK-NO-FASTFMA-NEXT: movl $8, %eax ; CHECK-NO-FASTFMA-NEXT: # kill: def $cl killed $cl killed $rcx ; CHECK-NO-FASTFMA-NEXT: shlq %cl, %rax ; CHECK-NO-FASTFMA-NEXT: vcvtsi2ss %rax, %xmm0, %xmm0 ; CHECK-NO-FASTFMA-NEXT: vmovss {{.*#+}} xmm1 = [-5.0E-1,0.0E+0,0.0E+0,0.0E+0] ; CHECK-NO-FASTFMA-NEXT: vdivss %xmm0, %xmm1, %xmm0 ; CHECK-NO-FASTFMA-NEXT: retq ; ; CHECK-FMA-LABEL: fdiv_pow_shl_cnt: ; CHECK-FMA: # %bb.0: ; CHECK-FMA-NEXT: andb $31, %dil ; CHECK-FMA-NEXT: movl $8, %eax ; CHECK-FMA-NEXT: shlxq %rdi, %rax, %rax ; CHECK-FMA-NEXT: vcvtsi2ss %rax, %xmm0, %xmm0 ; CHECK-FMA-NEXT: vmovss {{.*#+}} xmm1 = [-5.0E-1,0.0E+0,0.0E+0,0.0E+0] ; CHECK-FMA-NEXT: vdivss %xmm0, %xmm1, %xmm0 ; CHECK-FMA-NEXT: retq %cnt = and i64 %cnt_in, 31 %shl = shl i64 8, %cnt %conv = sitofp i64 %shl to float %mul = fdiv float -0.500000e+00, %conv ret float %mul } define half @fdiv_pow_shl_cnt_fail_out_of_bounds(i32 %cnt) nounwind { ; CHECK-SSE-LABEL: fdiv_pow_shl_cnt_fail_out_of_bounds: ; CHECK-SSE: # %bb.0: ; CHECK-SSE-NEXT: pushq %rax ; CHECK-SSE-NEXT: movl %edi, %ecx ; CHECK-SSE-NEXT: movl $1, %eax ; CHECK-SSE-NEXT: # kill: def $cl killed $cl killed $ecx ; CHECK-SSE-NEXT: shll %cl, %eax ; CHECK-SSE-NEXT: cvtsi2ss %rax, %xmm0 ; CHECK-SSE-NEXT: callq __truncsfhf2@PLT ; CHECK-SSE-NEXT: callq __extendhfsf2@PLT ; CHECK-SSE-NEXT: movss {{.*#+}} xmm1 = [8.192E+3,0.0E+0,0.0E+0,0.0E+0] ; CHECK-SSE-NEXT: divss %xmm0, %xmm1 ; CHECK-SSE-NEXT: movaps %xmm1, %xmm0 ; CHECK-SSE-NEXT: callq __truncsfhf2@PLT ; CHECK-SSE-NEXT: popq %rax ; CHECK-SSE-NEXT: retq ; ; CHECK-AVX2-LABEL: fdiv_pow_shl_cnt_fail_out_of_bounds: ; CHECK-AVX2: # %bb.0: ; CHECK-AVX2-NEXT: pushq %rax ; CHECK-AVX2-NEXT: movl %edi, %ecx ; CHECK-AVX2-NEXT: movl $1, %eax ; CHECK-AVX2-NEXT: # kill: def $cl killed $cl killed $ecx ; CHECK-AVX2-NEXT: shll %cl, %eax ; CHECK-AVX2-NEXT: vcvtsi2ss %rax, %xmm0, %xmm0 ; CHECK-AVX2-NEXT: callq __truncsfhf2@PLT ; CHECK-AVX2-NEXT: callq __extendhfsf2@PLT ; CHECK-AVX2-NEXT: vmovss {{.*#+}} xmm1 = [8.192E+3,0.0E+0,0.0E+0,0.0E+0] ; CHECK-AVX2-NEXT: vdivss %xmm0, %xmm1, %xmm0 ; CHECK-AVX2-NEXT: callq __truncsfhf2@PLT ; CHECK-AVX2-NEXT: popq %rax ; CHECK-AVX2-NEXT: retq ; ; CHECK-NO-FASTFMA-LABEL: fdiv_pow_shl_cnt_fail_out_of_bounds: ; CHECK-NO-FASTFMA: # %bb.0: ; CHECK-NO-FASTFMA-NEXT: movl %edi, %ecx ; CHECK-NO-FASTFMA-NEXT: movl $1, %eax ; CHECK-NO-FASTFMA-NEXT: # kill: def $cl killed $cl killed $ecx ; CHECK-NO-FASTFMA-NEXT: shll %cl, %eax ; CHECK-NO-FASTFMA-NEXT: vcvtusi2ss %eax, %xmm0, %xmm0 ; CHECK-NO-FASTFMA-NEXT: vcvtps2ph $4, %xmm0, %xmm0 ; CHECK-NO-FASTFMA-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero ; CHECK-NO-FASTFMA-NEXT: vcvtph2ps %xmm0, %xmm0 ; CHECK-NO-FASTFMA-NEXT: vmovss {{.*#+}} xmm1 = [8.192E+3,0.0E+0,0.0E+0,0.0E+0] ; CHECK-NO-FASTFMA-NEXT: vdivss %xmm0, %xmm1, %xmm0 ; CHECK-NO-FASTFMA-NEXT: vcvtps2ph $4, %xmm0, %xmm0 ; CHECK-NO-FASTFMA-NEXT: vmovd %xmm0, %eax ; CHECK-NO-FASTFMA-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0 ; CHECK-NO-FASTFMA-NEXT: retq ; ; CHECK-FMA-LABEL: fdiv_pow_shl_cnt_fail_out_of_bounds: ; CHECK-FMA: # %bb.0: ; CHECK-FMA-NEXT: movl $1, %eax ; CHECK-FMA-NEXT: shlxl %edi, %eax, %eax ; CHECK-FMA-NEXT: vcvtusi2ss %eax, %xmm0, %xmm0 ; CHECK-FMA-NEXT: vcvtps2ph $4, %xmm0, %xmm0 ; CHECK-FMA-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero ; CHECK-FMA-NEXT: vcvtph2ps %xmm0, %xmm0 ; CHECK-FMA-NEXT: vmovss {{.*#+}} xmm1 = [8.192E+3,0.0E+0,0.0E+0,0.0E+0] ; CHECK-FMA-NEXT: vdivss %xmm0, %xmm1, %xmm0 ; CHECK-FMA-NEXT: vcvtps2ph $4, %xmm0, %xmm0 ; CHECK-FMA-NEXT: vmovd %xmm0, %eax ; CHECK-FMA-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0 ; CHECK-FMA-NEXT: retq %shl = shl nuw i32 1, %cnt %conv = uitofp i32 %shl to half %mul = fdiv half 0xH7000, %conv ret half %mul } define half @fdiv_pow_shl_cnt_in_bounds(i16 %cnt) nounwind { ; CHECK-SSE-LABEL: fdiv_pow_shl_cnt_in_bounds: ; CHECK-SSE: # %bb.0: ; CHECK-SSE-NEXT: shll $10, %edi ; CHECK-SSE-NEXT: movl $28672, %eax # imm = 0x7000 ; CHECK-SSE-NEXT: subl %edi, %eax ; CHECK-SSE-NEXT: pinsrw $0, %eax, %xmm0 ; CHECK-SSE-NEXT: retq ; ; CHECK-AVX-LABEL: fdiv_pow_shl_cnt_in_bounds: ; CHECK-AVX: # %bb.0: ; CHECK-AVX-NEXT: shll $10, %edi ; CHECK-AVX-NEXT: movl $28672, %eax # imm = 0x7000 ; CHECK-AVX-NEXT: subl %edi, %eax ; CHECK-AVX-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0 ; CHECK-AVX-NEXT: retq %shl = shl nuw i16 1, %cnt %conv = uitofp i16 %shl to half %mul = fdiv half 0xH7000, %conv ret half %mul } define half @fdiv_pow_shl_cnt_in_bounds2(i16 %cnt) nounwind { ; CHECK-SSE-LABEL: fdiv_pow_shl_cnt_in_bounds2: ; CHECK-SSE: # %bb.0: ; CHECK-SSE-NEXT: shll $10, %edi ; CHECK-SSE-NEXT: movl $18432, %eax # imm = 0x4800 ; CHECK-SSE-NEXT: subl %edi, %eax ; CHECK-SSE-NEXT: pinsrw $0, %eax, %xmm0 ; CHECK-SSE-NEXT: retq ; ; CHECK-AVX-LABEL: fdiv_pow_shl_cnt_in_bounds2: ; CHECK-AVX: # %bb.0: ; CHECK-AVX-NEXT: shll $10, %edi ; CHECK-AVX-NEXT: movl $18432, %eax # imm = 0x4800 ; CHECK-AVX-NEXT: subl %edi, %eax ; CHECK-AVX-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0 ; CHECK-AVX-NEXT: retq %shl = shl nuw i16 1, %cnt %conv = uitofp i16 %shl to half %mul = fdiv half 0xH4800, %conv ret half %mul } define half @fdiv_pow_shl_cnt_fail_out_of_bound2(i16 %cnt) nounwind { ; CHECK-SSE-LABEL: fdiv_pow_shl_cnt_fail_out_of_bound2: ; CHECK-SSE: # %bb.0: ; CHECK-SSE-NEXT: pushq %rax ; CHECK-SSE-NEXT: movl %edi, %ecx ; CHECK-SSE-NEXT: movl $1, %eax ; CHECK-SSE-NEXT: # kill: def $cl killed $cl killed $ecx ; CHECK-SSE-NEXT: shll %cl, %eax ; CHECK-SSE-NEXT: movzwl %ax, %eax ; CHECK-SSE-NEXT: cvtsi2ss %eax, %xmm0 ; CHECK-SSE-NEXT: callq __truncsfhf2@PLT ; CHECK-SSE-NEXT: callq __extendhfsf2@PLT ; CHECK-SSE-NEXT: movss {{.*#+}} xmm1 = [2.0E+0,0.0E+0,0.0E+0,0.0E+0] ; CHECK-SSE-NEXT: divss %xmm0, %xmm1 ; CHECK-SSE-NEXT: movaps %xmm1, %xmm0 ; CHECK-SSE-NEXT: callq __truncsfhf2@PLT ; CHECK-SSE-NEXT: popq %rax ; CHECK-SSE-NEXT: retq ; ; CHECK-AVX2-LABEL: fdiv_pow_shl_cnt_fail_out_of_bound2: ; CHECK-AVX2: # %bb.0: ; CHECK-AVX2-NEXT: pushq %rax ; CHECK-AVX2-NEXT: movl %edi, %ecx ; CHECK-AVX2-NEXT: movl $1, %eax ; CHECK-AVX2-NEXT: # kill: def $cl killed $cl killed $ecx ; CHECK-AVX2-NEXT: shll %cl, %eax ; CHECK-AVX2-NEXT: movzwl %ax, %eax ; CHECK-AVX2-NEXT: vcvtsi2ss %eax, %xmm0, %xmm0 ; CHECK-AVX2-NEXT: callq __truncsfhf2@PLT ; CHECK-AVX2-NEXT: callq __extendhfsf2@PLT ; CHECK-AVX2-NEXT: vmovss {{.*#+}} xmm1 = [2.0E+0,0.0E+0,0.0E+0,0.0E+0] ; CHECK-AVX2-NEXT: vdivss %xmm0, %xmm1, %xmm0 ; CHECK-AVX2-NEXT: callq __truncsfhf2@PLT ; CHECK-AVX2-NEXT: popq %rax ; CHECK-AVX2-NEXT: retq ; ; CHECK-NO-FASTFMA-LABEL: fdiv_pow_shl_cnt_fail_out_of_bound2: ; CHECK-NO-FASTFMA: # %bb.0: ; CHECK-NO-FASTFMA-NEXT: movl %edi, %ecx ; CHECK-NO-FASTFMA-NEXT: movl $1, %eax ; CHECK-NO-FASTFMA-NEXT: # kill: def $cl killed $cl killed $ecx ; CHECK-NO-FASTFMA-NEXT: shll %cl, %eax ; CHECK-NO-FASTFMA-NEXT: movzwl %ax, %eax ; CHECK-NO-FASTFMA-NEXT: vcvtsi2ss %eax, %xmm0, %xmm0 ; CHECK-NO-FASTFMA-NEXT: vcvtps2ph $4, %xmm0, %xmm0 ; CHECK-NO-FASTFMA-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero ; CHECK-NO-FASTFMA-NEXT: vcvtph2ps %xmm0, %xmm0 ; CHECK-NO-FASTFMA-NEXT: vmovss {{.*#+}} xmm1 = [2.0E+0,0.0E+0,0.0E+0,0.0E+0] ; CHECK-NO-FASTFMA-NEXT: vdivss %xmm0, %xmm1, %xmm0 ; CHECK-NO-FASTFMA-NEXT: vcvtps2ph $4, %xmm0, %xmm0 ; CHECK-NO-FASTFMA-NEXT: vmovd %xmm0, %eax ; CHECK-NO-FASTFMA-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0 ; CHECK-NO-FASTFMA-NEXT: retq ; ; CHECK-FMA-LABEL: fdiv_pow_shl_cnt_fail_out_of_bound2: ; CHECK-FMA: # %bb.0: ; CHECK-FMA-NEXT: movl $1, %eax ; CHECK-FMA-NEXT: shlxl %edi, %eax, %eax ; CHECK-FMA-NEXT: movzwl %ax, %eax ; CHECK-FMA-NEXT: vcvtsi2ss %eax, %xmm0, %xmm0 ; CHECK-FMA-NEXT: vcvtps2ph $4, %xmm0, %xmm0 ; CHECK-FMA-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero ; CHECK-FMA-NEXT: vcvtph2ps %xmm0, %xmm0 ; CHECK-FMA-NEXT: vmovss {{.*#+}} xmm1 = [2.0E+0,0.0E+0,0.0E+0,0.0E+0] ; CHECK-FMA-NEXT: vdivss %xmm0, %xmm1, %xmm0 ; CHECK-FMA-NEXT: vcvtps2ph $4, %xmm0, %xmm0 ; CHECK-FMA-NEXT: vmovd %xmm0, %eax ; CHECK-FMA-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0 ; CHECK-FMA-NEXT: retq %shl = shl nuw i16 1, %cnt %conv = uitofp i16 %shl to half %mul = fdiv half 0xH4000, %conv ret half %mul } define double @fdiv_pow_shl_cnt32_to_dbl_okay(i32 %cnt) nounwind { ; CHECK-SSE-LABEL: fdiv_pow_shl_cnt32_to_dbl_okay: ; CHECK-SSE: # %bb.0: ; CHECK-SSE-NEXT: # kill: def $edi killed $edi def $rdi ; CHECK-SSE-NEXT: shlq $52, %rdi ; CHECK-SSE-NEXT: movabsq $3936146074321813504, %rax # imm = 0x36A0000000000000 ; CHECK-SSE-NEXT: subq %rdi, %rax ; CHECK-SSE-NEXT: movq %rax, %xmm0 ; CHECK-SSE-NEXT: retq ; ; CHECK-AVX-LABEL: fdiv_pow_shl_cnt32_to_dbl_okay: ; CHECK-AVX: # %bb.0: ; CHECK-AVX-NEXT: # kill: def $edi killed $edi def $rdi ; CHECK-AVX-NEXT: shlq $52, %rdi ; CHECK-AVX-NEXT: movabsq $3936146074321813504, %rax # imm = 0x36A0000000000000 ; CHECK-AVX-NEXT: subq %rdi, %rax ; CHECK-AVX-NEXT: vmovq %rax, %xmm0 ; CHECK-AVX-NEXT: retq %shl = shl nuw i32 1, %cnt %conv = uitofp i32 %shl to double %mul = fdiv double 0x36A0000000000000, %conv ret double %mul } define float @fdiv_pow_shl_cnt32_out_of_bounds2(i32 %cnt) nounwind { ; CHECK-SSE-LABEL: fdiv_pow_shl_cnt32_out_of_bounds2: ; CHECK-SSE: # %bb.0: ; CHECK-SSE-NEXT: movl %edi, %ecx ; CHECK-SSE-NEXT: movl $1, %eax ; CHECK-SSE-NEXT: # kill: def $cl killed $cl killed $ecx ; CHECK-SSE-NEXT: shll %cl, %eax ; CHECK-SSE-NEXT: cvtsi2ss %rax, %xmm1 ; CHECK-SSE-NEXT: movss {{.*#+}} xmm0 = [1.00974148E-28,0.0E+0,0.0E+0,0.0E+0] ; CHECK-SSE-NEXT: divss %xmm1, %xmm0 ; CHECK-SSE-NEXT: retq ; ; CHECK-AVX2-LABEL: fdiv_pow_shl_cnt32_out_of_bounds2: ; CHECK-AVX2: # %bb.0: ; CHECK-AVX2-NEXT: movl %edi, %ecx ; CHECK-AVX2-NEXT: movl $1, %eax ; CHECK-AVX2-NEXT: # kill: def $cl killed $cl killed $ecx ; CHECK-AVX2-NEXT: shll %cl, %eax ; CHECK-AVX2-NEXT: vcvtsi2ss %rax, %xmm0, %xmm0 ; CHECK-AVX2-NEXT: vmovss {{.*#+}} xmm1 = [1.00974148E-28,0.0E+0,0.0E+0,0.0E+0] ; CHECK-AVX2-NEXT: vdivss %xmm0, %xmm1, %xmm0 ; CHECK-AVX2-NEXT: retq ; ; CHECK-NO-FASTFMA-LABEL: fdiv_pow_shl_cnt32_out_of_bounds2: ; CHECK-NO-FASTFMA: # %bb.0: ; CHECK-NO-FASTFMA-NEXT: movl %edi, %ecx ; CHECK-NO-FASTFMA-NEXT: movl $1, %eax ; CHECK-NO-FASTFMA-NEXT: # kill: def $cl killed $cl killed $ecx ; CHECK-NO-FASTFMA-NEXT: shll %cl, %eax ; CHECK-NO-FASTFMA-NEXT: vcvtusi2ss %eax, %xmm0, %xmm0 ; CHECK-NO-FASTFMA-NEXT: vmovss {{.*#+}} xmm1 = [1.00974148E-28,0.0E+0,0.0E+0,0.0E+0] ; CHECK-NO-FASTFMA-NEXT: vdivss %xmm0, %xmm1, %xmm0 ; CHECK-NO-FASTFMA-NEXT: retq ; ; CHECK-FMA-LABEL: fdiv_pow_shl_cnt32_out_of_bounds2: ; CHECK-FMA: # %bb.0: ; CHECK-FMA-NEXT: movl $1, %eax ; CHECK-FMA-NEXT: shlxl %edi, %eax, %eax ; CHECK-FMA-NEXT: vcvtusi2ss %eax, %xmm0, %xmm0 ; CHECK-FMA-NEXT: vmovss {{.*#+}} xmm1 = [1.00974148E-28,0.0E+0,0.0E+0,0.0E+0] ; CHECK-FMA-NEXT: vdivss %xmm0, %xmm1, %xmm0 ; CHECK-FMA-NEXT: retq %shl = shl nuw i32 1, %cnt %conv = uitofp i32 %shl to float %mul = fdiv float 0x3a1fffff00000000, %conv ret float %mul } define float @fdiv_pow_shl_cnt32_okay(i32 %cnt) nounwind { ; CHECK-SSE-LABEL: fdiv_pow_shl_cnt32_okay: ; CHECK-SSE: # %bb.0: ; CHECK-SSE-NEXT: shll $23, %edi ; CHECK-SSE-NEXT: movl $285212672, %eax # imm = 0x11000000 ; CHECK-SSE-NEXT: subl %edi, %eax ; CHECK-SSE-NEXT: movd %eax, %xmm0 ; CHECK-SSE-NEXT: retq ; ; CHECK-AVX-LABEL: fdiv_pow_shl_cnt32_okay: ; CHECK-AVX: # %bb.0: ; CHECK-AVX-NEXT: shll $23, %edi ; CHECK-AVX-NEXT: movl $285212672, %eax # imm = 0x11000000 ; CHECK-AVX-NEXT: subl %edi, %eax ; CHECK-AVX-NEXT: vmovd %eax, %xmm0 ; CHECK-AVX-NEXT: retq %shl = shl nuw i32 1, %cnt %conv = uitofp i32 %shl to float %mul = fdiv float 0x3a20000000000000, %conv ret float %mul }