; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=i686-- -mattr=+avx | FileCheck %s --check-prefixes=CHECK,X86 ; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx2 | FileCheck %s --check-prefixes=CHECK,X64 define <2 x i64> @freeze_insert_vector_elt(<2 x i64> %a0) { ; CHECK-LABEL: freeze_insert_vector_elt: ; CHECK: # %bb.0: ; CHECK-NEXT: vxorps %xmm0, %xmm0, %xmm0 ; CHECK-NEXT: ret{{[l|q]}} %idx0 = insertelement <2 x i64> %a0, i64 0, i64 0 %freeze0 = freeze <2 x i64> %idx0 %idx1 = insertelement <2 x i64> %freeze0, i64 0, i64 1 %freeze1 = freeze <2 x i64> %idx1 ret <2 x i64> %freeze1 } define <4 x i32> @freeze_insert_subvector(<8 x i32> %a0) nounwind { ; CHECK-LABEL: freeze_insert_subvector: ; CHECK: # %bb.0: ; CHECK-NEXT: vxorps %xmm0, %xmm0, %xmm0 ; CHECK-NEXT: ret{{[l|q]}} %x = shufflevector <8 x i32> %a0, <8 x i32> zeroinitializer, <8 x i32> %y = freeze <8 x i32> %x %z = shufflevector <8 x i32> %y, <8 x i32> poison, <4 x i32> ret <4 x i32> %z } define <2 x i64> @freeze_sign_extend_vector_inreg(<16 x i8> %a0) nounwind { ; CHECK-LABEL: freeze_sign_extend_vector_inreg: ; CHECK: # %bb.0: ; CHECK-NEXT: vpmovsxbq %xmm0, %xmm0 ; CHECK-NEXT: ret{{[l|q]}} %x = sext <16 x i8> %a0 to <16 x i32> %y = shufflevector <16 x i32> %x, <16 x i32> poison, <4 x i32> %z = freeze <4 x i32> %y %w = sext <4 x i32> %z to <4 x i64> %r = shufflevector <4 x i64> %w, <4 x i64> poison, <2 x i32> ret <2 x i64> %r } define <2 x i64> @freeze_zero_extend_vector_inreg(<16 x i8> %a0) nounwind { ; CHECK-LABEL: freeze_zero_extend_vector_inreg: ; CHECK: # %bb.0: ; CHECK-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero ; CHECK-NEXT: ret{{[l|q]}} %x = zext <16 x i8> %a0 to <16 x i32> %y = shufflevector <16 x i32> %x, <16 x i32> poison, <4 x i32> %z = freeze <4 x i32> %y %w = zext <4 x i32> %z to <4 x i64> %r = shufflevector <4 x i64> %w, <4 x i64> poison, <2 x i32> ret <2 x i64> %r } define <4 x i32> @freeze_pshufd(<4 x i32> %a0) nounwind { ; CHECK-LABEL: freeze_pshufd: ; CHECK: # %bb.0: ; CHECK-NEXT: ret{{[l|q]}} %x = shufflevector <4 x i32> %a0, <4 x i32> poison, <4 x i32> %y = freeze <4 x i32> %x %z = shufflevector <4 x i32> %y, <4 x i32> poison, <4 x i32> ret <4 x i32> %z } define <4 x float> @freeze_permilps(<4 x float> %a0) nounwind { ; CHECK-LABEL: freeze_permilps: ; CHECK: # %bb.0: ; CHECK-NEXT: ret{{[l|q]}} %x = shufflevector <4 x float> %a0, <4 x float> poison, <4 x i32> %y = freeze <4 x float> %x %z = shufflevector <4 x float> %y, <4 x float> poison, <4 x i32> ret <4 x float> %z } define void @freeze_bitcast_from_wider_elt(ptr %origin, ptr %dst) nounwind { ; X86-LABEL: freeze_bitcast_from_wider_elt: ; X86: # %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero ; X86-NEXT: vmovsd %xmm0, (%eax) ; X86-NEXT: retl ; ; X64-LABEL: freeze_bitcast_from_wider_elt: ; X64: # %bb.0: ; X64-NEXT: movq (%rdi), %rax ; X64-NEXT: movq %rax, (%rsi) ; X64-NEXT: retq %i0 = load <4 x i16>, ptr %origin %i1 = bitcast <4 x i16> %i0 to <8 x i8> %i2 = freeze <8 x i8> %i1 %i3 = bitcast <8 x i8> %i2 to i64 store i64 %i3, ptr %dst ret void } define void @freeze_bitcast_from_wider_elt_escape(ptr %origin, ptr %escape, ptr %dst) nounwind { ; X86-LABEL: freeze_bitcast_from_wider_elt_escape: ; X86: # %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero ; X86-NEXT: vmovsd %xmm0, (%ecx) ; X86-NEXT: vmovsd %xmm0, (%eax) ; X86-NEXT: retl ; ; X64-LABEL: freeze_bitcast_from_wider_elt_escape: ; X64: # %bb.0: ; X64-NEXT: movq (%rdi), %rax ; X64-NEXT: movq %rax, (%rsi) ; X64-NEXT: movq %rax, (%rdx) ; X64-NEXT: retq %i0 = load <4 x i16>, ptr %origin %i1 = bitcast <4 x i16> %i0 to <8 x i8> store <8 x i8> %i1, ptr %escape %i2 = freeze <8 x i8> %i1 %i3 = bitcast <8 x i8> %i2 to i64 store i64 %i3, ptr %dst ret void } define void @freeze_bitcast_to_wider_elt(ptr %origin, ptr %dst) nounwind { ; X86-LABEL: freeze_bitcast_to_wider_elt: ; X86: # %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero ; X86-NEXT: vmovsd %xmm0, (%eax) ; X86-NEXT: retl ; ; X64-LABEL: freeze_bitcast_to_wider_elt: ; X64: # %bb.0: ; X64-NEXT: movq (%rdi), %rax ; X64-NEXT: movq %rax, (%rsi) ; X64-NEXT: retq %i0 = load <8 x i8>, ptr %origin %i1 = bitcast <8 x i8> %i0 to <4 x i16> %i2 = freeze <4 x i16> %i1 %i3 = bitcast <4 x i16> %i2 to i64 store i64 %i3, ptr %dst ret void } define void @freeze_bitcast_to_wider_elt_escape(ptr %origin, ptr %escape, ptr %dst) nounwind { ; X86-LABEL: freeze_bitcast_to_wider_elt_escape: ; X86: # %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero ; X86-NEXT: vmovsd %xmm0, (%ecx) ; X86-NEXT: vmovsd %xmm0, (%eax) ; X86-NEXT: retl ; ; X64-LABEL: freeze_bitcast_to_wider_elt_escape: ; X64: # %bb.0: ; X64-NEXT: movq (%rdi), %rax ; X64-NEXT: movq %rax, (%rsi) ; X64-NEXT: movq %rax, (%rdx) ; X64-NEXT: retq %i0 = load <8 x i8>, ptr %origin %i1 = bitcast <8 x i8> %i0 to <4 x i16> store <4 x i16> %i1, ptr %escape %i2 = freeze <4 x i16> %i1 %i3 = bitcast <4 x i16> %i2 to i64 store i64 %i3, ptr %dst ret void } define void @freeze_extractelement(ptr %origin0, ptr %origin1, ptr %dst) nounwind { ; X86-LABEL: freeze_extractelement: ; X86: # %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NEXT: vmovdqa (%edx), %xmm0 ; X86-NEXT: vpand (%ecx), %xmm0, %xmm0 ; X86-NEXT: vpextrb $6, %xmm0, %ecx ; X86-NEXT: movb %cl, (%eax) ; X86-NEXT: retl ; ; X64-LABEL: freeze_extractelement: ; X64: # %bb.0: ; X64-NEXT: vmovdqa (%rdi), %xmm0 ; X64-NEXT: vpand (%rsi), %xmm0, %xmm0 ; X64-NEXT: vpextrb $6, %xmm0, %eax ; X64-NEXT: movb %al, (%rdx) ; X64-NEXT: retq %i0 = load <16 x i8>, ptr %origin0 %i1 = load <16 x i8>, ptr %origin1 %i2 = and <16 x i8> %i0, %i1 %i3 = freeze <16 x i8> %i2 %i4 = extractelement <16 x i8> %i3, i64 6 store i8 %i4, ptr %dst ret void } define void @freeze_extractelement_escape(ptr %origin0, ptr %origin1, ptr %dst, ptr %escape) nounwind { ; X86-LABEL: freeze_extractelement_escape: ; X86: # %bb.0: ; X86-NEXT: pushl %esi ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi ; X86-NEXT: vmovdqa (%esi), %xmm0 ; X86-NEXT: vpand (%edx), %xmm0, %xmm0 ; X86-NEXT: vmovdqa %xmm0, (%ecx) ; X86-NEXT: vpextrb $6, %xmm0, (%eax) ; X86-NEXT: popl %esi ; X86-NEXT: retl ; ; X64-LABEL: freeze_extractelement_escape: ; X64: # %bb.0: ; X64-NEXT: vmovdqa (%rdi), %xmm0 ; X64-NEXT: vpand (%rsi), %xmm0, %xmm0 ; X64-NEXT: vmovdqa %xmm0, (%rcx) ; X64-NEXT: vpextrb $6, %xmm0, (%rdx) ; X64-NEXT: retq %i0 = load <16 x i8>, ptr %origin0 %i1 = load <16 x i8>, ptr %origin1 %i2 = and <16 x i8> %i0, %i1 %i3 = freeze <16 x i8> %i2 store <16 x i8> %i3, ptr %escape %i4 = extractelement <16 x i8> %i3, i64 6 store i8 %i4, ptr %dst ret void } ; It would be a miscompilation to pull freeze out of extractelement here. define void @freeze_extractelement_extra_use(ptr %origin0, ptr %origin1, i64 %idx0, i64 %idx1, ptr %dst, ptr %escape) nounwind { ; X86-LABEL: freeze_extractelement_extra_use: ; X86: # %bb.0: ; X86-NEXT: pushl %ebp ; X86-NEXT: movl %esp, %ebp ; X86-NEXT: pushl %edi ; X86-NEXT: pushl %esi ; X86-NEXT: andl $-16, %esp ; X86-NEXT: subl $16, %esp ; X86-NEXT: movl 24(%ebp), %eax ; X86-NEXT: andl $15, %eax ; X86-NEXT: movl 16(%ebp), %ecx ; X86-NEXT: andl $15, %ecx ; X86-NEXT: movl 32(%ebp), %edx ; X86-NEXT: movl 12(%ebp), %esi ; X86-NEXT: movl 8(%ebp), %edi ; X86-NEXT: vmovaps (%edi), %xmm0 ; X86-NEXT: vandps (%esi), %xmm0, %xmm0 ; X86-NEXT: vmovaps %xmm0, (%esp) ; X86-NEXT: movzbl (%esp,%ecx), %ecx ; X86-NEXT: cmpb (%esp,%eax), %cl ; X86-NEXT: sete (%edx) ; X86-NEXT: leal -8(%ebp), %esp ; X86-NEXT: popl %esi ; X86-NEXT: popl %edi ; X86-NEXT: popl %ebp ; X86-NEXT: retl ; ; X64-LABEL: freeze_extractelement_extra_use: ; X64: # %bb.0: ; X64-NEXT: andl $15, %ecx ; X64-NEXT: andl $15, %edx ; X64-NEXT: vmovaps (%rdi), %xmm0 ; X64-NEXT: vandps (%rsi), %xmm0, %xmm0 ; X64-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp) ; X64-NEXT: movzbl -24(%rsp,%rdx), %eax ; X64-NEXT: cmpb -24(%rsp,%rcx), %al ; X64-NEXT: sete (%r8) ; X64-NEXT: retq %i0 = load <16 x i8>, ptr %origin0 %i1 = load <16 x i8>, ptr %origin1 %i2 = and <16 x i8> %i0, %i1 %i3 = freeze <16 x i8> %i2 %i4 = extractelement <16 x i8> %i3, i64 %idx0 %i5 = extractelement <16 x i8> %i3, i64 %idx1 %i6 = icmp eq i8 %i4, %i5 store i1 %i6, ptr %dst ret void } define void @freeze_buildvector_single_maybe_poison_operand(ptr %origin, ptr %dst) nounwind { ; X86-LABEL: freeze_buildvector_single_maybe_poison_operand: ; X86: # %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: vbroadcastss {{.*#+}} xmm0 = [42,42,42,42] ; X86-NEXT: vpinsrd $0, (%ecx), %xmm0, %xmm0 ; X86-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0 ; X86-NEXT: vmovdqa %xmm0, (%eax) ; X86-NEXT: retl ; ; X64-LABEL: freeze_buildvector_single_maybe_poison_operand: ; X64: # %bb.0: ; X64-NEXT: vpbroadcastd {{.*#+}} xmm0 = [42,42,42,42] ; X64-NEXT: vpinsrd $0, (%rdi), %xmm0, %xmm0 ; X64-NEXT: vpbroadcastd {{.*#+}} xmm1 = [7,7,7,7] ; X64-NEXT: vpand %xmm1, %xmm0, %xmm0 ; X64-NEXT: vmovdqa %xmm0, (%rsi) ; X64-NEXT: retq %i0.src = load i32, ptr %origin %i0 = and i32 %i0.src, 15 %i1 = insertelement <4 x i32> poison, i32 %i0, i64 0 %i2 = insertelement <4 x i32> %i1, i32 42, i64 1 %i3 = insertelement <4 x i32> %i2, i32 42, i64 2 %i4 = insertelement <4 x i32> %i3, i32 42, i64 3 %i5 = freeze <4 x i32> %i4 %i6 = and <4 x i32> %i5, store <4 x i32> %i6, ptr %dst ret void } define void @freeze_buildvector_single_repeated_maybe_poison_operand(ptr %origin, ptr %dst) nounwind { ; X86-LABEL: freeze_buildvector_single_repeated_maybe_poison_operand: ; X86: # %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: movl (%ecx), %ecx ; X86-NEXT: andl $15, %ecx ; X86-NEXT: vbroadcastss {{.*#+}} xmm0 = [42,42,42,42] ; X86-NEXT: vpinsrd $0, %ecx, %xmm0, %xmm0 ; X86-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1] ; X86-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0 ; X86-NEXT: vmovdqa %xmm0, (%eax) ; X86-NEXT: retl ; ; X64-LABEL: freeze_buildvector_single_repeated_maybe_poison_operand: ; X64: # %bb.0: ; X64-NEXT: vpbroadcastd {{.*#+}} xmm0 = [42,42,42,42] ; X64-NEXT: vpinsrd $0, (%rdi), %xmm0, %xmm0 ; X64-NEXT: vpbroadcastq %xmm0, %xmm0 ; X64-NEXT: vpbroadcastd {{.*#+}} xmm1 = [7,7,7,7] ; X64-NEXT: vpand %xmm1, %xmm0, %xmm0 ; X64-NEXT: vmovdqa %xmm0, (%rsi) ; X64-NEXT: retq %i0.src = load i32, ptr %origin %i0 = and i32 %i0.src, 15 %i1 = insertelement <4 x i32> poison, i32 %i0, i64 0 %i2 = insertelement <4 x i32> %i1, i32 42, i64 1 %i3 = insertelement <4 x i32> %i2, i32 %i0, i64 2 %i4 = insertelement <4 x i32> %i3, i32 42, i64 3 %i5 = freeze <4 x i32> %i4 %i6 = and <4 x i32> %i5, store <4 x i32> %i6, ptr %dst ret void } define void @freeze_two_frozen_buildvectors(ptr %origin0, ptr %origin1, ptr %dst0, ptr %dst1) nounwind { ; X86-LABEL: freeze_two_frozen_buildvectors: ; X86: # %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NEXT: movl (%edx), %edx ; X86-NEXT: andl $15, %edx ; X86-NEXT: vpinsrd $1, %edx, %xmm0, %xmm0 ; X86-NEXT: vbroadcastss {{.*#+}} xmm1 = [7,7,7,7] ; X86-NEXT: vpand %xmm1, %xmm0, %xmm0 ; X86-NEXT: vmovdqa %xmm0, (%ecx) ; X86-NEXT: vmovd %edx, %xmm0 ; X86-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1] ; X86-NEXT: vpxor %xmm2, %xmm2, %xmm2 ; X86-NEXT: vpblendw {{.*#+}} xmm0 = xmm2[0,1,2,3],xmm0[4,5],xmm2[6,7] ; X86-NEXT: vpand %xmm1, %xmm0, %xmm0 ; X86-NEXT: vmovdqa %xmm0, (%eax) ; X86-NEXT: retl ; ; X64-LABEL: freeze_two_frozen_buildvectors: ; X64: # %bb.0: ; X64-NEXT: movl (%rdi), %eax ; X64-NEXT: andl $15, %eax ; X64-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0 ; X64-NEXT: vpbroadcastd {{.*#+}} xmm1 = [7,7,7,7] ; X64-NEXT: vpand %xmm1, %xmm0, %xmm0 ; X64-NEXT: vmovdqa %xmm0, (%rdx) ; X64-NEXT: vmovd %eax, %xmm0 ; X64-NEXT: vpbroadcastd %xmm0, %xmm0 ; X64-NEXT: vpxor %xmm2, %xmm2, %xmm2 ; X64-NEXT: vpblendd {{.*#+}} xmm0 = xmm2[0,1],xmm0[2],xmm2[3] ; X64-NEXT: vpand %xmm1, %xmm0, %xmm0 ; X64-NEXT: vmovdqa %xmm0, (%rcx) ; X64-NEXT: retq %i0.src = load i32, ptr %origin0 %i0 = and i32 %i0.src, 15 %i1.src = load i32, ptr %origin1 %i1 = and i32 %i0.src, 15 %i2 = insertelement <4 x i32> poison, i32 %i0, i64 1 %i3 = and <4 x i32> %i2, %i4 = freeze <4 x i32> %i3 store <4 x i32> %i4, ptr %dst0 %i5 = insertelement <4 x i32> poison, i32 %i1, i64 2 %i6 = and <4 x i32> %i5, %i7 = freeze <4 x i32> %i6 store <4 x i32> %i7, ptr %dst1 ret void } define void @freeze_two_buildvectors_only_one_frozen(ptr %origin0, ptr %origin1, ptr %dst0, ptr %dst1) nounwind { ; X86-LABEL: freeze_two_buildvectors_only_one_frozen: ; X86: # %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NEXT: movl (%edx), %edx ; X86-NEXT: andl $15, %edx ; X86-NEXT: vpxor %xmm0, %xmm0, %xmm0 ; X86-NEXT: vmovd %edx, %xmm1 ; X86-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[0,0,1,1] ; X86-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3],xmm0[4,5,6,7] ; X86-NEXT: vbroadcastss {{.*#+}} xmm2 = [7,7,7,7] ; X86-NEXT: vpand %xmm2, %xmm0, %xmm0 ; X86-NEXT: vmovdqa %xmm0, (%ecx) ; X86-NEXT: vpshufd {{.*#+}} xmm0 = xmm1[0,1,0,1] ; X86-NEXT: vpand %xmm2, %xmm0, %xmm0 ; X86-NEXT: vmovdqa %xmm0, (%eax) ; X86-NEXT: retl ; ; X64-LABEL: freeze_two_buildvectors_only_one_frozen: ; X64: # %bb.0: ; X64-NEXT: movl (%rdi), %eax ; X64-NEXT: andl $15, %eax ; X64-NEXT: vpxor %xmm0, %xmm0, %xmm0 ; X64-NEXT: vmovd %eax, %xmm1 ; X64-NEXT: vpbroadcastd %xmm1, %xmm1 ; X64-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3] ; X64-NEXT: vpbroadcastd {{.*#+}} xmm2 = [7,7,7,7] ; X64-NEXT: vpand %xmm2, %xmm0, %xmm0 ; X64-NEXT: vmovdqa %xmm0, (%rdx) ; X64-NEXT: vpand %xmm2, %xmm1, %xmm0 ; X64-NEXT: vmovdqa %xmm0, (%rcx) ; X64-NEXT: retq %i0.src = load i32, ptr %origin0 %i0 = and i32 %i0.src, 15 %i1.src = load i32, ptr %origin1 %i1 = and i32 %i0.src, 15 %i2 = insertelement <4 x i32> poison, i32 %i0, i64 1 %i3 = and <4 x i32> %i2, %i4 = freeze <4 x i32> %i3 store <4 x i32> %i4, ptr %dst0 %i5 = insertelement <4 x i32> poison, i32 %i1, i64 2 %i6 = and <4 x i32> %i5, store <4 x i32> %i6, ptr %dst1 ret void } define void @freeze_two_buildvectors_one_undef_elt(ptr %origin0, ptr %origin1, ptr %dst0, ptr %dst1) nounwind { ; X86-LABEL: freeze_two_buildvectors_one_undef_elt: ; X86: # %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NEXT: movl (%edx), %edx ; X86-NEXT: andl $15, %edx ; X86-NEXT: vmovddup {{.*#+}} xmm0 = [7,0,7,0] ; X86-NEXT: # xmm0 = mem[0,0] ; X86-NEXT: vmovd %edx, %xmm1 ; X86-NEXT: vpand %xmm0, %xmm1, %xmm2 ; X86-NEXT: vmovdqa %xmm2, (%ecx) ; X86-NEXT: vpslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1,2,3,4,5,6,7] ; X86-NEXT: vpand %xmm0, %xmm1, %xmm0 ; X86-NEXT: vmovdqa %xmm0, (%eax) ; X86-NEXT: retl ; ; X64-LABEL: freeze_two_buildvectors_one_undef_elt: ; X64: # %bb.0: ; X64-NEXT: movq (%rdi), %rax ; X64-NEXT: vmovd %eax, %xmm0 ; X64-NEXT: vpbroadcastd %xmm0, %xmm0 ; X64-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ; X64-NEXT: vmovdqa %xmm0, (%rdx) ; X64-NEXT: vmovdqa %xmm0, (%rcx) ; X64-NEXT: retq %i0.src = load i64, ptr %origin0 %i0 = and i64 %i0.src, 15 %i1.src = load i64, ptr %origin1 %i1 = and i64 %i0.src, 15 %i2 = insertelement <2 x i64> poison, i64 %i0, i64 0 %i3 = and <2 x i64> %i2, %i4 = freeze <2 x i64> %i3 store <2 x i64> %i4, ptr %dst0 %i5 = insertelement <2 x i64> poison, i64 %i1, i64 1 %i6 = and <2 x i64> %i5, store <2 x i64> %i6, ptr %dst1 ret void } define void @freeze_buildvector(ptr %origin0, ptr %origin1, ptr %origin2, ptr %origin3, ptr %dst) nounwind { ; X86-LABEL: freeze_buildvector: ; X86: # %bb.0: ; X86-NEXT: pushl %edi ; X86-NEXT: pushl %esi ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi ; X86-NEXT: movl {{[0-9]+}}(%esp), %edi ; X86-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero ; X86-NEXT: vpinsrd $1, (%esi), %xmm0, %xmm0 ; X86-NEXT: vpinsrd $2, (%edx), %xmm0, %xmm0 ; X86-NEXT: vpinsrd $3, (%ecx), %xmm0, %xmm0 ; X86-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0 ; X86-NEXT: vmovdqa %xmm0, (%eax) ; X86-NEXT: popl %esi ; X86-NEXT: popl %edi ; X86-NEXT: retl ; ; X64-LABEL: freeze_buildvector: ; X64: # %bb.0: ; X64-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero ; X64-NEXT: vpinsrd $1, (%rsi), %xmm0, %xmm0 ; X64-NEXT: vpinsrd $2, (%rdx), %xmm0, %xmm0 ; X64-NEXT: vpinsrd $3, (%rcx), %xmm0, %xmm0 ; X64-NEXT: vpbroadcastd {{.*#+}} xmm1 = [7,7,7,7] ; X64-NEXT: vpand %xmm1, %xmm0, %xmm0 ; X64-NEXT: vmovdqa %xmm0, (%r8) ; X64-NEXT: retq %i0.src = load i32, ptr %origin0 %i1.src = load i32, ptr %origin1 %i2.src = load i32, ptr %origin2 %i3.src = load i32, ptr %origin3 %i0 = and i32 %i0.src, 15 %i1 = and i32 %i1.src, 15 %i2 = and i32 %i2.src, 15 %i3 = and i32 %i3.src, 15 %i4 = insertelement <4 x i32> poison, i32 %i0, i64 0 %i5 = insertelement <4 x i32> %i4, i32 %i1, i64 1 %i6 = insertelement <4 x i32> %i5, i32 %i2, i64 2 %i7 = insertelement <4 x i32> %i6, i32 %i3, i64 3 %i8 = freeze <4 x i32> %i7 %i9 = and <4 x i32> %i8, store <4 x i32> %i9, ptr %dst ret void } define void @freeze_buildvector_one_undef_elt(ptr %origin0, ptr %origin1, ptr %origin2, ptr %origin3, ptr %dst) nounwind { ; X86-LABEL: freeze_buildvector_one_undef_elt: ; X86: # %bb.0: ; X86-NEXT: pushl %esi ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi ; X86-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero ; X86-NEXT: vpinsrd $1, (%edx), %xmm0, %xmm0 ; X86-NEXT: vpinsrd $2, %eax, %xmm0, %xmm0 ; X86-NEXT: vpinsrd $3, (%ecx), %xmm0, %xmm0 ; X86-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0 ; X86-NEXT: vmovdqa %xmm0, (%eax) ; X86-NEXT: popl %esi ; X86-NEXT: retl ; ; X64-LABEL: freeze_buildvector_one_undef_elt: ; X64: # %bb.0: ; X64-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero ; X64-NEXT: vpinsrd $1, (%rsi), %xmm0, %xmm0 ; X64-NEXT: vpinsrd $2, %eax, %xmm0, %xmm0 ; X64-NEXT: vpinsrd $3, (%rcx), %xmm0, %xmm0 ; X64-NEXT: vpbroadcastd {{.*#+}} xmm1 = [7,7,7,7] ; X64-NEXT: vpand %xmm1, %xmm0, %xmm0 ; X64-NEXT: vmovdqa %xmm0, (%r8) ; X64-NEXT: retq %i0.src = load i32, ptr %origin0 %i1.src = load i32, ptr %origin1 %i3.src = load i32, ptr %origin3 %i0 = and i32 %i0.src, 15 %i1 = and i32 %i1.src, 15 %i3 = and i32 %i3.src, 15 %i4 = insertelement <4 x i32> poison, i32 %i0, i64 0 %i5 = insertelement <4 x i32> %i4, i32 %i1, i64 1 %i7 = insertelement <4 x i32> %i5, i32 %i3, i64 3 %i8 = freeze <4 x i32> %i7 %i9 = and <4 x i32> %i8, store <4 x i32> %i9, ptr %dst ret void } define void @freeze_buildvector_extrause(ptr %origin0, ptr %origin1, ptr %origin2, ptr %origin3, ptr %dst, ptr %escape) nounwind { ; X86-LABEL: freeze_buildvector_extrause: ; X86: # %bb.0: ; X86-NEXT: pushl %ebx ; X86-NEXT: pushl %edi ; X86-NEXT: pushl %esi ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi ; X86-NEXT: movl {{[0-9]+}}(%esp), %edi ; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx ; X86-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero ; X86-NEXT: vpinsrd $1, (%edi), %xmm0, %xmm0 ; X86-NEXT: vpinsrd $2, (%esi), %xmm0, %xmm0 ; X86-NEXT: vpinsrd $3, (%edx), %xmm0, %xmm0 ; X86-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0 ; X86-NEXT: vmovdqa %xmm0, (%ecx) ; X86-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0 ; X86-NEXT: vmovdqa %xmm0, (%eax) ; X86-NEXT: popl %esi ; X86-NEXT: popl %edi ; X86-NEXT: popl %ebx ; X86-NEXT: retl ; ; X64-LABEL: freeze_buildvector_extrause: ; X64: # %bb.0: ; X64-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero ; X64-NEXT: vpinsrd $1, (%rsi), %xmm0, %xmm0 ; X64-NEXT: vpinsrd $2, (%rdx), %xmm0, %xmm0 ; X64-NEXT: vpinsrd $3, (%rcx), %xmm0, %xmm0 ; X64-NEXT: vpbroadcastd {{.*#+}} xmm1 = [15,15,15,15] ; X64-NEXT: vpand %xmm1, %xmm0, %xmm0 ; X64-NEXT: vmovdqa %xmm0, (%r9) ; X64-NEXT: vpbroadcastd {{.*#+}} xmm1 = [7,7,7,7] ; X64-NEXT: vpand %xmm1, %xmm0, %xmm0 ; X64-NEXT: vmovdqa %xmm0, (%r8) ; X64-NEXT: retq %i0.src = load i32, ptr %origin0 %i1.src = load i32, ptr %origin1 %i2.src = load i32, ptr %origin2 %i3.src = load i32, ptr %origin3 %i0 = and i32 %i0.src, 15 %i1 = and i32 %i1.src, 15 %i2 = and i32 %i2.src, 15 %i3 = and i32 %i3.src, 15 %i4 = insertelement <4 x i32> poison, i32 %i0, i64 0 %i5 = insertelement <4 x i32> %i4, i32 %i1, i64 1 %i6 = insertelement <4 x i32> %i5, i32 %i2, i64 2 %i7 = insertelement <4 x i32> %i6, i32 %i3, i64 3 store <4 x i32> %i7, ptr %escape %i8 = freeze <4 x i32> %i7 %i9 = and <4 x i32> %i8, store <4 x i32> %i9, ptr %dst ret void } define void @pr59677(i32 %x, ptr %out) nounwind { ; X86-LABEL: pr59677: ; X86: # %bb.0: ; X86-NEXT: pushl %esi ; X86-NEXT: pushl %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi ; X86-NEXT: vmovd %eax, %xmm0 ; X86-NEXT: orl $1, %eax ; X86-NEXT: vmovd %eax, %xmm1 ; X86-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] ; X86-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero ; X86-NEXT: vpaddd %xmm0, %xmm0, %xmm0 ; X86-NEXT: vcvtdq2ps %xmm0, %xmm0 ; X86-NEXT: vmovss %xmm0, (%esp) ; X86-NEXT: calll sinf ; X86-NEXT: fstps (%esi) ; X86-NEXT: addl $4, %esp ; X86-NEXT: popl %esi ; X86-NEXT: retl ; ; X64-LABEL: pr59677: ; X64: # %bb.0: ; X64-NEXT: pushq %rbx ; X64-NEXT: movq %rsi, %rbx ; X64-NEXT: vmovd %edi, %xmm0 ; X64-NEXT: orl $1, %edi ; X64-NEXT: vmovd %edi, %xmm1 ; X64-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] ; X64-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero ; X64-NEXT: vpaddd %xmm0, %xmm0, %xmm0 ; X64-NEXT: vcvtdq2ps %xmm0, %xmm0 ; X64-NEXT: callq sinf@PLT ; X64-NEXT: vmovss %xmm0, (%rbx) ; X64-NEXT: popq %rbx ; X64-NEXT: retq %i0 = or i32 %x, 1 %i1 = insertelement <4 x i32> zeroinitializer, i32 %x, i64 0 %i2 = insertelement <4 x i32> %i1, i32 %i0, i64 1 %i3 = shl <4 x i32> %i2, %i4 = sitofp <4 x i32> %i3 to <4 x float> %i5 = tail call <4 x float> @llvm.sin.v4f32(<4 x float> %i4) %i6 = extractelement <4 x float> %i5, i64 0 store float %i6, ptr %out, align 4 ret void } declare <4 x float> @llvm.sin.v4f32(<4 x float>)