; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=i686-unknown-linux-gnu -mattr=+avx512f | FileCheck %s --check-prefixes=CHECK,AVX512F,X86-AVX512F ; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512f | FileCheck %s --check-prefixes=CHECK,AVX512F,X64-AVX512F ; RUN: llc < %s -mtriple=i686-unknown-linux-gnu -mattr=+avx512f,+avx512vl,+avx512bw | FileCheck %s --check-prefixes=CHECK,AVX512BW,X86-AVX512BW ; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512f,+avx512vl,+avx512bw | FileCheck %s --check-prefixes=CHECK,AVX512BW,X64-AVX512BW define <16 x i32> @shuffle_v8i64(<16 x i32> %t0, <16 x i32> %t1) { ; CHECK-LABEL: shuffle_v8i64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vpaddd %zmm1, %zmm0, %zmm2 ; CHECK-NEXT: vpsubd %zmm1, %zmm0, %zmm0 ; CHECK-NEXT: vshufps {{.*#+}} zmm0 = zmm2[0,1],zmm0[2,3],zmm2[4,5],zmm0[6,7],zmm2[8,9],zmm0[10,11],zmm2[12,13],zmm0[14,15] ; CHECK-NEXT: ret{{[l|q]}} entry: %t2 = add nsw <16 x i32> %t0, %t1 %t3 = sub nsw <16 x i32> %t0, %t1 %t4 = shufflevector <16 x i32> %t2, <16 x i32> %t3, <16 x i32> ret <16 x i32> %t4 } define <8 x i32> @shuffle_v4i64(<8 x i32> %t0, <8 x i32> %t1) { ; CHECK-LABEL: shuffle_v4i64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vpaddd %ymm1, %ymm0, %ymm2 ; CHECK-NEXT: vpsubd %ymm1, %ymm0, %ymm0 ; CHECK-NEXT: vpblendd {{.*#+}} ymm0 = ymm2[0,1],ymm0[2,3],ymm2[4,5],ymm0[6,7] ; CHECK-NEXT: ret{{[l|q]}} entry: %t2 = add nsw <8 x i32> %t0, %t1 %t3 = sub nsw <8 x i32> %t0, %t1 %t4 = shufflevector <8 x i32> %t2, <8 x i32> %t3, <8 x i32> ret <8 x i32> %t4 } define <4 x i32> @shuffle_v2i64(<4 x i32> %t0, <4 x i32> %t1) { ; CHECK-LABEL: shuffle_v2i64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vpaddd %xmm1, %xmm0, %xmm2 ; CHECK-NEXT: vpsubd %xmm1, %xmm0, %xmm0 ; CHECK-NEXT: vpblendd {{.*#+}} xmm0 = xmm2[0,1],xmm0[2,3] ; CHECK-NEXT: ret{{[l|q]}} entry: %t2 = add nsw <4 x i32> %t0, %t1 %t3 = sub nsw <4 x i32> %t0, %t1 %t4 = shufflevector <4 x i32> %t2, <4 x i32> %t3, <4 x i32> ret <4 x i32> %t4 } define <2 x i32> @shuffle_v2i32(<2 x i32> %t0, <2 x i32> %t1) { ; CHECK-LABEL: shuffle_v2i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vpaddd %xmm1, %xmm0, %xmm2 ; CHECK-NEXT: vpsubd %xmm1, %xmm0, %xmm0 ; CHECK-NEXT: vpblendd {{.*#+}} xmm0 = xmm2[0],xmm0[1],xmm2[2,3] ; CHECK-NEXT: ret{{[l|q]}} entry: %t2 = add nsw <2 x i32> %t0, %t1 %t3 = sub nsw <2 x i32> %t0, %t1 %t4 = shufflevector <2 x i32> %t2, <2 x i32> %t3, <2 x i32> ret <2 x i32> %t4 } define <64 x i8> @addb_selectw_64xi8(<64 x i8> %t0, <64 x i8> %t1) { ; X86-AVX512F-LABEL: addb_selectw_64xi8: ; X86-AVX512F: # %bb.0: ; X86-AVX512F-NEXT: vextracti64x4 $1, %zmm1, %ymm2 ; X86-AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm3 ; X86-AVX512F-NEXT: vpaddb %ymm2, %ymm3, %ymm2 ; X86-AVX512F-NEXT: vpaddb %ymm1, %ymm0, %ymm3 ; X86-AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2 ; X86-AVX512F-NEXT: vpsubb %ymm1, %ymm0, %ymm0 ; X86-AVX512F-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}, %zmm2, %zmm0 ; X86-AVX512F-NEXT: retl ; ; X64-AVX512F-LABEL: addb_selectw_64xi8: ; X64-AVX512F: # %bb.0: ; X64-AVX512F-NEXT: vextracti64x4 $1, %zmm1, %ymm2 ; X64-AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm3 ; X64-AVX512F-NEXT: vpaddb %ymm2, %ymm3, %ymm2 ; X64-AVX512F-NEXT: vpaddb %ymm1, %ymm0, %ymm3 ; X64-AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2 ; X64-AVX512F-NEXT: vpsubb %ymm1, %ymm0, %ymm0 ; X64-AVX512F-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm2, %zmm0 ; X64-AVX512F-NEXT: retq ; ; X86-AVX512BW-LABEL: addb_selectw_64xi8: ; X86-AVX512BW: # %bb.0: ; X86-AVX512BW-NEXT: vpaddb %zmm1, %zmm0, %zmm2 ; X86-AVX512BW-NEXT: movl $3, %eax ; X86-AVX512BW-NEXT: kmovd %eax, %k0 ; X86-AVX512BW-NEXT: kmovd %k0, %k1 ; X86-AVX512BW-NEXT: vpsubb %zmm1, %zmm0, %zmm2 {%k1} ; X86-AVX512BW-NEXT: vmovdqa64 %zmm2, %zmm0 ; X86-AVX512BW-NEXT: retl ; ; X64-AVX512BW-LABEL: addb_selectw_64xi8: ; X64-AVX512BW: # %bb.0: ; X64-AVX512BW-NEXT: vpaddb %zmm1, %zmm0, %zmm2 ; X64-AVX512BW-NEXT: movl $3, %eax ; X64-AVX512BW-NEXT: kmovq %rax, %k1 ; X64-AVX512BW-NEXT: vpsubb %zmm1, %zmm0, %zmm2 {%k1} ; X64-AVX512BW-NEXT: vmovdqa64 %zmm2, %zmm0 ; X64-AVX512BW-NEXT: retq %t2 = add nsw <64 x i8> %t0, %t1 %t3 = sub nsw <64 x i8> %t0, %t1 %t4 = shufflevector <64 x i8> %t2, <64 x i8> %t3, <64 x i32> ret <64 x i8> %t4 } define <32 x i8> @addb_selectw_32xi8(<32 x i8> %t0, <32 x i8> %t1) { ; CHECK-LABEL: addb_selectw_32xi8: ; CHECK: # %bb.0: ; CHECK-NEXT: vpaddb %ymm1, %ymm0, %ymm2 ; CHECK-NEXT: vpsubb %xmm1, %xmm0, %xmm0 ; CHECK-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm2[1,2,3,4,5,6,7] ; CHECK-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm2[4,5,6,7] ; CHECK-NEXT: ret{{[l|q]}} %t2 = add nsw <32 x i8> %t0, %t1 %t3 = sub nsw <32 x i8> %t0, %t1 %t4 = shufflevector <32 x i8> %t2, <32 x i8> %t3, <32 x i32> ret <32 x i8> %t4 } define <16 x i8> @addb_selectw_16xi8(<16 x i8> %t0, <16 x i8> %t1) { ; CHECK-LABEL: addb_selectw_16xi8: ; CHECK: # %bb.0: ; CHECK-NEXT: vpaddb %xmm1, %xmm0, %xmm2 ; CHECK-NEXT: vpsubb %xmm1, %xmm0, %xmm0 ; CHECK-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm2[1,2,3,4,5,6,7] ; CHECK-NEXT: ret{{[l|q]}} %t2 = add nsw <16 x i8> %t0, %t1 %t3 = sub nsw <16 x i8> %t0, %t1 %t4 = shufflevector <16 x i8> %t2, <16 x i8> %t3, <16 x i32> ret <16 x i8> %t4 } define <8 x i8> @addb_selectw_8xi8(<8 x i8> %t0, <8 x i8> %t1) { ; CHECK-LABEL: addb_selectw_8xi8: ; CHECK: # %bb.0: ; CHECK-NEXT: vpaddb %xmm1, %xmm0, %xmm2 ; CHECK-NEXT: vpsubb %xmm1, %xmm0, %xmm0 ; CHECK-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm2[1,2,3,4,5,6,7] ; CHECK-NEXT: ret{{[l|q]}} %t2 = add nsw <8 x i8> %t0, %t1 %t3 = sub nsw <8 x i8> %t0, %t1 %t4 = shufflevector <8 x i8> %t2, <8 x i8> %t3, <8 x i32> ret <8 x i8> %t4 } define <32 x i16> @addw_selectd_32xi16(<32 x i16> %t0, <32 x i16> %t1) { ; AVX512F-LABEL: addw_selectd_32xi16: ; AVX512F: # %bb.0: ; AVX512F-NEXT: vextracti64x4 $1, %zmm1, %ymm2 ; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm3 ; AVX512F-NEXT: vpaddw %ymm2, %ymm3, %ymm2 ; AVX512F-NEXT: vpaddw %ymm1, %ymm0, %ymm3 ; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2 ; AVX512F-NEXT: vpsubw %ymm1, %ymm0, %ymm0 ; AVX512F-NEXT: movw $1, %ax ; AVX512F-NEXT: kmovw %eax, %k1 ; AVX512F-NEXT: vmovdqa32 %zmm0, %zmm2 {%k1} ; AVX512F-NEXT: vmovdqa64 %zmm2, %zmm0 ; AVX512F-NEXT: ret{{[l|q]}} ; ; AVX512BW-LABEL: addw_selectd_32xi16: ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: vpaddw %zmm1, %zmm0, %zmm2 ; AVX512BW-NEXT: movl $3, %eax ; AVX512BW-NEXT: kmovd %eax, %k1 ; AVX512BW-NEXT: vpsubw %zmm1, %zmm0, %zmm2 {%k1} ; AVX512BW-NEXT: vmovdqa64 %zmm2, %zmm0 ; AVX512BW-NEXT: ret{{[l|q]}} %t2 = add nsw <32 x i16> %t0, %t1 %t3 = sub nsw <32 x i16> %t0, %t1 %t4 = shufflevector <32 x i16> %t2, <32 x i16> %t3, <32 x i32> ret <32 x i16> %t4 } define <16 x i16> @addw_selectd_16xi16(<16 x i16> %t0, <16 x i16> %t1) { ; CHECK-LABEL: addw_selectd_16xi16: ; CHECK: # %bb.0: ; CHECK-NEXT: vpaddw %ymm1, %ymm0, %ymm2 ; CHECK-NEXT: vpsubw %ymm1, %ymm0, %ymm0 ; CHECK-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm2[1,2,3,4,5,6,7] ; CHECK-NEXT: ret{{[l|q]}} %t2 = add nsw <16 x i16> %t0, %t1 %t3 = sub nsw <16 x i16> %t0, %t1 %t4 = shufflevector <16 x i16> %t2, <16 x i16> %t3, <16 x i32> ret <16 x i16> %t4 } define <16 x i32> @addd_selectq_16xi32(<16 x i32> %t0, <16 x i32> %t1) { ; AVX512F-LABEL: addd_selectq_16xi32: ; AVX512F: # %bb.0: ; AVX512F-NEXT: vpaddd %zmm1, %zmm0, %zmm2 ; AVX512F-NEXT: movw $3, %ax ; AVX512F-NEXT: kmovw %eax, %k1 ; AVX512F-NEXT: vpsubd %zmm1, %zmm0, %zmm2 {%k1} ; AVX512F-NEXT: vmovdqa64 %zmm2, %zmm0 ; AVX512F-NEXT: ret{{[l|q]}} ; ; AVX512BW-LABEL: addd_selectq_16xi32: ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: vpaddd %zmm1, %zmm0, %zmm2 ; AVX512BW-NEXT: movw $3, %ax ; AVX512BW-NEXT: kmovd %eax, %k1 ; AVX512BW-NEXT: vpsubd %zmm1, %zmm0, %zmm2 {%k1} ; AVX512BW-NEXT: vmovdqa64 %zmm2, %zmm0 ; AVX512BW-NEXT: ret{{[l|q]}} %t2 = add nsw <16 x i32> %t0, %t1 %t3 = sub nsw <16 x i32> %t0, %t1 %t4 = shufflevector <16 x i32> %t2, <16 x i32> %t3, <16 x i32> ret <16 x i32> %t4 } define <8 x i32> @addd_selectq_8xi32(<8 x i32> %t0, <8 x i32> %t1) { ; CHECK-LABEL: addd_selectq_8xi32: ; CHECK: # %bb.0: ; CHECK-NEXT: vpaddd %ymm1, %ymm0, %ymm2 ; CHECK-NEXT: vpsubd %ymm1, %ymm0, %ymm0 ; CHECK-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm2[2,3,4,5,6,7] ; CHECK-NEXT: ret{{[l|q]}} %t2 = add nsw <8 x i32> %t0, %t1 %t3 = sub nsw <8 x i32> %t0, %t1 %t4 = shufflevector <8 x i32> %t2, <8 x i32> %t3, <8 x i32> ret <8 x i32> %t4 } define <4 x i32> @addd_selectq_4xi32(<4 x i32> %t0, <4 x i32> %t1) { ; CHECK-LABEL: addd_selectq_4xi32: ; CHECK: # %bb.0: ; CHECK-NEXT: vpaddd %xmm1, %xmm0, %xmm2 ; CHECK-NEXT: vpsubd %xmm1, %xmm0, %xmm0 ; CHECK-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3] ; CHECK-NEXT: ret{{[l|q]}} %t2 = add nsw <4 x i32> %t0, %t1 %t3 = sub nsw <4 x i32> %t0, %t1 %t4 = shufflevector <4 x i32> %t2, <4 x i32> %t3, <4 x i32> ret <4 x i32> %t4 } define <8 x i32> @shuffle_undef_8xi32(<8 x i32> %0, <8 x i32> %1) { ; CHECK-LABEL: shuffle_undef_8xi32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vpaddd %ymm1, %ymm0, %ymm0 ; CHECK-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3] ; CHECK-NEXT: ret{{[l|q]}} entry: %2 = add <8 x i32> %0, %1 %3 = shufflevector <8 x i32> %2, <8 x i32> , <8 x i32> ret <8 x i32> %3 } define <16 x i16> @shuffle_undef_16xi16(<16 x i16> %0, <16 x i16> %1) { ; CHECK-LABEL: shuffle_undef_16xi16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vpaddw %ymm1, %ymm0, %ymm0 ; CHECK-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,1,0,3] ; CHECK-NEXT: ret{{[l|q]}} entry: %2 = add <16 x i16> %0, %1 %3 = shufflevector <16 x i16> %2, <16 x i16> , <16 x i32> ret <16 x i16> %3 }