; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx512vpopcntdq,+avx512f,+avx512vl | FileCheck %s --check-prefixes=CHECK define <16 x i32> @combine_mask_with_or(<16 x i32> %v0) { ; CHECK-LABEL: combine_mask_with_or: ; CHECK: # %bb.0: ; CHECK-NEXT: vpshufd {{.*#+}} zmm1 = zmm0[1,0,3,2,5,4,7,6,9,8,11,10,13,12,15,14] ; CHECK-NEXT: movw $2570, %ax # imm = 0xA0A ; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vpblendmd %zmm1, %zmm0, %zmm2 {%k1} ; CHECK-NEXT: vpord %zmm2, %zmm1, %zmm1 ; CHECK-NEXT: movw $-3856, %ax # imm = 0xF0F0 ; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vpopcntd %zmm0, %zmm1 {%k1} ; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 ; CHECK-NEXT: retq %shuf0_0 = shufflevector <16 x i32> %v0, <16 x i32> poison, <16 x i32> %shuf0_1 = shufflevector <16 x i32> %v0, <16 x i32> %shuf0_0, <16 x i32> %op0_0 = or <16 x i32> %shuf0_0, %shuf0_1 %op1_0 = tail call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> %v0) %r = shufflevector <16 x i32> %op0_0, <16 x i32> %op1_0, <16 x i32> ret <16 x i32> %r } define <16 x i32> @combine_mask_with_mul(<16 x i32> %v0) { ; CHECK-LABEL: combine_mask_with_mul: ; CHECK: # %bb.0: ; CHECK-NEXT: vpshufd {{.*#+}} zmm1 = zmm0[1,0,3,2,5,4,7,6,9,8,11,10,13,12,15,14] ; CHECK-NEXT: movw $2570, %ax # imm = 0xA0A ; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vpblendmd %zmm1, %zmm0, %zmm2 {%k1} ; CHECK-NEXT: vpmulld %zmm2, %zmm1, %zmm1 ; CHECK-NEXT: movw $-3856, %ax # imm = 0xF0F0 ; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vpopcntd %zmm0, %zmm1 {%k1} ; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 ; CHECK-NEXT: retq %shuf0_0 = shufflevector <16 x i32> %v0, <16 x i32> poison, <16 x i32> %shuf0_1 = shufflevector <16 x i32> %v0, <16 x i32> %shuf0_0, <16 x i32> %op0_0 = mul <16 x i32> %shuf0_0, %shuf0_1 %op1_0 = tail call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> %v0) %r = shufflevector <16 x i32> %op0_0, <16 x i32> %op1_0, <16 x i32> ret <16 x i32> %r } define <16 x i32> @combine_mask_with_abs(<16 x i32> %v0) { ; CHECK-LABEL: combine_mask_with_abs: ; CHECK: # %bb.0: ; CHECK-NEXT: vpshufd {{.*#+}} zmm1 = zmm0[1,0,3,2,5,4,7,6,9,8,11,10,13,12,15,14] ; CHECK-NEXT: vpabsd %zmm1, %zmm1 ; CHECK-NEXT: movw $-3856, %ax # imm = 0xF0F0 ; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vpopcntd %zmm0, %zmm1 {%k1} ; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 ; CHECK-NEXT: retq %shuf0_0 = shufflevector <16 x i32> %v0, <16 x i32> poison, <16 x i32> %shuf0_1 = shufflevector <16 x i32> %v0, <16 x i32> %shuf0_0, <16 x i32> %op0_0_tmp0 = tail call <16 x i32> @llvm.abs.v16i32(<16 x i32> %shuf0_0, i1 true) %op0_0_tmp1 = tail call <16 x i32> @llvm.abs.v16i32(<16 x i32> %shuf0_1, i1 true) %op0_0 = shufflevector <16 x i32> %op0_0_tmp0, <16 x i32> %op0_0_tmp0, <16 x i32> %op1_0 = tail call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> %v0) %r = shufflevector <16 x i32> %op0_0, <16 x i32> %op1_0, <16 x i32> ret <16 x i32> %r } define <16 x i32> @combine_mask_with_umin(<16 x i32> %v0) { ; CHECK-LABEL: combine_mask_with_umin: ; CHECK: # %bb.0: ; CHECK-NEXT: vpshufd {{.*#+}} zmm1 = zmm0[1,0,3,2,5,4,7,6,9,8,11,10,13,12,15,14] ; CHECK-NEXT: movw $-21846, %ax # imm = 0xAAAA ; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vpblendmd %zmm1, %zmm0, %zmm2 {%k1} ; CHECK-NEXT: vpminud %zmm2, %zmm1, %zmm1 ; CHECK-NEXT: movw $-3856, %ax # imm = 0xF0F0 ; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vpopcntd %zmm0, %zmm1 {%k1} ; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 ; CHECK-NEXT: retq %shuf0_0 = shufflevector <16 x i32> %v0, <16 x i32> poison, <16 x i32> %shuf0_1 = shufflevector <16 x i32> %v0, <16 x i32> %shuf0_0, <16 x i32> %op0_0 = tail call <16 x i32> @llvm.umin.v16i32(<16 x i32> %shuf0_0, <16 x i32> %shuf0_1) %op1_0 = tail call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> %v0) %r = shufflevector <16 x i32> %op0_0, <16 x i32> %op1_0, <16 x i32> ret <16 x i32> %r } define <16 x i32> @combine_mask_with_umax(<16 x i32> %v0) { ; CHECK-LABEL: combine_mask_with_umax: ; CHECK: # %bb.0: ; CHECK-NEXT: vpshufd {{.*#+}} zmm1 = zmm0[1,0,3,2,5,4,7,6,9,8,11,10,13,12,15,14] ; CHECK-NEXT: movw $-21846, %ax # imm = 0xAAAA ; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vpblendmd %zmm1, %zmm0, %zmm2 {%k1} ; CHECK-NEXT: vpmaxud %zmm2, %zmm1, %zmm1 ; CHECK-NEXT: movw $-3856, %ax # imm = 0xF0F0 ; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vpopcntd %zmm0, %zmm1 {%k1} ; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 ; CHECK-NEXT: retq %shuf0_0 = shufflevector <16 x i32> %v0, <16 x i32> poison, <16 x i32> %shuf0_1 = shufflevector <16 x i32> %v0, <16 x i32> %shuf0_0, <16 x i32> %op0_0 = tail call <16 x i32> @llvm.umax.v16i32(<16 x i32> %shuf0_0, <16 x i32> %shuf0_1) %op1_0 = tail call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> %v0) %r = shufflevector <16 x i32> %op0_0, <16 x i32> %op1_0, <16 x i32> ret <16 x i32> %r } define <16 x i32> @combine_mask_with_smin(<16 x i32> %v0) { ; CHECK-LABEL: combine_mask_with_smin: ; CHECK: # %bb.0: ; CHECK-NEXT: vpshufd {{.*#+}} zmm1 = zmm0[1,0,3,2,5,4,7,6,9,8,11,10,13,12,15,14] ; CHECK-NEXT: movw $-21846, %ax # imm = 0xAAAA ; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vpblendmd %zmm1, %zmm0, %zmm2 {%k1} ; CHECK-NEXT: vpminsd %zmm2, %zmm1, %zmm1 ; CHECK-NEXT: movw $-3856, %ax # imm = 0xF0F0 ; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vpopcntd %zmm0, %zmm1 {%k1} ; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 ; CHECK-NEXT: retq %shuf0_0 = shufflevector <16 x i32> %v0, <16 x i32> poison, <16 x i32> %shuf0_1 = shufflevector <16 x i32> %v0, <16 x i32> %shuf0_0, <16 x i32> %op0_0 = tail call <16 x i32> @llvm.smin.v16i32(<16 x i32> %shuf0_0, <16 x i32> %shuf0_1) %op1_0 = tail call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> %v0) %r = shufflevector <16 x i32> %op0_0, <16 x i32> %op1_0, <16 x i32> ret <16 x i32> %r } define <16 x i32> @combine_mask_with_smax(<16 x i32> %v0) { ; CHECK-LABEL: combine_mask_with_smax: ; CHECK: # %bb.0: ; CHECK-NEXT: vpshufd {{.*#+}} zmm1 = zmm0[1,0,3,2,5,4,7,6,9,8,11,10,13,12,15,14] ; CHECK-NEXT: movw $-21846, %ax # imm = 0xAAAA ; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vpblendmd %zmm1, %zmm0, %zmm2 {%k1} ; CHECK-NEXT: vpmaxsd %zmm2, %zmm1, %zmm1 ; CHECK-NEXT: movw $-3856, %ax # imm = 0xF0F0 ; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vpopcntd %zmm0, %zmm1 {%k1} ; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 ; CHECK-NEXT: retq %shuf0_0 = shufflevector <16 x i32> %v0, <16 x i32> poison, <16 x i32> %shuf0_1 = shufflevector <16 x i32> %v0, <16 x i32> %shuf0_0, <16 x i32> %op0_0 = tail call <16 x i32> @llvm.smax.v16i32(<16 x i32> %shuf0_0, <16 x i32> %shuf0_1) %op1_0 = tail call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> %v0) %r = shufflevector <16 x i32> %op0_0, <16 x i32> %op1_0, <16 x i32> ret <16 x i32> %r } define <16 x i32> @combine_mask_with_shl(<16 x i32> %v0) { ; CHECK-LABEL: combine_mask_with_shl: ; CHECK: # %bb.0: ; CHECK-NEXT: vpshufd {{.*#+}} zmm1 = zmm0[1,0,3,2,5,4,7,6,9,8,11,10,13,12,15,14] ; CHECK-NEXT: movw $2570, %ax # imm = 0xA0A ; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vpblendmd %zmm1, %zmm0, %zmm2 {%k1} ; CHECK-NEXT: vpsllvd %zmm2, %zmm1, %zmm1 ; CHECK-NEXT: movw $-3856, %ax # imm = 0xF0F0 ; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vpopcntd %zmm0, %zmm1 {%k1} ; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 ; CHECK-NEXT: retq %shuf0_0 = shufflevector <16 x i32> %v0, <16 x i32> poison, <16 x i32> %shuf0_1 = shufflevector <16 x i32> %v0, <16 x i32> %shuf0_0, <16 x i32> %op0_0 = shl <16 x i32> %shuf0_0, %shuf0_1 %op1_0 = tail call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> %v0) %r = shufflevector <16 x i32> %op0_0, <16 x i32> %op1_0, <16 x i32> ret <16 x i32> %r } define <16 x i32> @combine_mask_with_ashr(<16 x i32> %v0) { ; CHECK-LABEL: combine_mask_with_ashr: ; CHECK: # %bb.0: ; CHECK-NEXT: vpshufd {{.*#+}} zmm1 = zmm0[1,0,3,2,5,4,7,6,9,8,11,10,13,12,15,14] ; CHECK-NEXT: movw $2570, %ax # imm = 0xA0A ; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vpblendmd %zmm1, %zmm0, %zmm2 {%k1} ; CHECK-NEXT: vpsravd %zmm2, %zmm1, %zmm1 ; CHECK-NEXT: movw $-3856, %ax # imm = 0xF0F0 ; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vpopcntd %zmm0, %zmm1 {%k1} ; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 ; CHECK-NEXT: retq %shuf0_0 = shufflevector <16 x i32> %v0, <16 x i32> poison, <16 x i32> %shuf0_1 = shufflevector <16 x i32> %v0, <16 x i32> %shuf0_0, <16 x i32> %op0_0 = ashr <16 x i32> %shuf0_0, %shuf0_1 %op1_0 = tail call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> %v0) %r = shufflevector <16 x i32> %op0_0, <16 x i32> %op1_0, <16 x i32> ret <16 x i32> %r } define <16 x i32> @combine_mask_with_lshr(<16 x i32> %v0) { ; CHECK-LABEL: combine_mask_with_lshr: ; CHECK: # %bb.0: ; CHECK-NEXT: vpshufd {{.*#+}} zmm1 = zmm0[1,0,3,2,5,4,7,6,9,8,11,10,13,12,15,14] ; CHECK-NEXT: movw $2570, %ax # imm = 0xA0A ; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vpblendmd %zmm1, %zmm0, %zmm2 {%k1} ; CHECK-NEXT: vpsrlvd %zmm2, %zmm1, %zmm1 ; CHECK-NEXT: movw $-3856, %ax # imm = 0xF0F0 ; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vpopcntd %zmm0, %zmm1 {%k1} ; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 ; CHECK-NEXT: retq %shuf0_0 = shufflevector <16 x i32> %v0, <16 x i32> poison, <16 x i32> %shuf0_1 = shufflevector <16 x i32> %v0, <16 x i32> %shuf0_0, <16 x i32> %op0_0 = lshr <16 x i32> %shuf0_0, %shuf0_1 %op1_0 = tail call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> %v0) %r = shufflevector <16 x i32> %op0_0, <16 x i32> %op1_0, <16 x i32> ret <16 x i32> %r } declare <16 x i32> @llvm.smin.v16i32(<16 x i32>, <16 x i32>) declare <16 x i32> @llvm.smax.v16i32(<16 x i32>, <16 x i32>) declare <16 x i32> @llvm.umin.v16i32(<16 x i32>, <16 x i32>) declare <16 x i32> @llvm.umax.v16i32(<16 x i32>, <16 x i32>) declare <16 x i32> @llvm.ctpop.v16i32(<16 x i32>) declare <16 x i32> @llvm.abs.v16i32(<16 x i32>, i1)