; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=riscv64 -mattr=+v | FileCheck %s declare @llvm.vp.merge.nxv2i16(, , , i32) declare @llvm.vp.merge.nxv2i32(, , , i32) declare @llvm.vp.merge.nxv2f32(, , , i32) declare @llvm.vp.merge.nxv2f64(, , , i32) ; Test binary operator with vp.merge and vp.smax. declare @llvm.vp.add.nxv2i32(, , , i32) define @vpmerge_vpadd( %passthru, %x, %y, %m, i32 zeroext %vl) { ; CHECK-LABEL: vpmerge_vpadd: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu ; CHECK-NEXT: vadd.vv v8, v9, v10, v0.t ; CHECK-NEXT: ret %splat = insertelement poison, i1 -1, i32 0 %mask = shufflevector %splat, poison, zeroinitializer %a = call @llvm.vp.add.nxv2i32( %x, %y, %mask, i32 %vl) %b = call @llvm.vp.merge.nxv2i32( %m, %a, %passthru, i32 %vl) ret %b } ; Test glued node of merge should not be deleted. declare @llvm.vp.icmp.nxv2i32(, , metadata, , i32) define @vpmerge_vpadd2( %passthru, %x, %y, i32 zeroext %vl) { ; CHECK-LABEL: vpmerge_vpadd2: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma ; CHECK-NEXT: vmseq.vv v0, v9, v10 ; CHECK-NEXT: vsetvli zero, zero, e32, m1, tu, mu ; CHECK-NEXT: vadd.vv v8, v9, v10, v0.t ; CHECK-NEXT: ret %splat = insertelement poison, i1 -1, i32 0 %mask = shufflevector %splat, poison, zeroinitializer %a = call @llvm.vp.add.nxv2i32( %x, %y, %mask, i32 %vl) %m = call @llvm.vp.icmp.nxv2i32( %x, %y, metadata !"eq", %mask, i32 %vl) %b = call @llvm.vp.merge.nxv2i32( %m, %a, %passthru, i32 %vl) ret %b } ; Test vp.merge has all-ones mask. define @vpmerge_vpadd3( %passthru, %x, %y, i32 zeroext %vl) { ; CHECK-LABEL: vpmerge_vpadd3: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, ma ; CHECK-NEXT: vadd.vv v8, v9, v10 ; CHECK-NEXT: ret %splat = insertelement poison, i1 -1, i32 0 %mask = shufflevector %splat, poison, zeroinitializer %a = call @llvm.vp.add.nxv2i32( %x, %y, %mask, i32 %vl) %b = call @llvm.vp.merge.nxv2i32( %mask, %a, %passthru, i32 %vl) ret %b } ; Test float binary operator with vp.merge and vp.fadd. declare @llvm.vp.fadd.nxv2f32(, , , i32) define @vpmerge_vpfadd( %passthru, %x, %y, %m, i32 zeroext %vl) { ; CHECK-LABEL: vpmerge_vpfadd: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu ; CHECK-NEXT: vfadd.vv v8, v9, v10, v0.t ; CHECK-NEXT: ret %splat = insertelement poison, i1 -1, i32 0 %mask = shufflevector %splat, poison, zeroinitializer %a = call @llvm.vp.fadd.nxv2f32( %x, %y, %mask, i32 %vl) %b = call @llvm.vp.merge.nxv2f32( %m, %a, %passthru, i32 %vl) ret %b } ; Test for binary operator with specific EEW by riscv.vrgatherei16. declare @llvm.riscv.vrgatherei16.vv.nxv2i32.i64(, , , i64) define @vpmerge_vrgatherei16( %passthru, %x, %y, %m, i32 zeroext %vl) { ; CHECK-LABEL: vpmerge_vrgatherei16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma ; CHECK-NEXT: vrgatherei16.vv v8, v9, v10 ; CHECK-NEXT: ret %1 = zext i32 %vl to i64 %2 = tail call @llvm.riscv.vrgatherei16.vv.nxv2i32.i64( undef, %x, %y, i64 %1) %3 = tail call @llvm.vp.merge.nxv2i32( %m, %2, %passthru, i32 %vl) ret %2 } ; Test conversion by fptosi. declare @llvm.vp.fptosi.nxv2i16.nxv2f32(, , i32) define @vpmerge_vpfptosi( %passthru, %x, %m, i32 zeroext %vl) { ; CHECK-LABEL: vpmerge_vpfptosi: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu ; CHECK-NEXT: vfncvt.rtz.x.f.w v8, v9, v0.t ; CHECK-NEXT: ret %splat = insertelement poison, i1 -1, i32 0 %mask = shufflevector %splat, poison, zeroinitializer %a = call @llvm.vp.fptosi.nxv2i16.nxv2f32( %x, %mask, i32 %vl) %b = call @llvm.vp.merge.nxv2i16( %m, %a, %passthru, i32 %vl) ret %b } ; Test conversion by sitofp. declare @llvm.vp.sitofp.nxv2f32.nxv2i64(, , i32) define @vpmerge_vpsitofp( %passthru, %x, %m, i32 zeroext %vl) { ; CHECK-LABEL: vpmerge_vpsitofp: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu ; CHECK-NEXT: vfncvt.f.x.w v8, v10, v0.t ; CHECK-NEXT: ret %splat = insertelement poison, i1 -1, i32 0 %mask = shufflevector %splat, poison, zeroinitializer %a = call @llvm.vp.sitofp.nxv2f32.nxv2i64( %x, %mask, i32 %vl) %b = call @llvm.vp.merge.nxv2f32( %m, %a, %passthru, i32 %vl) ret %b } ; Test integer extension by vp.zext. declare @llvm.vp.zext.nxv2i32.nxv2i8(, , i32) define @vpmerge_vpzext( %passthru, %x, %m, i32 zeroext %vl) { ; CHECK-LABEL: vpmerge_vpzext: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu ; CHECK-NEXT: vzext.vf4 v8, v9, v0.t ; CHECK-NEXT: ret %splat = insertelement poison, i1 -1, i32 0 %mask = shufflevector %splat, poison, zeroinitializer %a = call @llvm.vp.zext.nxv2i32.nxv2i8( %x, %mask, i32 %vl) %b = call @llvm.vp.merge.nxv2i32( %m, %a, %passthru, i32 %vl) ret %b } ; Test integer truncation by vp.trunc. declare @llvm.vp.trunc.nxv2i32.nxv2i64(, , i32) define @vpmerge_vptrunc( %passthru, %x, %m, i32 zeroext %vl) { ; CHECK-LABEL: vpmerge_vptrunc: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu ; CHECK-NEXT: vnsrl.wi v8, v10, 0, v0.t ; CHECK-NEXT: ret %splat = insertelement poison, i1 -1, i32 0 %mask = shufflevector %splat, poison, zeroinitializer %a = call @llvm.vp.trunc.nxv2i32.nxv2i64( %x, %mask, i32 %vl) %b = call @llvm.vp.merge.nxv2i32( %m, %a, %passthru, i32 %vl) ret %b } ; Test integer extension by vp.fpext. declare @llvm.vp.fpext.nxv2f64.nxv2f32(, , i32) define @vpmerge_vpfpext( %passthru, %x, %m, i32 zeroext %vl) { ; CHECK-LABEL: vpmerge_vpfpext: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu ; CHECK-NEXT: vfwcvt.f.f.v v8, v10, v0.t ; CHECK-NEXT: ret %splat = insertelement poison, i1 -1, i32 0 %mask = shufflevector %splat, poison, zeroinitializer %a = call @llvm.vp.fpext.nxv2f64.nxv2f32( %x, %mask, i32 %vl) %b = call @llvm.vp.merge.nxv2f64( %m, %a, %passthru, i32 %vl) ret %b } ; Test integer truncation by vp.trunc. declare @llvm.vp.fptrunc.nxv2f32.nxv2f64(, , i32) define @vpmerge_vpfptrunc( %passthru, %x, %m, i32 zeroext %vl) { ; CHECK-LABEL: vpmerge_vpfptrunc: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu ; CHECK-NEXT: vfncvt.f.f.w v8, v10, v0.t ; CHECK-NEXT: ret %splat = insertelement poison, i1 -1, i32 0 %mask = shufflevector %splat, poison, zeroinitializer %a = call @llvm.vp.fptrunc.nxv2f32.nxv2f64( %x, %mask, i32 %vl) %b = call @llvm.vp.merge.nxv2f32( %m, %a, %passthru, i32 %vl) ret %b } ; Test load operation by vp.load. declare @llvm.vp.load.nxv2i32.p0( *, , i32) define @vpmerge_vpload( %passthru, * %p, %m, i32 zeroext %vl) { ; CHECK-LABEL: vpmerge_vpload: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a1, e32, m1, tu, mu ; CHECK-NEXT: vle32.v v8, (a0), v0.t ; CHECK-NEXT: ret %splat = insertelement poison, i1 -1, i32 0 %mask = shufflevector %splat, poison, zeroinitializer %a = call @llvm.vp.load.nxv2i32.p0( * %p, %mask, i32 %vl) %b = call @llvm.vp.merge.nxv2i32( %m, %a, %passthru, i32 %vl) ret %b } ; Test result has chain and glued node. define @vpmerge_vpload2( %passthru, * %p, %x, %y, i32 zeroext %vl) { ; CHECK-LABEL: vpmerge_vpload2: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma ; CHECK-NEXT: vmseq.vv v0, v9, v10 ; CHECK-NEXT: vsetvli zero, zero, e32, m1, tu, mu ; CHECK-NEXT: vle32.v v8, (a0), v0.t ; CHECK-NEXT: ret %splat = insertelement poison, i1 -1, i32 0 %mask = shufflevector %splat, poison, zeroinitializer %a = call @llvm.vp.load.nxv2i32.p0( * %p, %mask, i32 %vl) %m = call @llvm.vp.icmp.nxv2i32( %x, %y, metadata !"eq", %mask, i32 %vl) %b = call @llvm.vp.merge.nxv2i32( %m, %a, %passthru, i32 %vl) ret %b } ; Test result has chain output of true operand of merge.vvm. define void @vpmerge_vpload_store( %passthru, * %p, %m, i32 zeroext %vl) { ; CHECK-LABEL: vpmerge_vpload_store: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a1, e32, m1, tu, mu ; CHECK-NEXT: vle32.v v8, (a0), v0.t ; CHECK-NEXT: vs1r.v v8, (a0) ; CHECK-NEXT: ret %splat = insertelement poison, i1 -1, i32 0 %mask = shufflevector %splat, poison, zeroinitializer %a = call @llvm.vp.load.nxv2i32.p0( * %p, %mask, i32 %vl) %b = call @llvm.vp.merge.nxv2i32( %m, %a, %passthru, i32 %vl) store %b, * %p ret void } ; FIXME: Merge vmerge.vvm and vleffN.v declare { , i64 } @llvm.riscv.vleff.nxv2i32(, *, i64) define @vpmerge_vleff( %passthru, * %p, %m, i32 zeroext %vl) { ; CHECK-LABEL: vpmerge_vleff: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma ; CHECK-NEXT: vle32ff.v v9, (a0) ; CHECK-NEXT: vsetvli zero, a1, e32, m1, tu, ma ; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 ; CHECK-NEXT: ret %1 = zext i32 %vl to i64 %a = call { , i64 } @llvm.riscv.vleff.nxv2i32( undef, * %p, i64 %1) %b = extractvalue { , i64 } %a, 0 %c = call @llvm.vp.merge.nxv2i32( %m, %b, %passthru, i32 %vl) ret %c } ; Test strided load by riscv.vlse declare @llvm.riscv.vlse.nxv2i32(, *, i64, i64) define @vpmerge_vlse( %passthru, * %p, %m, i64 %s, i32 zeroext %vl) { ; CHECK-LABEL: vpmerge_vlse: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a2, e32, m1, tu, mu ; CHECK-NEXT: vlse32.v v8, (a0), a1, v0.t ; CHECK-NEXT: ret %1 = zext i32 %vl to i64 %a = call @llvm.riscv.vlse.nxv2i32( undef, * %p, i64 %s, i64 %1) %b = call @llvm.vp.merge.nxv2i32( %m, %a, %passthru, i32 %vl) ret %b } ; Test indexed load by riscv.vluxei declare @llvm.riscv.vluxei.nxv2i32.nxv2i64(, *, , i64) define @vpmerge_vluxei( %passthru, * %p, %idx, %m, i64 %s, i32 zeroext %vl) { ; CHECK-LABEL: vpmerge_vluxei: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a2, e32, m1, tu, mu ; CHECK-NEXT: vluxei64.v v8, (a0), v10, v0.t ; CHECK-NEXT: ret %1 = zext i32 %vl to i64 %a = call @llvm.riscv.vluxei.nxv2i32.nxv2i64( undef, * %p, %idx, i64 %1) %b = call @llvm.vp.merge.nxv2i32( %m, %a, %passthru, i32 %vl) ret %b } ; Test vector index by riscv.vid declare @llvm.riscv.vid.nxv2i32(, i64) define @vpmerge_vid( %passthru, %m, i32 zeroext %vl) { ; CHECK-LABEL: vpmerge_vid: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu ; CHECK-NEXT: vid.v v8, v0.t ; CHECK-NEXT: ret %1 = zext i32 %vl to i64 %a = call @llvm.riscv.vid.nxv2i32( undef, i64 %1) %b = call @llvm.vp.merge.nxv2i32( %m, %a, %passthru, i32 %vl) ret %b } ; Test not combine VIOTA_M and VMERGE_VVM without true mask. declare @llvm.riscv.viota.nxv2i32(, , i64) define @vpmerge_viota( %passthru, %m, %vm, i32 zeroext %vl) { ; CHECK-LABEL: vpmerge_viota: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma ; CHECK-NEXT: viota.m v10, v9 ; CHECK-NEXT: vsetvli zero, zero, e32, m1, tu, ma ; CHECK-NEXT: vmerge.vvm v8, v8, v10, v0 ; CHECK-NEXT: ret %1 = zext i32 %vl to i64 %a = call @llvm.riscv.viota.nxv2i32( undef, %vm, i64 %1) %b = call @llvm.vp.merge.nxv2i32( %m, %a, %passthru, i32 %vl) ret %b } ; Test combine VIOTA_M and VMERGE_VVM with true mask. define @vpmerge_viota2( %passthru, %vm, i32 zeroext %vl) { ; CHECK-LABEL: vpmerge_viota2: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, ma ; CHECK-NEXT: viota.m v8, v0 ; CHECK-NEXT: ret %1 = zext i32 %vl to i64 %a = call @llvm.riscv.viota.nxv2i32( undef, %vm, i64 %1) %splat = insertelement poison, i1 -1, i32 0 %true = shufflevector %splat, poison, zeroinitializer %b = call @llvm.vp.merge.nxv2i32( %true, %a, %passthru, i32 %vl) ret %b } ; Test riscv.vfclass declare @llvm.riscv.vfclass.nxv2i32(, , i64) define @vpmerge_vflcass( %passthru, %vf, %m, i32 zeroext %vl) { ; CHECK-LABEL: vpmerge_vflcass: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu ; CHECK-NEXT: vfclass.v v8, v9, v0.t ; CHECK-NEXT: ret %1 = zext i32 %vl to i64 %a = call @llvm.riscv.vfclass.nxv2i32( undef, %vf, i64 %1) %b = call @llvm.vp.merge.nxv2i32( %m, %a, %passthru, i32 %vl) ret %b } ; Test riscv.vfsqrt declare @llvm.riscv.vfsqrt.nxv2f32(, , i64, i64) define @vpmerge_vfsqrt( %passthru, %vf, %m, i32 zeroext %vl) { ; CHECK-LABEL: vpmerge_vfsqrt: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu ; CHECK-NEXT: vfsqrt.v v8, v9, v0.t ; CHECK-NEXT: ret %1 = zext i32 %vl to i64 %a = call @llvm.riscv.vfsqrt.nxv2f32( undef, %vf, i64 7, i64 %1) %b = call @llvm.vp.merge.nxv2f32( %m, %a, %passthru, i32 %vl) ret %b } ; Test reciprocal operation by riscv.vfrec7 declare @llvm.riscv.vfrec7.nxv2f32(, , i64, i64) define @vpmerge_vfrec7( %passthru, %vf, %m, i32 zeroext %vl) { ; CHECK-LABEL: vpmerge_vfrec7: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu ; CHECK-NEXT: vfrec7.v v8, v9, v0.t ; CHECK-NEXT: ret %1 = zext i32 %vl to i64 %a = call @llvm.riscv.vfrec7.nxv2f32( undef, %vf, i64 7, i64 %1) %b = call @llvm.vp.merge.nxv2f32( %m, %a, %passthru, i32 %vl) ret %b } ; Test vector operations with VLMAX vector length. ; Test binary operator with vp.merge and add. define @vpmerge_add( %passthru, %x, %y, %m, i32 zeroext %vl) { ; CHECK-LABEL: vpmerge_add: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu ; CHECK-NEXT: vadd.vv v8, v9, v10, v0.t ; CHECK-NEXT: ret %a = add %x, %y %b = call @llvm.vp.merge.nxv2i32( %m, %a, %passthru, i32 %vl) ret %b } ; Test binary operator with vp.merge and fadd. define @vpmerge_fadd( %passthru, %x, %y, %m, i32 zeroext %vl) { ; CHECK-LABEL: vpmerge_fadd: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu ; CHECK-NEXT: vfadd.vv v8, v9, v10, v0.t ; CHECK-NEXT: ret %a = fadd %x, %y %b = call @llvm.vp.merge.nxv2f32( %m, %a, %passthru, i32 %vl) ret %b } ; This shouldn't be folded because we need to preserve exceptions with ; "fpexcept.strict" exception behaviour, and masking may hide them. define @vpmerge_constrained_fadd( %passthru, %x, %y, %m, i64 %vl) strictfp { ; CHECK-LABEL: vpmerge_constrained_fadd: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, ma ; CHECK-NEXT: vfadd.vv v9, v9, v10 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, ma ; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 ; CHECK-NEXT: ret %a = call @llvm.experimental.constrained.fadd( %x, %y, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp %b = call @llvm.riscv.vmerge.nxv2f32.nxv2f32( %passthru, %passthru, %a, %m, i64 %vl) strictfp ret %b } declare @llvm.experimental.constrained.fadd(, , metadata, metadata) declare @llvm.riscv.vmerge.nxv2f32.nxv2f32(, , , , i64) ; This shouldn't be folded because we need to preserve exceptions with ; "fpexcept.strict" exception behaviour, and masking may hide them. define @vpmerge_constrained_fadd_vlmax( %passthru, %x, %y, %m) strictfp { ; CHECK-LABEL: vpmerge_constrained_fadd_vlmax: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma ; CHECK-NEXT: vfadd.vv v9, v9, v10 ; CHECK-NEXT: vsetvli zero, zero, e32, m1, tu, ma ; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 ; CHECK-NEXT: ret %a = call @llvm.experimental.constrained.fadd( %x, %y, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp %b = call @llvm.riscv.vmerge.nxv2f32.nxv2f32( %passthru, %passthru, %a, %m, i64 -1) strictfp ret %b } ; Test conversion by fptosi. define @vpmerge_fptosi( %passthru, %x, %m, i32 zeroext %vl) { ; CHECK-LABEL: vpmerge_fptosi: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu ; CHECK-NEXT: vfncvt.rtz.x.f.w v8, v9, v0.t ; CHECK-NEXT: ret %a = fptosi %x to %b = call @llvm.vp.merge.nxv2i16( %m, %a, %passthru, i32 %vl) ret %b } ; Test conversion by sitofp. define @vpmerge_sitofp( %passthru, %x, %m, i32 zeroext %vl) { ; CHECK-LABEL: vpmerge_sitofp: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu ; CHECK-NEXT: vfncvt.f.x.w v8, v10, v0.t ; CHECK-NEXT: ret %a = sitofp %x to %b = call @llvm.vp.merge.nxv2f32( %m, %a, %passthru, i32 %vl) ret %b } ; Test float extension by fpext. define @vpmerge_fpext( %passthru, %x, %m, i32 zeroext %vl) { ; CHECK-LABEL: vpmerge_fpext: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu ; CHECK-NEXT: vfwcvt.f.f.v v8, v10, v0.t ; CHECK-NEXT: ret %a = fpext %x to %b = call @llvm.vp.merge.nxv2f64( %m, %a, %passthru, i32 %vl) ret %b } ; Test float truncation by fptrunc. define @vpmerge_fptrunc( %passthru, %x, %m, i32 zeroext %vl) { ; CHECK-LABEL: vpmerge_fptrunc: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu ; CHECK-NEXT: vfncvt.f.f.w v8, v10, v0.t ; CHECK-NEXT: ret %a = fptrunc %x to %b = call @llvm.vp.merge.nxv2f32( %m, %a, %passthru, i32 %vl) ret %b } ; Test integer extension by zext. define @vpmerge_zext( %passthru, %x, %m, i32 zeroext %vl) { ; CHECK-LABEL: vpmerge_zext: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu ; CHECK-NEXT: vzext.vf4 v8, v9, v0.t ; CHECK-NEXT: ret %a = zext %x to %b = call @llvm.vp.merge.nxv2i32( %m, %a, %passthru, i32 %vl) ret %b } ; Test integer truncation by trunc. define @vpmerge_trunc( %passthru, %x, %m, i32 zeroext %vl) { ; CHECK-LABEL: vpmerge_trunc: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu ; CHECK-NEXT: vnsrl.wi v8, v10, 0, v0.t ; CHECK-NEXT: ret %a = trunc %x to %b = call @llvm.vp.merge.nxv2i32( %m, %a, %passthru, i32 %vl) ret %b } declare @llvm.vp.select.nxv2i16(, , , i32) declare @llvm.vp.select.nxv2i32(, , , i32) declare @llvm.vp.select.nxv2f32(, , , i32) declare @llvm.vp.select.nxv2f64(, , , i32) ; Test binary operator with vp.select and vp.smax. define @vpselect_vpadd( %passthru, %x, %y, %m, i32 zeroext %vl) { ; CHECK-LABEL: vpselect_vpadd: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu ; CHECK-NEXT: vadd.vv v8, v9, v10, v0.t ; CHECK-NEXT: ret %splat = insertelement poison, i1 -1, i32 0 %mask = shufflevector %splat, poison, zeroinitializer %a = call @llvm.vp.add.nxv2i32( %x, %y, %mask, i32 %vl) %b = call @llvm.vp.select.nxv2i32( %m, %a, %passthru, i32 %vl) ret %b } ; Test glued node of select should not be deleted. define @vpselect_vpadd2( %passthru, %x, %y, i32 zeroext %vl) { ; CHECK-LABEL: vpselect_vpadd2: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu ; CHECK-NEXT: vmseq.vv v0, v9, v10 ; CHECK-NEXT: vadd.vv v8, v9, v10, v0.t ; CHECK-NEXT: ret %splat = insertelement poison, i1 -1, i32 0 %mask = shufflevector %splat, poison, zeroinitializer %a = call @llvm.vp.add.nxv2i32( %x, %y, %mask, i32 %vl) %m = call @llvm.vp.icmp.nxv2i32( %x, %y, metadata !"eq", %mask, i32 %vl) %b = call @llvm.vp.select.nxv2i32( %m, %a, %passthru, i32 %vl) ret %b } ; Test vp.select has all-ones mask. define @vpselect_vpadd3( %passthru, %x, %y, i32 zeroext %vl) { ; CHECK-LABEL: vpselect_vpadd3: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma ; CHECK-NEXT: vadd.vv v8, v9, v10 ; CHECK-NEXT: ret %splat = insertelement poison, i1 -1, i32 0 %mask = shufflevector %splat, poison, zeroinitializer %a = call @llvm.vp.add.nxv2i32( %x, %y, %mask, i32 %vl) %b = call @llvm.vp.select.nxv2i32( %mask, %a, %passthru, i32 %vl) ret %b } ; Test float binary operator with vp.select and vp.fadd. define @vpselect_vpfadd( %passthru, %x, %y, %m, i32 zeroext %vl) { ; CHECK-LABEL: vpselect_vpfadd: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu ; CHECK-NEXT: vfadd.vv v8, v9, v10, v0.t ; CHECK-NEXT: ret %splat = insertelement poison, i1 -1, i32 0 %mask = shufflevector %splat, poison, zeroinitializer %a = call @llvm.vp.fadd.nxv2f32( %x, %y, %mask, i32 %vl) %b = call @llvm.vp.select.nxv2f32( %m, %a, %passthru, i32 %vl) ret %b } ; Test for binary operator with specific EEW by riscv.vrgatherei16. define @vpselect_vrgatherei16( %passthru, %x, %y, %m, i32 zeroext %vl) { ; CHECK-LABEL: vpselect_vrgatherei16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma ; CHECK-NEXT: vrgatherei16.vv v8, v9, v10 ; CHECK-NEXT: ret %1 = zext i32 %vl to i64 %2 = tail call @llvm.riscv.vrgatherei16.vv.nxv2i32.i64( undef, %x, %y, i64 %1) %3 = tail call @llvm.vp.select.nxv2i32( %m, %2, %passthru, i32 %vl) ret %2 } ; Test conversion by fptosi. define @vpselect_vpfptosi( %passthru, %x, %m, i32 zeroext %vl) { ; CHECK-LABEL: vpselect_vpfptosi: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu ; CHECK-NEXT: vfncvt.rtz.x.f.w v8, v9, v0.t ; CHECK-NEXT: ret %splat = insertelement poison, i1 -1, i32 0 %mask = shufflevector %splat, poison, zeroinitializer %a = call @llvm.vp.fptosi.nxv2i16.nxv2f32( %x, %mask, i32 %vl) %b = call @llvm.vp.select.nxv2i16( %m, %a, %passthru, i32 %vl) ret %b } ; Test conversion by sitofp. define @vpselect_vpsitofp( %passthru, %x, %m, i32 zeroext %vl) { ; CHECK-LABEL: vpselect_vpsitofp: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu ; CHECK-NEXT: vfncvt.f.x.w v8, v10, v0.t ; CHECK-NEXT: ret %splat = insertelement poison, i1 -1, i32 0 %mask = shufflevector %splat, poison, zeroinitializer %a = call @llvm.vp.sitofp.nxv2f32.nxv2i64( %x, %mask, i32 %vl) %b = call @llvm.vp.select.nxv2f32( %m, %a, %passthru, i32 %vl) ret %b } ; Test integer extension by vp.zext. define @vpselect_vpzext( %passthru, %x, %m, i32 zeroext %vl) { ; CHECK-LABEL: vpselect_vpzext: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu ; CHECK-NEXT: vzext.vf4 v8, v9, v0.t ; CHECK-NEXT: ret %splat = insertelement poison, i1 -1, i32 0 %mask = shufflevector %splat, poison, zeroinitializer %a = call @llvm.vp.zext.nxv2i32.nxv2i8( %x, %mask, i32 %vl) %b = call @llvm.vp.select.nxv2i32( %m, %a, %passthru, i32 %vl) ret %b } ; Test integer truncation by vp.trunc. define @vpselect_vptrunc( %passthru, %x, %m, i32 zeroext %vl) { ; CHECK-LABEL: vpselect_vptrunc: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu ; CHECK-NEXT: vnsrl.wi v8, v10, 0, v0.t ; CHECK-NEXT: ret %splat = insertelement poison, i1 -1, i32 0 %mask = shufflevector %splat, poison, zeroinitializer %a = call @llvm.vp.trunc.nxv2i32.nxv2i64( %x, %mask, i32 %vl) %b = call @llvm.vp.select.nxv2i32( %m, %a, %passthru, i32 %vl) ret %b } ; Test integer extension by vp.fpext. define @vpselect_vpfpext( %passthru, %x, %m, i32 zeroext %vl) { ; CHECK-LABEL: vpselect_vpfpext: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu ; CHECK-NEXT: vfwcvt.f.f.v v8, v10, v0.t ; CHECK-NEXT: ret %splat = insertelement poison, i1 -1, i32 0 %mask = shufflevector %splat, poison, zeroinitializer %a = call @llvm.vp.fpext.nxv2f64.nxv2f32( %x, %mask, i32 %vl) %b = call @llvm.vp.select.nxv2f64( %m, %a, %passthru, i32 %vl) ret %b } ; Test integer truncation by vp.trunc. define @vpselect_vpfptrunc( %passthru, %x, %m, i32 zeroext %vl) { ; CHECK-LABEL: vpselect_vpfptrunc: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu ; CHECK-NEXT: vfncvt.f.f.w v8, v10, v0.t ; CHECK-NEXT: ret %splat = insertelement poison, i1 -1, i32 0 %mask = shufflevector %splat, poison, zeroinitializer %a = call @llvm.vp.fptrunc.nxv2f32.nxv2f64( %x, %mask, i32 %vl) %b = call @llvm.vp.select.nxv2f32( %m, %a, %passthru, i32 %vl) ret %b } ; Test load operation by vp.load. define @vpselect_vpload( %passthru, * %p, %m, i32 zeroext %vl) { ; CHECK-LABEL: vpselect_vpload: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu ; CHECK-NEXT: vle32.v v8, (a0), v0.t ; CHECK-NEXT: ret %splat = insertelement poison, i1 -1, i32 0 %mask = shufflevector %splat, poison, zeroinitializer %a = call @llvm.vp.load.nxv2i32.p0( * %p, %mask, i32 %vl) %b = call @llvm.vp.select.nxv2i32( %m, %a, %passthru, i32 %vl) ret %b } ; Test result has chain and glued node. define @vpselect_vpload2( %passthru, * %p, %x, %y, i32 zeroext %vl) { ; CHECK-LABEL: vpselect_vpload2: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu ; CHECK-NEXT: vmseq.vv v0, v9, v10 ; CHECK-NEXT: vle32.v v8, (a0), v0.t ; CHECK-NEXT: ret %splat = insertelement poison, i1 -1, i32 0 %mask = shufflevector %splat, poison, zeroinitializer %a = call @llvm.vp.load.nxv2i32.p0( * %p, %mask, i32 %vl) %m = call @llvm.vp.icmp.nxv2i32( %x, %y, metadata !"eq", %mask, i32 %vl) %b = call @llvm.vp.select.nxv2i32( %m, %a, %passthru, i32 %vl) ret %b } ; Test result has chain output of true operand of select.vvm. define void @vpselect_vpload_store( %passthru, * %p, %m, i32 zeroext %vl) { ; CHECK-LABEL: vpselect_vpload_store: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu ; CHECK-NEXT: vle32.v v8, (a0), v0.t ; CHECK-NEXT: vs1r.v v8, (a0) ; CHECK-NEXT: ret %splat = insertelement poison, i1 -1, i32 0 %mask = shufflevector %splat, poison, zeroinitializer %a = call @llvm.vp.load.nxv2i32.p0( * %p, %mask, i32 %vl) %b = call @llvm.vp.select.nxv2i32( %m, %a, %passthru, i32 %vl) store %b, * %p ret void } ; FIXME: select vselect.vvm and vleffN.v define @vpselect_vleff( %passthru, * %p, %m, i32 zeroext %vl) { ; CHECK-LABEL: vpselect_vleff: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma ; CHECK-NEXT: vle32ff.v v9, (a0) ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma ; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 ; CHECK-NEXT: ret %1 = zext i32 %vl to i64 %a = call { , i64 } @llvm.riscv.vleff.nxv2i32( undef, * %p, i64 %1) %b = extractvalue { , i64 } %a, 0 %c = call @llvm.vp.select.nxv2i32( %m, %b, %passthru, i32 %vl) ret %c } ; Test strided load by riscv.vlse define @vpselect_vlse( %passthru, * %p, %m, i64 %s, i32 zeroext %vl) { ; CHECK-LABEL: vpselect_vlse: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, mu ; CHECK-NEXT: vlse32.v v8, (a0), a1, v0.t ; CHECK-NEXT: ret %1 = zext i32 %vl to i64 %a = call @llvm.riscv.vlse.nxv2i32( undef, * %p, i64 %s, i64 %1) %b = call @llvm.vp.select.nxv2i32( %m, %a, %passthru, i32 %vl) ret %b } ; Test indexed load by riscv.vluxei define @vpselect_vluxei( %passthru, * %p, %idx, %m, i64 %s, i32 zeroext %vl) { ; CHECK-LABEL: vpselect_vluxei: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, mu ; CHECK-NEXT: vluxei64.v v8, (a0), v10, v0.t ; CHECK-NEXT: ret %1 = zext i32 %vl to i64 %a = call @llvm.riscv.vluxei.nxv2i32.nxv2i64( undef, * %p, %idx, i64 %1) %b = call @llvm.vp.select.nxv2i32( %m, %a, %passthru, i32 %vl) ret %b } ; Test vector index by riscv.vid define @vpselect_vid( %passthru, %m, i32 zeroext %vl) { ; CHECK-LABEL: vpselect_vid: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu ; CHECK-NEXT: vid.v v8, v0.t ; CHECK-NEXT: ret %1 = zext i32 %vl to i64 %a = call @llvm.riscv.vid.nxv2i32( undef, i64 %1) %b = call @llvm.vp.select.nxv2i32( %m, %a, %passthru, i32 %vl) ret %b } ; Test riscv.viota define @vpselect_viota( %passthru, %m, %vm, i32 zeroext %vl) { ; CHECK-LABEL: vpselect_viota: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma ; CHECK-NEXT: viota.m v10, v9 ; CHECK-NEXT: vmerge.vvm v8, v8, v10, v0 ; CHECK-NEXT: ret %1 = zext i32 %vl to i64 %a = call @llvm.riscv.viota.nxv2i32( undef, %vm, i64 %1) %b = call @llvm.vp.select.nxv2i32( %m, %a, %passthru, i32 %vl) ret %b } ; Test riscv.vfclass define @vpselect_vflcass( %passthru, %vf, %m, i32 zeroext %vl) { ; CHECK-LABEL: vpselect_vflcass: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu ; CHECK-NEXT: vfclass.v v8, v9, v0.t ; CHECK-NEXT: ret %1 = zext i32 %vl to i64 %a = call @llvm.riscv.vfclass.nxv2i32( undef, %vf, i64 %1) %b = call @llvm.vp.select.nxv2i32( %m, %a, %passthru, i32 %vl) ret %b } ; Test riscv.vfsqrt define @vpselect_vfsqrt( %passthru, %vf, %m, i32 zeroext %vl) { ; CHECK-LABEL: vpselect_vfsqrt: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu ; CHECK-NEXT: vfsqrt.v v8, v9, v0.t ; CHECK-NEXT: ret %1 = zext i32 %vl to i64 %a = call @llvm.riscv.vfsqrt.nxv2f32( undef, %vf, i64 7, i64 %1) %b = call @llvm.vp.select.nxv2f32( %m, %a, %passthru, i32 %vl) ret %b } ; Test reciprocal operation by riscv.vfrec7 define @vpselect_vfrec7( %passthru, %vf, %m, i32 zeroext %vl) { ; CHECK-LABEL: vpselect_vfrec7: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu ; CHECK-NEXT: vfrec7.v v8, v9, v0.t ; CHECK-NEXT: ret %1 = zext i32 %vl to i64 %a = call @llvm.riscv.vfrec7.nxv2f32( undef, %vf, i64 7, i64 %1) %b = call @llvm.vp.select.nxv2f32( %m, %a, %passthru, i32 %vl) ret %b } ; Test slides declare @llvm.riscv.vslideup.nxv2i32(, , i64, i64, i64) define @vpselect_vslideup( %passthru, %v, i64 %x, %m, i32 zeroext %vl) { ; CHECK-LABEL: vpselect_vslideup: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu ; CHECK-NEXT: vslideup.vx v8, v9, a0, v0.t ; CHECK-NEXT: ret %1 = zext i32 %vl to i64 %a = call @llvm.riscv.vslideup.nxv2i32( undef, %v, i64 %x, i64 %1, i64 0) %b = call @llvm.vp.select.nxv2i32( %m, %a, %passthru, i32 %vl) ret %b } declare @llvm.riscv.vslidedown.nxv2i32(, , i64, i64, i64) define @vpselect_vslidedown( %passthru, %v, i64 %x, %m, i32 zeroext %vl) { ; CHECK-LABEL: vpselect_vslidedown: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu ; CHECK-NEXT: vslidedown.vx v8, v9, a0, v0.t ; CHECK-NEXT: ret %1 = zext i32 %vl to i64 %a = call @llvm.riscv.vslidedown.nxv2i32( undef, %v, i64 %x, i64 %1, i64 0) %b = call @llvm.vp.select.nxv2i32( %m, %a, %passthru, i32 %vl) ret %b } declare @llvm.riscv.vslide1up.nxv2i32.i32(, , i32, i64) define @vpselect_vslide1up( %passthru, %v, i32 %x, %m, i32 zeroext %vl) { ; CHECK-LABEL: vpselect_vslide1up: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu ; CHECK-NEXT: vslide1up.vx v8, v9, a0, v0.t ; CHECK-NEXT: ret %1 = zext i32 %vl to i64 %a = call @llvm.riscv.vslide1up.nxv2i32.i32( undef, %v, i32 %x, i64 %1) %b = call @llvm.vp.select.nxv2i32( %m, %a, %passthru, i32 %vl) ret %b } declare @llvm.riscv.vslide1down.nxv2i32.i32(, , i32, i64) define @vpselect_vslide1down( %passthru, %v, i32 %x, %m, i32 zeroext %vl) { ; CHECK-LABEL: vpselect_vslide1down: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu ; CHECK-NEXT: vslide1down.vx v8, v9, a0, v0.t ; CHECK-NEXT: ret %1 = zext i32 %vl to i64 %a = call @llvm.riscv.vslide1down.nxv2i32.i32( undef, %v, i32 %x, i64 %1) %b = call @llvm.vp.select.nxv2i32( %m, %a, %passthru, i32 %vl) ret %b } ; Test vector operations with VLMAX vector length. ; Test binary operator with vp.select and add. define @vpselect_add( %passthru, %x, %y, %m, i32 zeroext %vl) { ; CHECK-LABEL: vpselect_add: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu ; CHECK-NEXT: vadd.vv v8, v9, v10, v0.t ; CHECK-NEXT: ret %a = add %x, %y %b = call @llvm.vp.select.nxv2i32( %m, %a, %passthru, i32 %vl) ret %b } ; Test binary operator with vp.select and fadd. define @vpselect_fadd( %passthru, %x, %y, %m, i32 zeroext %vl) { ; CHECK-LABEL: vpselect_fadd: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu ; CHECK-NEXT: vfadd.vv v8, v9, v10, v0.t ; CHECK-NEXT: ret %a = fadd %x, %y %b = call @llvm.vp.select.nxv2f32( %m, %a, %passthru, i32 %vl) ret %b } ; Test conversion by fptosi. define @vpselect_fptosi( %passthru, %x, %m, i32 zeroext %vl) { ; CHECK-LABEL: vpselect_fptosi: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu ; CHECK-NEXT: vfncvt.rtz.x.f.w v8, v9, v0.t ; CHECK-NEXT: ret %a = fptosi %x to %b = call @llvm.vp.select.nxv2i16( %m, %a, %passthru, i32 %vl) ret %b } ; Test conversion by sitofp. define @vpselect_sitofp( %passthru, %x, %m, i32 zeroext %vl) { ; CHECK-LABEL: vpselect_sitofp: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu ; CHECK-NEXT: vfncvt.f.x.w v8, v10, v0.t ; CHECK-NEXT: ret %a = sitofp %x to %b = call @llvm.vp.select.nxv2f32( %m, %a, %passthru, i32 %vl) ret %b } ; Test float extension by fpext. define @vpselect_fpext( %passthru, %x, %m, i32 zeroext %vl) { ; CHECK-LABEL: vpselect_fpext: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu ; CHECK-NEXT: vfwcvt.f.f.v v8, v10, v0.t ; CHECK-NEXT: ret %a = fpext %x to %b = call @llvm.vp.select.nxv2f64( %m, %a, %passthru, i32 %vl) ret %b } ; Test float truncation by fptrunc. define @vpselect_fptrunc( %passthru, %x, %m, i32 zeroext %vl) { ; CHECK-LABEL: vpselect_fptrunc: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu ; CHECK-NEXT: vfncvt.f.f.w v8, v10, v0.t ; CHECK-NEXT: ret %a = fptrunc %x to %b = call @llvm.vp.select.nxv2f32( %m, %a, %passthru, i32 %vl) ret %b } ; Test integer extension by zext. define @vpselect_zext( %passthru, %x, %m, i32 zeroext %vl) { ; CHECK-LABEL: vpselect_zext: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu ; CHECK-NEXT: vzext.vf4 v8, v9, v0.t ; CHECK-NEXT: ret %a = zext %x to %b = call @llvm.vp.select.nxv2i32( %m, %a, %passthru, i32 %vl) ret %b } ; Test integer truncation by trunc. define @vpselect_trunc( %passthru, %x, %m, i32 zeroext %vl) { ; CHECK-LABEL: vpselect_trunc: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu ; CHECK-NEXT: vnsrl.wi v8, v10, 0, v0.t ; CHECK-NEXT: ret %a = trunc %x to %b = call @llvm.vp.select.nxv2i32( %m, %a, %passthru, i32 %vl) ret %b } ; Folding this would create a loop in the DAG becuase the chain from the VLE is ; used by the vssubu. define void @test_dag_loop() { ; CHECK-LABEL: test_dag_loop: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetivli zero, 1, e16, m8, ta, ma ; CHECK-NEXT: vle16.v v8, (zero) ; CHECK-NEXT: vsetvli a0, zero, e8, m4, ta, ma ; CHECK-NEXT: vmclr.m v0 ; CHECK-NEXT: vmv.v.i v16, 0 ; CHECK-NEXT: vsetivli zero, 0, e8, m4, tu, mu ; CHECK-NEXT: vmv4r.v v20, v16 ; CHECK-NEXT: vssubu.vx v20, v16, zero, v0.t ; CHECK-NEXT: vsetvli zero, zero, e8, m4, ta, ma ; CHECK-NEXT: vmseq.vv v0, v20, v16 ; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma ; CHECK-NEXT: vmv.v.i v16, 0 ; CHECK-NEXT: vsetivli zero, 1, e16, m8, tu, ma ; CHECK-NEXT: vmerge.vvm v16, v16, v8, v0 ; CHECK-NEXT: vsetivli zero, 0, e16, m8, ta, ma ; CHECK-NEXT: vse16.v v16, (zero) ; CHECK-NEXT: ret entry: %0 = call @llvm.riscv.vle.nxv32i16.i64( undef, * null, i64 1) %1 = tail call @llvm.riscv.vssubu.mask.nxv32i8.i8.i64( zeroinitializer, zeroinitializer, i8 0, zeroinitializer, i64 0, i64 0) %2 = tail call @llvm.riscv.vmseq.nxv32i8.nxv32i8.i64( %1, zeroinitializer, i64 0) %3 = tail call @llvm.riscv.vmerge.nxv32i16.nxv32i16.i64( zeroinitializer, zeroinitializer, %0, %2, i64 1) call void @llvm.riscv.vse.nxv32i16.i64( %3, * null, i64 0) ret void } define @test_vaaddu( %var_11, i16 zeroext %var_9, %var_5, %var_0) { ; CHECK-LABEL: test_vaaddu: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetivli zero, 3, e16, mf4, ta, mu ; CHECK-NEXT: csrwi vxrm, 0 ; CHECK-NEXT: vaaddu.vx v9, v8, a0, v0.t ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: ret entry: %0 = tail call @llvm.riscv.vaaddu.nxv1i16.i16.i64( poison, %var_11, i16 %var_9, i64 0, i64 3) %1 = tail call @llvm.riscv.vmerge.nxv1i16.nxv1i16.i64( poison, %var_0, %0, %var_5, i64 3) ret %1 } ; Test reductions don't have a vmerge folded into them, since the mask affects ; the result. declare @llvm.riscv.vredsum.nxv2i32.nxv2i32( , , , i64) define @vredsum( %passthru, %x, %y, %m, i64 %vl) { ; CHECK-LABEL: vredsum: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma ; CHECK-NEXT: vmv1r.v v11, v8 ; CHECK-NEXT: vredsum.vs v11, v9, v10 ; CHECK-NEXT: vsetvli zero, zero, e32, m1, tu, ma ; CHECK-NEXT: vmerge.vvm v8, v8, v11, v0 ; CHECK-NEXT: ret %a = call @llvm.riscv.vredsum.nxv2i32.nxv2i32( %passthru, %x, %y, i64 %vl) %b = call @llvm.riscv.vmerge.nxv2i32.nxv2i32( %passthru, %passthru, %a, %m, i64 %vl) ret %b } declare @llvm.riscv.vfredusum.nxv2f32.nxv2f32( , , , i64, i64) define @vfredusum( %passthru, %x, %y, %m, i64 %vl) { ; CHECK-LABEL: vfredusum: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma ; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vmv1r.v v11, v8 ; CHECK-NEXT: vfredusum.vs v11, v9, v10 ; CHECK-NEXT: vsetvli zero, zero, e32, m1, tu, ma ; CHECK-NEXT: vmerge.vvm v8, v8, v11, v0 ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: ret %a = call @llvm.riscv.vfredusum.nxv2f32.nxv2f32( %passthru, %x, %y, i64 0, i64 %vl) %b = call @llvm.riscv.vmerge.nxv2f32.nxv2f32( %passthru, %passthru, %a, %m, i64 %vl) ret %b } ; However we can fold it in if the mask is all ones. define @vredsum_allones_mask( %passthru, %x, %y, i64 %vl) { ; CHECK-LABEL: vredsum_allones_mask: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, ma ; CHECK-NEXT: vredsum.vs v8, v9, v10 ; CHECK-NEXT: ret %splat = insertelement poison, i1 -1, i32 0 %mask = shufflevector %splat, poison, zeroinitializer %a = call @llvm.riscv.vredsum.nxv2i32.nxv2i32( %passthru, %x, %y, i64 %vl) %b = call @llvm.riscv.vmerge.nxv2i32.nxv2i32( %passthru, %passthru, %a, %mask, i64 %vl) ret %b } define @vfredusum_allones_mask( %passthru, %x, %y, i64 %vl) { ; CHECK-LABEL: vfredusum_allones_mask: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, ma ; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vfredusum.vs v8, v9, v10 ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: ret %splat = insertelement poison, i1 -1, i32 0 %mask = shufflevector %splat, poison, zeroinitializer %a = call @llvm.riscv.vfredusum.nxv2f32.nxv2f32( %passthru, %x, %y, i64 0, i64 %vl) %b = call @llvm.riscv.vmerge.nxv2f32.nxv2f32( %passthru, %passthru, %a, %mask, i64 %vl) ret %b } declare @llvm.riscv.vle.nxv32i16.i64(, * nocapture, i64) declare @llvm.riscv.vssubu.mask.nxv32i8.i8.i64(, , i8, , i64, i64 immarg) declare @llvm.riscv.vmseq.nxv32i8.nxv32i8.i64(, , i64) declare @llvm.riscv.vmerge.nxv32i16.nxv32i16.i64(, , , , i64) declare void @llvm.riscv.vse.nxv32i16.i64(, * nocapture, i64) declare @llvm.riscv.vaaddu.nxv1i16.i16.i64(, , i16, i64 immarg, i64) declare @llvm.riscv.vmerge.nxv1i16.nxv1i16.i64(, , , , i64) ; Tests for folding vmerge into its ops when their VLs differ declare @llvm.riscv.vadd.nxv2i32.nxv2i32(, , , i64) declare @llvm.riscv.vmerge.nxv2i32.nxv2i32(, , , , i64) ; Can fold with VL=2 define @vmerge_smaller_vl_same_passthru( %passthru, %x, %y, %m) { ; CHECK-LABEL: vmerge_smaller_vl_same_passthru: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e32, m1, tu, mu ; CHECK-NEXT: vadd.vv v8, v9, v10, v0.t ; CHECK-NEXT: ret %a = call @llvm.riscv.vadd.nxv2i32.nxv2i32( %passthru, %x, %y, i64 4) %b = call @llvm.riscv.vmerge.nxv2i32.nxv2i32( %passthru, %passthru, %a, %m, i64 2) ret %b } ; Can fold with VL=2 define @vmerge_larger_vl_same_passthru( %passthru, %x, %y, %m) { ; CHECK-LABEL: vmerge_larger_vl_same_passthru: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e32, m1, tu, mu ; CHECK-NEXT: vadd.vv v8, v9, v10, v0.t ; CHECK-NEXT: ret %a = call @llvm.riscv.vadd.nxv2i32.nxv2i32( %passthru, %x, %y, i64 2) %b = call @llvm.riscv.vmerge.nxv2i32.nxv2i32( %passthru, %passthru, %a, %m, i64 3) ret %b } ; Can fold with VL=2 define @vmerge_smaller_vl_different_passthru( %pt1, %pt2, %x, %y, %m) { ; CHECK-LABEL: vmerge_smaller_vl_different_passthru: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 3, e32, m1, tu, ma ; CHECK-NEXT: vadd.vv v8, v10, v11 ; CHECK-NEXT: vsetivli zero, 2, e32, m1, tu, ma ; CHECK-NEXT: vmerge.vvm v9, v9, v8, v0 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: ret %a = call @llvm.riscv.vadd.nxv2i32.nxv2i32( %pt1, %x, %y, i64 3) %b = call @llvm.riscv.vmerge.nxv2i32.nxv2i32( %pt2, %pt2, %a, %m, i64 2) ret %b } ; Can't fold this because we need to take elements from both %pt1 and %pt2 define @vmerge_larger_vl_different_passthru( %pt1, %pt2, %x, %y, %m) { ; CHECK-LABEL: vmerge_larger_vl_different_passthru: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e32, m1, tu, ma ; CHECK-NEXT: vadd.vv v8, v10, v11 ; CHECK-NEXT: vsetivli zero, 3, e32, m1, tu, ma ; CHECK-NEXT: vmerge.vvm v9, v9, v8, v0 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: ret %a = call @llvm.riscv.vadd.nxv2i32.nxv2i32( %pt1, %x, %y, i64 2) %b = call @llvm.riscv.vmerge.nxv2i32.nxv2i32( %pt2, %pt2, %a, %m, i64 3) ret %b } ; Can fold with VL=2 define @vmerge_smaller_vl_poison_passthru( %passthru, %x, %y, %m) { ; CHECK-LABEL: vmerge_smaller_vl_poison_passthru: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e32, m1, tu, mu ; CHECK-NEXT: vadd.vv v8, v9, v10, v0.t ; CHECK-NEXT: ret %a = call @llvm.riscv.vadd.nxv2i32.nxv2i32( poison, %x, %y, i64 3) %b = call @llvm.riscv.vmerge.nxv2i32.nxv2i32( %passthru, %passthru, %a, %m, i64 2) ret %b } ; Can fold with VL=2 define @vmerge_larger_vl_poison_passthru( %passthru, %x, %y, %m) { ; CHECK-LABEL: vmerge_larger_vl_poison_passthru: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e32, m1, tu, mu ; CHECK-NEXT: vadd.vv v8, v9, v10, v0.t ; CHECK-NEXT: ret %a = call @llvm.riscv.vadd.nxv2i32.nxv2i32( poison, %x, %y, i64 2) %b = call @llvm.riscv.vmerge.nxv2i32.nxv2i32( %passthru, %passthru, %a, %m, i64 3) ret %b } ; The vadd's new policy should be tail undisturbed since the false op of the ; vmerge moves from the the body to the tail, and we need to preserve it. define @vmerge_larger_vl_false_becomes_tail( %false, %x, %y, %m) { ; CHECK-LABEL: vmerge_larger_vl_false_becomes_tail: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e32, m1, tu, mu ; CHECK-NEXT: vadd.vv v8, v9, v10, v0.t ; CHECK-NEXT: ret %a = call @llvm.riscv.vadd.nxv2i32.nxv2i32( poison, %x, %y, i64 2) %b = call @llvm.riscv.vmerge.nxv2i32.nxv2i32( poison, %false, %a, %m, i64 3) ret %b }