; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v,+zbb -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s define i64 @reduce_add(i64 %x, <4 x i64> %v) { ; CHECK-LABEL: reduce_add: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; CHECK-NEXT: vmv.s.x v10, a0 ; CHECK-NEXT: vredsum.vs v8, v8, v10 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret entry: %rdx = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %v) %res = add i64 %rdx, %x ret i64 %res } define i64 @reduce_add2(<4 x i64> %v) { ; CHECK-LABEL: reduce_add2: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetivli zero, 4, e64, m1, ta, ma ; CHECK-NEXT: vmv.v.i v10, 8 ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; CHECK-NEXT: vredsum.vs v8, v8, v10 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret entry: %rdx = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %v) %res = add i64 %rdx, 8 ret i64 %res } define i64 @reduce_and(i64 %x, <4 x i64> %v) { ; CHECK-LABEL: reduce_and: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; CHECK-NEXT: vredand.vs v8, v8, v8 ; CHECK-NEXT: vmv.x.s a1, v8 ; CHECK-NEXT: and a0, a1, a0 ; CHECK-NEXT: ret entry: %rdx = call i64 @llvm.vector.reduce.and.v4i64(<4 x i64> %v) %res = and i64 %rdx, %x ret i64 %res } define i64 @reduce_and2(<4 x i64> %v) { ; CHECK-LABEL: reduce_and2: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; CHECK-NEXT: vredand.vs v8, v8, v8 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: andi a0, a0, 8 ; CHECK-NEXT: ret entry: %rdx = call i64 @llvm.vector.reduce.and.v4i64(<4 x i64> %v) %res = and i64 %rdx, 8 ret i64 %res } define i64 @reduce_or(i64 %x, <4 x i64> %v) { ; CHECK-LABEL: reduce_or: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; CHECK-NEXT: vredor.vs v8, v8, v8 ; CHECK-NEXT: vmv.x.s a1, v8 ; CHECK-NEXT: or a0, a1, a0 ; CHECK-NEXT: ret entry: %rdx = call i64 @llvm.vector.reduce.or.v4i64(<4 x i64> %v) %res = or i64 %rdx, %x ret i64 %res } define i64 @reduce_or2(<4 x i64> %v) { ; CHECK-LABEL: reduce_or2: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; CHECK-NEXT: vredor.vs v8, v8, v8 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ori a0, a0, 8 ; CHECK-NEXT: ret entry: %rdx = call i64 @llvm.vector.reduce.or.v4i64(<4 x i64> %v) %res = or i64 %rdx, 8 ret i64 %res } define i64 @reduce_xor(i64 %x, <4 x i64> %v) { ; CHECK-LABEL: reduce_xor: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; CHECK-NEXT: vmv.s.x v10, a0 ; CHECK-NEXT: vredxor.vs v8, v8, v10 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret entry: %rdx = call i64 @llvm.vector.reduce.xor.v4i64(<4 x i64> %v) %res = xor i64 %rdx, %x ret i64 %res } define i64 @reduce_xor2(<4 x i64> %v) { ; CHECK-LABEL: reduce_xor2: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; CHECK-NEXT: vmv.s.x v10, zero ; CHECK-NEXT: vredxor.vs v8, v8, v10 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: andi a0, a0, 8 ; CHECK-NEXT: ret entry: %rdx = call i64 @llvm.vector.reduce.xor.v4i64(<4 x i64> %v) %res = and i64 %rdx, 8 ret i64 %res } define i64 @reduce_umax(i64 %x, <4 x i64> %v) { ; CHECK-LABEL: reduce_umax: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; CHECK-NEXT: vredmaxu.vs v8, v8, v8 ; CHECK-NEXT: vmv.x.s a1, v8 ; CHECK-NEXT: maxu a0, a1, a0 ; CHECK-NEXT: ret entry: %rdx = call i64 @llvm.vector.reduce.umax.v4i64(<4 x i64> %v) %res = call i64 @llvm.umax.i64(i64 %rdx, i64 %x) ret i64 %res } define i64 @reduce_umax2(<4 x i64> %v) { ; CHECK-LABEL: reduce_umax2: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; CHECK-NEXT: vredmaxu.vs v8, v8, v8 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: li a1, 8 ; CHECK-NEXT: maxu a0, a0, a1 ; CHECK-NEXT: ret entry: %rdx = call i64 @llvm.vector.reduce.umax.v4i64(<4 x i64> %v) %res = call i64 @llvm.umax.i64(i64 %rdx, i64 8) ret i64 %res } define i64 @reduce_umin(i64 %x, <4 x i64> %v) { ; CHECK-LABEL: reduce_umin: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; CHECK-NEXT: vredminu.vs v8, v8, v8 ; CHECK-NEXT: vmv.x.s a1, v8 ; CHECK-NEXT: minu a0, a1, a0 ; CHECK-NEXT: ret entry: %rdx = call i64 @llvm.vector.reduce.umin.v4i64(<4 x i64> %v) %res = call i64 @llvm.umin.i64(i64 %rdx, i64 %x) ret i64 %res } define i64 @reduce_umin2(<4 x i64> %v) { ; CHECK-LABEL: reduce_umin2: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; CHECK-NEXT: vredminu.vs v8, v8, v8 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: li a1, 8 ; CHECK-NEXT: minu a0, a0, a1 ; CHECK-NEXT: ret entry: %rdx = call i64 @llvm.vector.reduce.umin.v4i64(<4 x i64> %v) %res = call i64 @llvm.umin.i64(i64 %rdx, i64 8) ret i64 %res } define i64 @reduce_smax(i64 %x, <4 x i64> %v) { ; CHECK-LABEL: reduce_smax: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; CHECK-NEXT: vredmax.vs v8, v8, v8 ; CHECK-NEXT: vmv.x.s a1, v8 ; CHECK-NEXT: max a0, a1, a0 ; CHECK-NEXT: ret entry: %rdx = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> %v) %res = call i64 @llvm.smax.i64(i64 %rdx, i64 %x) ret i64 %res } define i64 @reduce_smax2(<4 x i64> %v) { ; CHECK-LABEL: reduce_smax2: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; CHECK-NEXT: vredmax.vs v8, v8, v8 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: li a1, 8 ; CHECK-NEXT: max a0, a0, a1 ; CHECK-NEXT: ret entry: %rdx = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> %v) %res = call i64 @llvm.smax.i64(i64 %rdx, i64 8) ret i64 %res } define i64 @reduce_smin(i64 %x, <4 x i64> %v) { ; CHECK-LABEL: reduce_smin: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; CHECK-NEXT: vredmin.vs v8, v8, v8 ; CHECK-NEXT: vmv.x.s a1, v8 ; CHECK-NEXT: min a0, a1, a0 ; CHECK-NEXT: ret entry: %rdx = call i64 @llvm.vector.reduce.smin.v4i64(<4 x i64> %v) %res = call i64 @llvm.smin.i64(i64 %rdx, i64 %x) ret i64 %res } define i64 @reduce_smin2(<4 x i64> %v) { ; CHECK-LABEL: reduce_smin2: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; CHECK-NEXT: vredmin.vs v8, v8, v8 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: li a1, 8 ; CHECK-NEXT: min a0, a0, a1 ; CHECK-NEXT: ret entry: %rdx = call i64 @llvm.vector.reduce.smin.v4i64(<4 x i64> %v) %res = call i64 @llvm.smin.i64(i64 %rdx, i64 8) ret i64 %res } define float @reduce_fadd(float %x, <4 x float> %v) { ; CHECK-LABEL: reduce_fadd: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vfmv.s.f v9, fa0 ; CHECK-NEXT: vfredusum.vs v8, v8, v9 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret entry: %rdx = call fast float @llvm.vector.reduce.fadd.v4f32(float %x, <4 x float> %v) ret float %rdx } define float @reduce_fadd2(float %x, <4 x float> %v) { ; CHECK-LABEL: reduce_fadd2: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vfmv.s.f v9, fa0 ; CHECK-NEXT: vfredusum.vs v8, v8, v9 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret entry: %rdx = call fast float @llvm.vector.reduce.fadd.v4f32(float 0.0, <4 x float> %v) %res = fadd fast float %rdx, %x ret float %res } define float @reduce_fadd3(float %x, <4 x float> %v, ptr %rdxptr) { ; CHECK-LABEL: reduce_fadd3: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vmv.s.x v9, zero ; CHECK-NEXT: vfredusum.vs v8, v8, v9 ; CHECK-NEXT: vfmv.f.s fa5, v8 ; CHECK-NEXT: fadd.s fa0, fa5, fa0 ; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; CHECK-NEXT: vse32.v v8, (a0) ; CHECK-NEXT: ret entry: %rdx = call fast float @llvm.vector.reduce.fadd.v4f32(float -0.0, <4 x float> %v) %res = fadd fast float %rdx, %x store float %rdx, ptr %rdxptr ret float %res } define float @reduce_fadd4(float %x, float %y, <4 x float> %v, <4 x float> %w) { ; CHECK-LABEL: reduce_fadd4: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vfmv.s.f v10, fa0 ; CHECK-NEXT: vfredusum.vs v8, v8, v10 ; CHECK-NEXT: vfmv.f.s fa5, v8 ; CHECK-NEXT: vfmv.s.f v8, fa1 ; CHECK-NEXT: vfredusum.vs v8, v9, v8 ; CHECK-NEXT: vfmv.f.s fa4, v8 ; CHECK-NEXT: fdiv.s fa0, fa5, fa4 ; CHECK-NEXT: ret entry: %rdx = call fast float @llvm.vector.reduce.fadd.v4f32(float -0.0, <4 x float> %v) %rdx2 = call fast float @llvm.vector.reduce.fadd.v4f32(float -0.0, <4 x float> %w) %res = fadd fast float %rdx, %x %res2 = fadd fast float %rdx2, %y %div = fdiv fast float %res, %res2 ret float %div } define float @reduce_fmax(float %x, <4 x float> %v) { ; CHECK-LABEL: reduce_fmax: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vfredmax.vs v8, v8, v8 ; CHECK-NEXT: vfmv.f.s fa5, v8 ; CHECK-NEXT: fmax.s fa0, fa0, fa5 ; CHECK-NEXT: ret entry: %rdx = call float @llvm.vector.reduce.fmax.v4f32(<4 x float> %v) %res = call float @llvm.maxnum.f32(float %x, float %rdx) ret float %res } define float @reduce_fmin(float %x, <4 x float> %v) { ; CHECK-LABEL: reduce_fmin: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vfredmin.vs v8, v8, v8 ; CHECK-NEXT: vfmv.f.s fa5, v8 ; CHECK-NEXT: fmin.s fa0, fa0, fa5 ; CHECK-NEXT: ret entry: %rdx = call float @llvm.vector.reduce.fmin.v4f32(<4 x float> %v) %res = call float @llvm.minnum.f32(float %x, float %rdx) ret float %res } ; Function Attrs: nofree nosync nounwind readnone willreturn declare i64 @llvm.vector.reduce.add.v4i64(<4 x i64>) declare i64 @llvm.vector.reduce.and.v4i64(<4 x i64>) declare i64 @llvm.vector.reduce.or.v4i64(<4 x i64>) declare i64 @llvm.vector.reduce.xor.v4i64(<4 x i64>) declare i64 @llvm.vector.reduce.umax.v4i64(<4 x i64>) declare i64 @llvm.vector.reduce.umin.v4i64(<4 x i64>) declare i64 @llvm.vector.reduce.smax.v4i64(<4 x i64>) declare i64 @llvm.vector.reduce.smin.v4i64(<4 x i64>) declare float @llvm.vector.reduce.fadd.v4f32(float, <4 x float>) declare float @llvm.vector.reduce.fmax.v4f32(<4 x float>) declare float @llvm.vector.reduce.fmin.v4f32(<4 x float>) declare i64 @llvm.umax.i64(i64, i64) declare i64 @llvm.umin.i64(i64, i64) declare i64 @llvm.smax.i64(i64, i64) declare i64 @llvm.smin.i64(i64, i64) declare float @llvm.maxnum.f32(float ,float) declare float @llvm.minnum.f32(float ,float) define void @crash(<2 x i32> %0) { ; CHECK-LABEL: crash: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma ; CHECK-NEXT: vmv.v.i v8, 0 ; CHECK-NEXT: vmv.s.x v9, a0 ; CHECK-NEXT: vredsum.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: sb a0, 0(zero) ; CHECK-NEXT: ret entry: %1 = extractelement <2 x i32> %0, i64 0 %2 = tail call i16 @llvm.vector.reduce.add.v4i16(<4 x i16> zeroinitializer) %3 = zext i16 %2 to i32 %op.rdx = add i32 %1, %3 %conv18.us = trunc i32 %op.rdx to i8 store i8 %conv18.us, ptr null, align 1 ret void } declare i16 @llvm.vector.reduce.add.v4i16(<4 x i16>)