; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv64 -mattr=+m,+f,+d,+a,+c,+v \ ; RUN: -target-abi=lp64d -verify-machineinstrs -O2 < %s | FileCheck %s ; The following tests check whether inserting VSETVLI avoids inserting ; unneeded vsetvlis across basic blocks. declare i64 @llvm.riscv.vsetvli(i64, i64, i64) declare @llvm.riscv.vfadd.nxv1f64.nxv1f64(, , , i64, i64) declare @llvm.riscv.vfadd.nxv2f32.nxv2f32(, , , i64, i64) declare @llvm.riscv.vfsub.nxv1f64.nxv1f64(, , , i64, i64) declare @llvm.riscv.vfmul.nxv1f64.nxv1f64(, , , i64, i64) declare @llvm.riscv.vfmv.v.f.nxv1f64.f64(, double, i64) declare @llvm.riscv.vfmv.v.f.nxv2f32.f32( , float, i64) declare void @llvm.riscv.vse.nxv1f64(, * nocapture, i64) declare void @llvm.riscv.vse.nxv2f32(, * nocapture, i64) define @test1(i64 %avl, i8 zeroext %cond, %a, %b) nounwind { ; CHECK-LABEL: test1: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; CHECK-NEXT: beqz a1, .LBB0_2 ; CHECK-NEXT: # %bb.1: # %if.then ; CHECK-NEXT: vfadd.vv v8, v8, v9 ; CHECK-NEXT: ret ; CHECK-NEXT: .LBB0_2: # %if.else ; CHECK-NEXT: vfsub.vv v8, v8, v9 ; CHECK-NEXT: ret entry: %0 = tail call i64 @llvm.riscv.vsetvli(i64 %avl, i64 3, i64 0) %tobool = icmp eq i8 %cond, 0 br i1 %tobool, label %if.else, label %if.then if.then: ; preds = %entry %1 = tail call @llvm.riscv.vfadd.nxv1f64.nxv1f64( undef, %a, %b, i64 7, i64 %0) br label %if.end if.else: ; preds = %entry %2 = tail call @llvm.riscv.vfsub.nxv1f64.nxv1f64( undef, %a, %b, i64 7, i64 %0) br label %if.end if.end: ; preds = %if.else, %if.then %c.0 = phi [ %1, %if.then ], [ %2, %if.else ] ret %c.0 } @scratch = global i8 0, align 16 define @test2(i64 %avl, i8 zeroext %cond, %a, %b) nounwind { ; CHECK-LABEL: test2: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; CHECK-NEXT: beqz a1, .LBB1_2 ; CHECK-NEXT: # %bb.1: # %if.then ; CHECK-NEXT: vfadd.vv v9, v8, v9 ; CHECK-NEXT: vfmul.vv v8, v9, v8 ; CHECK-NEXT: ret ; CHECK-NEXT: .LBB1_2: # %if.else ; CHECK-NEXT: vfsub.vv v9, v8, v9 ; CHECK-NEXT: vfmul.vv v8, v9, v8 ; CHECK-NEXT: ret entry: %0 = tail call i64 @llvm.riscv.vsetvli(i64 %avl, i64 3, i64 0) %tobool = icmp eq i8 %cond, 0 br i1 %tobool, label %if.else, label %if.then if.then: ; preds = %entry %1 = tail call @llvm.riscv.vfadd.nxv1f64.nxv1f64( undef, %a, %b, i64 7, i64 %0) br label %if.end if.else: ; preds = %entry %2 = tail call @llvm.riscv.vfsub.nxv1f64.nxv1f64( undef, %a, %b, i64 7, i64 %0) br label %if.end if.end: ; preds = %if.else, %if.then %c.0 = phi [ %1, %if.then ], [ %2, %if.else ] %3 = tail call @llvm.riscv.vfmul.nxv1f64.nxv1f64( undef, %c.0, %a, i64 7, i64 %0) ret %3 } define @test3(i64 %avl, i8 zeroext %cond, %a, %b) nounwind { ; CHECK-LABEL: test3: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: beqz a1, .LBB2_2 ; CHECK-NEXT: # %bb.1: # %if.then ; CHECK-NEXT: vsetvli a0, a0, e64, m1, ta, ma ; CHECK-NEXT: vfadd.vv v9, v8, v9 ; CHECK-NEXT: vfmul.vv v8, v9, v8 ; CHECK-NEXT: # implicit-def: $x10 ; CHECK-NEXT: ret ; CHECK-NEXT: .LBB2_2: # %if.else ; CHECK-NEXT: vsetvli a0, a0, e64, m1, ta, ma ; CHECK-NEXT: vfsub.vv v9, v8, v9 ; CHECK-NEXT: vfmul.vv v8, v9, v8 ; CHECK-NEXT: # implicit-def: $x10 ; CHECK-NEXT: ret entry: %tobool = icmp eq i8 %cond, 0 br i1 %tobool, label %if.else, label %if.then if.then: ; preds = %entry %0 = tail call i64 @llvm.riscv.vsetvli(i64 %avl, i64 3, i64 0) %1 = tail call @llvm.riscv.vfadd.nxv1f64.nxv1f64( undef, %a, %b, i64 7, i64 %0) br label %if.end if.else: ; preds = %entry %2 = tail call i64 @llvm.riscv.vsetvli(i64 %avl, i64 3, i64 0) %3 = tail call @llvm.riscv.vfsub.nxv1f64.nxv1f64( undef, %a, %b, i64 7, i64 %2) br label %if.end if.end: ; preds = %if.else, %if.then %vl.0 = phi i64 [ %0, %if.then], [ %2, %if.else ] %c.0 = phi [ %1, %if.then ], [ %3, %if.else ] %4 = tail call @llvm.riscv.vfmul.nxv1f64.nxv1f64( undef, %c.0, %a, i64 7, i64 %vl.0) ret %4 } define @test4(i64 %avl, i8 zeroext %cond, %l, %r) nounwind { ; CHECK-LABEL: test4: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: beqz a1, .LBB3_2 ; CHECK-NEXT: # %bb.1: # %if.then ; CHECK-NEXT: lui a1, %hi(.LCPI3_0) ; CHECK-NEXT: addi a1, a1, %lo(.LCPI3_0) ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; CHECK-NEXT: vlse64.v v10, (a1), zero ; CHECK-NEXT: lui a1, %hi(.LCPI3_1) ; CHECK-NEXT: addi a1, a1, %lo(.LCPI3_1) ; CHECK-NEXT: vlse64.v v11, (a1), zero ; CHECK-NEXT: vfadd.vv v10, v10, v11 ; CHECK-NEXT: lui a1, %hi(scratch) ; CHECK-NEXT: addi a1, a1, %lo(scratch) ; CHECK-NEXT: vse64.v v10, (a1) ; CHECK-NEXT: j .LBB3_3 ; CHECK-NEXT: .LBB3_2: # %if.else ; CHECK-NEXT: lui a1, 260096 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma ; CHECK-NEXT: vmv.v.x v10, a1 ; CHECK-NEXT: lui a1, 262144 ; CHECK-NEXT: vmv.v.x v11, a1 ; CHECK-NEXT: vfadd.vv v10, v10, v11 ; CHECK-NEXT: lui a1, %hi(scratch) ; CHECK-NEXT: addi a1, a1, %lo(scratch) ; CHECK-NEXT: vse32.v v10, (a1) ; CHECK-NEXT: .LBB3_3: # %if.end ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; CHECK-NEXT: vfmul.vv v8, v8, v9 ; CHECK-NEXT: ret entry: %tobool = icmp eq i8 %cond, 0 br i1 %tobool, label %if.else, label %if.then if.then: ; preds = %entry %0 = tail call @llvm.riscv.vfmv.v.f.nxv1f64.f64( undef, double 1.000000e+00, i64 %avl) %1 = tail call @llvm.riscv.vfmv.v.f.nxv1f64.f64( undef, double 2.000000e+00, i64 %avl) %2 = tail call @llvm.riscv.vfadd.nxv1f64.nxv1f64( undef, %0, %1, i64 7, i64 %avl) %3 = bitcast i8* @scratch to * tail call void @llvm.riscv.vse.nxv1f64( %2, * %3, i64 %avl) br label %if.end if.else: ; preds = %entry %4 = tail call @llvm.riscv.vfmv.v.f.nxv2f32.f32( undef, float 1.000000e+00, i64 %avl) %5 = tail call @llvm.riscv.vfmv.v.f.nxv2f32.f32( undef, float 2.000000e+00, i64 %avl) %6 = tail call @llvm.riscv.vfadd.nxv2f32.nxv2f32( undef, %4, %5, i64 7, i64 %avl) %7 = bitcast i8* @scratch to * tail call void @llvm.riscv.vse.nxv2f32( %6, * %7, i64 %avl) br label %if.end if.end: ; preds = %if.else, %if.then %8 = tail call @llvm.riscv.vfmul.nxv1f64.nxv1f64( undef, %l, %r, i64 7, i64 %avl) ret %8 } define @test5(i64 %avl, i8 zeroext %cond, %a, %b) nounwind { ; CHECK-LABEL: test5: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: andi a2, a1, 1 ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; CHECK-NEXT: bnez a2, .LBB4_3 ; CHECK-NEXT: # %bb.1: # %if.else ; CHECK-NEXT: vfsub.vv v9, v8, v9 ; CHECK-NEXT: andi a1, a1, 2 ; CHECK-NEXT: beqz a1, .LBB4_4 ; CHECK-NEXT: .LBB4_2: # %if.then4 ; CHECK-NEXT: vfmul.vv v8, v9, v8 ; CHECK-NEXT: ret ; CHECK-NEXT: .LBB4_3: # %if.then ; CHECK-NEXT: vfadd.vv v9, v8, v9 ; CHECK-NEXT: andi a1, a1, 2 ; CHECK-NEXT: bnez a1, .LBB4_2 ; CHECK-NEXT: .LBB4_4: # %if.else5 ; CHECK-NEXT: vfmul.vv v8, v8, v9 ; CHECK-NEXT: ret entry: %0 = tail call i64 @llvm.riscv.vsetvli(i64 %avl, i64 3, i64 0) %conv = zext i8 %cond to i32 %and = and i32 %conv, 1 %tobool = icmp eq i32 %and, 0 br i1 %tobool, label %if.else, label %if.then if.then: ; preds = %entry %1 = tail call @llvm.riscv.vfadd.nxv1f64.nxv1f64( undef, %a, %b, i64 7, i64 %0) br label %if.end if.else: ; preds = %entry %2 = tail call @llvm.riscv.vfsub.nxv1f64.nxv1f64( undef, %a, %b, i64 7, i64 %0) br label %if.end if.end: ; preds = %if.else, %if.then %c.0 = phi [ %1, %if.then ], [ %2, %if.else ] %and2 = and i32 %conv, 2 %tobool3 = icmp eq i32 %and2, 0 br i1 %tobool3, label %if.else5, label %if.then4 if.then4: ; preds = %if.end %3 = tail call @llvm.riscv.vfmul.nxv1f64.nxv1f64( undef, %c.0, %a, i64 7, i64 %0) br label %if.end6 if.else5: ; preds = %if.end %4 = tail call @llvm.riscv.vfmul.nxv1f64.nxv1f64( undef, %a, %c.0, i64 7, i64 %0) br label %if.end6 if.end6: ; preds = %if.else5, %if.then4 %c.1 = phi [ %3, %if.then4 ], [ %4, %if.else5 ] ret %c.1 } ; FIXME: The explicit vsetvli in if.then4 could be removed as it is redundant ; with the one in the entry, but we lack the ability to remove explicit ; vsetvli instructions. define @test6(i64 %avl, i8 zeroext %cond, %a, %b) nounwind { ; CHECK-LABEL: test6: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: andi a3, a1, 1 ; CHECK-NEXT: vsetvli a2, a0, e64, m1, ta, ma ; CHECK-NEXT: bnez a3, .LBB5_3 ; CHECK-NEXT: # %bb.1: # %if.else ; CHECK-NEXT: vfsub.vv v8, v8, v9 ; CHECK-NEXT: andi a1, a1, 2 ; CHECK-NEXT: beqz a1, .LBB5_4 ; CHECK-NEXT: .LBB5_2: # %if.then4 ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; CHECK-NEXT: lui a0, %hi(.LCPI5_0) ; CHECK-NEXT: addi a0, a0, %lo(.LCPI5_0) ; CHECK-NEXT: vlse64.v v9, (a0), zero ; CHECK-NEXT: lui a0, %hi(.LCPI5_1) ; CHECK-NEXT: addi a0, a0, %lo(.LCPI5_1) ; CHECK-NEXT: vlse64.v v10, (a0), zero ; CHECK-NEXT: vfadd.vv v9, v9, v10 ; CHECK-NEXT: lui a0, %hi(scratch) ; CHECK-NEXT: addi a0, a0, %lo(scratch) ; CHECK-NEXT: vse64.v v9, (a0) ; CHECK-NEXT: j .LBB5_5 ; CHECK-NEXT: .LBB5_3: # %if.then ; CHECK-NEXT: vfadd.vv v8, v8, v9 ; CHECK-NEXT: andi a1, a1, 2 ; CHECK-NEXT: bnez a1, .LBB5_2 ; CHECK-NEXT: .LBB5_4: # %if.else5 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma ; CHECK-NEXT: lui a0, 260096 ; CHECK-NEXT: vmv.v.x v9, a0 ; CHECK-NEXT: lui a0, 262144 ; CHECK-NEXT: vmv.v.x v10, a0 ; CHECK-NEXT: vfadd.vv v9, v9, v10 ; CHECK-NEXT: lui a0, %hi(scratch) ; CHECK-NEXT: addi a0, a0, %lo(scratch) ; CHECK-NEXT: vse32.v v9, (a0) ; CHECK-NEXT: .LBB5_5: # %if.end10 ; CHECK-NEXT: vsetvli zero, a2, e64, m1, ta, ma ; CHECK-NEXT: vfmul.vv v8, v8, v8 ; CHECK-NEXT: ret entry: %0 = tail call i64 @llvm.riscv.vsetvli(i64 %avl, i64 3, i64 0) %conv = zext i8 %cond to i32 %and = and i32 %conv, 1 %tobool = icmp eq i32 %and, 0 br i1 %tobool, label %if.else, label %if.then if.then: ; preds = %entry %1 = tail call @llvm.riscv.vfadd.nxv1f64.nxv1f64( undef, %a, %b, i64 7, i64 %0) br label %if.end if.else: ; preds = %entry %2 = tail call @llvm.riscv.vfsub.nxv1f64.nxv1f64( undef, %a, %b, i64 7, i64 %0) br label %if.end if.end: ; preds = %if.else, %if.then %c.0 = phi [ %1, %if.then ], [ %2, %if.else ] %and2 = and i32 %conv, 2 %tobool3 = icmp eq i32 %and2, 0 br i1 %tobool3, label %if.else5, label %if.then4 if.then4: ; preds = %if.end %3 = tail call i64 @llvm.riscv.vsetvli(i64 %avl, i64 3, i64 0) %4 = tail call @llvm.riscv.vfmv.v.f.nxv1f64.f64( undef, double 1.000000e+00, i64 %3) %5 = tail call @llvm.riscv.vfmv.v.f.nxv1f64.f64( undef, double 2.000000e+00, i64 %3) %6 = tail call @llvm.riscv.vfadd.nxv1f64.nxv1f64( undef, %4, %5, i64 7, i64 %3) %7 = bitcast i8* @scratch to * tail call void @llvm.riscv.vse.nxv1f64( %6, * %7, i64 %3) br label %if.end10 if.else5: ; preds = %if.end %8 = tail call i64 @llvm.riscv.vsetvli(i64 %avl, i64 2, i64 0) %9 = tail call @llvm.riscv.vfmv.v.f.nxv2f32.f32( undef, float 1.000000e+00, i64 %8) %10 = tail call @llvm.riscv.vfmv.v.f.nxv2f32.f32( undef, float 2.000000e+00, i64 %8) %11 = tail call @llvm.riscv.vfadd.nxv2f32.nxv2f32( undef, %9, %10, i64 7, i64 %8) %12 = bitcast i8* @scratch to * tail call void @llvm.riscv.vse.nxv2f32( %11, * %12, i64 %8) br label %if.end10 if.end10: ; preds = %if.else5, %if.then4 %13 = tail call @llvm.riscv.vfmul.nxv1f64.nxv1f64( undef, %c.0, %c.0, i64 7, i64 %0) ret %13 } declare void @foo() ; Similar to test1, but contains a call to @foo to act as barrier to analyzing ; VL/VTYPE. define @test8(i64 %avl, i8 zeroext %cond, %a, %b) nounwind { ; CHECK-LABEL: test8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: addi sp, sp, -32 ; CHECK-NEXT: sd ra, 24(sp) # 8-byte Folded Spill ; CHECK-NEXT: sd s0, 16(sp) # 8-byte Folded Spill ; CHECK-NEXT: csrr a2, vlenb ; CHECK-NEXT: slli a2, a2, 1 ; CHECK-NEXT: sub sp, sp, a2 ; CHECK-NEXT: vsetvli s0, a0, e64, m1, ta, ma ; CHECK-NEXT: beqz a1, .LBB6_2 ; CHECK-NEXT: # %bb.1: # %if.then ; CHECK-NEXT: vfadd.vv v8, v8, v9 ; CHECK-NEXT: j .LBB6_3 ; CHECK-NEXT: .LBB6_2: # %if.else ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: add a0, a0, sp ; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vs1r.v v9, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: call foo ; CHECK-NEXT: vsetvli zero, s0, e64, m1, ta, ma ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: add a0, a0, sp ; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vl1r.v v9, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vfsub.vv v8, v9, v8 ; CHECK-NEXT: .LBB6_3: # %if.then ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 1 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: ld ra, 24(sp) # 8-byte Folded Reload ; CHECK-NEXT: ld s0, 16(sp) # 8-byte Folded Reload ; CHECK-NEXT: addi sp, sp, 32 ; CHECK-NEXT: ret entry: %0 = tail call i64 @llvm.riscv.vsetvli(i64 %avl, i64 3, i64 0) %tobool = icmp eq i8 %cond, 0 br i1 %tobool, label %if.else, label %if.then if.then: ; preds = %entry %1 = tail call @llvm.riscv.vfadd.nxv1f64.nxv1f64( undef, %a, %b, i64 7, i64 %0) br label %if.end if.else: ; preds = %entry call void @foo() %2 = tail call @llvm.riscv.vfsub.nxv1f64.nxv1f64( undef, %a, %b, i64 7, i64 %0) br label %if.end if.end: ; preds = %if.else, %if.then %c.0 = phi [ %1, %if.then ], [ %2, %if.else ] ret %c.0 } ; Similar to test2, but contains a call to @foo to act as barrier to analyzing ; VL/VTYPE. define @test9(i64 %avl, i8 zeroext %cond, %a, %b) nounwind { ; CHECK-LABEL: test9: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: addi sp, sp, -32 ; CHECK-NEXT: sd ra, 24(sp) # 8-byte Folded Spill ; CHECK-NEXT: sd s0, 16(sp) # 8-byte Folded Spill ; CHECK-NEXT: csrr a2, vlenb ; CHECK-NEXT: slli a2, a2, 1 ; CHECK-NEXT: sub sp, sp, a2 ; CHECK-NEXT: vsetvli s0, a0, e64, m1, ta, ma ; CHECK-NEXT: beqz a1, .LBB7_2 ; CHECK-NEXT: # %bb.1: # %if.then ; CHECK-NEXT: vfadd.vv v9, v8, v9 ; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vs1r.v v9, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: add a0, a0, sp ; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: call foo ; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vl1r.v v9, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: add a0, a0, sp ; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: j .LBB7_3 ; CHECK-NEXT: .LBB7_2: # %if.else ; CHECK-NEXT: vfsub.vv v9, v8, v9 ; CHECK-NEXT: .LBB7_3: # %if.end ; CHECK-NEXT: vsetvli zero, s0, e64, m1, ta, ma ; CHECK-NEXT: vfmul.vv v8, v9, v8 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 1 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: ld ra, 24(sp) # 8-byte Folded Reload ; CHECK-NEXT: ld s0, 16(sp) # 8-byte Folded Reload ; CHECK-NEXT: addi sp, sp, 32 ; CHECK-NEXT: ret entry: %0 = tail call i64 @llvm.riscv.vsetvli(i64 %avl, i64 3, i64 0) %tobool = icmp eq i8 %cond, 0 br i1 %tobool, label %if.else, label %if.then if.then: ; preds = %entry %1 = tail call @llvm.riscv.vfadd.nxv1f64.nxv1f64( undef, %a, %b, i64 7, i64 %0) call void @foo() br label %if.end if.else: ; preds = %entry %2 = tail call @llvm.riscv.vfsub.nxv1f64.nxv1f64( undef, %a, %b, i64 7, i64 %0) br label %if.end if.end: ; preds = %if.else, %if.then %c.0 = phi [ %1, %if.then ], [ %2, %if.else ] %3 = tail call @llvm.riscv.vfmul.nxv1f64.nxv1f64( undef, %c.0, %a, i64 7, i64 %0) ret %3 } define void @saxpy_vec(i64 %n, float %a, float* nocapture readonly %x, float* nocapture %y) { ; CHECK-LABEL: saxpy_vec: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli a3, a0, e32, m8, ta, ma ; CHECK-NEXT: beqz a3, .LBB8_2 ; CHECK-NEXT: .LBB8_1: # %for.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vle32.v v8, (a1) ; CHECK-NEXT: vle32.v v16, (a2) ; CHECK-NEXT: slli a4, a3, 2 ; CHECK-NEXT: add a1, a1, a4 ; CHECK-NEXT: vsetvli zero, zero, e32, m8, tu, ma ; CHECK-NEXT: vfmacc.vf v16, fa0, v8 ; CHECK-NEXT: vse32.v v16, (a2) ; CHECK-NEXT: sub a0, a0, a3 ; CHECK-NEXT: vsetvli a3, a0, e32, m8, ta, ma ; CHECK-NEXT: add a2, a2, a4 ; CHECK-NEXT: bnez a3, .LBB8_1 ; CHECK-NEXT: .LBB8_2: # %for.end ; CHECK-NEXT: ret entry: %0 = tail call i64 @llvm.riscv.vsetvli.i64(i64 %n, i64 2, i64 3) %cmp.not13 = icmp eq i64 %0, 0 br i1 %cmp.not13, label %for.end, label %for.body for.body: ; preds = %for.body, %entry %1 = phi i64 [ %7, %for.body ], [ %0, %entry ] %n.addr.016 = phi i64 [ %sub, %for.body ], [ %n, %entry ] %x.addr.015 = phi float* [ %add.ptr, %for.body ], [ %x, %entry ] %y.addr.014 = phi float* [ %add.ptr1, %for.body ], [ %y, %entry ] %2 = bitcast float* %x.addr.015 to * %3 = tail call @llvm.riscv.vle.nxv16f32.i64( undef, * %2, i64 %1) %add.ptr = getelementptr inbounds float, float* %x.addr.015, i64 %1 %4 = bitcast float* %y.addr.014 to * %5 = tail call @llvm.riscv.vle.nxv16f32.i64( undef, * %4, i64 %1) %6 = tail call @llvm.riscv.vfmacc.nxv16f32.f32.i64( %5, float %a, %3, i64 7, i64 %1, i64 0) tail call void @llvm.riscv.vse.nxv16f32.i64( %6, * %4, i64 %1) %add.ptr1 = getelementptr inbounds float, float* %y.addr.014, i64 %1 %sub = sub i64 %n.addr.016, %1 %7 = tail call i64 @llvm.riscv.vsetvli.i64(i64 %sub, i64 2, i64 3) %cmp.not = icmp eq i64 %7, 0 br i1 %cmp.not, label %for.end, label %for.body for.end: ; preds = %for.body, %entry ret void } declare i64 @llvm.riscv.vsetvli.i64(i64, i64 immarg, i64 immarg) declare @llvm.riscv.vle.nxv16f32.i64(, * nocapture, i64) declare @llvm.riscv.vfmacc.nxv16f32.f32.i64(, float, , i64, i64, i64) declare void @llvm.riscv.vse.nxv16f32.i64(, * nocapture, i64) ; We need a vsetvli in the last block because the predecessors have different ; VTYPEs. The AVL is the same and the SEW/LMUL ratio implies the same VLMAX so ; we don't need to read AVL and can keep VL unchanged. define @test_vsetvli_x0_x0(* %x, * %y, %z, i64 %vl, i1 %cond) nounwind { ; CHECK-LABEL: test_vsetvli_x0_x0: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, ma ; CHECK-NEXT: vle32.v v9, (a0) ; CHECK-NEXT: andi a3, a3, 1 ; CHECK-NEXT: beqz a3, .LBB9_2 ; CHECK-NEXT: # %bb.1: # %if ; CHECK-NEXT: vle16.v v10, (a1) ; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; CHECK-NEXT: vwcvt.x.x.v v8, v10 ; CHECK-NEXT: .LBB9_2: # %if.end ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; CHECK-NEXT: vadd.vv v8, v9, v8 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vle.nxv2i32( undef, * %x, i64 %vl) br i1 %cond, label %if, label %if.end if: %b = call @llvm.riscv.vle.nxv2i16( undef, * %y, i64 %vl) %c = call @llvm.riscv.vwadd.nxv2i32( undef, %b, i16 0, i64 %vl) br label %if.end if.end: %d = phi [ %z, %entry ], [ %c, %if ] %e = call @llvm.riscv.vadd.nxv2i32( undef, %a, %d, i64 %vl) ret %e } declare @llvm.riscv.vle.nxv2i32(, *, i64) declare @llvm.riscv.vle.nxv2i16(, *, i64) declare @llvm.riscv.vwadd.nxv2i32(, , i16, i64) declare @llvm.riscv.vadd.nxv2i32(, , , i64) ; We can use X0, X0 vsetvli in if2 and if2.end. The merge point as if.end will ; see two different vtypes with the same SEW/LMUL ratio. At if2.end we will only ; know the SEW/LMUL ratio for the if.end predecessor and the full vtype for ; the if2 predecessor. This makes sure we can merge a SEW/LMUL predecessor with ; a predecessor we know the vtype for. define @test_vsetvli_x0_x0_2(* %x, * %y, * %z, i64 %vl, i1 %cond, i1 %cond2, %w) nounwind { ; CHECK-LABEL: test_vsetvli_x0_x0_2: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a3, e32, m1, ta, ma ; CHECK-NEXT: vle32.v v9, (a0) ; CHECK-NEXT: andi a4, a4, 1 ; CHECK-NEXT: beqz a4, .LBB10_2 ; CHECK-NEXT: # %bb.1: # %if ; CHECK-NEXT: vle16.v v10, (a1) ; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; CHECK-NEXT: vwadd.wv v9, v9, v10 ; CHECK-NEXT: .LBB10_2: # %if.end ; CHECK-NEXT: andi a5, a5, 1 ; CHECK-NEXT: beqz a5, .LBB10_4 ; CHECK-NEXT: # %bb.3: # %if2 ; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; CHECK-NEXT: vle16.v v10, (a2) ; CHECK-NEXT: vwadd.wv v9, v9, v10 ; CHECK-NEXT: .LBB10_4: # %if2.end ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; CHECK-NEXT: vadd.vv v8, v9, v8 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vle.nxv2i32( undef, * %x, i64 %vl) br i1 %cond, label %if, label %if.end if: %b = call @llvm.riscv.vle.nxv2i16( undef, * %y, i64 %vl) %c = call @llvm.riscv.vwadd.w.nxv2i32.nxv2i16( undef, %a, %b, i64 %vl) br label %if.end if.end: %d = phi [ %a, %entry ], [ %c, %if ] br i1 %cond2, label %if2, label %if2.end if2: %e = call @llvm.riscv.vle.nxv2i16( undef, * %z, i64 %vl) %f = call @llvm.riscv.vwadd.w.nxv2i32.nxv2i16( undef, %d, %e, i64 %vl) br label %if2.end if2.end: %g = phi [ %d, %if.end ], [ %f, %if2 ] %h = call @llvm.riscv.vadd.nxv2i32( undef, %g, %w, i64 %vl) ret %h } declare @llvm.riscv.vwadd.w.nxv2i32.nxv2i16(, , , i64) ; We should only need 1 vsetvli for this code. define void @vlmax(i64 %N, double* %c, double* %a, double* %b) { ; CHECK-LABEL: vlmax: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli a6, zero, e64, m1, ta, ma ; CHECK-NEXT: blez a0, .LBB11_3 ; CHECK-NEXT: # %bb.1: # %for.body.preheader ; CHECK-NEXT: li a5, 0 ; CHECK-NEXT: slli a4, a6, 3 ; CHECK-NEXT: .LBB11_2: # %for.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vle64.v v8, (a2) ; CHECK-NEXT: vle64.v v9, (a3) ; CHECK-NEXT: vfadd.vv v8, v8, v9 ; CHECK-NEXT: vse64.v v8, (a1) ; CHECK-NEXT: add a5, a5, a6 ; CHECK-NEXT: add a1, a1, a4 ; CHECK-NEXT: add a3, a3, a4 ; CHECK-NEXT: add a2, a2, a4 ; CHECK-NEXT: blt a5, a0, .LBB11_2 ; CHECK-NEXT: .LBB11_3: # %for.end ; CHECK-NEXT: ret entry: %0 = tail call i64 @llvm.riscv.vsetvlimax.i64(i64 3, i64 0) %cmp13 = icmp sgt i64 %N, 0 br i1 %cmp13, label %for.body, label %for.end for.body: ; preds = %entry, %for.body %i.014 = phi i64 [ %add, %for.body ], [ 0, %entry ] %arrayidx = getelementptr inbounds double, double* %a, i64 %i.014 %1 = bitcast double* %arrayidx to * %2 = tail call @llvm.riscv.vle.nxv1f64.i64( undef, * %1, i64 %0) %arrayidx1 = getelementptr inbounds double, double* %b, i64 %i.014 %3 = bitcast double* %arrayidx1 to * %4 = tail call @llvm.riscv.vle.nxv1f64.i64( undef, * %3, i64 %0) %5 = tail call @llvm.riscv.vfadd.nxv1f64.nxv1f64.i64( undef, %2, %4, i64 7, i64 %0) %arrayidx2 = getelementptr inbounds double, double* %c, i64 %i.014 %6 = bitcast double* %arrayidx2 to * tail call void @llvm.riscv.vse.nxv1f64.i64( %5, * %6, i64 %0) %add = add nuw nsw i64 %i.014, %0 %cmp = icmp slt i64 %add, %N br i1 %cmp, label %for.body, label %for.end for.end: ; preds = %for.body, %entry ret void } ; A single vector store in the loop with VL controlled by VLMAX define void @vector_init_vlmax(i64 %N, double* %c) { ; CHECK-LABEL: vector_init_vlmax: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli a2, zero, e64, m1, ta, ma ; CHECK-NEXT: blez a0, .LBB12_3 ; CHECK-NEXT: # %bb.1: # %for.body.preheader ; CHECK-NEXT: li a3, 0 ; CHECK-NEXT: slli a4, a2, 3 ; CHECK-NEXT: vmv.v.i v8, 0 ; CHECK-NEXT: .LBB12_2: # %for.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vse64.v v8, (a1) ; CHECK-NEXT: add a3, a3, a2 ; CHECK-NEXT: add a1, a1, a4 ; CHECK-NEXT: blt a3, a0, .LBB12_2 ; CHECK-NEXT: .LBB12_3: # %for.end ; CHECK-NEXT: ret entry: %0 = tail call i64 @llvm.riscv.vsetvlimax.i64(i64 3, i64 0) %cmp13 = icmp sgt i64 %N, 0 br i1 %cmp13, label %for.body, label %for.end for.body: ; preds = %entry, %for.body %i.014 = phi i64 [ %add, %for.body ], [ 0, %entry ] %arrayidx2 = getelementptr inbounds double, double* %c, i64 %i.014 %addr = bitcast double* %arrayidx2 to * tail call void @llvm.riscv.vse.nxv1f64.i64( zeroinitializer, * %addr, i64 %0) %add = add nuw nsw i64 %i.014, %0 %cmp = icmp slt i64 %add, %N br i1 %cmp, label %for.body, label %for.end for.end: ; preds = %for.body, %entry ret void } ; Same as above, but VL comes from user provided AVL value define void @vector_init_vsetvli_N(i64 %N, double* %c) { ; CHECK-LABEL: vector_init_vsetvli_N: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli a2, a0, e64, m1, ta, ma ; CHECK-NEXT: blez a0, .LBB13_3 ; CHECK-NEXT: # %bb.1: # %for.body.preheader ; CHECK-NEXT: li a3, 0 ; CHECK-NEXT: slli a4, a2, 3 ; CHECK-NEXT: vsetvli a5, zero, e64, m1, ta, ma ; CHECK-NEXT: vmv.v.i v8, 0 ; CHECK-NEXT: .LBB13_2: # %for.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vsetvli zero, a2, e64, m1, ta, ma ; CHECK-NEXT: vse64.v v8, (a1) ; CHECK-NEXT: add a3, a3, a2 ; CHECK-NEXT: add a1, a1, a4 ; CHECK-NEXT: blt a3, a0, .LBB13_2 ; CHECK-NEXT: .LBB13_3: # %for.end ; CHECK-NEXT: ret entry: %0 = tail call i64 @llvm.riscv.vsetvli(i64 %N, i64 3, i64 0) %cmp13 = icmp sgt i64 %N, 0 br i1 %cmp13, label %for.body, label %for.end for.body: ; preds = %entry, %for.body %i.014 = phi i64 [ %add, %for.body ], [ 0, %entry ] %arrayidx2 = getelementptr inbounds double, double* %c, i64 %i.014 %addr = bitcast double* %arrayidx2 to * tail call void @llvm.riscv.vse.nxv1f64.i64( zeroinitializer, * %addr, i64 %0) %add = add nuw nsw i64 %i.014, %0 %cmp = icmp slt i64 %add, %N br i1 %cmp, label %for.body, label %for.end for.end: ; preds = %for.body, %entry ret void } ; Same as above, but VL is a hard coded constant (in the preheader) define void @vector_init_vsetvli_fv(i64 %N, double* %c) { ; CHECK-LABEL: vector_init_vsetvli_fv: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: li a2, 0 ; CHECK-NEXT: vsetivli a3, 4, e64, m1, ta, ma ; CHECK-NEXT: slli a4, a3, 3 ; CHECK-NEXT: vsetvli a5, zero, e64, m1, ta, ma ; CHECK-NEXT: vmv.v.i v8, 0 ; CHECK-NEXT: .LBB14_1: # %for.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vsetivli zero, 4, e64, m1, ta, ma ; CHECK-NEXT: vse64.v v8, (a1) ; CHECK-NEXT: add a2, a2, a3 ; CHECK-NEXT: add a1, a1, a4 ; CHECK-NEXT: blt a2, a0, .LBB14_1 ; CHECK-NEXT: # %bb.2: # %for.end ; CHECK-NEXT: ret entry: %0 = tail call i64 @llvm.riscv.vsetvli(i64 4, i64 3, i64 0) br label %for.body for.body: ; preds = %entry, %for.body %i.014 = phi i64 [ %add, %for.body ], [ 0, %entry ] %arrayidx2 = getelementptr inbounds double, double* %c, i64 %i.014 %addr = bitcast double* %arrayidx2 to * tail call void @llvm.riscv.vse.nxv1f64.i64( zeroinitializer, * %addr, i64 %0) %add = add nuw nsw i64 %i.014, %0 %cmp = icmp slt i64 %add, %N br i1 %cmp, label %for.body, label %for.end for.end: ; preds = %for.body ret void } ; Same as above, but result of vsetvli in preheader isn't used, and ; constant is repeated in loop define void @vector_init_vsetvli_fv2(i64 %N, double* %c) { ; CHECK-LABEL: vector_init_vsetvli_fv2: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: li a2, 0 ; CHECK-NEXT: vsetvli a3, zero, e64, m1, ta, ma ; CHECK-NEXT: vmv.v.i v8, 0 ; CHECK-NEXT: .LBB15_1: # %for.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vsetivli zero, 4, e64, m1, ta, ma ; CHECK-NEXT: vse64.v v8, (a1) ; CHECK-NEXT: addi a2, a2, 4 ; CHECK-NEXT: addi a1, a1, 32 ; CHECK-NEXT: blt a2, a0, .LBB15_1 ; CHECK-NEXT: # %bb.2: # %for.end ; CHECK-NEXT: ret entry: tail call i64 @llvm.riscv.vsetvli(i64 4, i64 3, i64 0) br label %for.body for.body: ; preds = %entry, %for.body %i.014 = phi i64 [ %add, %for.body ], [ 0, %entry ] %arrayidx2 = getelementptr inbounds double, double* %c, i64 %i.014 %addr = bitcast double* %arrayidx2 to * tail call void @llvm.riscv.vse.nxv1f64.i64( zeroinitializer, * %addr, i64 4) %add = add nuw nsw i64 %i.014, 4 %cmp = icmp slt i64 %add, %N br i1 %cmp, label %for.body, label %for.end for.end: ; preds = %for.body ret void } ; Same as above, but AVL is only specified on the store intrinsic ; This case will require some form of hoisting or PRE define void @vector_init_vsetvli_fv3(i64 %N, double* %c) { ; CHECK-LABEL: vector_init_vsetvli_fv3: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: li a2, 0 ; CHECK-NEXT: vsetvli a3, zero, e64, m1, ta, ma ; CHECK-NEXT: vmv.v.i v8, 0 ; CHECK-NEXT: .LBB16_1: # %for.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vsetivli zero, 4, e64, m1, ta, ma ; CHECK-NEXT: vse64.v v8, (a1) ; CHECK-NEXT: addi a2, a2, 4 ; CHECK-NEXT: addi a1, a1, 32 ; CHECK-NEXT: blt a2, a0, .LBB16_1 ; CHECK-NEXT: # %bb.2: # %for.end ; CHECK-NEXT: ret entry: br label %for.body for.body: ; preds = %entry, %for.body %i.014 = phi i64 [ %add, %for.body ], [ 0, %entry ] %arrayidx2 = getelementptr inbounds double, double* %c, i64 %i.014 %addr = bitcast double* %arrayidx2 to * tail call void @llvm.riscv.vse.nxv1f64.i64( zeroinitializer, * %addr, i64 4) %add = add nuw nsw i64 %i.014, 4 %cmp = icmp slt i64 %add, %N br i1 %cmp, label %for.body, label %for.end for.end: ; preds = %for.body ret void } ; Demonstrates a case where mutation in phase3 is problematic. We mutate the ; vsetvli without considering that it changes the compatibility result of the ; vadd in the second block. define @cross_block_mutate( %a, %b, ; CHECK-LABEL: cross_block_mutate: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetivli a0, 6, e32, m2, tu, ma ; CHECK-NEXT: vmv.s.x v8, a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: vadd.vv v8, v8, v10, v0.t ; CHECK-NEXT: ret %mask) { entry: %vl = tail call i64 @llvm.riscv.vsetvli(i64 6, i64 1, i64 0) %vl.trunc = trunc i64 %vl to i32 %a.mod = insertelement %a, i32 %vl.trunc, i32 0 br label %fallthrough fallthrough: %res = call @llvm.riscv.vadd.mask.nxv4i32.nxv4i32( undef, %a.mod, %b, %mask, i64 %vl, i64 0) ret %res } define @pre_lmul( %x, %y, i1 %cond) nounwind { ; CHECK-LABEL: pre_lmul: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: andi a0, a0, 1 ; CHECK-NEXT: vsetvli a1, zero, e64, m1, ta, ma ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma ; CHECK-NEXT: vadd.vv v8, v8, v9 ; CHECK-NEXT: vadd.vv v8, v8, v9 ; CHECK-NEXT: ret entry: %vl = tail call i64 @llvm.riscv.vsetvlimax.i64(i64 3, i64 0) %a = call @llvm.riscv.vadd.nxv2i32( undef, %x, %y, i64 %vl) br i1 %cond, label %if, label %if.end if: ; Deliberately change vtype - this could be an unknown call, but the broader ; code quality is distractingly bad tail call i64 @llvm.riscv.vsetvlimax.i64(i64 2, i64 1) br label %if.end if.end: %b = call @llvm.riscv.vadd.nxv2i32( undef, %a, %y, i64 %vl) ret %b } define @compat_store_consistency(i1 %cond, %a, %b, * %p1, %c, * %p2) { ; CHECK-LABEL: compat_store_consistency: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: andi a0, a0, 1 ; CHECK-NEXT: vsetvli a3, zero, e64, m1, ta, ma ; CHECK-NEXT: vfadd.vv v8, v8, v9 ; CHECK-NEXT: vs1r.v v8, (a1) ; CHECK-NEXT: beqz a0, .LBB19_2 ; CHECK-NEXT: # %bb.1: # %if.then ; CHECK-NEXT: vse32.v v10, (a2) ; CHECK-NEXT: .LBB19_2: # %if.end ; CHECK-NEXT: ret entry: %res = fadd %a, %b store %res, * %p1 br i1 %cond, label %if.then, label %if.end if.then: ; preds = %entry store %c, * %p2 br label %if.end if.end: ; preds = %if.else, %if.then ret %res } ; Next two tests (which are the same except for swapped block order), make sure that the ; demanded reasoning around vmv.s.x correctly handles a forward state with only a valid ; SEWLMULRatio. We previously had a crash bug in this case. define @test_ratio_only_vmv_s_x(* %x, * %y, i1 %cond) nounwind { ; CHECK-LABEL: test_ratio_only_vmv_s_x: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: andi a2, a2, 1 ; CHECK-NEXT: beqz a2, .LBB20_2 ; CHECK-NEXT: # %bb.1: # %if ; CHECK-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; CHECK-NEXT: vle16.v v9, (a1) ; CHECK-NEXT: vwcvt.x.x.v v8, v9 ; CHECK-NEXT: j .LBB20_3 ; CHECK-NEXT: .LBB20_2: ; CHECK-NEXT: vsetivli zero, 2, e32, m1, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: .LBB20_3: # %if.end ; CHECK-NEXT: vsetvli zero, zero, e32, m1, tu, ma ; CHECK-NEXT: vmv.s.x v8, zero ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vle.nxv2i32( undef, * %x, i64 2) br i1 %cond, label %if, label %if.end if: %b = call @llvm.riscv.vle.nxv2i16( undef, * %y, i64 2) %c = call @llvm.riscv.vwadd.nxv2i32( undef, %b, i16 0, i64 2) br label %if.end if.end: %d = phi [ %a, %entry ], [ %c, %if ] %e = insertelement %d, i32 0, i32 0 ret %e } define @test_ratio_only_vmv_s_x2(* %x, * %y, i1 %cond) nounwind { ; CHECK-LABEL: test_ratio_only_vmv_s_x2: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: andi a2, a2, 1 ; CHECK-NEXT: beqz a2, .LBB21_2 ; CHECK-NEXT: # %bb.1: # %if ; CHECK-NEXT: vsetivli zero, 2, e32, m1, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: j .LBB21_3 ; CHECK-NEXT: .LBB21_2: ; CHECK-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; CHECK-NEXT: vle16.v v9, (a1) ; CHECK-NEXT: vwcvt.x.x.v v8, v9 ; CHECK-NEXT: .LBB21_3: # %if.end ; CHECK-NEXT: vsetvli zero, zero, e32, m1, tu, ma ; CHECK-NEXT: vmv.s.x v8, zero ; CHECK-NEXT: ret entry: %b = call @llvm.riscv.vle.nxv2i16( undef, * %y, i64 2) %c = call @llvm.riscv.vwadd.nxv2i32( undef, %b, i16 0, i64 2) br i1 %cond, label %if, label %if.end if: %a = call @llvm.riscv.vle.nxv2i32( undef, * %x, i64 2) br label %if.end if.end: %d = phi [ %a, %if ], [ %c, %entry ] %e = insertelement %d, i32 0, i32 0 ret %e } ; This case demonstrates a PRE case where the first instruction in the block ; doesn't require a state transition. define void @pre_over_vle(ptr %A) { ; CHECK-LABEL: pre_over_vle: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: addi a1, a0, 800 ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; CHECK-NEXT: .LBB22_1: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vle8.v v8, (a0) ; CHECK-NEXT: vsext.vf4 v9, v8 ; CHECK-NEXT: vse32.v v9, (a0) ; CHECK-NEXT: addi a0, a0, 8 ; CHECK-NEXT: bne a0, a1, .LBB22_1 ; CHECK-NEXT: # %bb.2: # %exit ; CHECK-NEXT: ret entry: br label %vector.body vector.body: %iv = phi i64 [ 0, %entry], [%iv.next, %vector.body] %addr = getelementptr inbounds <2 x i32>, ptr %A, i64 %iv %v = load <2 x i8>, ptr %addr %v2 = sext <2 x i8> %v to <2 x i32> store <2 x i32> %v2, ptr %addr %iv.next = add i64 %iv, 1 %cmp = icmp ne i64 %iv.next, 100 br i1 %cmp, label %vector.body, label %exit exit: ret void } declare i64 @llvm.riscv.vsetvlimax.i64(i64, i64) declare @llvm.riscv.vle.nxv1f64.i64(, * nocapture, i64) declare @llvm.riscv.vfadd.nxv1f64.nxv1f64.i64(, , , i64, i64) declare void @llvm.riscv.vse.nxv1f64.i64(, * nocapture, i64) declare @llvm.riscv.vadd.mask.nxv4i32.nxv4i32( , , , , i64, i64);