; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py ; RUN: opt < %s -passes="print" 2>&1 -disable-output -S -mtriple=riscv64 -mattr=+v | FileCheck %s define void @unsupported_fp_ops( %vec, i32 %extraarg) { ; CHECK-LABEL: 'unsupported_fp_ops' ; CHECK-NEXT: Cost Model: Invalid cost for instruction: %pow = call @llvm.pow.nxv4f32( %vec, %vec) ; CHECK-NEXT: Cost Model: Invalid cost for instruction: %powi = call @llvm.powi.nxv4f32.i32( %vec, i32 %extraarg) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %pow = call @llvm.pow.nxv4f32( %vec, %vec) %powi = call @llvm.powi.nxv4f32.i32( %vec, i32 %extraarg) ret void } define void @powi( %vec) { ; CHECK-LABEL: 'powi' ; CHECK-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %powi = call @llvm.powi.nxv4f32.i32( %vec, i32 42) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %powi = call @llvm.powi.nxv4f32.i32( %vec, i32 42) ret void } define void @fshr( %a, %b, %c) { ; CHECK-LABEL: 'fshr' ; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %1 = call @llvm.fshr.nxv1i32( %a, %b, %c) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; call @llvm.fshr.nxv4i32( %a, %b, %c) ret void } define void @fshl( %a, %b, %c) { ; CHECK-LABEL: 'fshl' ; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %1 = call @llvm.fshl.nxv1i32( %a, %b, %c) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; call @llvm.fshl.nxv4i32( %a, %b, %c) ret void } define void @vp_fshr() { ; CHECK-LABEL: 'vp_fshr' ; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %1 = call <2 x i8> @llvm.vp.fshr.v2i8(<2 x i8> undef, <2 x i8> undef, <2 x i8> undef, <2 x i1> undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %2 = call <4 x i8> @llvm.vp.fshr.v4i8(<4 x i8> undef, <4 x i8> undef, <4 x i8> undef, <4 x i1> undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %3 = call <8 x i8> @llvm.vp.fshr.v8i8(<8 x i8> undef, <8 x i8> undef, <8 x i8> undef, <8 x i1> undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %4 = call <16 x i8> @llvm.vp.fshr.v16i8(<16 x i8> undef, <16 x i8> undef, <16 x i8> undef, <16 x i1> undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %5 = call @llvm.vp.fshr.nxv1i8( undef, undef, undef, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %6 = call @llvm.vp.fshr.nxv2i8( undef, undef, undef, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %7 = call @llvm.vp.fshr.nxv4i8( undef, undef, undef, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %8 = call @llvm.vp.fshr.nxv8i8( undef, undef, undef, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %9 = call @llvm.vp.fshr.nxv16i8( undef, undef, undef, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %10 = call @llvm.vp.fshr.nxv32i8( undef, undef, undef, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %11 = call @llvm.vp.fshr.nxv64i8( undef, undef, undef, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %12 = call <2 x i16> @llvm.vp.fshr.v2i16(<2 x i16> undef, <2 x i16> undef, <2 x i16> undef, <2 x i1> undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %13 = call <4 x i16> @llvm.vp.fshr.v4i16(<4 x i16> undef, <4 x i16> undef, <4 x i16> undef, <4 x i1> undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %14 = call <8 x i16> @llvm.vp.fshr.v8i16(<8 x i16> undef, <8 x i16> undef, <8 x i16> undef, <8 x i1> undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %15 = call <16 x i16> @llvm.vp.fshr.v16i16(<16 x i16> undef, <16 x i16> undef, <16 x i16> undef, <16 x i1> undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %16 = call @llvm.vp.fshr.nxv1i16( undef, undef, undef, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %17 = call @llvm.vp.fshr.nxv2i16( undef, undef, undef, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %18 = call @llvm.vp.fshr.nxv4i16( undef, undef, undef, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %19 = call @llvm.vp.fshr.nxv8i16( undef, undef, undef, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %20 = call @llvm.vp.fshr.nxv16i16( undef, undef, undef, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %21 = call @llvm.vp.fshr.nxv32i16( undef, undef, undef, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %22 = call <2 x i32> @llvm.vp.fshr.v2i32(<2 x i32> undef, <2 x i32> undef, <2 x i32> undef, <2 x i1> undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %23 = call <4 x i32> @llvm.vp.fshr.v4i32(<4 x i32> undef, <4 x i32> undef, <4 x i32> undef, <4 x i1> undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %24 = call <8 x i32> @llvm.vp.fshr.v8i32(<8 x i32> undef, <8 x i32> undef, <8 x i32> undef, <8 x i1> undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %25 = call <16 x i32> @llvm.vp.fshr.v16i32(<16 x i32> undef, <16 x i32> undef, <16 x i32> undef, <16 x i1> undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %26 = call @llvm.vp.fshr.nxv1i32( undef, undef, undef, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %27 = call @llvm.vp.fshr.nxv2i32( undef, undef, undef, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %28 = call @llvm.vp.fshr.nxv4i32( undef, undef, undef, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %29 = call @llvm.vp.fshr.nxv8i32( undef, undef, undef, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %30 = call @llvm.vp.fshr.nxv16i32( undef, undef, undef, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %31 = call <2 x i64> @llvm.vp.fshr.v2i64(<2 x i64> undef, <2 x i64> undef, <2 x i64> undef, <2 x i1> undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %32 = call <4 x i64> @llvm.vp.fshr.v4i64(<4 x i64> undef, <4 x i64> undef, <4 x i64> undef, <4 x i1> undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %33 = call <8 x i64> @llvm.vp.fshr.v8i64(<8 x i64> undef, <8 x i64> undef, <8 x i64> undef, <8 x i1> undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %34 = call <16 x i64> @llvm.vp.fshr.v16i64(<16 x i64> undef, <16 x i64> undef, <16 x i64> undef, <16 x i1> undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %35 = call @llvm.vp.fshr.nxv1i64( undef, undef, undef, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %36 = call @llvm.vp.fshr.nxv2i64( undef, undef, undef, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %37 = call @llvm.vp.fshr.nxv4i64( undef, undef, undef, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %38 = call @llvm.vp.fshr.nxv8i64( undef, undef, undef, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; call <2 x i8> @llvm.vp.fshr.v2i8(<2 x i8> undef, <2 x i8> undef, <2 x i8> undef, <2 x i1> undef, i32 undef) call <4 x i8> @llvm.vp.fshr.v4i8(<4 x i8> undef, <4 x i8> undef, <4 x i8> undef, <4 x i1> undef, i32 undef) call <8 x i8> @llvm.vp.fshr.v8i8(<8 x i8> undef, <8 x i8> undef, <8 x i8> undef, <8 x i1> undef, i32 undef) call <16 x i8> @llvm.vp.fshr.v16i8(<16 x i8> undef, <16 x i8> undef, <16 x i8> undef, <16 x i1> undef, i32 undef) call @llvm.vp.fshr.nxv1i8( undef, undef, undef, undef, i32 undef) call @llvm.vp.fshr.nxv2i8( undef, undef, undef, undef, i32 undef) call @llvm.vp.fshr.nxv4i8( undef, undef, undef, undef, i32 undef) call @llvm.vp.fshr.nxv8i8( undef, undef, undef, undef, i32 undef) call @llvm.vp.fshr.nxv16i8( undef, undef, undef, undef, i32 undef) call @llvm.vp.fshr.nxv32i8( undef, undef, undef, undef, i32 undef) call @llvm.vp.fshr.nxv64i8( undef, undef, undef, undef, i32 undef) call <2 x i16> @llvm.vp.fshr.v2i16(<2 x i16> undef, <2 x i16> undef, <2 x i16> undef, <2 x i1> undef, i32 undef) call <4 x i16> @llvm.vp.fshr.v4i16(<4 x i16> undef, <4 x i16> undef, <4 x i16> undef, <4 x i1> undef, i32 undef) call <8 x i16> @llvm.vp.fshr.v8i16(<8 x i16> undef, <8 x i16> undef, <8 x i16> undef, <8 x i1> undef, i32 undef) call <16 x i16> @llvm.vp.fshr.v16i16(<16 x i16> undef, <16 x i16> undef, <16 x i16> undef, <16 x i1> undef, i32 undef) call @llvm.vp.fshr.nxv1i16( undef, undef, undef, undef, i32 undef) call @llvm.vp.fshr.nxv2i16( undef, undef, undef, undef, i32 undef) call @llvm.vp.fshr.nxv4i16( undef, undef, undef, undef, i32 undef) call @llvm.vp.fshr.nxv8i16( undef, undef, undef, undef, i32 undef) call @llvm.vp.fshr.nxv16i16( undef, undef, undef, undef, i32 undef) call @llvm.vp.fshr.nxv32i16( undef, undef, undef, undef, i32 undef) call <2 x i32> @llvm.vp.fshr.v2i32(<2 x i32> undef, <2 x i32> undef, <2 x i32> undef, <2 x i1> undef, i32 undef) call <4 x i32> @llvm.vp.fshr.v4i32(<4 x i32> undef, <4 x i32> undef, <4 x i32> undef, <4 x i1> undef, i32 undef) call <8 x i32> @llvm.vp.fshr.v8i32(<8 x i32> undef, <8 x i32> undef, <8 x i32> undef, <8 x i1> undef, i32 undef) call <16 x i32> @llvm.vp.fshr.v16i32(<16 x i32> undef, <16 x i32> undef, <16 x i32> undef, <16 x i1> undef, i32 undef) call @llvm.vp.fshr.nxv1i32( undef, undef, undef, undef, i32 undef) call @llvm.vp.fshr.nxv2i32( undef, undef, undef, undef, i32 undef) call @llvm.vp.fshr.nxv4i32( undef, undef, undef, undef, i32 undef) call @llvm.vp.fshr.nxv8i32( undef, undef, undef, undef, i32 undef) call @llvm.vp.fshr.nxv16i32( undef, undef, undef, undef, i32 undef) call <2 x i64> @llvm.vp.fshr.v2i64(<2 x i64> undef, <2 x i64> undef, <2 x i64> undef, <2 x i1> undef, i32 undef) call <4 x i64> @llvm.vp.fshr.v4i64(<4 x i64> undef, <4 x i64> undef, <4 x i64> undef, <4 x i1> undef, i32 undef) call <8 x i64> @llvm.vp.fshr.v8i64(<8 x i64> undef, <8 x i64> undef, <8 x i64> undef, <8 x i1> undef, i32 undef) call <16 x i64> @llvm.vp.fshr.v16i64(<16 x i64> undef, <16 x i64> undef, <16 x i64> undef, <16 x i1> undef, i32 undef) call @llvm.vp.fshr.nxv1i64( undef, undef, undef, undef, i32 undef) call @llvm.vp.fshr.nxv2i64( undef, undef, undef, undef, i32 undef) call @llvm.vp.fshr.nxv4i64( undef, undef, undef, undef, i32 undef) call @llvm.vp.fshr.nxv8i64( undef, undef, undef, undef, i32 undef) ret void } define void @vp_fshl() { ; CHECK-LABEL: 'vp_fshl' ; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %1 = call <2 x i8> @llvm.vp.fshl.v2i8(<2 x i8> undef, <2 x i8> undef, <2 x i8> undef, <2 x i1> undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %2 = call <4 x i8> @llvm.vp.fshl.v4i8(<4 x i8> undef, <4 x i8> undef, <4 x i8> undef, <4 x i1> undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %3 = call <8 x i8> @llvm.vp.fshl.v8i8(<8 x i8> undef, <8 x i8> undef, <8 x i8> undef, <8 x i1> undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %4 = call <16 x i8> @llvm.vp.fshl.v16i8(<16 x i8> undef, <16 x i8> undef, <16 x i8> undef, <16 x i1> undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %5 = call @llvm.vp.fshl.nxv1i8( undef, undef, undef, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %6 = call @llvm.vp.fshl.nxv2i8( undef, undef, undef, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %7 = call @llvm.vp.fshl.nxv4i8( undef, undef, undef, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %8 = call @llvm.vp.fshl.nxv8i8( undef, undef, undef, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %9 = call @llvm.vp.fshl.nxv16i8( undef, undef, undef, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %10 = call @llvm.vp.fshl.nxv32i8( undef, undef, undef, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %11 = call @llvm.vp.fshl.nxv64i8( undef, undef, undef, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %12 = call <2 x i16> @llvm.vp.fshl.v2i16(<2 x i16> undef, <2 x i16> undef, <2 x i16> undef, <2 x i1> undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %13 = call <4 x i16> @llvm.vp.fshl.v4i16(<4 x i16> undef, <4 x i16> undef, <4 x i16> undef, <4 x i1> undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %14 = call <8 x i16> @llvm.vp.fshl.v8i16(<8 x i16> undef, <8 x i16> undef, <8 x i16> undef, <8 x i1> undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %15 = call <16 x i16> @llvm.vp.fshl.v16i16(<16 x i16> undef, <16 x i16> undef, <16 x i16> undef, <16 x i1> undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %16 = call @llvm.vp.fshl.nxv1i16( undef, undef, undef, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %17 = call @llvm.vp.fshl.nxv2i16( undef, undef, undef, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %18 = call @llvm.vp.fshl.nxv4i16( undef, undef, undef, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %19 = call @llvm.vp.fshl.nxv8i16( undef, undef, undef, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %20 = call @llvm.vp.fshl.nxv16i16( undef, undef, undef, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %21 = call @llvm.vp.fshl.nxv32i16( undef, undef, undef, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %22 = call <2 x i32> @llvm.vp.fshl.v2i32(<2 x i32> undef, <2 x i32> undef, <2 x i32> undef, <2 x i1> undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %23 = call <4 x i32> @llvm.vp.fshl.v4i32(<4 x i32> undef, <4 x i32> undef, <4 x i32> undef, <4 x i1> undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %24 = call <8 x i32> @llvm.vp.fshl.v8i32(<8 x i32> undef, <8 x i32> undef, <8 x i32> undef, <8 x i1> undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %25 = call <16 x i32> @llvm.vp.fshl.v16i32(<16 x i32> undef, <16 x i32> undef, <16 x i32> undef, <16 x i1> undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %26 = call @llvm.vp.fshl.nxv1i32( undef, undef, undef, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %27 = call @llvm.vp.fshl.nxv2i32( undef, undef, undef, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %28 = call @llvm.vp.fshl.nxv4i32( undef, undef, undef, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %29 = call @llvm.vp.fshl.nxv8i32( undef, undef, undef, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %30 = call @llvm.vp.fshl.nxv16i32( undef, undef, undef, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %31 = call <2 x i64> @llvm.vp.fshl.v2i64(<2 x i64> undef, <2 x i64> undef, <2 x i64> undef, <2 x i1> undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %32 = call <4 x i64> @llvm.vp.fshl.v4i64(<4 x i64> undef, <4 x i64> undef, <4 x i64> undef, <4 x i1> undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %33 = call <8 x i64> @llvm.vp.fshl.v8i64(<8 x i64> undef, <8 x i64> undef, <8 x i64> undef, <8 x i1> undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %34 = call <16 x i64> @llvm.vp.fshl.v16i64(<16 x i64> undef, <16 x i64> undef, <16 x i64> undef, <16 x i1> undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %35 = call @llvm.vp.fshl.nxv1i64( undef, undef, undef, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %36 = call @llvm.vp.fshl.nxv2i64( undef, undef, undef, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %37 = call @llvm.vp.fshl.nxv4i64( undef, undef, undef, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %38 = call @llvm.vp.fshl.nxv8i64( undef, undef, undef, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; call <2 x i8> @llvm.vp.fshl.v2i8(<2 x i8> undef, <2 x i8> undef, <2 x i8> undef, <2 x i1> undef, i32 undef) call <4 x i8> @llvm.vp.fshl.v4i8(<4 x i8> undef, <4 x i8> undef, <4 x i8> undef, <4 x i1> undef, i32 undef) call <8 x i8> @llvm.vp.fshl.v8i8(<8 x i8> undef, <8 x i8> undef, <8 x i8> undef, <8 x i1> undef, i32 undef) call <16 x i8> @llvm.vp.fshl.v16i8(<16 x i8> undef, <16 x i8> undef, <16 x i8> undef, <16 x i1> undef, i32 undef) call @llvm.vp.fshl.nxv1i8( undef, undef, undef, undef, i32 undef) call @llvm.vp.fshl.nxv2i8( undef, undef, undef, undef, i32 undef) call @llvm.vp.fshl.nxv4i8( undef, undef, undef, undef, i32 undef) call @llvm.vp.fshl.nxv8i8( undef, undef, undef, undef, i32 undef) call @llvm.vp.fshl.nxv16i8( undef, undef, undef, undef, i32 undef) call @llvm.vp.fshl.nxv32i8( undef, undef, undef, undef, i32 undef) call @llvm.vp.fshl.nxv64i8( undef, undef, undef, undef, i32 undef) call <2 x i16> @llvm.vp.fshl.v2i16(<2 x i16> undef, <2 x i16> undef, <2 x i16> undef, <2 x i1> undef, i32 undef) call <4 x i16> @llvm.vp.fshl.v4i16(<4 x i16> undef, <4 x i16> undef, <4 x i16> undef, <4 x i1> undef, i32 undef) call <8 x i16> @llvm.vp.fshl.v8i16(<8 x i16> undef, <8 x i16> undef, <8 x i16> undef, <8 x i1> undef, i32 undef) call <16 x i16> @llvm.vp.fshl.v16i16(<16 x i16> undef, <16 x i16> undef, <16 x i16> undef, <16 x i1> undef, i32 undef) call @llvm.vp.fshl.nxv1i16( undef, undef, undef, undef, i32 undef) call @llvm.vp.fshl.nxv2i16( undef, undef, undef, undef, i32 undef) call @llvm.vp.fshl.nxv4i16( undef, undef, undef, undef, i32 undef) call @llvm.vp.fshl.nxv8i16( undef, undef, undef, undef, i32 undef) call @llvm.vp.fshl.nxv16i16( undef, undef, undef, undef, i32 undef) call @llvm.vp.fshl.nxv32i16( undef, undef, undef, undef, i32 undef) call <2 x i32> @llvm.vp.fshl.v2i32(<2 x i32> undef, <2 x i32> undef, <2 x i32> undef, <2 x i1> undef, i32 undef) call <4 x i32> @llvm.vp.fshl.v4i32(<4 x i32> undef, <4 x i32> undef, <4 x i32> undef, <4 x i1> undef, i32 undef) call <8 x i32> @llvm.vp.fshl.v8i32(<8 x i32> undef, <8 x i32> undef, <8 x i32> undef, <8 x i1> undef, i32 undef) call <16 x i32> @llvm.vp.fshl.v16i32(<16 x i32> undef, <16 x i32> undef, <16 x i32> undef, <16 x i1> undef, i32 undef) call @llvm.vp.fshl.nxv1i32( undef, undef, undef, undef, i32 undef) call @llvm.vp.fshl.nxv2i32( undef, undef, undef, undef, i32 undef) call @llvm.vp.fshl.nxv4i32( undef, undef, undef, undef, i32 undef) call @llvm.vp.fshl.nxv8i32( undef, undef, undef, undef, i32 undef) call @llvm.vp.fshl.nxv16i32( undef, undef, undef, undef, i32 undef) call <2 x i64> @llvm.vp.fshl.v2i64(<2 x i64> undef, <2 x i64> undef, <2 x i64> undef, <2 x i1> undef, i32 undef) call <4 x i64> @llvm.vp.fshl.v4i64(<4 x i64> undef, <4 x i64> undef, <4 x i64> undef, <4 x i1> undef, i32 undef) call <8 x i64> @llvm.vp.fshl.v8i64(<8 x i64> undef, <8 x i64> undef, <8 x i64> undef, <8 x i1> undef, i32 undef) call <16 x i64> @llvm.vp.fshl.v16i64(<16 x i64> undef, <16 x i64> undef, <16 x i64> undef, <16 x i1> undef, i32 undef) call @llvm.vp.fshl.nxv1i64( undef, undef, undef, undef, i32 undef) call @llvm.vp.fshl.nxv2i64( undef, undef, undef, undef, i32 undef) call @llvm.vp.fshl.nxv4i64( undef, undef, undef, undef, i32 undef) call @llvm.vp.fshl.nxv8i64( undef, undef, undef, undef, i32 undef) ret void } define void @add() { ; CHECK-LABEL: 'add' ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t0 = call <2 x i8> @llvm.vp.add.v2i8(<2 x i8> undef, <2 x i8> undef, <2 x i1> undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t1 = add <2 x i8> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t2 = call <4 x i8> @llvm.vp.add.v4i8(<4 x i8> undef, <4 x i8> undef, <4 x i1> undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t3 = add <4 x i8> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t4 = call <8 x i8> @llvm.vp.add.v8i8(<8 x i8> undef, <8 x i8> undef, <8 x i1> undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t5 = add <8 x i8> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t6 = call <16 x i8> @llvm.vp.add.v16i8(<16 x i8> undef, <16 x i8> undef, <16 x i1> undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t7 = add <16 x i8> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t8 = call <2 x i64> @llvm.vp.add.v2i64(<2 x i64> undef, <2 x i64> undef, <2 x i1> undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t9 = add <2 x i64> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t10 = call <4 x i64> @llvm.vp.add.v4i64(<4 x i64> undef, <4 x i64> undef, <4 x i1> undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t12 = add <4 x i64> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %t13 = call <8 x i64> @llvm.vp.add.v8i64(<8 x i64> undef, <8 x i64> undef, <8 x i1> undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %t14 = add <8 x i64> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %t15 = call <16 x i64> @llvm.vp.add.v16i64(<16 x i64> undef, <16 x i64> undef, <16 x i1> undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %t16 = add <16 x i64> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t17 = call @llvm.vp.add.nxv2i8( undef, undef, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t18 = add undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t19 = call @llvm.vp.add.nxv4i8( undef, undef, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t20 = add undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t21 = call @llvm.vp.add.nxv8i8( undef, undef, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t22 = add undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t23 = call @llvm.vp.add.nxv16i8( undef, undef, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t24 = add undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t25 = call @llvm.vp.add.nxv2i64( undef, undef, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t26 = add undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %t27 = call @llvm.vp.add.nxv4i64( undef, undef, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %t28 = add undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %t29 = call @llvm.vp.add.nxv8i64( undef, undef, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %t30 = add undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %t31 = call @llvm.vp.add.nxv16i64( undef, undef, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %t32 = add undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %t0 = call <2 x i8> @llvm.vp.add.v2i8(<2 x i8> undef, <2 x i8> undef, <2 x i1> undef, i32 undef) %t1 = add <2 x i8> undef, undef %t2 = call <4 x i8> @llvm.vp.add.v4i8(<4 x i8> undef, <4 x i8> undef, <4 x i1> undef, i32 undef) %t3 = add <4 x i8> undef, undef %t4 = call <8 x i8> @llvm.vp.add.v8i8(<8 x i8> undef, <8 x i8> undef, <8 x i1> undef, i32 undef) %t5 = add <8 x i8> undef, undef %t6 = call <16 x i8> @llvm.vp.add.v16i8(<16 x i8> undef, <16 x i8> undef, <16 x i1> undef, i32 undef) %t7 = add <16 x i8> undef, undef %t8 = call <2 x i64> @llvm.vp.add.v2i64(<2 x i64> undef, <2 x i64> undef, <2 x i1> undef, i32 undef) %t9 = add <2 x i64> undef, undef %t10 = call <4 x i64> @llvm.vp.add.v4i64(<4 x i64> undef, <4 x i64> undef, <4 x i1> undef, i32 undef) %t12 = add <4 x i64> undef, undef %t13 = call <8 x i64> @llvm.vp.add.v8i64(<8 x i64> undef, <8 x i64> undef, <8 x i1> undef, i32 undef) %t14 = add <8 x i64> undef, undef %t15 = call <16 x i64> @llvm.vp.add.v16i64(<16 x i64> undef, <16 x i64> undef, <16 x i1> undef, i32 undef) %t16 = add <16 x i64> undef, undef %t17 = call @llvm.vp.add.nxv2i8( undef, undef, undef, i32 undef) %t18 = add undef, undef %t19 = call @llvm.vp.add.nxv4i8( undef, undef, undef, i32 undef) %t20 = add undef, undef %t21 = call @llvm.vp.add.nxv8i8( undef, undef, undef, i32 undef) %t22 = add undef, undef %t23 = call @llvm.vp.add.nxv16i8( undef, undef, undef, i32 undef) %t24 = add undef, undef %t25 = call @llvm.vp.add.nxv2i64( undef, undef, undef, i32 undef) %t26 = add undef, undef %t27 = call @llvm.vp.add.nxv4i64( undef, undef, undef, i32 undef) %t28 = add undef, undef %t29 = call @llvm.vp.add.nxv8i64( undef, undef, undef, i32 undef) %t30 = add undef, undef %t31 = call @llvm.vp.add.nxv16i64( undef, undef, undef, i32 undef) %t32 = add undef, undef ret void } define void @abs() { ; CHECK-LABEL: 'abs' ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %1 = call <2 x i8> @llvm.vp.abs.v2i8(<2 x i8> undef, i1 false, <2 x i1> undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %2 = call <2 x i8> @llvm.abs.v2i8(<2 x i8> undef, i1 false) ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %3 = call <4 x i8> @llvm.vp.abs.v4i8(<4 x i8> undef, i1 false, <4 x i1> undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %4 = call <4 x i8> @llvm.abs.v4i8(<4 x i8> undef, i1 false) ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %5 = call <8 x i8> @llvm.vp.abs.v8i8(<8 x i8> undef, i1 false, <8 x i1> undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %6 = call <8 x i8> @llvm.abs.v8i8(<8 x i8> undef, i1 false) ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %7 = call <16 x i8> @llvm.vp.abs.v16i8(<16 x i8> undef, i1 false, <16 x i1> undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %8 = call <16 x i8> @llvm.abs.v16i8(<16 x i8> undef, i1 false) ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %9 = call <2 x i64> @llvm.vp.abs.v2i64(<2 x i64> undef, i1 false, <2 x i1> undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %10 = call <2 x i64> @llvm.abs.v2i64(<2 x i64> undef, i1 false) ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %11 = call <4 x i64> @llvm.vp.abs.v4i64(<4 x i64> undef, i1 false, <4 x i1> undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %12 = call <4 x i64> @llvm.abs.v4i64(<4 x i64> undef, i1 false) ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %13 = call <8 x i64> @llvm.vp.abs.v8i64(<8 x i64> undef, i1 false, <8 x i1> undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %14 = call <8 x i64> @llvm.abs.v8i64(<8 x i64> undef, i1 false) ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %15 = call <16 x i64> @llvm.vp.abs.v16i64(<16 x i64> undef, i1 false, <16 x i1> undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %16 = call <16 x i64> @llvm.abs.v16i64(<16 x i64> undef, i1 false) ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %17 = call @llvm.vp.abs.nxv2i8( undef, i1 false, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %18 = call @llvm.abs.nxv2i8( undef, i1 false) ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %19 = call @llvm.vp.abs.nxv4i8( undef, i1 false, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %20 = call @llvm.abs.nxv4i8( undef, i1 false) ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %21 = call @llvm.vp.abs.nxv8i8( undef, i1 false, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %22 = call @llvm.abs.nxv8i8( undef, i1 false) ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %23 = call @llvm.vp.abs.nxv16i8( undef, i1 false, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %24 = call @llvm.abs.nxv16i8( undef, i1 false) ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %25 = call @llvm.vp.abs.nxv2i64( undef, i1 false, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %26 = call @llvm.abs.nxv2i64( undef, i1 false) ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %27 = call @llvm.vp.abs.nxv4i64( undef, i1 false, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %28 = call @llvm.abs.nxv4i64( undef, i1 false) ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %29 = call @llvm.vp.abs.nxv8i64( undef, i1 false, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %30 = call @llvm.abs.nxv8i64( undef, i1 false) ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %31 = call @llvm.vp.abs.nxv16i64( undef, i1 false, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %32 = call @llvm.abs.nxv16i64( undef, i1 false) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; call <2 x i8> @llvm.vp.abs.v2i8(<2 x i8> undef, i1 0, <2 x i1> undef, i32 undef) call <2 x i8> @llvm.abs.v2i8(<2 x i8> undef, i1 0) call <4 x i8> @llvm.vp.abs.v4i8(<4 x i8> undef, i1 0, <4 x i1> undef, i32 undef) call <4 x i8> @llvm.abs.v4i8(<4 x i8> undef, i1 0) call <8 x i8> @llvm.vp.abs.v8i8(<8 x i8> undef, i1 0, <8 x i1> undef, i32 undef) call <8 x i8> @llvm.abs.v8i8(<8 x i8> undef, i1 0) call <16 x i8> @llvm.vp.abs.v16i8(<16 x i8> undef, i1 0, <16 x i1> undef, i32 undef) call <16 x i8> @llvm.abs.v16i8(<16 x i8> undef, i1 0) call <2 x i64> @llvm.vp.abs.v2i64(<2 x i64> undef, i1 0, <2 x i1> undef, i32 undef) call <2 x i64> @llvm.abs.v2i64(<2 x i64> undef, i1 0) call <4 x i64> @llvm.vp.abs.v4i64(<4 x i64> undef, i1 0, <4 x i1> undef, i32 undef) call <4 x i64> @llvm.abs.v4i64(<4 x i64> undef, i1 0) call <8 x i64> @llvm.vp.abs.v8i64(<8 x i64> undef, i1 0, <8 x i1> undef, i32 undef) call <8 x i64> @llvm.abs.v8i64(<8 x i64> undef, i1 0) call <16 x i64> @llvm.vp.abs.v16i64(<16 x i64> undef, i1 0, <16 x i1> undef, i32 undef) call <16 x i64> @llvm.abs.v16i64(<16 x i64> undef, i1 0) call @llvm.vp.abs.nxv2i8( undef, i1 0, undef, i32 undef) call @llvm.abs.nxv2i8( undef, i1 0) call @llvm.vp.abs.nxv4i8( undef, i1 0, undef, i32 undef) call @llvm.abs.nxv4i8( undef, i1 0) call @llvm.vp.abs.nxv8i8( undef, i1 0, undef, i32 undef) call @llvm.abs.nxv8i8( undef, i1 0) call @llvm.vp.abs.nxv16i8( undef, i1 0, undef, i32 undef) call @llvm.abs.nxv16i8( undef, i1 0) call @llvm.vp.abs.nxv2i64( undef, i1 0, undef, i32 undef) call @llvm.abs.nxv2i64( undef, i1 0) call @llvm.vp.abs.nxv4i64( undef, i1 0, undef, i32 undef) call @llvm.abs.nxv4i64( undef, i1 0) call @llvm.vp.abs.nxv8i64( undef, i1 0, undef, i32 undef) call @llvm.abs.nxv8i64( undef, i1 0) call @llvm.vp.abs.nxv16i64( undef, i1 0, undef, i32 undef) call @llvm.abs.nxv16i64( undef, i1 0) ret void } define void @load() { ; CHECK-LABEL: 'load' ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t0 = call <2 x i8> @llvm.vp.load.v2i8.p0(ptr undef, <2 x i1> undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t1 = load <2 x i8>, ptr undef, align 2 ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t2 = call <4 x i8> @llvm.vp.load.v4i8.p0(ptr undef, <4 x i1> undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t3 = load <4 x i8>, ptr undef, align 4 ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t4 = call <8 x i8> @llvm.vp.load.v8i8.p0(ptr undef, <8 x i1> undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t5 = load <8 x i8>, ptr undef, align 8 ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t6 = call <16 x i8> @llvm.vp.load.v16i8.p0(ptr undef, <16 x i1> undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t7 = load <16 x i8>, ptr undef, align 16 ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t8 = call <2 x i64> @llvm.vp.load.v2i64.p0(ptr undef, <2 x i1> undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t9 = load <2 x i64>, ptr undef, align 16 ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t10 = call <4 x i64> @llvm.vp.load.v4i64.p0(ptr undef, <4 x i1> undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t12 = load <4 x i64>, ptr undef, align 32 ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %t13 = call <8 x i64> @llvm.vp.load.v8i64.p0(ptr undef, <8 x i1> undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %t14 = load <8 x i64>, ptr undef, align 64 ; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %t15 = call <16 x i64> @llvm.vp.load.v16i64.p0(ptr undef, <16 x i1> undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %t16 = load <16 x i64>, ptr undef, align 128 ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t17 = call @llvm.vp.load.nxv2i8.p0(ptr undef, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t18 = load , ptr undef, align 2 ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t19 = call @llvm.vp.load.nxv4i8.p0(ptr undef, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t20 = load , ptr undef, align 4 ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t21 = call @llvm.vp.load.nxv8i8.p0(ptr undef, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t22 = load , ptr undef, align 8 ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t23 = call @llvm.vp.load.nxv16i8.p0(ptr undef, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t24 = load , ptr undef, align 16 ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t25 = call @llvm.vp.load.nxv2i64.p0(ptr undef, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t26 = load , ptr undef, align 16 ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %t27 = call @llvm.vp.load.nxv4i64.p0(ptr undef, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %t28 = load , ptr undef, align 32 ; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %t29 = call @llvm.vp.load.nxv8i64.p0(ptr undef, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %t30 = load , ptr undef, align 64 ; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %t31 = call @llvm.vp.load.nxv16i64.p0(ptr undef, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %t32 = load , ptr undef, align 128 ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %t0 = call <2 x i8> @llvm.vp.load.v2i8(ptr undef, <2 x i1> undef, i32 undef) %t1 = load <2 x i8>, ptr undef %t2 = call <4 x i8> @llvm.vp.load.v4i8(ptr undef, <4 x i1> undef, i32 undef) %t3 = load <4 x i8>, ptr undef %t4 = call <8 x i8> @llvm.vp.load.v8i8(ptr undef, <8 x i1> undef, i32 undef) %t5 = load <8 x i8>, ptr undef %t6 = call <16 x i8> @llvm.vp.load.v16i8(ptr undef, <16 x i1> undef, i32 undef) %t7 = load <16 x i8>, ptr undef %t8 = call <2 x i64> @llvm.vp.load.v2i64(ptr undef, <2 x i1> undef, i32 undef) %t9 = load <2 x i64>, ptr undef %t10 = call <4 x i64> @llvm.vp.load.v4i64(ptr undef, <4 x i1> undef, i32 undef) %t12 = load <4 x i64>, ptr undef %t13 = call <8 x i64> @llvm.vp.load.v8i64(ptr undef, <8 x i1> undef, i32 undef) %t14 = load <8 x i64>, ptr undef %t15 = call <16 x i64> @llvm.vp.load.v16i64(ptr undef, <16 x i1> undef, i32 undef) %t16 = load <16 x i64>, ptr undef %t17 = call @llvm.vp.load.nxv2i8(ptr undef, undef, i32 undef) %t18 = load , ptr undef %t19 = call @llvm.vp.load.nxv4i8(ptr undef, undef, i32 undef) %t20 = load , ptr undef %t21 = call @llvm.vp.load.nxv8i8(ptr undef, undef, i32 undef) %t22 = load , ptr undef %t23 = call @llvm.vp.load.nxv16i8(ptr undef, undef, i32 undef) %t24 = load , ptr undef %t25 = call @llvm.vp.load.nxv2i64(ptr undef, undef, i32 undef) %t26 = load , ptr undef %t27 = call @llvm.vp.load.nxv4i64(ptr undef, undef, i32 undef) %t28 = load , ptr undef %t29 = call @llvm.vp.load.nxv8i64(ptr undef, undef, i32 undef) %t30 = load , ptr undef %t31 = call @llvm.vp.load.nxv16i64(ptr undef, undef, i32 undef) %t32 = load , ptr undef ret void } define void @store() { ; CHECK-LABEL: 'store' ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.vp.store.v2i8.p0(<2 x i8> undef, ptr undef, <2 x i1> undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i8> undef, ptr undef, align 2 ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.vp.store.v4i8.p0(<4 x i8> undef, ptr undef, <4 x i1> undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i8> undef, ptr undef, align 4 ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.vp.store.v8i8.p0(<8 x i8> undef, ptr undef, <8 x i1> undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i8> undef, ptr undef, align 8 ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.vp.store.v16i8.p0(<16 x i8> undef, ptr undef, <16 x i1> undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <16 x i8> undef, ptr undef, align 16 ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.vp.store.v2i64.p0(<2 x i64> undef, ptr undef, <2 x i1> undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i64> undef, ptr undef, align 16 ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.vp.store.v4i64.p0(<4 x i64> undef, ptr undef, <4 x i1> undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <4 x i64> undef, ptr undef, align 32 ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.vp.store.v8i64.p0(<8 x i64> undef, ptr undef, <8 x i1> undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <8 x i64> undef, ptr undef, align 64 ; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.vp.store.v16i64.p0(<16 x i64> undef, ptr undef, <16 x i1> undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <16 x i64> undef, ptr undef, align 128 ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.vp.store.nxv2i8.p0( undef, ptr undef, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store undef, ptr undef, align 2 ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.vp.store.nxv4i8.p0( undef, ptr undef, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store undef, ptr undef, align 4 ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.vp.store.nxv8i8.p0( undef, ptr undef, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store undef, ptr undef, align 8 ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.vp.store.nxv16i8.p0( undef, ptr undef, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store undef, ptr undef, align 16 ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.vp.store.nxv2i64.p0( undef, ptr undef, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store undef, ptr undef, align 16 ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.vp.store.nxv4i64.p0( undef, ptr undef, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store undef, ptr undef, align 32 ; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.vp.store.nxv8i64.p0( undef, ptr undef, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store undef, ptr undef, align 64 ; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.vp.store.nxv16i64.p0( undef, ptr undef, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: store undef, ptr undef, align 128 ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; call void @llvm.vp.store.v2i8(<2 x i8> undef, ptr undef, <2 x i1> undef, i32 undef) store <2 x i8> undef, ptr undef call void @llvm.vp.store.v4i8(<4 x i8> undef, ptr undef, <4 x i1> undef, i32 undef) store <4 x i8> undef, ptr undef call void @llvm.vp.store.v8i8(<8 x i8> undef, ptr undef, <8 x i1> undef, i32 undef) store <8 x i8> undef, ptr undef call void @llvm.vp.store.v16i8(<16 x i8> undef, ptr undef, <16 x i1> undef, i32 undef) store <16 x i8> undef, ptr undef call void @llvm.vp.store.v2i64(<2 x i64> undef, ptr undef, <2 x i1> undef, i32 undef) store <2 x i64> undef, ptr undef call void @llvm.vp.store.v4i64(<4 x i64> undef, ptr undef, <4 x i1> undef, i32 undef) store <4 x i64> undef, ptr undef call void @llvm.vp.store.v8i64(<8 x i64> undef, ptr undef, <8 x i1> undef, i32 undef) store <8 x i64> undef, ptr undef call void @llvm.vp.store.v16i64(<16 x i64> undef, ptr undef, <16 x i1> undef, i32 undef) store <16 x i64> undef, ptr undef call void @llvm.vp.store.nxv2i8( undef, ptr undef, undef, i32 undef) store undef, ptr undef call void @llvm.vp.store.nxv4i8( undef, ptr undef, undef, i32 undef) store undef, ptr undef call void @llvm.vp.store.nxv8i8( undef, ptr undef, undef, i32 undef) store undef, ptr undef call void @llvm.vp.store.nxv16i8( undef, ptr undef, undef, i32 undef) store undef, ptr undef call void @llvm.vp.store.nxv2i64( undef, ptr undef, undef, i32 undef) store undef, ptr undef call void @llvm.vp.store.nxv4i64( undef, ptr undef, undef, i32 undef) store undef, ptr undef call void @llvm.vp.store.nxv8i64( undef, ptr undef, undef, i32 undef) store undef, ptr undef call void @llvm.vp.store.nxv16i64( undef, ptr undef, undef, i32 undef) store undef, ptr undef ret void } define void @reduce_add() { ; CHECK-LABEL: 'reduce_add' ; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %1 = call i8 @llvm.vp.reduce.add.v2i8(i8 undef, <2 x i8> undef, <2 x i1> undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %2 = call i8 @llvm.vector.reduce.add.v2i8(<2 x i8> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %3 = call i8 @llvm.vp.reduce.add.v4i8(i8 undef, <4 x i8> undef, <4 x i1> undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %4 = call i8 @llvm.vector.reduce.add.v4i8(<4 x i8> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %5 = call i8 @llvm.vp.reduce.add.v8i8(i8 undef, <8 x i8> undef, <8 x i1> undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %6 = call i8 @llvm.vector.reduce.add.v8i8(<8 x i8> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %7 = call i8 @llvm.vp.reduce.add.v16i8(i8 undef, <16 x i8> undef, <16 x i1> undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %8 = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %9 = call i64 @llvm.vp.reduce.add.v2i64(i64 undef, <2 x i64> undef, <2 x i1> undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %10 = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %11 = call i64 @llvm.vp.reduce.add.v4i64(i64 undef, <4 x i64> undef, <4 x i1> undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %12 = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %13 = call i64 @llvm.vp.reduce.add.v8i64(i64 undef, <8 x i64> undef, <8 x i1> undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %14 = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %15 = call i64 @llvm.vp.reduce.add.v16i64(i64 undef, <16 x i64> undef, <16 x i1> undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %16 = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %17 = call i8 @llvm.vp.reduce.add.nxv8i8(i8 undef, undef, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %18 = call i8 @llvm.vector.reduce.add.nxv2i8( undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %19 = call i8 @llvm.vp.reduce.add.nxv4i8(i8 undef, undef, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %20 = call i8 @llvm.vector.reduce.add.nxv4i8( undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %21 = call i8 @llvm.vp.reduce.add.nxv8i8(i8 undef, undef, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %22 = call i8 @llvm.vector.reduce.add.nxv8i8( undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %23 = call i8 @llvm.vp.reduce.add.nxv16i8(i8 undef, undef, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %24 = call i8 @llvm.vector.reduce.add.nxv16i8( undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %25 = call i64 @llvm.vp.reduce.add.nxv2i64(i64 undef, undef, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %26 = call i64 @llvm.vector.reduce.add.nxv2i64( undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %27 = call i64 @llvm.vp.reduce.add.nxv4i64(i64 undef, undef, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %28 = call i64 @llvm.vector.reduce.add.nxv4i64( undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %29 = call i64 @llvm.vp.reduce.add.nxv8i64(i64 undef, undef, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %30 = call i64 @llvm.vector.reduce.add.nxv8i64( undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %31 = call i64 @llvm.vp.reduce.add.nxv16i64(i64 undef, undef, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %32 = call i64 @llvm.vector.reduce.add.nxv16i64( undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; call i8 @llvm.vp.reduce.add.v2i8(i8 undef, <2 x i8> undef, <2 x i1> undef, i32 undef) call i8 @llvm.vector.reduce.add.v2i8(<2 x i8> undef) call i8 @llvm.vp.reduce.add.v4i8(i8 undef, <4 x i8> undef, <4 x i1> undef, i32 undef) call i8 @llvm.vector.reduce.add.v4i8(<4 x i8> undef) call i8 @llvm.vp.reduce.add.v8i8(i8 undef, <8 x i8> undef, <8 x i1> undef, i32 undef) call i8 @llvm.vector.reduce.add.v8i8(<8 x i8> undef) call i8 @llvm.vp.reduce.add.v16i8(i8 undef, <16 x i8> undef, <16 x i1> undef, i32 undef) call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> undef) call i64 @llvm.vp.reduce.add.v2i64(i64 undef, <2 x i64> undef, <2 x i1> undef, i32 undef) call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> undef) call i64 @llvm.vp.reduce.add.v4i64(i64 undef, <4 x i64> undef, <4 x i1> undef, i32 undef) call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> undef) call i64 @llvm.vp.reduce.add.v8i64(i64 undef, <8 x i64> undef, <8 x i1> undef, i32 undef) call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> undef) call i64 @llvm.vp.reduce.add.v16i64(i64 undef, <16 x i64> undef, <16 x i1> undef, i32 undef) call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> undef) call i8 @llvm.vp.reduce.add.nxv8i8(i8 undef, undef, undef, i32 undef) call i8 @llvm.vector.reduce.add.nxv2i8( undef) call i8 @llvm.vp.reduce.add.nxv4i8(i8 undef, undef, undef, i32 undef) call i8 @llvm.vector.reduce.add.nxv4i8( undef) call i8 @llvm.vp.reduce.add.nxv8i8(i8 undef, undef, undef, i32 undef) call i8 @llvm.vector.reduce.add.nxv8i8( undef) call i8 @llvm.vp.reduce.add.nxv16i8(i8 undef, undef, undef, i32 undef) call i8 @llvm.vector.reduce.add.nxv16i8( undef) call i64 @llvm.vp.reduce.add.nxv2i64(i64 undef, undef, undef, i32 undef) call i64 @llvm.vector.reduce.add.nxv2i64( undef) call i64 @llvm.vp.reduce.add.nxv4i64(i64 undef, undef, undef, i32 undef) call i64 @llvm.vector.reduce.add.nxv4i64( undef) call i64 @llvm.vp.reduce.add.nxv8i64(i64 undef, undef, undef, i32 undef) call i64 @llvm.vector.reduce.add.nxv8i64( undef) call i64 @llvm.vp.reduce.add.nxv16i64(i64 undef, undef, undef, i32 undef) call i64 @llvm.vector.reduce.add.nxv16i64( undef) ret void } define void @reduce_fadd() { ; CHECK-LABEL: 'reduce_fadd' ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %1 = call float @llvm.vp.reduce.fadd.v2f32(float undef, <2 x float> undef, <2 x i1> undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %2 = call float @llvm.vector.reduce.fadd.v2f32(float undef, <2 x float> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %3 = call float @llvm.vp.reduce.fadd.v4f32(float undef, <4 x float> undef, <4 x i1> undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %4 = call float @llvm.vector.reduce.fadd.v4f32(float undef, <4 x float> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %5 = call float @llvm.vp.reduce.fadd.v8f32(float undef, <8 x float> undef, <8 x i1> undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %6 = call float @llvm.vector.reduce.fadd.v8f32(float undef, <8 x float> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %7 = call float @llvm.vp.reduce.fadd.v16f32(float undef, <16 x float> undef, <16 x i1> undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %8 = call float @llvm.vector.reduce.fadd.v16f32(float undef, <16 x float> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %9 = call double @llvm.vp.reduce.fadd.v2f64(double undef, <2 x double> undef, <2 x i1> undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %10 = call double @llvm.vector.reduce.fadd.v2f64(double undef, <2 x double> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %11 = call double @llvm.vp.reduce.fadd.v4f64(double undef, <4 x double> undef, <4 x i1> undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %12 = call double @llvm.vector.reduce.fadd.v4f64(double undef, <4 x double> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %13 = call double @llvm.vp.reduce.fadd.v8f64(double undef, <8 x double> undef, <8 x i1> undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %14 = call double @llvm.vector.reduce.fadd.v8f64(double undef, <8 x double> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %15 = call double @llvm.vp.reduce.fadd.v16f64(double undef, <16 x double> undef, <16 x i1> undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %16 = call double @llvm.vector.reduce.fadd.v16f64(double undef, <16 x double> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %17 = call float @llvm.vp.reduce.fadd.nxv2f32(float undef, undef, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %18 = call float @llvm.vector.reduce.fadd.nxv2f32(float undef, undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %19 = call float @llvm.vp.reduce.fadd.nxv4f32(float undef, undef, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %20 = call float @llvm.vector.reduce.fadd.nxv4f32(float undef, undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %21 = call float @llvm.vp.reduce.fadd.nxv8f32(float undef, undef, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %22 = call float @llvm.vector.reduce.fadd.nxv8f32(float undef, undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %23 = call float @llvm.vp.reduce.fadd.nxv16f32(float undef, undef, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %24 = call float @llvm.vector.reduce.fadd.nxv16f32(float undef, undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %25 = call double @llvm.vp.reduce.fadd.nxv2f64(double undef, undef, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %26 = call double @llvm.vector.reduce.fadd.nxv2f64(double undef, undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %27 = call double @llvm.vp.reduce.fadd.nxv4f64(double undef, undef, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %28 = call double @llvm.vector.reduce.fadd.nxv4f64(double undef, undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %29 = call double @llvm.vp.reduce.fadd.nxv8f64(double undef, undef, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %30 = call double @llvm.vector.reduce.fadd.nxv8f64(double undef, undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %31 = call double @llvm.vp.reduce.fadd.nxv16f64(double undef, undef, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %32 = call double @llvm.vector.reduce.fadd.nxv16f64(double undef, undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; call float @llvm.vp.reduce.fadd.v2f32(float undef, <2 x float> undef, <2 x i1> undef, i32 undef) call float @llvm.vector.reduce.fadd.v2f32(float undef, <2 x float> undef) call float @llvm.vp.reduce.fadd.v4f32(float undef, <4 x float> undef, <4 x i1> undef, i32 undef) call float @llvm.vector.reduce.fadd.v4f32(float undef, <4 x float> undef) call float @llvm.vp.reduce.fadd.v8f32(float undef, <8 x float> undef, <8 x i1> undef, i32 undef) call float @llvm.vector.reduce.fadd.v8f32(float undef, <8 x float> undef) call float @llvm.vp.reduce.fadd.v16f32(float undef, <16 x float> undef, <16 x i1> undef, i32 undef) call float @llvm.vector.reduce.fadd.v16f32(float undef, <16 x float> undef) call double @llvm.vp.reduce.fadd.v2f64(double undef, <2 x double> undef, <2 x i1> undef, i32 undef) call double @llvm.vector.reduce.fadd.v2f64(double undef, <2 x double> undef) call double @llvm.vp.reduce.fadd.v4f64(double undef, <4 x double> undef, <4 x i1> undef, i32 undef) call double @llvm.vector.reduce.fadd.v4f64(double undef, <4 x double> undef) call double @llvm.vp.reduce.fadd.v8f64(double undef, <8 x double> undef, <8 x i1> undef, i32 undef) call double @llvm.vector.reduce.fadd.v8f64(double undef, <8 x double> undef) call double @llvm.vp.reduce.fadd.v16f64(double undef, <16 x double> undef, <16 x i1> undef, i32 undef) call double @llvm.vector.reduce.fadd.v16f64(double undef, <16 x double> undef) call float @llvm.vp.reduce.fadd.nxv2f32(float undef, undef, undef, i32 undef) call float @llvm.vector.reduce.fadd.nxv2f32(float undef, undef) call float @llvm.vp.reduce.fadd.nxv4f32(float undef, undef, undef, i32 undef) call float @llvm.vector.reduce.fadd.nxv4f32(float undef, undef) call float @llvm.vp.reduce.fadd.nxv8f32(float undef, undef, undef, i32 undef) call float @llvm.vector.reduce.fadd.nxv8f32(float undef, undef) call float @llvm.vp.reduce.fadd.nxv16f32(float undef, undef, undef, i32 undef) call float @llvm.vector.reduce.fadd.nxv16f32(float undef, undef) call double @llvm.vp.reduce.fadd.nxv2f64(double undef, undef, undef, i32 undef) call double @llvm.vector.reduce.fadd.nxv2f64(double undef, undef) call double @llvm.vp.reduce.fadd.nxv4f64(double undef, undef, undef, i32 undef) call double @llvm.vector.reduce.fadd.nxv4f64(double undef, undef) call double @llvm.vp.reduce.fadd.nxv8f64(double undef, undef, undef, i32 undef) call double @llvm.vector.reduce.fadd.nxv8f64(double undef, undef) call double @llvm.vp.reduce.fadd.nxv16f64(double undef, undef, undef, i32 undef) call double @llvm.vector.reduce.fadd.nxv16f64(double undef, undef) ret void } declare <2 x i8> @llvm.vp.add.v2i8(<2 x i8>, <2 x i8>, <2 x i1>, i32) declare <4 x i8> @llvm.vp.add.v4i8(<4 x i8>, <4 x i8>, <4 x i1>, i32) declare <8 x i8> @llvm.vp.add.v8i8(<8 x i8>, <8 x i8>, <8 x i1>, i32) declare <16 x i8> @llvm.vp.add.v16i8(<16 x i8>, <16 x i8>, <16 x i1>, i32) declare <2 x i64> @llvm.vp.add.v2i64(<2 x i64>, <2 x i64>, <2 x i1>, i32) declare <4 x i64> @llvm.vp.add.v4i64(<4 x i64>, <4 x i64>, <4 x i1>, i32) declare <8 x i64> @llvm.vp.add.v8i64(<8 x i64>, <8 x i64>, <8 x i1>, i32) declare <16 x i64> @llvm.vp.add.v16i64(<16 x i64>, <16 x i64>, <16 x i1>, i32) declare @llvm.vp.add.nxv2i8(, , , i32) declare @llvm.vp.add.nxv4i8(, , , i32) declare @llvm.vp.add.nxv8i8(, , , i32) declare @llvm.vp.add.nxv16i8(, , , i32) declare @llvm.vp.add.nxv2i64(, , , i32) declare @llvm.vp.add.nxv4i64(, , , i32) declare @llvm.vp.add.nxv8i64(, , , i32) declare @llvm.vp.add.nxv16i64(, , , i32) declare <2 x i8> @llvm.vp.abs.v2i8(<2 x i8>, i1, <2 x i1>, i32) declare <4 x i8> @llvm.vp.abs.v4i8(<4 x i8>, i1, <4 x i1>, i32) declare <8 x i8> @llvm.vp.abs.v8i8(<8 x i8>, i1, <8 x i1>, i32) declare <16 x i8> @llvm.vp.abs.v16i8(<16 x i8>, i1, <16 x i1>, i32) declare <2 x i64> @llvm.vp.abs.v2i64(<2 x i64>, i1, <2 x i1>, i32) declare <4 x i64> @llvm.vp.abs.v4i64(<4 x i64>, i1, <4 x i1>, i32) declare <8 x i64> @llvm.vp.abs.v8i64(<8 x i64>, i1, <8 x i1>, i32) declare <16 x i64> @llvm.vp.abs.v16i64(<16 x i64>, i1, <16 x i1>, i32) declare @llvm.vp.abs.nxv2i8(, i1, , i32) declare @llvm.vp.abs.nxv4i8(, i1, , i32) declare @llvm.vp.abs.nxv8i8(, i1, , i32) declare @llvm.vp.abs.nxv16i8(, i1, , i32) declare @llvm.vp.abs.nxv2i64(, i1, , i32) declare @llvm.vp.abs.nxv4i64(, i1, , i32) declare @llvm.vp.abs.nxv8i64(, i1, , i32) declare @llvm.vp.abs.nxv16i64(, i1, , i32) declare <2 x i8> @llvm.abs.v2i8(<2 x i8>, i1) declare <4 x i8> @llvm.abs.v4i8(<4 x i8>, i1) declare <8 x i8> @llvm.abs.v8i8(<8 x i8>, i1) declare <16 x i8> @llvm.abs.v16i8(<16 x i8>, i1) declare <2 x i64> @llvm.abs.v2i64(<2 x i64>, i1) declare <4 x i64> @llvm.abs.v4i64(<4 x i64>, i1) declare <8 x i64> @llvm.abs.v8i64(<8 x i64>, i1) declare <16 x i64> @llvm.abs.v16i64(<16 x i64>, i1) declare @llvm.abs.nxv2i8(, i1) declare @llvm.abs.nxv4i8(, i1) declare @llvm.abs.nxv8i8(, i1) declare @llvm.abs.nxv16i8(, i1) declare @llvm.abs.nxv2i64(, i1) declare @llvm.abs.nxv4i64(, i1) declare @llvm.abs.nxv8i64(, i1) declare @llvm.abs.nxv16i64(, i1) declare <2 x i8> @llvm.vp.load.v2i8(ptr, <2 x i1>, i32) declare <4 x i8> @llvm.vp.load.v4i8(ptr, <4 x i1>, i32) declare <8 x i8> @llvm.vp.load.v8i8(ptr, <8 x i1>, i32) declare <16 x i8> @llvm.vp.load.v16i8(ptr, <16 x i1>, i32) declare <2 x i64> @llvm.vp.load.v2i64(ptr, <2 x i1>, i32) declare <4 x i64> @llvm.vp.load.v4i64(ptr, <4 x i1>, i32) declare <8 x i64> @llvm.vp.load.v8i64(ptr, <8 x i1>, i32) declare <16 x i64> @llvm.vp.load.v16i64(ptr, <16 x i1>, i32) declare @llvm.vp.load.nxv2i8(ptr, , i32) declare @llvm.vp.load.nxv4i8(ptr, , i32) declare @llvm.vp.load.nxv8i8(ptr, , i32) declare @llvm.vp.load.nxv16i8(ptr, , i32) declare @llvm.vp.load.nxv2i64(ptr, , i32) declare @llvm.vp.load.nxv4i64(ptr, , i32) declare @llvm.vp.load.nxv8i64(ptr, , i32) declare @llvm.vp.load.nxv16i64(ptr, , i32) declare void @llvm.vp.store.v2i8(<2 x i8>, ptr, <2 x i1>, i32) declare void @llvm.vp.store.v4i8(<4 x i8>, ptr, <4 x i1>, i32) declare void @llvm.vp.store.v8i8(<8 x i8>, ptr, <8 x i1>, i32) declare void @llvm.vp.store.v16i8(<16 x i8>, ptr, <16 x i1>, i32) declare void @llvm.vp.store.v2i64(<2 x i64>, ptr, <2 x i1>, i32) declare void @llvm.vp.store.v4i64(<4 x i64>, ptr, <4 x i1>, i32) declare void @llvm.vp.store.v8i64(<8 x i64>, ptr, <8 x i1>, i32) declare void @llvm.vp.store.v16i64(<16 x i64>, ptr, <16 x i1>, i32) declare void @llvm.vp.store.nxv2i8(, ptr, , i32) declare void @llvm.vp.store.nxv4i8(, ptr, , i32) declare void @llvm.vp.store.nxv8i8(, ptr, , i32) declare void @llvm.vp.store.nxv16i8(, ptr, , i32) declare void @llvm.vp.store.nxv2i64(, ptr, , i32) declare void @llvm.vp.store.nxv4i64(, ptr, , i32) declare void @llvm.vp.store.nxv8i64(, ptr, , i32) declare void @llvm.vp.store.nxv16i64(, ptr, , i32) declare i8 @llvm.vector.reduce.add.v2i8(<2 x i8>) declare i8 @llvm.vector.reduce.add.v4i8(<4 x i8>) declare i8 @llvm.vector.reduce.add.v8i8(<8 x i8>) declare i8 @llvm.vector.reduce.add.v16i8(<16 x i8>) declare i64 @llvm.vector.reduce.add.v2i64(<2 x i64>) declare i64 @llvm.vector.reduce.add.v4i64(<4 x i64>) declare i64 @llvm.vector.reduce.add.v8i64(<8 x i64>) declare i64 @llvm.vector.reduce.add.v16i64(<16 x i64>) declare i8 @llvm.vector.reduce.add.nxv2i8() declare i8 @llvm.vector.reduce.add.nxv4i8() declare i8 @llvm.vector.reduce.add.nxv8i8() declare i8 @llvm.vector.reduce.add.nxv16i8() declare i64 @llvm.vector.reduce.add.nxv2i64() declare i64 @llvm.vector.reduce.add.nxv4i64() declare i64 @llvm.vector.reduce.add.nxv8i64() declare i64 @llvm.vector.reduce.add.nxv16i64() declare i8 @llvm.vp.reduce.add.v2i8(i8, <2 x i8>, <2 x i1>, i32) declare i8 @llvm.vp.reduce.add.v4i8(i8, <4 x i8>, <4 x i1>, i32) declare i8 @llvm.vp.reduce.add.v8i8(i8, <8 x i8>, <8 x i1>, i32) declare i8 @llvm.vp.reduce.add.v16i8(i8, <16 x i8>, <16 x i1>, i32) declare i64 @llvm.vp.reduce.add.v2i64(i64, <2 x i64>, <2 x i1>, i32) declare i64 @llvm.vp.reduce.add.v4i64(i64, <4 x i64>, <4 x i1>, i32) declare i64 @llvm.vp.reduce.add.v8i64(i64, <8 x i64>, <8 x i1>, i32) declare i64 @llvm.vp.reduce.add.v16i64(i64, <16 x i64>, <16 x i1>, i32) declare i8 @llvm.vp.reduce.add.nxv2i8(i8, , , i32) declare i8 @llvm.vp.reduce.add.nxv4i8(i8, , , i32) declare i8 @llvm.vp.reduce.add.nxv8i8(i8, , , i32) declare i8 @llvm.vp.reduce.add.nxv16i8(i8, , , i32) declare i64 @llvm.vp.reduce.add.nxv2i64(i64, , , i32) declare i64 @llvm.vp.reduce.add.nxv4i64(i64, , , i32) declare i64 @llvm.vp.reduce.add.nxv8i64(i64, , , i32) declare i64 @llvm.vp.reduce.add.nxv16i64(i64, , , i32) declare float @llvm.vector.reduce.fadd.v2f32(float, <2 x float>) declare float @llvm.vector.reduce.fadd.v4f32(float, <4 x float>) declare float @llvm.vector.reduce.fadd.v8f32(float, <8 x float>) declare float @llvm.vector.reduce.fadd.v16f32(float, <16 x float>) declare double @llvm.vector.reduce.fadd.v2f64(double, <2 x double>) declare double @llvm.vector.reduce.fadd.v4f64(double, <4 x double>) declare double @llvm.vector.reduce.fadd.v8f64(double, <8 x double>) declare double @llvm.vector.reduce.fadd.v16f64(double, <16 x double>) declare float @llvm.vector.reduce.fadd.nxv2f32(float, ) declare float @llvm.vector.reduce.fadd.nxv4f32(float, ) declare float @llvm.vector.reduce.fadd.nxv8f32(float, ) declare float @llvm.vector.reduce.fadd.nxv16f32(float, ) declare double @llvm.vector.reduce.fadd.nxv2f64(double, ) declare double @llvm.vector.reduce.fadd.nxv4f64(double, ) declare double @llvm.vector.reduce.fadd.nxv8f64(double, ) declare double @llvm.vector.reduce.fadd.nxv16f64(double, ) declare float @llvm.vp.reduce.fadd.v2f32(float, <2 x float>, <2 x i1>, i32) declare float @llvm.vp.reduce.fadd.v4f32(float, <4 x float>, <4 x i1>, i32) declare float @llvm.vp.reduce.fadd.v8f32(float, <8 x float>, <8 x i1>, i32) declare float @llvm.vp.reduce.fadd.v16f32(float, <16 x float>, <16 x i1>, i32) declare double @llvm.vp.reduce.fadd.v2f64(double, <2 x double>, <2 x i1>, i32) declare double @llvm.vp.reduce.fadd.v4f64(double, <4 x double>, <4 x i1>, i32) declare double @llvm.vp.reduce.fadd.v8f64(double, <8 x double>, <8 x i1>, i32) declare double @llvm.vp.reduce.fadd.v16f64(double, <16 x double>, <16 x i1>, i32) declare float @llvm.vp.reduce.fadd.nxv2f32(float, , , i32) declare float @llvm.vp.reduce.fadd.nxv4f32(float, , , i32) declare float @llvm.vp.reduce.fadd.nxv8f32(float, , , i32) declare float @llvm.vp.reduce.fadd.nxv16f32(float, , , i32) declare double @llvm.vp.reduce.fadd.nxv2f64(double, , , i32) declare double @llvm.vp.reduce.fadd.nxv4f64(double, , , i32) declare double @llvm.vp.reduce.fadd.nxv8f64(double, , , i32) declare double @llvm.vp.reduce.fadd.nxv16f64(double, , , i32) declare @llvm.fshr.nxv4i32( %a, %b, %c) declare @llvm.fshl.nxv4i32( %a, %b, %c) declare @llvm.pow.nxv4f32(, ) declare @llvm.powi.nxv4f32.i32(, i32) declare @llvm.nearbyint.nxv4f32() declare <2 x i8> @llvm.vp.fshr.v2i8(<2 x i8>, <2 x i8>, <2 x i8>, <2 x i1>, i32) declare <4 x i8> @llvm.vp.fshr.v4i8(<4 x i8>, <4 x i8>, <4 x i8>, <4 x i1>, i32) declare <8 x i8> @llvm.vp.fshr.v8i8(<8 x i8>, <8 x i8>, <8 x i8>, <8 x i1>, i32) declare <16 x i8> @llvm.vp.fshr.v16i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i1>, i32) declare @llvm.vp.fshr.nxv1i8(, , , , i32) declare @llvm.vp.fshr.nxv2i8(, , , , i32) declare @llvm.vp.fshr.nxv4i8(, , , , i32) declare @llvm.vp.fshr.nxv8i8(, , , , i32) declare @llvm.vp.fshr.nxv16i8(, , , , i32) declare @llvm.vp.fshr.nxv32i8(, , , , i32) declare @llvm.vp.fshr.nxv64i8(, , , , i32) declare <2 x i16> @llvm.vp.fshr.v2i16(<2 x i16>, <2 x i16>, <2 x i16>, <2 x i1>, i32) declare <4 x i16> @llvm.vp.fshr.v4i16(<4 x i16>, <4 x i16>, <4 x i16>, <4 x i1>, i32) declare <8 x i16> @llvm.vp.fshr.v8i16(<8 x i16>, <8 x i16>, <8 x i16>, <8 x i1>, i32) declare <16 x i16> @llvm.vp.fshr.v16i16(<16 x i16>, <16 x i16>, <16 x i16>, <16 x i1>, i32) declare @llvm.vp.fshr.nxv1i16(, , , , i32) declare @llvm.vp.fshr.nxv2i16(, , , , i32) declare @llvm.vp.fshr.nxv4i16(, , , , i32) declare @llvm.vp.fshr.nxv8i16(, , , , i32) declare @llvm.vp.fshr.nxv16i16(, , , , i32) declare @llvm.vp.fshr.nxv32i16(, , , , i32) declare <2 x i32> @llvm.vp.fshr.v2i32(<2 x i32>, <2 x i32>, <2 x i32>, <2 x i1>, i32) declare <4 x i32> @llvm.vp.fshr.v4i32(<4 x i32>, <4 x i32>, <4 x i32>, <4 x i1>, i32) declare <8 x i32> @llvm.vp.fshr.v8i32(<8 x i32>, <8 x i32>, <8 x i32>, <8 x i1>, i32) declare <16 x i32> @llvm.vp.fshr.v16i32(<16 x i32>, <16 x i32>, <16 x i32>, <16 x i1>, i32) declare @llvm.vp.fshr.nxv1i32(, , , , i32) declare @llvm.vp.fshr.nxv2i32(, , , , i32) declare @llvm.vp.fshr.nxv4i32(, , , , i32) declare @llvm.vp.fshr.nxv8i32(, , , , i32) declare @llvm.vp.fshr.nxv16i32(, , , , i32) declare <2 x i64> @llvm.vp.fshr.v2i64(<2 x i64>, <2 x i64>, <2 x i64>, <2 x i1>, i32) declare <4 x i64> @llvm.vp.fshr.v4i64(<4 x i64>, <4 x i64>, <4 x i64>, <4 x i1>, i32) declare <8 x i64> @llvm.vp.fshr.v8i64(<8 x i64>, <8 x i64>, <8 x i64>, <8 x i1>, i32) declare <16 x i64> @llvm.vp.fshr.v16i64(<16 x i64>, <16 x i64>, <16 x i64>, <16 x i1>, i32) declare @llvm.vp.fshr.nxv1i64(, , , , i32) declare @llvm.vp.fshr.nxv2i64(, , , , i32) declare @llvm.vp.fshr.nxv4i64(, , , , i32) declare @llvm.vp.fshr.nxv8i64(, , , , i32) declare <2 x i8> @llvm.vp.fshl.v2i8(<2 x i8>, <2 x i8>, <2 x i8>, <2 x i1>, i32) declare <4 x i8> @llvm.vp.fshl.v4i8(<4 x i8>, <4 x i8>, <4 x i8>, <4 x i1>, i32) declare <8 x i8> @llvm.vp.fshl.v8i8(<8 x i8>, <8 x i8>, <8 x i8>, <8 x i1>, i32) declare <16 x i8> @llvm.vp.fshl.v16i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i1>, i32) declare @llvm.vp.fshl.nxv1i8(, , , , i32) declare @llvm.vp.fshl.nxv2i8(, , , , i32) declare @llvm.vp.fshl.nxv4i8(, , , , i32) declare @llvm.vp.fshl.nxv8i8(, , , , i32) declare @llvm.vp.fshl.nxv16i8(, , , , i32) declare @llvm.vp.fshl.nxv32i8(, , , , i32) declare @llvm.vp.fshl.nxv64i8(, , , , i32) declare <2 x i16> @llvm.vp.fshl.v2i16(<2 x i16>, <2 x i16>, <2 x i16>, <2 x i1>, i32) declare <4 x i16> @llvm.vp.fshl.v4i16(<4 x i16>, <4 x i16>, <4 x i16>, <4 x i1>, i32) declare <8 x i16> @llvm.vp.fshl.v8i16(<8 x i16>, <8 x i16>, <8 x i16>, <8 x i1>, i32) declare <16 x i16> @llvm.vp.fshl.v16i16(<16 x i16>, <16 x i16>, <16 x i16>, <16 x i1>, i32) declare @llvm.vp.fshl.nxv1i16(, , , , i32) declare @llvm.vp.fshl.nxv2i16(, , , , i32) declare @llvm.vp.fshl.nxv4i16(, , , , i32) declare @llvm.vp.fshl.nxv8i16(, , , , i32) declare @llvm.vp.fshl.nxv16i16(, , , , i32) declare @llvm.vp.fshl.nxv32i16(, , , , i32) declare <2 x i32> @llvm.vp.fshl.v2i32(<2 x i32>, <2 x i32>, <2 x i32>, <2 x i1>, i32) declare <4 x i32> @llvm.vp.fshl.v4i32(<4 x i32>, <4 x i32>, <4 x i32>, <4 x i1>, i32) declare <8 x i32> @llvm.vp.fshl.v8i32(<8 x i32>, <8 x i32>, <8 x i32>, <8 x i1>, i32) declare <16 x i32> @llvm.vp.fshl.v16i32(<16 x i32>, <16 x i32>, <16 x i32>, <16 x i1>, i32) declare @llvm.vp.fshl.nxv1i32(, , , , i32) declare @llvm.vp.fshl.nxv2i32(, , , , i32) declare @llvm.vp.fshl.nxv4i32(, , , , i32) declare @llvm.vp.fshl.nxv8i32(, , , , i32) declare @llvm.vp.fshl.nxv16i32(, , , , i32) declare <2 x i64> @llvm.vp.fshl.v2i64(<2 x i64>, <2 x i64>, <2 x i64>, <2 x i1>, i32) declare <4 x i64> @llvm.vp.fshl.v4i64(<4 x i64>, <4 x i64>, <4 x i64>, <4 x i1>, i32) declare <8 x i64> @llvm.vp.fshl.v8i64(<8 x i64>, <8 x i64>, <8 x i64>, <8 x i1>, i32) declare <16 x i64> @llvm.vp.fshl.v16i64(<16 x i64>, <16 x i64>, <16 x i64>, <16 x i1>, i32) declare @llvm.vp.fshl.nxv1i64(, , , , i32) declare @llvm.vp.fshl.nxv2i64(, , , , i32) declare @llvm.vp.fshl.nxv4i64(, , , , i32) declare @llvm.vp.fshl.nxv8i64(, , , , i32)