; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py ; RUN: opt -passes="print" 2>&1 -disable-output < %s | FileCheck %s ; RUN: opt -passes="print" 2>&1 -disable-output -mcpu=neoverse-v1 < %s | FileCheck %s --check-prefix=CHECK-VSCALE-2 ; RUN: opt -passes="print" 2>&1 -disable-output -mcpu=neoverse-n2 < %s | FileCheck %s --check-prefix=CHECK-VSCALE-1 ; RUN: opt -passes="print" 2>&1 -disable-output -mcpu=cortex-a510 < %s | FileCheck %s --check-prefix=CHECK-VSCALE-1 target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" target triple="aarch64--linux-gnu" define void @masked_gathers( %nxv4i1mask, %nxv8i1mask, <4 x i1> %v4i1mask, <1 x i1> %v1i1mask, %nxv1i1mask) #0 { ; CHECK-LABEL: 'masked_gathers' ; CHECK-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %res.nxv4i32 = call @llvm.masked.gather.nxv4i32.nxv4p0( undef, i32 0, %nxv4i1mask, zeroinitializer) ; CHECK-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %res.nxv8i32 = call @llvm.masked.gather.nxv8i32.nxv8p0( undef, i32 0, %nxv8i1mask, zeroinitializer) ; CHECK-NEXT: Cost Model: Invalid cost for instruction: %res.nxv1i64 = call @llvm.masked.gather.nxv1i64.nxv1p0( undef, i32 0, %nxv1i1mask, zeroinitializer) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; CHECK-VSCALE-2-LABEL: 'masked_gathers' ; CHECK-VSCALE-2-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %res.nxv4i32 = call @llvm.masked.gather.nxv4i32.nxv4p0( undef, i32 0, %nxv4i1mask, zeroinitializer) ; CHECK-VSCALE-2-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %res.nxv8i32 = call @llvm.masked.gather.nxv8i32.nxv8p0( undef, i32 0, %nxv8i1mask, zeroinitializer) ; CHECK-VSCALE-2-NEXT: Cost Model: Invalid cost for instruction: %res.nxv1i64 = call @llvm.masked.gather.nxv1i64.nxv1p0( undef, i32 0, %nxv1i1mask, zeroinitializer) ; CHECK-VSCALE-2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; CHECK-VSCALE-1-LABEL: 'masked_gathers' ; CHECK-VSCALE-1-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %res.nxv4i32 = call @llvm.masked.gather.nxv4i32.nxv4p0( undef, i32 0, %nxv4i1mask, zeroinitializer) ; CHECK-VSCALE-1-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %res.nxv8i32 = call @llvm.masked.gather.nxv8i32.nxv8p0( undef, i32 0, %nxv8i1mask, zeroinitializer) ; CHECK-VSCALE-1-NEXT: Cost Model: Invalid cost for instruction: %res.nxv1i64 = call @llvm.masked.gather.nxv1i64.nxv1p0( undef, i32 0, %nxv1i1mask, zeroinitializer) ; CHECK-VSCALE-1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %res.nxv4i32 = call @llvm.masked.gather.nxv4i32( undef, i32 0, %nxv4i1mask, zeroinitializer) %res.nxv8i32 = call @llvm.masked.gather.nxv8i32( undef, i32 0, %nxv8i1mask, zeroinitializer) %res.nxv1i64 = call @llvm.masked.gather.nxv1i64( undef, i32 0, %nxv1i1mask, zeroinitializer) ret void } define void @masked_gathers_tune_generic( %nxv4i1mask, %nxv8i1mask, <4 x i1> %v4i1mask, <1 x i1> %v1i1mask, %nxv1i1mask) #1 { ; CHECK-LABEL: 'masked_gathers_tune_generic' ; CHECK-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %res.nxv4i32 = call @llvm.masked.gather.nxv4i32.nxv4p0( undef, i32 0, %nxv4i1mask, zeroinitializer) ; CHECK-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %res.nxv8i32 = call @llvm.masked.gather.nxv8i32.nxv8p0( undef, i32 0, %nxv8i1mask, zeroinitializer) ; CHECK-NEXT: Cost Model: Invalid cost for instruction: %res.nxv1i64 = call @llvm.masked.gather.nxv1i64.nxv1p0( undef, i32 0, %nxv1i1mask, zeroinitializer) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; CHECK-VSCALE-2-LABEL: 'masked_gathers_tune_generic' ; CHECK-VSCALE-2-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %res.nxv4i32 = call @llvm.masked.gather.nxv4i32.nxv4p0( undef, i32 0, %nxv4i1mask, zeroinitializer) ; CHECK-VSCALE-2-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %res.nxv8i32 = call @llvm.masked.gather.nxv8i32.nxv8p0( undef, i32 0, %nxv8i1mask, zeroinitializer) ; CHECK-VSCALE-2-NEXT: Cost Model: Invalid cost for instruction: %res.nxv1i64 = call @llvm.masked.gather.nxv1i64.nxv1p0( undef, i32 0, %nxv1i1mask, zeroinitializer) ; CHECK-VSCALE-2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; CHECK-VSCALE-1-LABEL: 'masked_gathers_tune_generic' ; CHECK-VSCALE-1-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %res.nxv4i32 = call @llvm.masked.gather.nxv4i32.nxv4p0( undef, i32 0, %nxv4i1mask, zeroinitializer) ; CHECK-VSCALE-1-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %res.nxv8i32 = call @llvm.masked.gather.nxv8i32.nxv8p0( undef, i32 0, %nxv8i1mask, zeroinitializer) ; CHECK-VSCALE-1-NEXT: Cost Model: Invalid cost for instruction: %res.nxv1i64 = call @llvm.masked.gather.nxv1i64.nxv1p0( undef, i32 0, %nxv1i1mask, zeroinitializer) ; CHECK-VSCALE-1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %res.nxv4i32 = call @llvm.masked.gather.nxv4i32( undef, i32 0, %nxv4i1mask, zeroinitializer) %res.nxv8i32 = call @llvm.masked.gather.nxv8i32( undef, i32 0, %nxv8i1mask, zeroinitializer) %res.nxv1i64 = call @llvm.masked.gather.nxv1i64( undef, i32 0, %nxv1i1mask, zeroinitializer) ret void } define void @masked_gathers_no_vscale_range() #2 { ; CHECK-LABEL: 'masked_gathers_no_vscale_range' ; CHECK-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %res.nxv4f64 = call @llvm.masked.gather.nxv4f64.nxv4p0( undef, i32 1, undef, undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %res.nxv2f64 = call @llvm.masked.gather.nxv2f64.nxv2p0( undef, i32 1, undef, undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %res.nxv8f32 = call @llvm.masked.gather.nxv8f32.nxv8p0( undef, i32 1, undef, undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %res.nxv4f32 = call @llvm.masked.gather.nxv4f32.nxv4p0( undef, i32 1, undef, undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %res.nxv2f32 = call @llvm.masked.gather.nxv2f32.nxv2p0( undef, i32 1, undef, undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %res.nxv16i16 = call @llvm.masked.gather.nxv16i16.nxv16p0( undef, i32 1, undef, undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %res.nxv8i16 = call @llvm.masked.gather.nxv8i16.nxv8p0( undef, i32 1, undef, undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %res.nxv4i16 = call @llvm.masked.gather.nxv4i16.nxv4p0( undef, i32 1, undef, undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; CHECK-VSCALE-2-LABEL: 'masked_gathers_no_vscale_range' ; CHECK-VSCALE-2-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %res.nxv4f64 = call @llvm.masked.gather.nxv4f64.nxv4p0( undef, i32 1, undef, undef) ; CHECK-VSCALE-2-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %res.nxv2f64 = call @llvm.masked.gather.nxv2f64.nxv2p0( undef, i32 1, undef, undef) ; CHECK-VSCALE-2-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %res.nxv8f32 = call @llvm.masked.gather.nxv8f32.nxv8p0( undef, i32 1, undef, undef) ; CHECK-VSCALE-2-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %res.nxv4f32 = call @llvm.masked.gather.nxv4f32.nxv4p0( undef, i32 1, undef, undef) ; CHECK-VSCALE-2-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %res.nxv2f32 = call @llvm.masked.gather.nxv2f32.nxv2p0( undef, i32 1, undef, undef) ; CHECK-VSCALE-2-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %res.nxv16i16 = call @llvm.masked.gather.nxv16i16.nxv16p0( undef, i32 1, undef, undef) ; CHECK-VSCALE-2-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %res.nxv8i16 = call @llvm.masked.gather.nxv8i16.nxv8p0( undef, i32 1, undef, undef) ; CHECK-VSCALE-2-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %res.nxv4i16 = call @llvm.masked.gather.nxv4i16.nxv4p0( undef, i32 1, undef, undef) ; CHECK-VSCALE-2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; CHECK-VSCALE-1-LABEL: 'masked_gathers_no_vscale_range' ; CHECK-VSCALE-1-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %res.nxv4f64 = call @llvm.masked.gather.nxv4f64.nxv4p0( undef, i32 1, undef, undef) ; CHECK-VSCALE-1-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %res.nxv2f64 = call @llvm.masked.gather.nxv2f64.nxv2p0( undef, i32 1, undef, undef) ; CHECK-VSCALE-1-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %res.nxv8f32 = call @llvm.masked.gather.nxv8f32.nxv8p0( undef, i32 1, undef, undef) ; CHECK-VSCALE-1-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %res.nxv4f32 = call @llvm.masked.gather.nxv4f32.nxv4p0( undef, i32 1, undef, undef) ; CHECK-VSCALE-1-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %res.nxv2f32 = call @llvm.masked.gather.nxv2f32.nxv2p0( undef, i32 1, undef, undef) ; CHECK-VSCALE-1-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %res.nxv16i16 = call @llvm.masked.gather.nxv16i16.nxv16p0( undef, i32 1, undef, undef) ; CHECK-VSCALE-1-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %res.nxv8i16 = call @llvm.masked.gather.nxv8i16.nxv8p0( undef, i32 1, undef, undef) ; CHECK-VSCALE-1-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %res.nxv4i16 = call @llvm.masked.gather.nxv4i16.nxv4p0( undef, i32 1, undef, undef) ; CHECK-VSCALE-1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %res.nxv4f64 = call @llvm.masked.gather.nxv4f64( undef, i32 1, undef, undef) %res.nxv2f64 = call @llvm.masked.gather.nxv2f64( undef, i32 1, undef, undef) %res.nxv8f32 = call @llvm.masked.gather.nxv8f32( undef, i32 1, undef, undef) %res.nxv4f32 = call @llvm.masked.gather.nxv4f32( undef, i32 1, undef, undef) %res.nxv2f32 = call @llvm.masked.gather.nxv2f32( undef, i32 1, undef, undef) %res.nxv16i16 = call @llvm.masked.gather.nxv16i16( undef, i32 1, undef, undef) %res.nxv8i16 = call @llvm.masked.gather.nxv8i16( undef, i32 1, undef, undef) %res.nxv4i16 = call @llvm.masked.gather.nxv4i16( undef, i32 1, undef, undef) ret void } define <2 x i128> @masked_gather_v1i128(<2 x ptr> %ld, <2 x i1> %masks, <2 x i128> %passthru) #3 { ; CHECK-LABEL: 'masked_gather_v1i128' ; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %res = call <2 x i128> @llvm.masked.gather.v2i128.v2p0(<2 x ptr> %ld, i32 0, <2 x i1> %masks, <2 x i128> %passthru) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i128> %res ; ; CHECK-VSCALE-2-LABEL: 'masked_gather_v1i128' ; CHECK-VSCALE-2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %res = call <2 x i128> @llvm.masked.gather.v2i128.v2p0(<2 x ptr> %ld, i32 0, <2 x i1> %masks, <2 x i128> %passthru) ; CHECK-VSCALE-2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i128> %res ; ; CHECK-VSCALE-1-LABEL: 'masked_gather_v1i128' ; CHECK-VSCALE-1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %res = call <2 x i128> @llvm.masked.gather.v2i128.v2p0(<2 x ptr> %ld, i32 0, <2 x i1> %masks, <2 x i128> %passthru) ; CHECK-VSCALE-1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i128> %res ; %res = call <2 x i128> @llvm.masked.gather.v2i128.v2p0(<2 x ptr> %ld, i32 0, <2 x i1> %masks, <2 x i128> %passthru) ret <2 x i128> %res } attributes #0 = { "target-features"="+sve" vscale_range(1, 8) } attributes #1 = { "target-features"="+sve" vscale_range(1, 16) "tune-cpu"="generic" } attributes #2 = { "target-features"="+sve" } attributes #3 = { "target-features"="+sve" vscale_range(2, 2) } declare @llvm.masked.gather.nxv4i32(, i32, , ) declare @llvm.masked.gather.nxv8i32(, i32, , ) declare @llvm.masked.gather.nxv1i64(, i32, , ) declare @llvm.masked.gather.nxv4f64(, i32, , ) declare @llvm.masked.gather.nxv2f64(, i32, , ) declare @llvm.masked.gather.nxv8f32(, i32, , ) declare @llvm.masked.gather.nxv4f32(, i32, , ) declare @llvm.masked.gather.nxv2f32(, i32, , ) declare @llvm.masked.gather.nxv16i16(, i32, , ) declare @llvm.masked.gather.nxv8i16(, i32, , ) declare @llvm.masked.gather.nxv4i16(, i32, , ) declare <2 x i128> @llvm.masked.gather.v2i128.v2p0(<2 x ptr>, i32, <2 x i1>, <2 x i128>)