; RUN: opt -S -passes=instcombine < %s | FileCheck %s target triple = "aarch64-unknown-linux-gnu" ; Idempotent fmuls -- should compile to just a ret. define @idempotent_fmul_f16( %pg, %a) #0 { ; CHECK-LABEL: @idempotent_fmul_f16( ; CHECK-NEXT: ret [[A:%.*]] ; %1 = call @llvm.aarch64.sve.dup.x.nxv8f16(half 1.0) %2 = call @llvm.aarch64.sve.fmul.nxv8f16( %pg, %a, %1) ret %2 } define @idempotent_fmul_f32( %pg, %a) #0 { ; CHECK-LABEL: @idempotent_fmul_f32( ; CHECK-NEXT: ret [[A:%.*]] ; %1 = call @llvm.aarch64.sve.dup.x.nxv4f32(float 1.0) %2 = call @llvm.aarch64.sve.fmul.nxv4f32( %pg, %a, %1) ret %2 } define @idempotent_fmul_f64( %pg, %a) #0 { ; CHECK-LABEL: @idempotent_fmul_f64( ; CHECK-NEXT: ret [[A:%.*]] ; %1 = call @llvm.aarch64.sve.dup.x.nxv2f64(double 1.0) %2 = call @llvm.aarch64.sve.fmul.nxv2f64( %pg, %a, %1) ret %2 } define @idempotent_fmul_different_argument_order( %pg, %a) #0 { ; CHECK-LABEL: @idempotent_fmul_different_argument_order( ; CHECK-NEXT: [[TMP1:%.*]] = call @llvm.aarch64.sve.fmul.nxv2f64( [[PG:%.*]], shufflevector ( insertelement ( poison, double 1.000000e+00, i64 0), poison, zeroinitializer), [[A:%.*]]) ; CHECK-NEXT: ret [[TMP1]] ; %1 = call @llvm.aarch64.sve.dup.x.nxv2f64(double 1.0) ; Different argument order to the above tests. %2 = call @llvm.aarch64.sve.fmul.nxv2f64( %pg, %1, %a) ret %2 } define @idempotent_fmul_with_predicated_dup( %pg, %a) #0 { ; CHECK-LABEL: @idempotent_fmul_with_predicated_dup( ; CHECK-NEXT: ret [[A:%.*]] ; %1 = call @llvm.aarch64.sve.dup.nxv8f16( undef, %pg, half 1.0) %2 = call @llvm.aarch64.sve.fmul.nxv8f16( %pg, %a, %1) ret %2 } define @idempotent_fmul_two_dups( %pg, %a) #0 { ; Edge case -- make sure that the case where we're fmultiplying two dups ; together is sane. ; CHECK-LABEL: @idempotent_fmul_two_dups( ; CHECK-NEXT: ret shufflevector ( insertelement ( poison, half 0xH3C00, i64 0), poison, zeroinitializer) ; %1 = call @llvm.aarch64.sve.dup.x.nxv8f16(half 1.0) %2 = call @llvm.aarch64.sve.dup.x.nxv8f16(half 1.0) %3 = call @llvm.aarch64.sve.fmul.nxv8f16( %pg, %1, %2) ret %3 } ; Non-idempotent fmuls -- we don't expect these to be optimised out. define @non_idempotent_fmul_f16( %pg, %a) #0 { ; CHECK-LABEL: @non_idempotent_fmul_f16( ; CHECK-NEXT: [[TMP1:%.*]] = call @llvm.aarch64.sve.fmul.nxv8f16( [[PG:%.*]], [[A:%.*]], shufflevector ( insertelement ( poison, half 0xH4000, i64 0), poison, zeroinitializer)) ; CHECK-NEXT: ret [[TMP1]] ; %1 = call @llvm.aarch64.sve.dup.x.nxv8f16(half 2.0) %2 = call @llvm.aarch64.sve.fmul.nxv8f16( %pg, %a, %1) ret %2 } define @non_idempotent_fmul_f32( %pg, %a) #0 { ; CHECK-LABEL: @non_idempotent_fmul_f32( ; CHECK-NEXT: [[TMP1:%.*]] = call @llvm.aarch64.sve.fmul.nxv4f32( [[PG:%.*]], [[A:%.*]], shufflevector ( insertelement ( poison, float 2.000000e+00, i64 0), poison, zeroinitializer)) ; CHECK-NEXT: ret [[TMP1]] ; %1 = call @llvm.aarch64.sve.dup.x.nxv4f32(float 2.0) %2 = call @llvm.aarch64.sve.fmul.nxv4f32( %pg, %a, %1) ret %2 } define @non_idempotent_fmul_f64( %pg, %a) #0 { ; CHECK-LABEL: @non_idempotent_fmul_f64( ; CHECK-NEXT: [[TMP1:%.*]] = call @llvm.aarch64.sve.fmul.nxv2f64( [[PG:%.*]], [[A:%.*]], shufflevector ( insertelement ( poison, double 2.000000e+00, i64 0), poison, zeroinitializer)) ; CHECK-NEXT: ret [[TMP1]] ; %1 = call @llvm.aarch64.sve.dup.x.nxv2f64(double 2.0) %2 = call @llvm.aarch64.sve.fmul.nxv2f64( %pg, %a, %1) ret %2 } define @non_idempotent_fmul_with_predicated_dup( %pg1, %pg2, %a) #0 { ; Different predicates ; CHECK-LABEL: @non_idempotent_fmul_with_predicated_dup( ; CHECK-NEXT: [[TMP1:%.*]] = call @llvm.aarch64.sve.dup.nxv2f64( undef, [[PG1:%.*]], double 1.000000e+00) ; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.aarch64.sve.fmul.nxv2f64( [[PG2:%.*]], [[A:%.*]], [[TMP1]]) ; CHECK-NEXT: ret [[TMP2]] ; %1 = call @llvm.aarch64.sve.dup.nxv2f64( undef, %pg1, double 1.0) %2 = call @llvm.aarch64.sve.fmul.nxv2f64( %pg2, %a, %1) ret %2 } declare @llvm.aarch64.sve.dup.x.nxv8f16(half) declare @llvm.aarch64.sve.dup.x.nxv4f32(float) declare @llvm.aarch64.sve.dup.x.nxv2f64(double) declare @llvm.aarch64.sve.dup.nxv2f64(, , double) declare @llvm.aarch64.sve.dup.nxv8f16(, , half) declare @llvm.aarch64.sve.fmul.nxv8f16(, , ) declare @llvm.aarch64.sve.fmul.nxv4f32(, , ) declare @llvm.aarch64.sve.fmul.nxv2f64(, , ) attributes #0 = { "target-features"="+sve" }