; RUN: opt -S -passes=instcombine < %s | FileCheck %s target triple = "aarch64-unknown-linux-gnu" declare @llvm.aarch64.sve.ptrue.nxv8i1(i32) declare @llvm.aarch64.sve.ptrue.nxv4i1(i32) declare @llvm.aarch64.sve.ptrue.nxv2i1(i32) ; SVE intrinsics fmul, fmul_u, fadd, fadd_u, fsub and fsub_u should be replaced with regular fmul, fadd and fsub. declare @llvm.aarch64.sve.fmul.nxv8f16(, , ) define @replace_fmul_intrinsic_half( %a, %b) #0 { ; CHECK-LABEL: @replace_fmul_intrinsic_half ; CHECK-NEXT: %1 = fmul fast %a, %b ; CHECK-NEXT: ret %1 %1 = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) %2 = tail call fast @llvm.aarch64.sve.fmul.nxv8f16( %1, %a, %b) ret %2 } declare @llvm.aarch64.sve.fmul.nxv4f32(, , ) define @replace_fmul_intrinsic_float( %a, %b) #0 { ; CHECK-LABEL: @replace_fmul_intrinsic_float ; CHECK-NEXT: %1 = fmul fast %a, %b ; CHECK-NEXT: ret %1 %1 = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) %2 = tail call fast @llvm.aarch64.sve.fmul.nxv4f32( %1, %a, %b) ret %2 } declare @llvm.aarch64.sve.fmul.nxv2f64(, , ) define @replace_fmul_intrinsic_double( %a, %b) #0 { ; CHECK-LABEL: @replace_fmul_intrinsic_double ; CHECK-NEXT: %1 = fmul fast %a, %b ; CHECK-NEXT: ret %1 %1 = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) %2 = tail call fast @llvm.aarch64.sve.fmul.nxv2f64( %1, %a, %b) ret %2 } declare @llvm.aarch64.sve.fmul.u.nxv8f16(, , ) define @replace_fmul_u_intrinsic_half( %a, %b) #0 { ; CHECK-LABEL: @replace_fmul_u_intrinsic_half ; CHECK-NEXT: %1 = fmul fast %a, %b ; CHECK-NEXT: ret %1 %1 = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) %2 = tail call fast @llvm.aarch64.sve.fmul.u.nxv8f16( %1, %a, %b) ret %2 } declare @llvm.aarch64.sve.fmul.u.nxv4f32(, , ) define @replace_fmul_u_intrinsic_float( %a, %b) #0 { ; CHECK-LABEL: @replace_fmul_u_intrinsic_float ; CHECK-NEXT: %1 = fmul fast %a, %b ; CHECK-NEXT: ret %1 %1 = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) %2 = tail call fast @llvm.aarch64.sve.fmul.u.nxv4f32( %1, %a, %b) ret %2 } declare @llvm.aarch64.sve.fmul.u.nxv2f64(, , ) define @replace_fmul_u_intrinsic_double( %a, %b) #0 { ; CHECK-LABEL: @replace_fmul_u_intrinsic_double ; CHECK-NEXT: %1 = fmul fast %a, %b ; CHECK-NEXT: ret %1 %1 = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) %2 = tail call fast @llvm.aarch64.sve.fmul.u.nxv2f64( %1, %a, %b) ret %2 } declare @llvm.aarch64.sve.fadd.nxv8f16(, , ) define @replace_fadd_intrinsic_half( %a, %b) #0 { ; CHECK-LABEL: @replace_fadd_intrinsic_half ; CHECK-NEXT: %1 = fadd fast %a, %b ; CHECK-NEXT: ret %1 %1 = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) %2 = tail call fast @llvm.aarch64.sve.fadd.nxv8f16( %1, %a, %b) ret %2 } declare @llvm.aarch64.sve.fadd.nxv4f32(, , ) define @replace_fadd_intrinsic_float( %a, %b) #0 { ; CHECK-LABEL: @replace_fadd_intrinsic_float ; CHECK-NEXT: %1 = fadd fast %a, %b ; CHECK-NEXT: ret %1 %1 = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) %2 = tail call fast @llvm.aarch64.sve.fadd.nxv4f32( %1, %a, %b) ret %2 } declare @llvm.aarch64.sve.fadd.nxv2f64(, , ) define @replace_fadd_intrinsic_double( %a, %b) #0 { ; CHECK-LABEL: @replace_fadd_intrinsic_double ; CHECK-NEXT: %1 = fadd fast %a, %b ; CHECK-NEXT: ret %1 %1 = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) %2 = tail call fast @llvm.aarch64.sve.fadd.nxv2f64( %1, %a, %b) ret %2 } declare @llvm.aarch64.sve.fadd.u.nxv8f16(, , ) define @replace_fadd_u_intrinsic_half( %a, %b) #0 { ; CHECK-LABEL: @replace_fadd_u_intrinsic_half ; CHECK-NEXT: %1 = fadd fast %a, %b ; CHECK-NEXT: ret %1 %1 = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) %2 = tail call fast @llvm.aarch64.sve.fadd.u.nxv8f16( %1, %a, %b) ret %2 } declare @llvm.aarch64.sve.fadd.u.nxv4f32(, , ) define @replace_fadd_u_intrinsic_float( %a, %b) #0 { ; CHECK-LABEL: @replace_fadd_u_intrinsic_float ; CHECK-NEXT: %1 = fadd fast %a, %b ; CHECK-NEXT: ret %1 %1 = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) %2 = tail call fast @llvm.aarch64.sve.fadd.u.nxv4f32( %1, %a, %b) ret %2 } declare @llvm.aarch64.sve.fadd.u.nxv2f64(, , ) define @replace_fadd_u_intrinsic_double( %a, %b) #0 { ; CHECK-LABEL: @replace_fadd_u_intrinsic_double ; CHECK-NEXT: %1 = fadd fast %a, %b ; CHECK-NEXT: ret %1 %1 = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) %2 = tail call fast @llvm.aarch64.sve.fadd.u.nxv2f64( %1, %a, %b) ret %2 } declare @llvm.aarch64.sve.fsub.nxv8f16(, , ) define @replace_fsub_intrinsic_half( %a, %b) #0 { ; CHECK-LABEL: @replace_fsub_intrinsic_half ; CHECK-NEXT: %1 = fsub fast %a, %b ; CHECK-NEXT: ret %1 %1 = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) %2 = tail call fast @llvm.aarch64.sve.fsub.nxv8f16( %1, %a, %b) ret %2 } declare @llvm.aarch64.sve.fsub.nxv4f32(, , ) define @replace_fsub_intrinsic_float( %a, %b) #0 { ; CHECK-LABEL: @replace_fsub_intrinsic_float ; CHECK-NEXT: %1 = fsub fast %a, %b ; CHECK-NEXT: ret %1 %1 = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) %2 = tail call fast @llvm.aarch64.sve.fsub.nxv4f32( %1, %a, %b) ret %2 } declare @llvm.aarch64.sve.fsub.nxv2f64(, , ) define @replace_fsub_intrinsic_double( %a, %b) #0 { ; CHECK-LABEL: @replace_fsub_intrinsic_double ; CHECK-NEXT: %1 = fsub fast %a, %b ; CHECK-NEXT: ret %1 %1 = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) %2 = tail call fast @llvm.aarch64.sve.fsub.nxv2f64( %1, %a, %b) ret %2 } define @no_replace_on_non_ptrue_all( %a, %b) #0 { ; CHECK-LABEL: @no_replace_on_non_ptrue_all ; CHECK-NEXT: %1 = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 5) ; CHECK-NEXT: %2 = tail call fast @llvm.aarch64.sve.fsub.nxv2f64( %1, %a, %b) ; CHECK-NEXT: ret %2 %1 = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 5) %2 = tail call fast @llvm.aarch64.sve.fsub.nxv2f64( %1, %a, %b) ret %2 } define @replace_fsub_intrinsic_no_fast_flag( %a, %b) #0 { ; CHECK-LABEL: @replace_fsub_intrinsic_no_fast_flag ; CHECK-NEXT: %1 = fsub %a, %b ; CHECK-NEXT: ret %1 %1 = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) %2 = tail call @llvm.aarch64.sve.fsub.nxv2f64( %1, %a, %b) ret %2 } declare @llvm.aarch64.sve.fsub.u.nxv8f16(, , ) define @replace_fsub_u_intrinsic_half( %a, %b) #0 { ; CHECK-LABEL: @replace_fsub_u_intrinsic_half ; CHECK-NEXT: %1 = fsub fast %a, %b ; CHECK-NEXT: ret %1 %1 = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) %2 = tail call fast @llvm.aarch64.sve.fsub.u.nxv8f16( %1, %a, %b) ret %2 } declare @llvm.aarch64.sve.fsub.u.nxv4f32(, , ) define @replace_fsub_u_intrinsic_float( %a, %b) #0 { ; CHECK-LABEL: @replace_fsub_u_intrinsic_float ; CHECK-NEXT: %1 = fsub fast %a, %b ; CHECK-NEXT: ret %1 %1 = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) %2 = tail call fast @llvm.aarch64.sve.fsub.u.nxv4f32( %1, %a, %b) ret %2 } declare @llvm.aarch64.sve.fsub.u.nxv2f64(, , ) define @replace_fsub_u_intrinsic_no_fast_flag( %a, %b) #0 { ; CHECK-LABEL: @replace_fsub_u_intrinsic_no_fast_flag ; CHECK-NEXT: %1 = fsub %a, %b ; CHECK-NEXT: ret %1 %1 = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) %2 = tail call @llvm.aarch64.sve.fsub.u.nxv2f64( %1, %a, %b) ret %2 } define @no_replace_on_non_ptrue_all_u( %a, %b) #0 { ; CHECK-LABEL: @no_replace_on_non_ptrue_all_u ; CHECK-NEXT: %1 = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 5) ; CHECK-NEXT: %2 = tail call fast @llvm.aarch64.sve.fsub.u.nxv2f64( %1, %a, %b) ; CHECK-NEXT: ret %2 %1 = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 5) %2 = tail call fast @llvm.aarch64.sve.fsub.u.nxv2f64( %1, %a, %b) ret %2 } attributes #0 = { "target-features"="+sve" }