; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zfh,+zvfh,+f,+d -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH,LMULMAX2 ; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zfh,+zvfh,+f,+d -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH,LMULMAX2 ; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zfh,+zvfh,+f,+d -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH,LMULMAX1,LMULMAX1-RV32 ; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zfh,+zvfh,+f,+d -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH,LMULMAX1,LMULMAX1-RV64 ; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zfhmin,+zvfh,+f,+d -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH,LMULMAX2 ; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zfhmin,+zvfh,+f,+d -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH,LMULMAX2 ; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zfhmin,+zvfh,+f,+d -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH,LMULMAX1,LMULMAX1-RV32 ; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zfhmin,+zvfh,+f,+d -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH,LMULMAX1,LMULMAX1-RV64 ; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zfh,+zvfhmin,+f,+d,+zvl256b -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN,ZVFHMINLMULMAX2,ZVFHMINLMULMAX2-RV32 ; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zfh,+zvfhmin,+f,+d,+zvl256b -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN,ZVFHMINLMULMAX2,ZVFHMINLMULMAX2-RV64 ; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zfh,+zvfhmin,+f,+zvl256b -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN,ZVFHMINLMULMAX1-RV32 ; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zfh,+zvfhmin,+f,+zvl256b -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN,ZVFHMINLMULMAX1-RV64 ; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zfhmin,+zvfhmin,+f,+zvl256b -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN,ZVFHMINLMULMAX2,ZVFHMINLMULMAX2-RV32 ; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zfhmin,+zvfhmin,+f,+zvl256b -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN,ZVFHMINLMULMAX2,ZVFHMINLMULMAX2-RV64 ; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zfhmin,+zvfhmin,+f,+zvl256b -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN,ZVFHMINLMULMAX1-RV32 ; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zfhmin,+zvfhmin,+f,+zvl256b -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN,ZVFHMINLMULMAX1-RV64 define void @fadd_v8f16(ptr %x, ptr %y) { ; ZVFH-LABEL: fadd_v8f16: ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; ZVFH-NEXT: vle16.v v8, (a0) ; ZVFH-NEXT: vle16.v v9, (a1) ; ZVFH-NEXT: vfadd.vv v8, v8, v9 ; ZVFH-NEXT: vse16.v v8, (a0) ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: fadd_v8f16: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vle16.v v8, (a1) ; ZVFHMIN-NEXT: vle16.v v9, (a0) ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMIN-NEXT: vfadd.vv v8, v8, v10 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v8 ; ZVFHMIN-NEXT: vse16.v v9, (a0) ; ZVFHMIN-NEXT: ret %a = load <8 x half>, ptr %x %b = load <8 x half>, ptr %y %c = fadd <8 x half> %a, %b store <8 x half> %c, ptr %x ret void } define void @fadd_v6f16(ptr %x, ptr %y) { ; ZVFH-LABEL: fadd_v6f16: ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma ; ZVFH-NEXT: vle16.v v8, (a0) ; ZVFH-NEXT: vle16.v v9, (a1) ; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; ZVFH-NEXT: vfadd.vv v8, v8, v9 ; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma ; ZVFH-NEXT: vse16.v v8, (a0) ; ZVFH-NEXT: ret ; ; ZVFHMINLMULMAX2-RV32-LABEL: fadd_v6f16: ; ZVFHMINLMULMAX2-RV32: # %bb.0: ; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma ; ZVFHMINLMULMAX2-RV32-NEXT: vle16.v v8, (a1) ; ZVFHMINLMULMAX2-RV32-NEXT: vle16.v v9, (a0) ; ZVFHMINLMULMAX2-RV32-NEXT: vfwcvt.f.f.v v10, v8 ; ZVFHMINLMULMAX2-RV32-NEXT: vfwcvt.f.f.v v8, v9 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMINLMULMAX2-RV32-NEXT: vfadd.vv v8, v8, v10 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMINLMULMAX2-RV32-NEXT: vfncvt.f.f.w v9, v8 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma ; ZVFHMINLMULMAX2-RV32-NEXT: vslidedown.vi v8, v9, 2 ; ZVFHMINLMULMAX2-RV32-NEXT: addi a1, a0, 8 ; ZVFHMINLMULMAX2-RV32-NEXT: vse32.v v8, (a1) ; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma ; ZVFHMINLMULMAX2-RV32-NEXT: vse16.v v9, (a0) ; ZVFHMINLMULMAX2-RV32-NEXT: ret ; ; ZVFHMINLMULMAX2-RV64-LABEL: fadd_v6f16: ; ZVFHMINLMULMAX2-RV64: # %bb.0: ; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma ; ZVFHMINLMULMAX2-RV64-NEXT: vle16.v v8, (a1) ; ZVFHMINLMULMAX2-RV64-NEXT: vle16.v v9, (a0) ; ZVFHMINLMULMAX2-RV64-NEXT: vfwcvt.f.f.v v10, v8 ; ZVFHMINLMULMAX2-RV64-NEXT: vfwcvt.f.f.v v8, v9 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMINLMULMAX2-RV64-NEXT: vfadd.vv v8, v8, v10 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMINLMULMAX2-RV64-NEXT: vfncvt.f.f.w v9, v8 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma ; ZVFHMINLMULMAX2-RV64-NEXT: vse64.v v9, (a0) ; ZVFHMINLMULMAX2-RV64-NEXT: vslidedown.vi v8, v9, 2 ; ZVFHMINLMULMAX2-RV64-NEXT: addi a0, a0, 8 ; ZVFHMINLMULMAX2-RV64-NEXT: vse32.v v8, (a0) ; ZVFHMINLMULMAX2-RV64-NEXT: ret ; ; ZVFHMINLMULMAX1-RV32-LABEL: fadd_v6f16: ; ZVFHMINLMULMAX1-RV32: # %bb.0: ; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma ; ZVFHMINLMULMAX1-RV32-NEXT: vle16.v v8, (a1) ; ZVFHMINLMULMAX1-RV32-NEXT: vle16.v v9, (a0) ; ZVFHMINLMULMAX1-RV32-NEXT: vfwcvt.f.f.v v10, v8 ; ZVFHMINLMULMAX1-RV32-NEXT: vfwcvt.f.f.v v8, v9 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMINLMULMAX1-RV32-NEXT: vfadd.vv v8, v8, v10 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMINLMULMAX1-RV32-NEXT: vfncvt.f.f.w v9, v8 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma ; ZVFHMINLMULMAX1-RV32-NEXT: vslidedown.vi v8, v9, 2 ; ZVFHMINLMULMAX1-RV32-NEXT: addi a1, a0, 8 ; ZVFHMINLMULMAX1-RV32-NEXT: vse32.v v8, (a1) ; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma ; ZVFHMINLMULMAX1-RV32-NEXT: vse16.v v9, (a0) ; ZVFHMINLMULMAX1-RV32-NEXT: ret ; ; ZVFHMINLMULMAX1-RV64-LABEL: fadd_v6f16: ; ZVFHMINLMULMAX1-RV64: # %bb.0: ; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma ; ZVFHMINLMULMAX1-RV64-NEXT: vle16.v v8, (a1) ; ZVFHMINLMULMAX1-RV64-NEXT: vle16.v v9, (a0) ; ZVFHMINLMULMAX1-RV64-NEXT: vfwcvt.f.f.v v10, v8 ; ZVFHMINLMULMAX1-RV64-NEXT: vfwcvt.f.f.v v8, v9 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMINLMULMAX1-RV64-NEXT: vfadd.vv v8, v8, v10 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMINLMULMAX1-RV64-NEXT: vfncvt.f.f.w v9, v8 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma ; ZVFHMINLMULMAX1-RV64-NEXT: vse64.v v9, (a0) ; ZVFHMINLMULMAX1-RV64-NEXT: vslidedown.vi v8, v9, 2 ; ZVFHMINLMULMAX1-RV64-NEXT: addi a0, a0, 8 ; ZVFHMINLMULMAX1-RV64-NEXT: vse32.v v8, (a0) ; ZVFHMINLMULMAX1-RV64-NEXT: ret %a = load <6 x half>, ptr %x %b = load <6 x half>, ptr %y %c = fadd <6 x half> %a, %b store <6 x half> %c, ptr %x ret void } define void @fadd_v4f32(ptr %x, ptr %y) { ; ZVFH-LABEL: fadd_v4f32: ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; ZVFH-NEXT: vle32.v v8, (a0) ; ZVFH-NEXT: vle32.v v9, (a1) ; ZVFH-NEXT: vfadd.vv v8, v8, v9 ; ZVFH-NEXT: vse32.v v8, (a0) ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: fadd_v4f32: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: vsetivli zero, 4, e32, mf2, ta, ma ; ZVFHMIN-NEXT: vle32.v v8, (a0) ; ZVFHMIN-NEXT: vle32.v v9, (a1) ; ZVFHMIN-NEXT: vfadd.vv v8, v8, v9 ; ZVFHMIN-NEXT: vse32.v v8, (a0) ; ZVFHMIN-NEXT: ret %a = load <4 x float>, ptr %x %b = load <4 x float>, ptr %y %c = fadd <4 x float> %a, %b store <4 x float> %c, ptr %x ret void } define void @fadd_v2f64(ptr %x, ptr %y) { ; CHECK-LABEL: fadd_v2f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; CHECK-NEXT: vle64.v v8, (a0) ; CHECK-NEXT: vle64.v v9, (a1) ; CHECK-NEXT: vfadd.vv v8, v8, v9 ; CHECK-NEXT: vse64.v v8, (a0) ; CHECK-NEXT: ret %a = load <2 x double>, ptr %x %b = load <2 x double>, ptr %y %c = fadd <2 x double> %a, %b store <2 x double> %c, ptr %x ret void } define void @fsub_v8f16(ptr %x, ptr %y) { ; ZVFH-LABEL: fsub_v8f16: ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; ZVFH-NEXT: vle16.v v8, (a0) ; ZVFH-NEXT: vle16.v v9, (a1) ; ZVFH-NEXT: vfsub.vv v8, v8, v9 ; ZVFH-NEXT: vse16.v v8, (a0) ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: fsub_v8f16: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vle16.v v8, (a1) ; ZVFHMIN-NEXT: vle16.v v9, (a0) ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMIN-NEXT: vfsub.vv v8, v8, v10 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v8 ; ZVFHMIN-NEXT: vse16.v v9, (a0) ; ZVFHMIN-NEXT: ret %a = load <8 x half>, ptr %x %b = load <8 x half>, ptr %y %c = fsub <8 x half> %a, %b store <8 x half> %c, ptr %x ret void } define void @fsub_v6f16(ptr %x, ptr %y) { ; ZVFH-LABEL: fsub_v6f16: ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma ; ZVFH-NEXT: vle16.v v8, (a0) ; ZVFH-NEXT: vle16.v v9, (a1) ; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; ZVFH-NEXT: vfsub.vv v8, v8, v9 ; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma ; ZVFH-NEXT: vse16.v v8, (a0) ; ZVFH-NEXT: ret ; ; ZVFHMINLMULMAX2-RV32-LABEL: fsub_v6f16: ; ZVFHMINLMULMAX2-RV32: # %bb.0: ; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma ; ZVFHMINLMULMAX2-RV32-NEXT: vle16.v v8, (a1) ; ZVFHMINLMULMAX2-RV32-NEXT: vle16.v v9, (a0) ; ZVFHMINLMULMAX2-RV32-NEXT: vfwcvt.f.f.v v10, v8 ; ZVFHMINLMULMAX2-RV32-NEXT: vfwcvt.f.f.v v8, v9 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMINLMULMAX2-RV32-NEXT: vfsub.vv v8, v8, v10 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMINLMULMAX2-RV32-NEXT: vfncvt.f.f.w v9, v8 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma ; ZVFHMINLMULMAX2-RV32-NEXT: vslidedown.vi v8, v9, 2 ; ZVFHMINLMULMAX2-RV32-NEXT: addi a1, a0, 8 ; ZVFHMINLMULMAX2-RV32-NEXT: vse32.v v8, (a1) ; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma ; ZVFHMINLMULMAX2-RV32-NEXT: vse16.v v9, (a0) ; ZVFHMINLMULMAX2-RV32-NEXT: ret ; ; ZVFHMINLMULMAX2-RV64-LABEL: fsub_v6f16: ; ZVFHMINLMULMAX2-RV64: # %bb.0: ; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma ; ZVFHMINLMULMAX2-RV64-NEXT: vle16.v v8, (a1) ; ZVFHMINLMULMAX2-RV64-NEXT: vle16.v v9, (a0) ; ZVFHMINLMULMAX2-RV64-NEXT: vfwcvt.f.f.v v10, v8 ; ZVFHMINLMULMAX2-RV64-NEXT: vfwcvt.f.f.v v8, v9 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMINLMULMAX2-RV64-NEXT: vfsub.vv v8, v8, v10 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMINLMULMAX2-RV64-NEXT: vfncvt.f.f.w v9, v8 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma ; ZVFHMINLMULMAX2-RV64-NEXT: vse64.v v9, (a0) ; ZVFHMINLMULMAX2-RV64-NEXT: vslidedown.vi v8, v9, 2 ; ZVFHMINLMULMAX2-RV64-NEXT: addi a0, a0, 8 ; ZVFHMINLMULMAX2-RV64-NEXT: vse32.v v8, (a0) ; ZVFHMINLMULMAX2-RV64-NEXT: ret ; ; ZVFHMINLMULMAX1-RV32-LABEL: fsub_v6f16: ; ZVFHMINLMULMAX1-RV32: # %bb.0: ; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma ; ZVFHMINLMULMAX1-RV32-NEXT: vle16.v v8, (a1) ; ZVFHMINLMULMAX1-RV32-NEXT: vle16.v v9, (a0) ; ZVFHMINLMULMAX1-RV32-NEXT: vfwcvt.f.f.v v10, v8 ; ZVFHMINLMULMAX1-RV32-NEXT: vfwcvt.f.f.v v8, v9 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMINLMULMAX1-RV32-NEXT: vfsub.vv v8, v8, v10 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMINLMULMAX1-RV32-NEXT: vfncvt.f.f.w v9, v8 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma ; ZVFHMINLMULMAX1-RV32-NEXT: vslidedown.vi v8, v9, 2 ; ZVFHMINLMULMAX1-RV32-NEXT: addi a1, a0, 8 ; ZVFHMINLMULMAX1-RV32-NEXT: vse32.v v8, (a1) ; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma ; ZVFHMINLMULMAX1-RV32-NEXT: vse16.v v9, (a0) ; ZVFHMINLMULMAX1-RV32-NEXT: ret ; ; ZVFHMINLMULMAX1-RV64-LABEL: fsub_v6f16: ; ZVFHMINLMULMAX1-RV64: # %bb.0: ; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma ; ZVFHMINLMULMAX1-RV64-NEXT: vle16.v v8, (a1) ; ZVFHMINLMULMAX1-RV64-NEXT: vle16.v v9, (a0) ; ZVFHMINLMULMAX1-RV64-NEXT: vfwcvt.f.f.v v10, v8 ; ZVFHMINLMULMAX1-RV64-NEXT: vfwcvt.f.f.v v8, v9 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMINLMULMAX1-RV64-NEXT: vfsub.vv v8, v8, v10 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMINLMULMAX1-RV64-NEXT: vfncvt.f.f.w v9, v8 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma ; ZVFHMINLMULMAX1-RV64-NEXT: vse64.v v9, (a0) ; ZVFHMINLMULMAX1-RV64-NEXT: vslidedown.vi v8, v9, 2 ; ZVFHMINLMULMAX1-RV64-NEXT: addi a0, a0, 8 ; ZVFHMINLMULMAX1-RV64-NEXT: vse32.v v8, (a0) ; ZVFHMINLMULMAX1-RV64-NEXT: ret %a = load <6 x half>, ptr %x %b = load <6 x half>, ptr %y %c = fsub <6 x half> %a, %b store <6 x half> %c, ptr %x ret void } define void @fsub_v4f32(ptr %x, ptr %y) { ; ZVFH-LABEL: fsub_v4f32: ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; ZVFH-NEXT: vle32.v v8, (a0) ; ZVFH-NEXT: vle32.v v9, (a1) ; ZVFH-NEXT: vfsub.vv v8, v8, v9 ; ZVFH-NEXT: vse32.v v8, (a0) ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: fsub_v4f32: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: vsetivli zero, 4, e32, mf2, ta, ma ; ZVFHMIN-NEXT: vle32.v v8, (a0) ; ZVFHMIN-NEXT: vle32.v v9, (a1) ; ZVFHMIN-NEXT: vfsub.vv v8, v8, v9 ; ZVFHMIN-NEXT: vse32.v v8, (a0) ; ZVFHMIN-NEXT: ret %a = load <4 x float>, ptr %x %b = load <4 x float>, ptr %y %c = fsub <4 x float> %a, %b store <4 x float> %c, ptr %x ret void } define void @fsub_v2f64(ptr %x, ptr %y) { ; CHECK-LABEL: fsub_v2f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; CHECK-NEXT: vle64.v v8, (a0) ; CHECK-NEXT: vle64.v v9, (a1) ; CHECK-NEXT: vfsub.vv v8, v8, v9 ; CHECK-NEXT: vse64.v v8, (a0) ; CHECK-NEXT: ret %a = load <2 x double>, ptr %x %b = load <2 x double>, ptr %y %c = fsub <2 x double> %a, %b store <2 x double> %c, ptr %x ret void } define void @fmul_v8f16(ptr %x, ptr %y) { ; ZVFH-LABEL: fmul_v8f16: ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; ZVFH-NEXT: vle16.v v8, (a0) ; ZVFH-NEXT: vle16.v v9, (a1) ; ZVFH-NEXT: vfmul.vv v8, v8, v9 ; ZVFH-NEXT: vse16.v v8, (a0) ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: fmul_v8f16: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vle16.v v8, (a1) ; ZVFHMIN-NEXT: vle16.v v9, (a0) ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMIN-NEXT: vfmul.vv v8, v8, v10 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v8 ; ZVFHMIN-NEXT: vse16.v v9, (a0) ; ZVFHMIN-NEXT: ret %a = load <8 x half>, ptr %x %b = load <8 x half>, ptr %y %c = fmul <8 x half> %a, %b store <8 x half> %c, ptr %x ret void } define void @fmul_v6f16(ptr %x, ptr %y) { ; ZVFH-LABEL: fmul_v6f16: ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma ; ZVFH-NEXT: vle16.v v8, (a0) ; ZVFH-NEXT: vle16.v v9, (a1) ; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; ZVFH-NEXT: vfmul.vv v8, v8, v9 ; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma ; ZVFH-NEXT: vse16.v v8, (a0) ; ZVFH-NEXT: ret ; ; ZVFHMINLMULMAX2-RV32-LABEL: fmul_v6f16: ; ZVFHMINLMULMAX2-RV32: # %bb.0: ; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma ; ZVFHMINLMULMAX2-RV32-NEXT: vle16.v v8, (a1) ; ZVFHMINLMULMAX2-RV32-NEXT: vle16.v v9, (a0) ; ZVFHMINLMULMAX2-RV32-NEXT: vfwcvt.f.f.v v10, v8 ; ZVFHMINLMULMAX2-RV32-NEXT: vfwcvt.f.f.v v8, v9 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMINLMULMAX2-RV32-NEXT: vfmul.vv v8, v8, v10 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMINLMULMAX2-RV32-NEXT: vfncvt.f.f.w v9, v8 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma ; ZVFHMINLMULMAX2-RV32-NEXT: vslidedown.vi v8, v9, 2 ; ZVFHMINLMULMAX2-RV32-NEXT: addi a1, a0, 8 ; ZVFHMINLMULMAX2-RV32-NEXT: vse32.v v8, (a1) ; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma ; ZVFHMINLMULMAX2-RV32-NEXT: vse16.v v9, (a0) ; ZVFHMINLMULMAX2-RV32-NEXT: ret ; ; ZVFHMINLMULMAX2-RV64-LABEL: fmul_v6f16: ; ZVFHMINLMULMAX2-RV64: # %bb.0: ; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma ; ZVFHMINLMULMAX2-RV64-NEXT: vle16.v v8, (a1) ; ZVFHMINLMULMAX2-RV64-NEXT: vle16.v v9, (a0) ; ZVFHMINLMULMAX2-RV64-NEXT: vfwcvt.f.f.v v10, v8 ; ZVFHMINLMULMAX2-RV64-NEXT: vfwcvt.f.f.v v8, v9 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMINLMULMAX2-RV64-NEXT: vfmul.vv v8, v8, v10 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMINLMULMAX2-RV64-NEXT: vfncvt.f.f.w v9, v8 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma ; ZVFHMINLMULMAX2-RV64-NEXT: vse64.v v9, (a0) ; ZVFHMINLMULMAX2-RV64-NEXT: vslidedown.vi v8, v9, 2 ; ZVFHMINLMULMAX2-RV64-NEXT: addi a0, a0, 8 ; ZVFHMINLMULMAX2-RV64-NEXT: vse32.v v8, (a0) ; ZVFHMINLMULMAX2-RV64-NEXT: ret ; ; ZVFHMINLMULMAX1-RV32-LABEL: fmul_v6f16: ; ZVFHMINLMULMAX1-RV32: # %bb.0: ; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma ; ZVFHMINLMULMAX1-RV32-NEXT: vle16.v v8, (a1) ; ZVFHMINLMULMAX1-RV32-NEXT: vle16.v v9, (a0) ; ZVFHMINLMULMAX1-RV32-NEXT: vfwcvt.f.f.v v10, v8 ; ZVFHMINLMULMAX1-RV32-NEXT: vfwcvt.f.f.v v8, v9 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMINLMULMAX1-RV32-NEXT: vfmul.vv v8, v8, v10 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMINLMULMAX1-RV32-NEXT: vfncvt.f.f.w v9, v8 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma ; ZVFHMINLMULMAX1-RV32-NEXT: vslidedown.vi v8, v9, 2 ; ZVFHMINLMULMAX1-RV32-NEXT: addi a1, a0, 8 ; ZVFHMINLMULMAX1-RV32-NEXT: vse32.v v8, (a1) ; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma ; ZVFHMINLMULMAX1-RV32-NEXT: vse16.v v9, (a0) ; ZVFHMINLMULMAX1-RV32-NEXT: ret ; ; ZVFHMINLMULMAX1-RV64-LABEL: fmul_v6f16: ; ZVFHMINLMULMAX1-RV64: # %bb.0: ; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma ; ZVFHMINLMULMAX1-RV64-NEXT: vle16.v v8, (a1) ; ZVFHMINLMULMAX1-RV64-NEXT: vle16.v v9, (a0) ; ZVFHMINLMULMAX1-RV64-NEXT: vfwcvt.f.f.v v10, v8 ; ZVFHMINLMULMAX1-RV64-NEXT: vfwcvt.f.f.v v8, v9 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMINLMULMAX1-RV64-NEXT: vfmul.vv v8, v8, v10 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMINLMULMAX1-RV64-NEXT: vfncvt.f.f.w v9, v8 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma ; ZVFHMINLMULMAX1-RV64-NEXT: vse64.v v9, (a0) ; ZVFHMINLMULMAX1-RV64-NEXT: vslidedown.vi v8, v9, 2 ; ZVFHMINLMULMAX1-RV64-NEXT: addi a0, a0, 8 ; ZVFHMINLMULMAX1-RV64-NEXT: vse32.v v8, (a0) ; ZVFHMINLMULMAX1-RV64-NEXT: ret %a = load <6 x half>, ptr %x %b = load <6 x half>, ptr %y %c = fmul <6 x half> %a, %b store <6 x half> %c, ptr %x ret void } define void @fmul_v4f32(ptr %x, ptr %y) { ; ZVFH-LABEL: fmul_v4f32: ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; ZVFH-NEXT: vle32.v v8, (a0) ; ZVFH-NEXT: vle32.v v9, (a1) ; ZVFH-NEXT: vfmul.vv v8, v8, v9 ; ZVFH-NEXT: vse32.v v8, (a0) ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: fmul_v4f32: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: vsetivli zero, 4, e32, mf2, ta, ma ; ZVFHMIN-NEXT: vle32.v v8, (a0) ; ZVFHMIN-NEXT: vle32.v v9, (a1) ; ZVFHMIN-NEXT: vfmul.vv v8, v8, v9 ; ZVFHMIN-NEXT: vse32.v v8, (a0) ; ZVFHMIN-NEXT: ret %a = load <4 x float>, ptr %x %b = load <4 x float>, ptr %y %c = fmul <4 x float> %a, %b store <4 x float> %c, ptr %x ret void } define void @fmul_v2f64(ptr %x, ptr %y) { ; CHECK-LABEL: fmul_v2f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; CHECK-NEXT: vle64.v v8, (a0) ; CHECK-NEXT: vle64.v v9, (a1) ; CHECK-NEXT: vfmul.vv v8, v8, v9 ; CHECK-NEXT: vse64.v v8, (a0) ; CHECK-NEXT: ret %a = load <2 x double>, ptr %x %b = load <2 x double>, ptr %y %c = fmul <2 x double> %a, %b store <2 x double> %c, ptr %x ret void } define void @fdiv_v8f16(ptr %x, ptr %y) { ; ZVFH-LABEL: fdiv_v8f16: ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; ZVFH-NEXT: vle16.v v8, (a0) ; ZVFH-NEXT: vle16.v v9, (a1) ; ZVFH-NEXT: vfdiv.vv v8, v8, v9 ; ZVFH-NEXT: vse16.v v8, (a0) ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: fdiv_v8f16: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vle16.v v8, (a1) ; ZVFHMIN-NEXT: vle16.v v9, (a0) ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMIN-NEXT: vfdiv.vv v8, v8, v10 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v8 ; ZVFHMIN-NEXT: vse16.v v9, (a0) ; ZVFHMIN-NEXT: ret %a = load <8 x half>, ptr %x %b = load <8 x half>, ptr %y %c = fdiv <8 x half> %a, %b store <8 x half> %c, ptr %x ret void } define void @fdiv_v6f16(ptr %x, ptr %y) { ; ZVFH-LABEL: fdiv_v6f16: ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma ; ZVFH-NEXT: vle16.v v8, (a0) ; ZVFH-NEXT: vle16.v v9, (a1) ; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; ZVFH-NEXT: vfdiv.vv v8, v8, v9 ; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma ; ZVFH-NEXT: vse16.v v8, (a0) ; ZVFH-NEXT: ret ; ; ZVFHMINLMULMAX2-RV32-LABEL: fdiv_v6f16: ; ZVFHMINLMULMAX2-RV32: # %bb.0: ; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma ; ZVFHMINLMULMAX2-RV32-NEXT: vle16.v v8, (a1) ; ZVFHMINLMULMAX2-RV32-NEXT: vle16.v v9, (a0) ; ZVFHMINLMULMAX2-RV32-NEXT: vfwcvt.f.f.v v10, v8 ; ZVFHMINLMULMAX2-RV32-NEXT: vfwcvt.f.f.v v8, v9 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMINLMULMAX2-RV32-NEXT: vfdiv.vv v8, v8, v10 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMINLMULMAX2-RV32-NEXT: vfncvt.f.f.w v9, v8 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma ; ZVFHMINLMULMAX2-RV32-NEXT: vslidedown.vi v8, v9, 2 ; ZVFHMINLMULMAX2-RV32-NEXT: addi a1, a0, 8 ; ZVFHMINLMULMAX2-RV32-NEXT: vse32.v v8, (a1) ; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma ; ZVFHMINLMULMAX2-RV32-NEXT: vse16.v v9, (a0) ; ZVFHMINLMULMAX2-RV32-NEXT: ret ; ; ZVFHMINLMULMAX2-RV64-LABEL: fdiv_v6f16: ; ZVFHMINLMULMAX2-RV64: # %bb.0: ; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma ; ZVFHMINLMULMAX2-RV64-NEXT: vle16.v v8, (a1) ; ZVFHMINLMULMAX2-RV64-NEXT: vle16.v v9, (a0) ; ZVFHMINLMULMAX2-RV64-NEXT: vfwcvt.f.f.v v10, v8 ; ZVFHMINLMULMAX2-RV64-NEXT: vfwcvt.f.f.v v8, v9 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMINLMULMAX2-RV64-NEXT: vfdiv.vv v8, v8, v10 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMINLMULMAX2-RV64-NEXT: vfncvt.f.f.w v9, v8 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma ; ZVFHMINLMULMAX2-RV64-NEXT: vse64.v v9, (a0) ; ZVFHMINLMULMAX2-RV64-NEXT: vslidedown.vi v8, v9, 2 ; ZVFHMINLMULMAX2-RV64-NEXT: addi a0, a0, 8 ; ZVFHMINLMULMAX2-RV64-NEXT: vse32.v v8, (a0) ; ZVFHMINLMULMAX2-RV64-NEXT: ret ; ; ZVFHMINLMULMAX1-RV32-LABEL: fdiv_v6f16: ; ZVFHMINLMULMAX1-RV32: # %bb.0: ; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma ; ZVFHMINLMULMAX1-RV32-NEXT: vle16.v v8, (a1) ; ZVFHMINLMULMAX1-RV32-NEXT: vle16.v v9, (a0) ; ZVFHMINLMULMAX1-RV32-NEXT: vfwcvt.f.f.v v10, v8 ; ZVFHMINLMULMAX1-RV32-NEXT: vfwcvt.f.f.v v8, v9 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMINLMULMAX1-RV32-NEXT: vfdiv.vv v8, v8, v10 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMINLMULMAX1-RV32-NEXT: vfncvt.f.f.w v9, v8 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma ; ZVFHMINLMULMAX1-RV32-NEXT: vslidedown.vi v8, v9, 2 ; ZVFHMINLMULMAX1-RV32-NEXT: addi a1, a0, 8 ; ZVFHMINLMULMAX1-RV32-NEXT: vse32.v v8, (a1) ; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma ; ZVFHMINLMULMAX1-RV32-NEXT: vse16.v v9, (a0) ; ZVFHMINLMULMAX1-RV32-NEXT: ret ; ; ZVFHMINLMULMAX1-RV64-LABEL: fdiv_v6f16: ; ZVFHMINLMULMAX1-RV64: # %bb.0: ; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma ; ZVFHMINLMULMAX1-RV64-NEXT: vle16.v v8, (a1) ; ZVFHMINLMULMAX1-RV64-NEXT: vle16.v v9, (a0) ; ZVFHMINLMULMAX1-RV64-NEXT: vfwcvt.f.f.v v10, v8 ; ZVFHMINLMULMAX1-RV64-NEXT: vfwcvt.f.f.v v8, v9 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMINLMULMAX1-RV64-NEXT: vfdiv.vv v8, v8, v10 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMINLMULMAX1-RV64-NEXT: vfncvt.f.f.w v9, v8 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma ; ZVFHMINLMULMAX1-RV64-NEXT: vse64.v v9, (a0) ; ZVFHMINLMULMAX1-RV64-NEXT: vslidedown.vi v8, v9, 2 ; ZVFHMINLMULMAX1-RV64-NEXT: addi a0, a0, 8 ; ZVFHMINLMULMAX1-RV64-NEXT: vse32.v v8, (a0) ; ZVFHMINLMULMAX1-RV64-NEXT: ret %a = load <6 x half>, ptr %x %b = load <6 x half>, ptr %y %c = fdiv <6 x half> %a, %b store <6 x half> %c, ptr %x ret void } define void @fdiv_v4f32(ptr %x, ptr %y) { ; ZVFH-LABEL: fdiv_v4f32: ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; ZVFH-NEXT: vle32.v v8, (a0) ; ZVFH-NEXT: vle32.v v9, (a1) ; ZVFH-NEXT: vfdiv.vv v8, v8, v9 ; ZVFH-NEXT: vse32.v v8, (a0) ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: fdiv_v4f32: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: vsetivli zero, 4, e32, mf2, ta, ma ; ZVFHMIN-NEXT: vle32.v v8, (a0) ; ZVFHMIN-NEXT: vle32.v v9, (a1) ; ZVFHMIN-NEXT: vfdiv.vv v8, v8, v9 ; ZVFHMIN-NEXT: vse32.v v8, (a0) ; ZVFHMIN-NEXT: ret %a = load <4 x float>, ptr %x %b = load <4 x float>, ptr %y %c = fdiv <4 x float> %a, %b store <4 x float> %c, ptr %x ret void } define void @fdiv_v2f64(ptr %x, ptr %y) { ; CHECK-LABEL: fdiv_v2f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; CHECK-NEXT: vle64.v v8, (a0) ; CHECK-NEXT: vle64.v v9, (a1) ; CHECK-NEXT: vfdiv.vv v8, v8, v9 ; CHECK-NEXT: vse64.v v8, (a0) ; CHECK-NEXT: ret %a = load <2 x double>, ptr %x %b = load <2 x double>, ptr %y %c = fdiv <2 x double> %a, %b store <2 x double> %c, ptr %x ret void } define void @fneg_v8f16(ptr %x) { ; ZVFH-LABEL: fneg_v8f16: ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; ZVFH-NEXT: vle16.v v8, (a0) ; ZVFH-NEXT: vfneg.v v8, v8 ; ZVFH-NEXT: vse16.v v8, (a0) ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: fneg_v8f16: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vle16.v v8, (a0) ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMIN-NEXT: vfneg.v v8, v9 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v8 ; ZVFHMIN-NEXT: vse16.v v9, (a0) ; ZVFHMIN-NEXT: ret %a = load <8 x half>, ptr %x %b = fneg <8 x half> %a store <8 x half> %b, ptr %x ret void } define void @fneg_v6f16(ptr %x) { ; ZVFH-LABEL: fneg_v6f16: ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma ; ZVFH-NEXT: vle16.v v8, (a0) ; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; ZVFH-NEXT: vfneg.v v8, v8 ; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma ; ZVFH-NEXT: vse16.v v8, (a0) ; ZVFH-NEXT: ret ; ; ZVFHMINLMULMAX2-RV32-LABEL: fneg_v6f16: ; ZVFHMINLMULMAX2-RV32: # %bb.0: ; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma ; ZVFHMINLMULMAX2-RV32-NEXT: vle16.v v8, (a0) ; ZVFHMINLMULMAX2-RV32-NEXT: vfwcvt.f.f.v v9, v8 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMINLMULMAX2-RV32-NEXT: vfneg.v v8, v9 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMINLMULMAX2-RV32-NEXT: vfncvt.f.f.w v9, v8 ; ZVFHMINLMULMAX2-RV32-NEXT: addi a1, a0, 8 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma ; ZVFHMINLMULMAX2-RV32-NEXT: vslidedown.vi v8, v9, 2 ; ZVFHMINLMULMAX2-RV32-NEXT: vse32.v v8, (a1) ; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma ; ZVFHMINLMULMAX2-RV32-NEXT: vse16.v v9, (a0) ; ZVFHMINLMULMAX2-RV32-NEXT: ret ; ; ZVFHMINLMULMAX2-RV64-LABEL: fneg_v6f16: ; ZVFHMINLMULMAX2-RV64: # %bb.0: ; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma ; ZVFHMINLMULMAX2-RV64-NEXT: vle16.v v8, (a0) ; ZVFHMINLMULMAX2-RV64-NEXT: vfwcvt.f.f.v v9, v8 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMINLMULMAX2-RV64-NEXT: vfneg.v v8, v9 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMINLMULMAX2-RV64-NEXT: vfncvt.f.f.w v9, v8 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma ; ZVFHMINLMULMAX2-RV64-NEXT: vse64.v v9, (a0) ; ZVFHMINLMULMAX2-RV64-NEXT: addi a0, a0, 8 ; ZVFHMINLMULMAX2-RV64-NEXT: vslidedown.vi v8, v9, 2 ; ZVFHMINLMULMAX2-RV64-NEXT: vse32.v v8, (a0) ; ZVFHMINLMULMAX2-RV64-NEXT: ret ; ; ZVFHMINLMULMAX1-RV32-LABEL: fneg_v6f16: ; ZVFHMINLMULMAX1-RV32: # %bb.0: ; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma ; ZVFHMINLMULMAX1-RV32-NEXT: vle16.v v8, (a0) ; ZVFHMINLMULMAX1-RV32-NEXT: vfwcvt.f.f.v v9, v8 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMINLMULMAX1-RV32-NEXT: vfneg.v v8, v9 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMINLMULMAX1-RV32-NEXT: vfncvt.f.f.w v9, v8 ; ZVFHMINLMULMAX1-RV32-NEXT: addi a1, a0, 8 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma ; ZVFHMINLMULMAX1-RV32-NEXT: vslidedown.vi v8, v9, 2 ; ZVFHMINLMULMAX1-RV32-NEXT: vse32.v v8, (a1) ; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma ; ZVFHMINLMULMAX1-RV32-NEXT: vse16.v v9, (a0) ; ZVFHMINLMULMAX1-RV32-NEXT: ret ; ; ZVFHMINLMULMAX1-RV64-LABEL: fneg_v6f16: ; ZVFHMINLMULMAX1-RV64: # %bb.0: ; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma ; ZVFHMINLMULMAX1-RV64-NEXT: vle16.v v8, (a0) ; ZVFHMINLMULMAX1-RV64-NEXT: vfwcvt.f.f.v v9, v8 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMINLMULMAX1-RV64-NEXT: vfneg.v v8, v9 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMINLMULMAX1-RV64-NEXT: vfncvt.f.f.w v9, v8 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma ; ZVFHMINLMULMAX1-RV64-NEXT: vse64.v v9, (a0) ; ZVFHMINLMULMAX1-RV64-NEXT: addi a0, a0, 8 ; ZVFHMINLMULMAX1-RV64-NEXT: vslidedown.vi v8, v9, 2 ; ZVFHMINLMULMAX1-RV64-NEXT: vse32.v v8, (a0) ; ZVFHMINLMULMAX1-RV64-NEXT: ret %a = load <6 x half>, ptr %x %b = fneg <6 x half> %a store <6 x half> %b, ptr %x ret void } define void @fneg_v4f32(ptr %x) { ; ZVFH-LABEL: fneg_v4f32: ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; ZVFH-NEXT: vle32.v v8, (a0) ; ZVFH-NEXT: vfneg.v v8, v8 ; ZVFH-NEXT: vse32.v v8, (a0) ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: fneg_v4f32: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: vsetivli zero, 4, e32, mf2, ta, ma ; ZVFHMIN-NEXT: vle32.v v8, (a0) ; ZVFHMIN-NEXT: vfneg.v v8, v8 ; ZVFHMIN-NEXT: vse32.v v8, (a0) ; ZVFHMIN-NEXT: ret %a = load <4 x float>, ptr %x %b = fneg <4 x float> %a store <4 x float> %b, ptr %x ret void } define void @fneg_v2f64(ptr %x) { ; CHECK-LABEL: fneg_v2f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; CHECK-NEXT: vle64.v v8, (a0) ; CHECK-NEXT: vfneg.v v8, v8 ; CHECK-NEXT: vse64.v v8, (a0) ; CHECK-NEXT: ret %a = load <2 x double>, ptr %x %b = fneg <2 x double> %a store <2 x double> %b, ptr %x ret void } define void @fabs_v8f16(ptr %x) { ; ZVFH-LABEL: fabs_v8f16: ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; ZVFH-NEXT: vle16.v v8, (a0) ; ZVFH-NEXT: vfabs.v v8, v8 ; ZVFH-NEXT: vse16.v v8, (a0) ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: fabs_v8f16: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vle16.v v8, (a0) ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMIN-NEXT: vfabs.v v8, v9 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v8 ; ZVFHMIN-NEXT: vse16.v v9, (a0) ; ZVFHMIN-NEXT: ret %a = load <8 x half>, ptr %x %b = call <8 x half> @llvm.fabs.v8f16(<8 x half> %a) store <8 x half> %b, ptr %x ret void } declare <8 x half> @llvm.fabs.v8f16(<8 x half>) define void @fabs_v6f16(ptr %x) { ; ZVFH-LABEL: fabs_v6f16: ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma ; ZVFH-NEXT: vle16.v v8, (a0) ; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; ZVFH-NEXT: vfabs.v v8, v8 ; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma ; ZVFH-NEXT: vse16.v v8, (a0) ; ZVFH-NEXT: ret ; ; ZVFHMINLMULMAX2-RV32-LABEL: fabs_v6f16: ; ZVFHMINLMULMAX2-RV32: # %bb.0: ; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma ; ZVFHMINLMULMAX2-RV32-NEXT: vle16.v v8, (a0) ; ZVFHMINLMULMAX2-RV32-NEXT: vfwcvt.f.f.v v9, v8 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMINLMULMAX2-RV32-NEXT: vfabs.v v8, v9 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMINLMULMAX2-RV32-NEXT: vfncvt.f.f.w v9, v8 ; ZVFHMINLMULMAX2-RV32-NEXT: addi a1, a0, 8 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma ; ZVFHMINLMULMAX2-RV32-NEXT: vslidedown.vi v8, v9, 2 ; ZVFHMINLMULMAX2-RV32-NEXT: vse32.v v8, (a1) ; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma ; ZVFHMINLMULMAX2-RV32-NEXT: vse16.v v9, (a0) ; ZVFHMINLMULMAX2-RV32-NEXT: ret ; ; ZVFHMINLMULMAX2-RV64-LABEL: fabs_v6f16: ; ZVFHMINLMULMAX2-RV64: # %bb.0: ; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma ; ZVFHMINLMULMAX2-RV64-NEXT: vle16.v v8, (a0) ; ZVFHMINLMULMAX2-RV64-NEXT: vfwcvt.f.f.v v9, v8 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMINLMULMAX2-RV64-NEXT: vfabs.v v8, v9 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMINLMULMAX2-RV64-NEXT: vfncvt.f.f.w v9, v8 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma ; ZVFHMINLMULMAX2-RV64-NEXT: vse64.v v9, (a0) ; ZVFHMINLMULMAX2-RV64-NEXT: addi a0, a0, 8 ; ZVFHMINLMULMAX2-RV64-NEXT: vslidedown.vi v8, v9, 2 ; ZVFHMINLMULMAX2-RV64-NEXT: vse32.v v8, (a0) ; ZVFHMINLMULMAX2-RV64-NEXT: ret ; ; ZVFHMINLMULMAX1-RV32-LABEL: fabs_v6f16: ; ZVFHMINLMULMAX1-RV32: # %bb.0: ; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma ; ZVFHMINLMULMAX1-RV32-NEXT: vle16.v v8, (a0) ; ZVFHMINLMULMAX1-RV32-NEXT: vfwcvt.f.f.v v9, v8 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMINLMULMAX1-RV32-NEXT: vfabs.v v8, v9 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMINLMULMAX1-RV32-NEXT: vfncvt.f.f.w v9, v8 ; ZVFHMINLMULMAX1-RV32-NEXT: addi a1, a0, 8 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma ; ZVFHMINLMULMAX1-RV32-NEXT: vslidedown.vi v8, v9, 2 ; ZVFHMINLMULMAX1-RV32-NEXT: vse32.v v8, (a1) ; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma ; ZVFHMINLMULMAX1-RV32-NEXT: vse16.v v9, (a0) ; ZVFHMINLMULMAX1-RV32-NEXT: ret ; ; ZVFHMINLMULMAX1-RV64-LABEL: fabs_v6f16: ; ZVFHMINLMULMAX1-RV64: # %bb.0: ; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma ; ZVFHMINLMULMAX1-RV64-NEXT: vle16.v v8, (a0) ; ZVFHMINLMULMAX1-RV64-NEXT: vfwcvt.f.f.v v9, v8 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMINLMULMAX1-RV64-NEXT: vfabs.v v8, v9 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMINLMULMAX1-RV64-NEXT: vfncvt.f.f.w v9, v8 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma ; ZVFHMINLMULMAX1-RV64-NEXT: vse64.v v9, (a0) ; ZVFHMINLMULMAX1-RV64-NEXT: addi a0, a0, 8 ; ZVFHMINLMULMAX1-RV64-NEXT: vslidedown.vi v8, v9, 2 ; ZVFHMINLMULMAX1-RV64-NEXT: vse32.v v8, (a0) ; ZVFHMINLMULMAX1-RV64-NEXT: ret %a = load <6 x half>, ptr %x %b = call <6 x half> @llvm.fabs.v6f16(<6 x half> %a) store <6 x half> %b, ptr %x ret void } declare <6 x half> @llvm.fabs.v6f16(<6 x half>) define void @fabs_v4f32(ptr %x) { ; ZVFH-LABEL: fabs_v4f32: ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; ZVFH-NEXT: vle32.v v8, (a0) ; ZVFH-NEXT: vfabs.v v8, v8 ; ZVFH-NEXT: vse32.v v8, (a0) ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: fabs_v4f32: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: vsetivli zero, 4, e32, mf2, ta, ma ; ZVFHMIN-NEXT: vle32.v v8, (a0) ; ZVFHMIN-NEXT: vfabs.v v8, v8 ; ZVFHMIN-NEXT: vse32.v v8, (a0) ; ZVFHMIN-NEXT: ret %a = load <4 x float>, ptr %x %b = call <4 x float> @llvm.fabs.v4f32(<4 x float> %a) store <4 x float> %b, ptr %x ret void } declare <4 x float> @llvm.fabs.v4f32(<4 x float>) define void @fabs_v2f64(ptr %x) { ; CHECK-LABEL: fabs_v2f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; CHECK-NEXT: vle64.v v8, (a0) ; CHECK-NEXT: vfabs.v v8, v8 ; CHECK-NEXT: vse64.v v8, (a0) ; CHECK-NEXT: ret %a = load <2 x double>, ptr %x %b = call <2 x double> @llvm.fabs.v2f64(<2 x double> %a) store <2 x double> %b, ptr %x ret void } declare <2 x double> @llvm.fabs.v2f64(<2 x double>) define void @copysign_v8f16(ptr %x, ptr %y) { ; ZVFH-LABEL: copysign_v8f16: ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; ZVFH-NEXT: vle16.v v8, (a0) ; ZVFH-NEXT: vle16.v v9, (a1) ; ZVFH-NEXT: vfsgnj.vv v8, v8, v9 ; ZVFH-NEXT: vse16.v v8, (a0) ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: copysign_v8f16: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vle16.v v8, (a1) ; ZVFHMIN-NEXT: vle16.v v9, (a0) ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMIN-NEXT: vfsgnj.vv v8, v8, v10 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v8 ; ZVFHMIN-NEXT: vse16.v v9, (a0) ; ZVFHMIN-NEXT: ret %a = load <8 x half>, ptr %x %b = load <8 x half>, ptr %y %c = call <8 x half> @llvm.copysign.v8f16(<8 x half> %a, <8 x half> %b) store <8 x half> %c, ptr %x ret void } declare <8 x half> @llvm.copysign.v8f16(<8 x half>, <8 x half>) define void @copysign_v6f16(ptr %x, ptr %y) { ; ZVFH-LABEL: copysign_v6f16: ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma ; ZVFH-NEXT: vle16.v v8, (a0) ; ZVFH-NEXT: vle16.v v9, (a1) ; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; ZVFH-NEXT: vfsgnj.vv v8, v8, v9 ; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma ; ZVFH-NEXT: vse16.v v8, (a0) ; ZVFH-NEXT: ret ; ; ZVFHMINLMULMAX2-RV32-LABEL: copysign_v6f16: ; ZVFHMINLMULMAX2-RV32: # %bb.0: ; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma ; ZVFHMINLMULMAX2-RV32-NEXT: vle16.v v8, (a1) ; ZVFHMINLMULMAX2-RV32-NEXT: vle16.v v9, (a0) ; ZVFHMINLMULMAX2-RV32-NEXT: vfwcvt.f.f.v v10, v8 ; ZVFHMINLMULMAX2-RV32-NEXT: vfwcvt.f.f.v v8, v9 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMINLMULMAX2-RV32-NEXT: vfsgnj.vv v8, v8, v10 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMINLMULMAX2-RV32-NEXT: vfncvt.f.f.w v9, v8 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma ; ZVFHMINLMULMAX2-RV32-NEXT: vslidedown.vi v8, v9, 2 ; ZVFHMINLMULMAX2-RV32-NEXT: addi a1, a0, 8 ; ZVFHMINLMULMAX2-RV32-NEXT: vse32.v v8, (a1) ; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma ; ZVFHMINLMULMAX2-RV32-NEXT: vse16.v v9, (a0) ; ZVFHMINLMULMAX2-RV32-NEXT: ret ; ; ZVFHMINLMULMAX2-RV64-LABEL: copysign_v6f16: ; ZVFHMINLMULMAX2-RV64: # %bb.0: ; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma ; ZVFHMINLMULMAX2-RV64-NEXT: vle16.v v8, (a1) ; ZVFHMINLMULMAX2-RV64-NEXT: vle16.v v9, (a0) ; ZVFHMINLMULMAX2-RV64-NEXT: vfwcvt.f.f.v v10, v8 ; ZVFHMINLMULMAX2-RV64-NEXT: vfwcvt.f.f.v v8, v9 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMINLMULMAX2-RV64-NEXT: vfsgnj.vv v8, v8, v10 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMINLMULMAX2-RV64-NEXT: vfncvt.f.f.w v9, v8 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma ; ZVFHMINLMULMAX2-RV64-NEXT: vse64.v v9, (a0) ; ZVFHMINLMULMAX2-RV64-NEXT: vslidedown.vi v8, v9, 2 ; ZVFHMINLMULMAX2-RV64-NEXT: addi a0, a0, 8 ; ZVFHMINLMULMAX2-RV64-NEXT: vse32.v v8, (a0) ; ZVFHMINLMULMAX2-RV64-NEXT: ret ; ; ZVFHMINLMULMAX1-RV32-LABEL: copysign_v6f16: ; ZVFHMINLMULMAX1-RV32: # %bb.0: ; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma ; ZVFHMINLMULMAX1-RV32-NEXT: vle16.v v8, (a1) ; ZVFHMINLMULMAX1-RV32-NEXT: vle16.v v9, (a0) ; ZVFHMINLMULMAX1-RV32-NEXT: vfwcvt.f.f.v v10, v8 ; ZVFHMINLMULMAX1-RV32-NEXT: vfwcvt.f.f.v v8, v9 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMINLMULMAX1-RV32-NEXT: vfsgnj.vv v8, v8, v10 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMINLMULMAX1-RV32-NEXT: vfncvt.f.f.w v9, v8 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma ; ZVFHMINLMULMAX1-RV32-NEXT: vslidedown.vi v8, v9, 2 ; ZVFHMINLMULMAX1-RV32-NEXT: addi a1, a0, 8 ; ZVFHMINLMULMAX1-RV32-NEXT: vse32.v v8, (a1) ; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma ; ZVFHMINLMULMAX1-RV32-NEXT: vse16.v v9, (a0) ; ZVFHMINLMULMAX1-RV32-NEXT: ret ; ; ZVFHMINLMULMAX1-RV64-LABEL: copysign_v6f16: ; ZVFHMINLMULMAX1-RV64: # %bb.0: ; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma ; ZVFHMINLMULMAX1-RV64-NEXT: vle16.v v8, (a1) ; ZVFHMINLMULMAX1-RV64-NEXT: vle16.v v9, (a0) ; ZVFHMINLMULMAX1-RV64-NEXT: vfwcvt.f.f.v v10, v8 ; ZVFHMINLMULMAX1-RV64-NEXT: vfwcvt.f.f.v v8, v9 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMINLMULMAX1-RV64-NEXT: vfsgnj.vv v8, v8, v10 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMINLMULMAX1-RV64-NEXT: vfncvt.f.f.w v9, v8 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma ; ZVFHMINLMULMAX1-RV64-NEXT: vse64.v v9, (a0) ; ZVFHMINLMULMAX1-RV64-NEXT: vslidedown.vi v8, v9, 2 ; ZVFHMINLMULMAX1-RV64-NEXT: addi a0, a0, 8 ; ZVFHMINLMULMAX1-RV64-NEXT: vse32.v v8, (a0) ; ZVFHMINLMULMAX1-RV64-NEXT: ret %a = load <6 x half>, ptr %x %b = load <6 x half>, ptr %y %c = call <6 x half> @llvm.copysign.v6f16(<6 x half> %a, <6 x half> %b) store <6 x half> %c, ptr %x ret void } declare <6 x half> @llvm.copysign.v6f16(<6 x half>, <6 x half>) define void @copysign_v4f32(ptr %x, ptr %y) { ; ZVFH-LABEL: copysign_v4f32: ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; ZVFH-NEXT: vle32.v v8, (a0) ; ZVFH-NEXT: vle32.v v9, (a1) ; ZVFH-NEXT: vfsgnj.vv v8, v8, v9 ; ZVFH-NEXT: vse32.v v8, (a0) ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: copysign_v4f32: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: vsetivli zero, 4, e32, mf2, ta, ma ; ZVFHMIN-NEXT: vle32.v v8, (a0) ; ZVFHMIN-NEXT: vle32.v v9, (a1) ; ZVFHMIN-NEXT: vfsgnj.vv v8, v8, v9 ; ZVFHMIN-NEXT: vse32.v v8, (a0) ; ZVFHMIN-NEXT: ret %a = load <4 x float>, ptr %x %b = load <4 x float>, ptr %y %c = call <4 x float> @llvm.copysign.v4f32(<4 x float> %a, <4 x float> %b) store <4 x float> %c, ptr %x ret void } declare <4 x float> @llvm.copysign.v4f32(<4 x float>, <4 x float>) define void @copysign_v2f64(ptr %x, ptr %y) { ; CHECK-LABEL: copysign_v2f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; CHECK-NEXT: vle64.v v8, (a0) ; CHECK-NEXT: vle64.v v9, (a1) ; CHECK-NEXT: vfsgnj.vv v8, v8, v9 ; CHECK-NEXT: vse64.v v8, (a0) ; CHECK-NEXT: ret %a = load <2 x double>, ptr %x %b = load <2 x double>, ptr %y %c = call <2 x double> @llvm.copysign.v2f64(<2 x double> %a, <2 x double> %b) store <2 x double> %c, ptr %x ret void } declare <2 x double> @llvm.copysign.v2f64(<2 x double>, <2 x double>) define void @copysign_vf_v8f16(ptr %x, half %y) { ; ZVFH-LABEL: copysign_vf_v8f16: ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; ZVFH-NEXT: vle16.v v8, (a0) ; ZVFH-NEXT: vfsgnj.vf v8, v8, fa0 ; ZVFH-NEXT: vse16.v v8, (a0) ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: copysign_vf_v8f16: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vle16.v v8, (a0) ; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0 ; ZVFHMIN-NEXT: vsetvli a1, zero, e32, m1, ta, ma ; ZVFHMIN-NEXT: vfmv.v.f v9, fa5 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v9 ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMIN-NEXT: vfsgnj.vv v8, v9, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v8 ; ZVFHMIN-NEXT: vse16.v v9, (a0) ; ZVFHMIN-NEXT: ret %a = load <8 x half>, ptr %x %b = insertelement <8 x half> poison, half %y, i32 0 %c = shufflevector <8 x half> %b, <8 x half> poison, <8 x i32> zeroinitializer %d = call <8 x half> @llvm.copysign.v8f16(<8 x half> %a, <8 x half> %c) store <8 x half> %d, ptr %x ret void } define void @copysign_vf_v6f16(ptr %x, half %y) { ; ZVFH-LABEL: copysign_vf_v6f16: ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma ; ZVFH-NEXT: vle16.v v8, (a0) ; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; ZVFH-NEXT: vfsgnj.vf v8, v8, fa0 ; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma ; ZVFH-NEXT: vse16.v v8, (a0) ; ZVFH-NEXT: ret ; ; ZVFHMINLMULMAX2-RV32-LABEL: copysign_vf_v6f16: ; ZVFHMINLMULMAX2-RV32: # %bb.0: ; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma ; ZVFHMINLMULMAX2-RV32-NEXT: vle16.v v8, (a0) ; ZVFHMINLMULMAX2-RV32-NEXT: fcvt.s.h fa5, fa0 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli a1, zero, e32, m1, ta, ma ; ZVFHMINLMULMAX2-RV32-NEXT: vfmv.v.f v9, fa5 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMINLMULMAX2-RV32-NEXT: vfncvt.f.f.w v10, v9 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma ; ZVFHMINLMULMAX2-RV32-NEXT: vfwcvt.f.f.v v9, v8 ; ZVFHMINLMULMAX2-RV32-NEXT: vfwcvt.f.f.v v8, v10 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMINLMULMAX2-RV32-NEXT: vfsgnj.vv v8, v9, v8 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMINLMULMAX2-RV32-NEXT: vfncvt.f.f.w v9, v8 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma ; ZVFHMINLMULMAX2-RV32-NEXT: vslidedown.vi v8, v9, 2 ; ZVFHMINLMULMAX2-RV32-NEXT: addi a1, a0, 8 ; ZVFHMINLMULMAX2-RV32-NEXT: vse32.v v8, (a1) ; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma ; ZVFHMINLMULMAX2-RV32-NEXT: vse16.v v9, (a0) ; ZVFHMINLMULMAX2-RV32-NEXT: ret ; ; ZVFHMINLMULMAX2-RV64-LABEL: copysign_vf_v6f16: ; ZVFHMINLMULMAX2-RV64: # %bb.0: ; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma ; ZVFHMINLMULMAX2-RV64-NEXT: vle16.v v8, (a0) ; ZVFHMINLMULMAX2-RV64-NEXT: fcvt.s.h fa5, fa0 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli a1, zero, e32, m1, ta, ma ; ZVFHMINLMULMAX2-RV64-NEXT: vfmv.v.f v9, fa5 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMINLMULMAX2-RV64-NEXT: vfncvt.f.f.w v10, v9 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma ; ZVFHMINLMULMAX2-RV64-NEXT: vfwcvt.f.f.v v9, v8 ; ZVFHMINLMULMAX2-RV64-NEXT: vfwcvt.f.f.v v8, v10 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMINLMULMAX2-RV64-NEXT: vfsgnj.vv v8, v9, v8 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMINLMULMAX2-RV64-NEXT: vfncvt.f.f.w v9, v8 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma ; ZVFHMINLMULMAX2-RV64-NEXT: vse64.v v9, (a0) ; ZVFHMINLMULMAX2-RV64-NEXT: vslidedown.vi v8, v9, 2 ; ZVFHMINLMULMAX2-RV64-NEXT: addi a0, a0, 8 ; ZVFHMINLMULMAX2-RV64-NEXT: vse32.v v8, (a0) ; ZVFHMINLMULMAX2-RV64-NEXT: ret ; ; ZVFHMINLMULMAX1-RV32-LABEL: copysign_vf_v6f16: ; ZVFHMINLMULMAX1-RV32: # %bb.0: ; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma ; ZVFHMINLMULMAX1-RV32-NEXT: vle16.v v8, (a0) ; ZVFHMINLMULMAX1-RV32-NEXT: fcvt.s.h fa5, fa0 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli a1, zero, e32, m1, ta, ma ; ZVFHMINLMULMAX1-RV32-NEXT: vfmv.v.f v9, fa5 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMINLMULMAX1-RV32-NEXT: vfncvt.f.f.w v10, v9 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma ; ZVFHMINLMULMAX1-RV32-NEXT: vfwcvt.f.f.v v9, v8 ; ZVFHMINLMULMAX1-RV32-NEXT: vfwcvt.f.f.v v8, v10 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMINLMULMAX1-RV32-NEXT: vfsgnj.vv v8, v9, v8 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMINLMULMAX1-RV32-NEXT: vfncvt.f.f.w v9, v8 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma ; ZVFHMINLMULMAX1-RV32-NEXT: vslidedown.vi v8, v9, 2 ; ZVFHMINLMULMAX1-RV32-NEXT: addi a1, a0, 8 ; ZVFHMINLMULMAX1-RV32-NEXT: vse32.v v8, (a1) ; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma ; ZVFHMINLMULMAX1-RV32-NEXT: vse16.v v9, (a0) ; ZVFHMINLMULMAX1-RV32-NEXT: ret ; ; ZVFHMINLMULMAX1-RV64-LABEL: copysign_vf_v6f16: ; ZVFHMINLMULMAX1-RV64: # %bb.0: ; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma ; ZVFHMINLMULMAX1-RV64-NEXT: vle16.v v8, (a0) ; ZVFHMINLMULMAX1-RV64-NEXT: fcvt.s.h fa5, fa0 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli a1, zero, e32, m1, ta, ma ; ZVFHMINLMULMAX1-RV64-NEXT: vfmv.v.f v9, fa5 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMINLMULMAX1-RV64-NEXT: vfncvt.f.f.w v10, v9 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma ; ZVFHMINLMULMAX1-RV64-NEXT: vfwcvt.f.f.v v9, v8 ; ZVFHMINLMULMAX1-RV64-NEXT: vfwcvt.f.f.v v8, v10 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMINLMULMAX1-RV64-NEXT: vfsgnj.vv v8, v9, v8 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMINLMULMAX1-RV64-NEXT: vfncvt.f.f.w v9, v8 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma ; ZVFHMINLMULMAX1-RV64-NEXT: vse64.v v9, (a0) ; ZVFHMINLMULMAX1-RV64-NEXT: vslidedown.vi v8, v9, 2 ; ZVFHMINLMULMAX1-RV64-NEXT: addi a0, a0, 8 ; ZVFHMINLMULMAX1-RV64-NEXT: vse32.v v8, (a0) ; ZVFHMINLMULMAX1-RV64-NEXT: ret %a = load <6 x half>, ptr %x %b = insertelement <6 x half> poison, half %y, i32 0 %c = shufflevector <6 x half> %b, <6 x half> poison, <6 x i32> zeroinitializer %d = call <6 x half> @llvm.copysign.v6f16(<6 x half> %a, <6 x half> %c) store <6 x half> %d, ptr %x ret void } define void @copysign_vf_v4f32(ptr %x, float %y) { ; ZVFH-LABEL: copysign_vf_v4f32: ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; ZVFH-NEXT: vle32.v v8, (a0) ; ZVFH-NEXT: vfsgnj.vf v8, v8, fa0 ; ZVFH-NEXT: vse32.v v8, (a0) ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: copysign_vf_v4f32: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: vsetivli zero, 4, e32, mf2, ta, ma ; ZVFHMIN-NEXT: vle32.v v8, (a0) ; ZVFHMIN-NEXT: vfsgnj.vf v8, v8, fa0 ; ZVFHMIN-NEXT: vse32.v v8, (a0) ; ZVFHMIN-NEXT: ret %a = load <4 x float>, ptr %x %b = insertelement <4 x float> poison, float %y, i32 0 %c = shufflevector <4 x float> %b, <4 x float> poison, <4 x i32> zeroinitializer %d = call <4 x float> @llvm.copysign.v4f32(<4 x float> %a, <4 x float> %c) store <4 x float> %d, ptr %x ret void } define void @copysign_vf_v2f64(ptr %x, double %y) { ; CHECK-LABEL: copysign_vf_v2f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; CHECK-NEXT: vle64.v v8, (a0) ; CHECK-NEXT: vfsgnj.vf v8, v8, fa0 ; CHECK-NEXT: vse64.v v8, (a0) ; CHECK-NEXT: ret %a = load <2 x double>, ptr %x %b = insertelement <2 x double> poison, double %y, i32 0 %c = shufflevector <2 x double> %b, <2 x double> poison, <2 x i32> zeroinitializer %d = call <2 x double> @llvm.copysign.v2f64(<2 x double> %a, <2 x double> %c) store <2 x double> %d, ptr %x ret void } define void @copysign_neg_v8f16(ptr %x, ptr %y) { ; ZVFH-LABEL: copysign_neg_v8f16: ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; ZVFH-NEXT: vle16.v v8, (a0) ; ZVFH-NEXT: vle16.v v9, (a1) ; ZVFH-NEXT: vfsgnjn.vv v8, v8, v9 ; ZVFH-NEXT: vse16.v v8, (a0) ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: copysign_neg_v8f16: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vle16.v v8, (a1) ; ZVFHMIN-NEXT: vle16.v v9, (a0) ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMIN-NEXT: vfneg.v v8, v10 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v10 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMIN-NEXT: vfsgnj.vv v8, v8, v9 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v8 ; ZVFHMIN-NEXT: vse16.v v9, (a0) ; ZVFHMIN-NEXT: ret %a = load <8 x half>, ptr %x %b = load <8 x half>, ptr %y %c = fneg <8 x half> %b %d = call <8 x half> @llvm.copysign.v8f16(<8 x half> %a, <8 x half> %c) store <8 x half> %d, ptr %x ret void } define void @copysign_neg_v6f16(ptr %x, ptr %y) { ; ZVFH-LABEL: copysign_neg_v6f16: ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma ; ZVFH-NEXT: vle16.v v8, (a0) ; ZVFH-NEXT: vle16.v v9, (a1) ; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; ZVFH-NEXT: vfsgnjn.vv v8, v8, v9 ; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma ; ZVFH-NEXT: vse16.v v8, (a0) ; ZVFH-NEXT: ret ; ; ZVFHMINLMULMAX2-RV32-LABEL: copysign_neg_v6f16: ; ZVFHMINLMULMAX2-RV32: # %bb.0: ; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma ; ZVFHMINLMULMAX2-RV32-NEXT: vle16.v v8, (a1) ; ZVFHMINLMULMAX2-RV32-NEXT: vle16.v v9, (a0) ; ZVFHMINLMULMAX2-RV32-NEXT: vfwcvt.f.f.v v10, v8 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMINLMULMAX2-RV32-NEXT: vfneg.v v8, v10 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMINLMULMAX2-RV32-NEXT: vfncvt.f.f.w v10, v8 ; ZVFHMINLMULMAX2-RV32-NEXT: vfwcvt.f.f.v v8, v9 ; ZVFHMINLMULMAX2-RV32-NEXT: vfwcvt.f.f.v v9, v10 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMINLMULMAX2-RV32-NEXT: vfsgnj.vv v8, v8, v9 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMINLMULMAX2-RV32-NEXT: vfncvt.f.f.w v9, v8 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma ; ZVFHMINLMULMAX2-RV32-NEXT: vslidedown.vi v8, v9, 2 ; ZVFHMINLMULMAX2-RV32-NEXT: addi a1, a0, 8 ; ZVFHMINLMULMAX2-RV32-NEXT: vse32.v v8, (a1) ; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma ; ZVFHMINLMULMAX2-RV32-NEXT: vse16.v v9, (a0) ; ZVFHMINLMULMAX2-RV32-NEXT: ret ; ; ZVFHMINLMULMAX2-RV64-LABEL: copysign_neg_v6f16: ; ZVFHMINLMULMAX2-RV64: # %bb.0: ; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma ; ZVFHMINLMULMAX2-RV64-NEXT: vle16.v v8, (a1) ; ZVFHMINLMULMAX2-RV64-NEXT: vle16.v v9, (a0) ; ZVFHMINLMULMAX2-RV64-NEXT: vfwcvt.f.f.v v10, v8 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMINLMULMAX2-RV64-NEXT: vfneg.v v8, v10 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMINLMULMAX2-RV64-NEXT: vfncvt.f.f.w v10, v8 ; ZVFHMINLMULMAX2-RV64-NEXT: vfwcvt.f.f.v v8, v9 ; ZVFHMINLMULMAX2-RV64-NEXT: vfwcvt.f.f.v v9, v10 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMINLMULMAX2-RV64-NEXT: vfsgnj.vv v8, v8, v9 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMINLMULMAX2-RV64-NEXT: vfncvt.f.f.w v9, v8 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma ; ZVFHMINLMULMAX2-RV64-NEXT: vse64.v v9, (a0) ; ZVFHMINLMULMAX2-RV64-NEXT: vslidedown.vi v8, v9, 2 ; ZVFHMINLMULMAX2-RV64-NEXT: addi a0, a0, 8 ; ZVFHMINLMULMAX2-RV64-NEXT: vse32.v v8, (a0) ; ZVFHMINLMULMAX2-RV64-NEXT: ret ; ; ZVFHMINLMULMAX1-RV32-LABEL: copysign_neg_v6f16: ; ZVFHMINLMULMAX1-RV32: # %bb.0: ; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma ; ZVFHMINLMULMAX1-RV32-NEXT: vle16.v v8, (a1) ; ZVFHMINLMULMAX1-RV32-NEXT: vle16.v v9, (a0) ; ZVFHMINLMULMAX1-RV32-NEXT: vfwcvt.f.f.v v10, v8 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMINLMULMAX1-RV32-NEXT: vfneg.v v8, v10 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMINLMULMAX1-RV32-NEXT: vfncvt.f.f.w v10, v8 ; ZVFHMINLMULMAX1-RV32-NEXT: vfwcvt.f.f.v v8, v9 ; ZVFHMINLMULMAX1-RV32-NEXT: vfwcvt.f.f.v v9, v10 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMINLMULMAX1-RV32-NEXT: vfsgnj.vv v8, v8, v9 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMINLMULMAX1-RV32-NEXT: vfncvt.f.f.w v9, v8 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma ; ZVFHMINLMULMAX1-RV32-NEXT: vslidedown.vi v8, v9, 2 ; ZVFHMINLMULMAX1-RV32-NEXT: addi a1, a0, 8 ; ZVFHMINLMULMAX1-RV32-NEXT: vse32.v v8, (a1) ; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma ; ZVFHMINLMULMAX1-RV32-NEXT: vse16.v v9, (a0) ; ZVFHMINLMULMAX1-RV32-NEXT: ret ; ; ZVFHMINLMULMAX1-RV64-LABEL: copysign_neg_v6f16: ; ZVFHMINLMULMAX1-RV64: # %bb.0: ; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma ; ZVFHMINLMULMAX1-RV64-NEXT: vle16.v v8, (a1) ; ZVFHMINLMULMAX1-RV64-NEXT: vle16.v v9, (a0) ; ZVFHMINLMULMAX1-RV64-NEXT: vfwcvt.f.f.v v10, v8 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMINLMULMAX1-RV64-NEXT: vfneg.v v8, v10 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMINLMULMAX1-RV64-NEXT: vfncvt.f.f.w v10, v8 ; ZVFHMINLMULMAX1-RV64-NEXT: vfwcvt.f.f.v v8, v9 ; ZVFHMINLMULMAX1-RV64-NEXT: vfwcvt.f.f.v v9, v10 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMINLMULMAX1-RV64-NEXT: vfsgnj.vv v8, v8, v9 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMINLMULMAX1-RV64-NEXT: vfncvt.f.f.w v9, v8 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma ; ZVFHMINLMULMAX1-RV64-NEXT: vse64.v v9, (a0) ; ZVFHMINLMULMAX1-RV64-NEXT: vslidedown.vi v8, v9, 2 ; ZVFHMINLMULMAX1-RV64-NEXT: addi a0, a0, 8 ; ZVFHMINLMULMAX1-RV64-NEXT: vse32.v v8, (a0) ; ZVFHMINLMULMAX1-RV64-NEXT: ret %a = load <6 x half>, ptr %x %b = load <6 x half>, ptr %y %c = fneg <6 x half> %b %d = call <6 x half> @llvm.copysign.v6f16(<6 x half> %a, <6 x half> %c) store <6 x half> %d, ptr %x ret void } define void @copysign_neg_v4f32(ptr %x, ptr %y) { ; ZVFH-LABEL: copysign_neg_v4f32: ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; ZVFH-NEXT: vle32.v v8, (a0) ; ZVFH-NEXT: vle32.v v9, (a1) ; ZVFH-NEXT: vfsgnjn.vv v8, v8, v9 ; ZVFH-NEXT: vse32.v v8, (a0) ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: copysign_neg_v4f32: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: vsetivli zero, 4, e32, mf2, ta, ma ; ZVFHMIN-NEXT: vle32.v v8, (a0) ; ZVFHMIN-NEXT: vle32.v v9, (a1) ; ZVFHMIN-NEXT: vfsgnjn.vv v8, v8, v9 ; ZVFHMIN-NEXT: vse32.v v8, (a0) ; ZVFHMIN-NEXT: ret %a = load <4 x float>, ptr %x %b = load <4 x float>, ptr %y %c = fneg <4 x float> %b %d = call <4 x float> @llvm.copysign.v4f32(<4 x float> %a, <4 x float> %c) store <4 x float> %d, ptr %x ret void } define void @copysign_neg_v2f64(ptr %x, ptr %y) { ; CHECK-LABEL: copysign_neg_v2f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; CHECK-NEXT: vle64.v v8, (a0) ; CHECK-NEXT: vle64.v v9, (a1) ; CHECK-NEXT: vfsgnjn.vv v8, v8, v9 ; CHECK-NEXT: vse64.v v8, (a0) ; CHECK-NEXT: ret %a = load <2 x double>, ptr %x %b = load <2 x double>, ptr %y %c = fneg <2 x double> %b %d = call <2 x double> @llvm.copysign.v2f64(<2 x double> %a, <2 x double> %c) store <2 x double> %d, ptr %x ret void } define void @copysign_neg_trunc_v4f16_v4f32(ptr %x, ptr %y) { ; ZVFH-LABEL: copysign_neg_trunc_v4f16_v4f32: ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetivli zero, 4, e16, mf2, ta, ma ; ZVFH-NEXT: vle32.v v8, (a1) ; ZVFH-NEXT: vle16.v v9, (a0) ; ZVFH-NEXT: vfncvt.f.f.w v10, v8 ; ZVFH-NEXT: vfsgnjn.vv v8, v9, v10 ; ZVFH-NEXT: vse16.v v8, (a0) ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: copysign_neg_trunc_v4f16_v4f32: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vle16.v v8, (a0) ; ZVFHMIN-NEXT: vle32.v v9, (a1) ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; ZVFHMIN-NEXT: vfneg.v v8, v9 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; ZVFHMIN-NEXT: vfsgnj.vv v8, v10, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v8 ; ZVFHMIN-NEXT: vse16.v v9, (a0) ; ZVFHMIN-NEXT: ret %a = load <4 x half>, ptr %x %b = load <4 x float>, ptr %y %c = fneg <4 x float> %b %d = fptrunc <4 x float> %c to <4 x half> %e = call <4 x half> @llvm.copysign.v4f16(<4 x half> %a, <4 x half> %d) store <4 x half> %e, ptr %x ret void } declare <4 x half> @llvm.copysign.v4f16(<4 x half>, <4 x half>) define void @copysign_neg_trunc_v3f16_v3f32(ptr %x, ptr %y) { ; ZVFH-LABEL: copysign_neg_trunc_v3f16_v3f32: ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetivli zero, 3, e16, mf2, ta, ma ; ZVFH-NEXT: vle32.v v8, (a1) ; ZVFH-NEXT: vle16.v v9, (a0) ; ZVFH-NEXT: vsetivli zero, 4, e16, mf2, ta, ma ; ZVFH-NEXT: vfncvt.f.f.w v10, v8 ; ZVFH-NEXT: vfsgnjn.vv v8, v9, v10 ; ZVFH-NEXT: vsetivli zero, 3, e16, mf2, ta, ma ; ZVFH-NEXT: vse16.v v8, (a0) ; ZVFH-NEXT: ret ; ; ZVFHMINLMULMAX2-RV32-LABEL: copysign_neg_trunc_v3f16_v3f32: ; ZVFHMINLMULMAX2-RV32: # %bb.0: ; ZVFHMINLMULMAX2-RV32-NEXT: addi sp, sp, -16 ; ZVFHMINLMULMAX2-RV32-NEXT: .cfi_def_cfa_offset 16 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma ; ZVFHMINLMULMAX2-RV32-NEXT: vle16.v v8, (a0) ; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 3, e32, mf2, ta, ma ; ZVFHMINLMULMAX2-RV32-NEXT: vle32.v v9, (a1) ; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma ; ZVFHMINLMULMAX2-RV32-NEXT: vfwcvt.f.f.v v10, v8 ; ZVFHMINLMULMAX2-RV32-NEXT: vfncvt.f.f.w v8, v9 ; ZVFHMINLMULMAX2-RV32-NEXT: vfwcvt.f.f.v v9, v8 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; ZVFHMINLMULMAX2-RV32-NEXT: vfneg.v v8, v9 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; ZVFHMINLMULMAX2-RV32-NEXT: vfncvt.f.f.w v9, v8 ; ZVFHMINLMULMAX2-RV32-NEXT: vfwcvt.f.f.v v8, v9 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; ZVFHMINLMULMAX2-RV32-NEXT: vfsgnj.vv v8, v10, v8 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; ZVFHMINLMULMAX2-RV32-NEXT: vfncvt.f.f.w v9, v8 ; ZVFHMINLMULMAX2-RV32-NEXT: addi a1, sp, 8 ; ZVFHMINLMULMAX2-RV32-NEXT: vse16.v v9, (a1) ; ZVFHMINLMULMAX2-RV32-NEXT: flh fa5, 12(sp) ; ZVFHMINLMULMAX2-RV32-NEXT: fsh fa5, 4(a0) ; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma ; ZVFHMINLMULMAX2-RV32-NEXT: vse32.v v9, (a0) ; ZVFHMINLMULMAX2-RV32-NEXT: addi sp, sp, 16 ; ZVFHMINLMULMAX2-RV32-NEXT: ret ; ; ZVFHMINLMULMAX2-RV64-LABEL: copysign_neg_trunc_v3f16_v3f32: ; ZVFHMINLMULMAX2-RV64: # %bb.0: ; ZVFHMINLMULMAX2-RV64-NEXT: addi sp, sp, -16 ; ZVFHMINLMULMAX2-RV64-NEXT: .cfi_def_cfa_offset 16 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; ZVFHMINLMULMAX2-RV64-NEXT: vle64.v v8, (a0) ; ZVFHMINLMULMAX2-RV64-NEXT: mv a2, sp ; ZVFHMINLMULMAX2-RV64-NEXT: vse64.v v8, (a2) ; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 4, e16, mf4, ta, ma ; ZVFHMINLMULMAX2-RV64-NEXT: vle16.v v8, (a2) ; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 3, e32, mf2, ta, ma ; ZVFHMINLMULMAX2-RV64-NEXT: vle32.v v9, (a1) ; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 4, e16, mf4, ta, ma ; ZVFHMINLMULMAX2-RV64-NEXT: vfwcvt.f.f.v v10, v8 ; ZVFHMINLMULMAX2-RV64-NEXT: vfncvt.f.f.w v8, v9 ; ZVFHMINLMULMAX2-RV64-NEXT: vfwcvt.f.f.v v9, v8 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; ZVFHMINLMULMAX2-RV64-NEXT: vfneg.v v8, v9 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; ZVFHMINLMULMAX2-RV64-NEXT: vfncvt.f.f.w v9, v8 ; ZVFHMINLMULMAX2-RV64-NEXT: vfwcvt.f.f.v v8, v9 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; ZVFHMINLMULMAX2-RV64-NEXT: vfsgnj.vv v8, v10, v8 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; ZVFHMINLMULMAX2-RV64-NEXT: vfncvt.f.f.w v9, v8 ; ZVFHMINLMULMAX2-RV64-NEXT: addi a1, sp, 8 ; ZVFHMINLMULMAX2-RV64-NEXT: vse16.v v9, (a1) ; ZVFHMINLMULMAX2-RV64-NEXT: flh fa5, 12(sp) ; ZVFHMINLMULMAX2-RV64-NEXT: fsh fa5, 4(a0) ; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma ; ZVFHMINLMULMAX2-RV64-NEXT: vse32.v v9, (a0) ; ZVFHMINLMULMAX2-RV64-NEXT: addi sp, sp, 16 ; ZVFHMINLMULMAX2-RV64-NEXT: ret ; ; ZVFHMINLMULMAX1-RV32-LABEL: copysign_neg_trunc_v3f16_v3f32: ; ZVFHMINLMULMAX1-RV32: # %bb.0: ; ZVFHMINLMULMAX1-RV32-NEXT: addi sp, sp, -16 ; ZVFHMINLMULMAX1-RV32-NEXT: .cfi_def_cfa_offset 16 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma ; ZVFHMINLMULMAX1-RV32-NEXT: vle16.v v8, (a0) ; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 3, e32, mf2, ta, ma ; ZVFHMINLMULMAX1-RV32-NEXT: vle32.v v9, (a1) ; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma ; ZVFHMINLMULMAX1-RV32-NEXT: vfwcvt.f.f.v v10, v8 ; ZVFHMINLMULMAX1-RV32-NEXT: vfncvt.f.f.w v8, v9 ; ZVFHMINLMULMAX1-RV32-NEXT: vfwcvt.f.f.v v9, v8 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; ZVFHMINLMULMAX1-RV32-NEXT: vfneg.v v8, v9 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; ZVFHMINLMULMAX1-RV32-NEXT: vfncvt.f.f.w v9, v8 ; ZVFHMINLMULMAX1-RV32-NEXT: vfwcvt.f.f.v v8, v9 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; ZVFHMINLMULMAX1-RV32-NEXT: vfsgnj.vv v8, v10, v8 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; ZVFHMINLMULMAX1-RV32-NEXT: vfncvt.f.f.w v9, v8 ; ZVFHMINLMULMAX1-RV32-NEXT: addi a1, sp, 8 ; ZVFHMINLMULMAX1-RV32-NEXT: vse16.v v9, (a1) ; ZVFHMINLMULMAX1-RV32-NEXT: flh fa5, 12(sp) ; ZVFHMINLMULMAX1-RV32-NEXT: fsh fa5, 4(a0) ; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma ; ZVFHMINLMULMAX1-RV32-NEXT: vse32.v v9, (a0) ; ZVFHMINLMULMAX1-RV32-NEXT: addi sp, sp, 16 ; ZVFHMINLMULMAX1-RV32-NEXT: ret ; ; ZVFHMINLMULMAX1-RV64-LABEL: copysign_neg_trunc_v3f16_v3f32: ; ZVFHMINLMULMAX1-RV64: # %bb.0: ; ZVFHMINLMULMAX1-RV64-NEXT: addi sp, sp, -16 ; ZVFHMINLMULMAX1-RV64-NEXT: .cfi_def_cfa_offset 16 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; ZVFHMINLMULMAX1-RV64-NEXT: vle64.v v8, (a0) ; ZVFHMINLMULMAX1-RV64-NEXT: mv a2, sp ; ZVFHMINLMULMAX1-RV64-NEXT: vse64.v v8, (a2) ; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 4, e16, mf4, ta, ma ; ZVFHMINLMULMAX1-RV64-NEXT: vle16.v v8, (a2) ; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 3, e32, mf2, ta, ma ; ZVFHMINLMULMAX1-RV64-NEXT: vle32.v v9, (a1) ; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 4, e16, mf4, ta, ma ; ZVFHMINLMULMAX1-RV64-NEXT: vfwcvt.f.f.v v10, v8 ; ZVFHMINLMULMAX1-RV64-NEXT: vfncvt.f.f.w v8, v9 ; ZVFHMINLMULMAX1-RV64-NEXT: vfwcvt.f.f.v v9, v8 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; ZVFHMINLMULMAX1-RV64-NEXT: vfneg.v v8, v9 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; ZVFHMINLMULMAX1-RV64-NEXT: vfncvt.f.f.w v9, v8 ; ZVFHMINLMULMAX1-RV64-NEXT: vfwcvt.f.f.v v8, v9 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; ZVFHMINLMULMAX1-RV64-NEXT: vfsgnj.vv v8, v10, v8 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; ZVFHMINLMULMAX1-RV64-NEXT: vfncvt.f.f.w v9, v8 ; ZVFHMINLMULMAX1-RV64-NEXT: addi a1, sp, 8 ; ZVFHMINLMULMAX1-RV64-NEXT: vse16.v v9, (a1) ; ZVFHMINLMULMAX1-RV64-NEXT: flh fa5, 12(sp) ; ZVFHMINLMULMAX1-RV64-NEXT: fsh fa5, 4(a0) ; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma ; ZVFHMINLMULMAX1-RV64-NEXT: vse32.v v9, (a0) ; ZVFHMINLMULMAX1-RV64-NEXT: addi sp, sp, 16 ; ZVFHMINLMULMAX1-RV64-NEXT: ret %a = load <3 x half>, ptr %x %b = load <3 x float>, ptr %y %c = fneg <3 x float> %b %d = fptrunc <3 x float> %c to <3 x half> %e = call <3 x half> @llvm.copysign.v3f16(<3 x half> %a, <3 x half> %d) store <3 x half> %e, ptr %x ret void } declare <3 x half> @llvm.copysign.v3f16(<3 x half>, <3 x half>) define void @copysign_neg_ext_v2f64_v2f32(ptr %x, ptr %y) { ; CHECK-LABEL: copysign_neg_ext_v2f64_v2f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; CHECK-NEXT: vle32.v v8, (a1) ; CHECK-NEXT: vle64.v v9, (a0) ; CHECK-NEXT: vfwcvt.f.f.v v10, v8 ; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma ; CHECK-NEXT: vfsgnjn.vv v8, v9, v10 ; CHECK-NEXT: vse64.v v8, (a0) ; CHECK-NEXT: ret %a = load <2 x double>, ptr %x %b = load <2 x float>, ptr %y %c = fneg <2 x float> %b %d = fpext <2 x float> %c to <2 x double> %e = call <2 x double> @llvm.copysign.v2f64(<2 x double> %a, <2 x double> %d) store <2 x double> %e, ptr %x ret void } define void @sqrt_v8f16(ptr %x) { ; ZVFH-LABEL: sqrt_v8f16: ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; ZVFH-NEXT: vle16.v v8, (a0) ; ZVFH-NEXT: vfsqrt.v v8, v8 ; ZVFH-NEXT: vse16.v v8, (a0) ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: sqrt_v8f16: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vle16.v v8, (a0) ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMIN-NEXT: vfsqrt.v v8, v9 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v8 ; ZVFHMIN-NEXT: vse16.v v9, (a0) ; ZVFHMIN-NEXT: ret %a = load <8 x half>, ptr %x %b = call <8 x half> @llvm.sqrt.v8f16(<8 x half> %a) store <8 x half> %b, ptr %x ret void } declare <8 x half> @llvm.sqrt.v8f16(<8 x half>) define void @sqrt_v6f16(ptr %x) { ; ZVFH-LABEL: sqrt_v6f16: ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma ; ZVFH-NEXT: vle16.v v8, (a0) ; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; ZVFH-NEXT: vfsqrt.v v8, v8 ; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma ; ZVFH-NEXT: vse16.v v8, (a0) ; ZVFH-NEXT: ret ; ; ZVFHMINLMULMAX2-RV32-LABEL: sqrt_v6f16: ; ZVFHMINLMULMAX2-RV32: # %bb.0: ; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma ; ZVFHMINLMULMAX2-RV32-NEXT: vle16.v v8, (a0) ; ZVFHMINLMULMAX2-RV32-NEXT: vfwcvt.f.f.v v9, v8 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMINLMULMAX2-RV32-NEXT: vfsqrt.v v8, v9 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMINLMULMAX2-RV32-NEXT: vfncvt.f.f.w v9, v8 ; ZVFHMINLMULMAX2-RV32-NEXT: addi a1, a0, 8 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma ; ZVFHMINLMULMAX2-RV32-NEXT: vslidedown.vi v8, v9, 2 ; ZVFHMINLMULMAX2-RV32-NEXT: vse32.v v8, (a1) ; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma ; ZVFHMINLMULMAX2-RV32-NEXT: vse16.v v9, (a0) ; ZVFHMINLMULMAX2-RV32-NEXT: ret ; ; ZVFHMINLMULMAX2-RV64-LABEL: sqrt_v6f16: ; ZVFHMINLMULMAX2-RV64: # %bb.0: ; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma ; ZVFHMINLMULMAX2-RV64-NEXT: vle16.v v8, (a0) ; ZVFHMINLMULMAX2-RV64-NEXT: vfwcvt.f.f.v v9, v8 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMINLMULMAX2-RV64-NEXT: vfsqrt.v v8, v9 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMINLMULMAX2-RV64-NEXT: vfncvt.f.f.w v9, v8 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma ; ZVFHMINLMULMAX2-RV64-NEXT: vse64.v v9, (a0) ; ZVFHMINLMULMAX2-RV64-NEXT: addi a0, a0, 8 ; ZVFHMINLMULMAX2-RV64-NEXT: vslidedown.vi v8, v9, 2 ; ZVFHMINLMULMAX2-RV64-NEXT: vse32.v v8, (a0) ; ZVFHMINLMULMAX2-RV64-NEXT: ret ; ; ZVFHMINLMULMAX1-RV32-LABEL: sqrt_v6f16: ; ZVFHMINLMULMAX1-RV32: # %bb.0: ; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma ; ZVFHMINLMULMAX1-RV32-NEXT: vle16.v v8, (a0) ; ZVFHMINLMULMAX1-RV32-NEXT: vfwcvt.f.f.v v9, v8 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMINLMULMAX1-RV32-NEXT: vfsqrt.v v8, v9 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMINLMULMAX1-RV32-NEXT: vfncvt.f.f.w v9, v8 ; ZVFHMINLMULMAX1-RV32-NEXT: addi a1, a0, 8 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma ; ZVFHMINLMULMAX1-RV32-NEXT: vslidedown.vi v8, v9, 2 ; ZVFHMINLMULMAX1-RV32-NEXT: vse32.v v8, (a1) ; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma ; ZVFHMINLMULMAX1-RV32-NEXT: vse16.v v9, (a0) ; ZVFHMINLMULMAX1-RV32-NEXT: ret ; ; ZVFHMINLMULMAX1-RV64-LABEL: sqrt_v6f16: ; ZVFHMINLMULMAX1-RV64: # %bb.0: ; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma ; ZVFHMINLMULMAX1-RV64-NEXT: vle16.v v8, (a0) ; ZVFHMINLMULMAX1-RV64-NEXT: vfwcvt.f.f.v v9, v8 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMINLMULMAX1-RV64-NEXT: vfsqrt.v v8, v9 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMINLMULMAX1-RV64-NEXT: vfncvt.f.f.w v9, v8 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma ; ZVFHMINLMULMAX1-RV64-NEXT: vse64.v v9, (a0) ; ZVFHMINLMULMAX1-RV64-NEXT: addi a0, a0, 8 ; ZVFHMINLMULMAX1-RV64-NEXT: vslidedown.vi v8, v9, 2 ; ZVFHMINLMULMAX1-RV64-NEXT: vse32.v v8, (a0) ; ZVFHMINLMULMAX1-RV64-NEXT: ret %a = load <6 x half>, ptr %x %b = call <6 x half> @llvm.sqrt.v6f16(<6 x half> %a) store <6 x half> %b, ptr %x ret void } declare <6 x half> @llvm.sqrt.v6f16(<6 x half>) define void @sqrt_v4f32(ptr %x) { ; ZVFH-LABEL: sqrt_v4f32: ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; ZVFH-NEXT: vle32.v v8, (a0) ; ZVFH-NEXT: vfsqrt.v v8, v8 ; ZVFH-NEXT: vse32.v v8, (a0) ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: sqrt_v4f32: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: vsetivli zero, 4, e32, mf2, ta, ma ; ZVFHMIN-NEXT: vle32.v v8, (a0) ; ZVFHMIN-NEXT: vfsqrt.v v8, v8 ; ZVFHMIN-NEXT: vse32.v v8, (a0) ; ZVFHMIN-NEXT: ret %a = load <4 x float>, ptr %x %b = call <4 x float> @llvm.sqrt.v4f32(<4 x float> %a) store <4 x float> %b, ptr %x ret void } declare <4 x float> @llvm.sqrt.v4f32(<4 x float>) define void @sqrt_v2f64(ptr %x) { ; CHECK-LABEL: sqrt_v2f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; CHECK-NEXT: vle64.v v8, (a0) ; CHECK-NEXT: vfsqrt.v v8, v8 ; CHECK-NEXT: vse64.v v8, (a0) ; CHECK-NEXT: ret %a = load <2 x double>, ptr %x %b = call <2 x double> @llvm.sqrt.v2f64(<2 x double> %a) store <2 x double> %b, ptr %x ret void } declare <2 x double> @llvm.sqrt.v2f64(<2 x double>) define void @fma_v8f16(ptr %x, ptr %y, ptr %z) { ; ZVFH-LABEL: fma_v8f16: ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; ZVFH-NEXT: vle16.v v8, (a0) ; ZVFH-NEXT: vle16.v v9, (a1) ; ZVFH-NEXT: vle16.v v10, (a2) ; ZVFH-NEXT: vfmacc.vv v10, v8, v9 ; ZVFH-NEXT: vse16.v v10, (a0) ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: fma_v8f16: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vle16.v v8, (a2) ; ZVFHMIN-NEXT: vle16.v v9, (a0) ; ZVFHMIN-NEXT: vle16.v v10, (a1) ; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v10 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMIN-NEXT: vfmadd.vv v9, v8, v11 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 ; ZVFHMIN-NEXT: vse16.v v8, (a0) ; ZVFHMIN-NEXT: ret %a = load <8 x half>, ptr %x %b = load <8 x half>, ptr %y %c = load <8 x half>, ptr %z %d = call <8 x half> @llvm.fma.v8f16(<8 x half> %a, <8 x half> %b, <8 x half> %c) store <8 x half> %d, ptr %x ret void } declare <8 x half> @llvm.fma.v8f16(<8 x half>, <8 x half>, <8 x half>) define void @fma_v6f16(ptr %x, ptr %y, ptr %z) { ; ZVFH-LABEL: fma_v6f16: ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma ; ZVFH-NEXT: vle16.v v8, (a0) ; ZVFH-NEXT: vle16.v v9, (a1) ; ZVFH-NEXT: vle16.v v10, (a2) ; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; ZVFH-NEXT: vfmacc.vv v10, v8, v9 ; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma ; ZVFH-NEXT: vse16.v v10, (a0) ; ZVFH-NEXT: ret ; ; ZVFHMINLMULMAX2-RV32-LABEL: fma_v6f16: ; ZVFHMINLMULMAX2-RV32: # %bb.0: ; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma ; ZVFHMINLMULMAX2-RV32-NEXT: vle16.v v8, (a2) ; ZVFHMINLMULMAX2-RV32-NEXT: vle16.v v9, (a0) ; ZVFHMINLMULMAX2-RV32-NEXT: vle16.v v10, (a1) ; ZVFHMINLMULMAX2-RV32-NEXT: vfwcvt.f.f.v v11, v8 ; ZVFHMINLMULMAX2-RV32-NEXT: vfwcvt.f.f.v v8, v9 ; ZVFHMINLMULMAX2-RV32-NEXT: vfwcvt.f.f.v v9, v10 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMINLMULMAX2-RV32-NEXT: vfmadd.vv v9, v8, v11 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMINLMULMAX2-RV32-NEXT: vfncvt.f.f.w v8, v9 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma ; ZVFHMINLMULMAX2-RV32-NEXT: vslidedown.vi v9, v8, 2 ; ZVFHMINLMULMAX2-RV32-NEXT: addi a1, a0, 8 ; ZVFHMINLMULMAX2-RV32-NEXT: vse32.v v9, (a1) ; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma ; ZVFHMINLMULMAX2-RV32-NEXT: vse16.v v8, (a0) ; ZVFHMINLMULMAX2-RV32-NEXT: ret ; ; ZVFHMINLMULMAX2-RV64-LABEL: fma_v6f16: ; ZVFHMINLMULMAX2-RV64: # %bb.0: ; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma ; ZVFHMINLMULMAX2-RV64-NEXT: vle16.v v8, (a2) ; ZVFHMINLMULMAX2-RV64-NEXT: vle16.v v9, (a0) ; ZVFHMINLMULMAX2-RV64-NEXT: vle16.v v10, (a1) ; ZVFHMINLMULMAX2-RV64-NEXT: vfwcvt.f.f.v v11, v8 ; ZVFHMINLMULMAX2-RV64-NEXT: vfwcvt.f.f.v v8, v9 ; ZVFHMINLMULMAX2-RV64-NEXT: vfwcvt.f.f.v v9, v10 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMINLMULMAX2-RV64-NEXT: vfmadd.vv v9, v8, v11 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMINLMULMAX2-RV64-NEXT: vfncvt.f.f.w v8, v9 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma ; ZVFHMINLMULMAX2-RV64-NEXT: vse64.v v8, (a0) ; ZVFHMINLMULMAX2-RV64-NEXT: vslidedown.vi v8, v8, 2 ; ZVFHMINLMULMAX2-RV64-NEXT: addi a0, a0, 8 ; ZVFHMINLMULMAX2-RV64-NEXT: vse32.v v8, (a0) ; ZVFHMINLMULMAX2-RV64-NEXT: ret ; ; ZVFHMINLMULMAX1-RV32-LABEL: fma_v6f16: ; ZVFHMINLMULMAX1-RV32: # %bb.0: ; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma ; ZVFHMINLMULMAX1-RV32-NEXT: vle16.v v8, (a2) ; ZVFHMINLMULMAX1-RV32-NEXT: vle16.v v9, (a0) ; ZVFHMINLMULMAX1-RV32-NEXT: vle16.v v10, (a1) ; ZVFHMINLMULMAX1-RV32-NEXT: vfwcvt.f.f.v v11, v8 ; ZVFHMINLMULMAX1-RV32-NEXT: vfwcvt.f.f.v v8, v9 ; ZVFHMINLMULMAX1-RV32-NEXT: vfwcvt.f.f.v v9, v10 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMINLMULMAX1-RV32-NEXT: vfmadd.vv v9, v8, v11 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMINLMULMAX1-RV32-NEXT: vfncvt.f.f.w v8, v9 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma ; ZVFHMINLMULMAX1-RV32-NEXT: vslidedown.vi v9, v8, 2 ; ZVFHMINLMULMAX1-RV32-NEXT: addi a1, a0, 8 ; ZVFHMINLMULMAX1-RV32-NEXT: vse32.v v9, (a1) ; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma ; ZVFHMINLMULMAX1-RV32-NEXT: vse16.v v8, (a0) ; ZVFHMINLMULMAX1-RV32-NEXT: ret ; ; ZVFHMINLMULMAX1-RV64-LABEL: fma_v6f16: ; ZVFHMINLMULMAX1-RV64: # %bb.0: ; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma ; ZVFHMINLMULMAX1-RV64-NEXT: vle16.v v8, (a2) ; ZVFHMINLMULMAX1-RV64-NEXT: vle16.v v9, (a0) ; ZVFHMINLMULMAX1-RV64-NEXT: vle16.v v10, (a1) ; ZVFHMINLMULMAX1-RV64-NEXT: vfwcvt.f.f.v v11, v8 ; ZVFHMINLMULMAX1-RV64-NEXT: vfwcvt.f.f.v v8, v9 ; ZVFHMINLMULMAX1-RV64-NEXT: vfwcvt.f.f.v v9, v10 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMINLMULMAX1-RV64-NEXT: vfmadd.vv v9, v8, v11 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMINLMULMAX1-RV64-NEXT: vfncvt.f.f.w v8, v9 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma ; ZVFHMINLMULMAX1-RV64-NEXT: vse64.v v8, (a0) ; ZVFHMINLMULMAX1-RV64-NEXT: vslidedown.vi v8, v8, 2 ; ZVFHMINLMULMAX1-RV64-NEXT: addi a0, a0, 8 ; ZVFHMINLMULMAX1-RV64-NEXT: vse32.v v8, (a0) ; ZVFHMINLMULMAX1-RV64-NEXT: ret %a = load <6 x half>, ptr %x %b = load <6 x half>, ptr %y %c = load <6 x half>, ptr %z %d = call <6 x half> @llvm.fma.v6f16(<6 x half> %a, <6 x half> %b, <6 x half> %c) store <6 x half> %d, ptr %x ret void } declare <6 x half> @llvm.fma.v6f16(<6 x half>, <6 x half>, <6 x half>) define void @fma_v4f32(ptr %x, ptr %y, ptr %z) { ; ZVFH-LABEL: fma_v4f32: ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; ZVFH-NEXT: vle32.v v8, (a0) ; ZVFH-NEXT: vle32.v v9, (a1) ; ZVFH-NEXT: vle32.v v10, (a2) ; ZVFH-NEXT: vfmacc.vv v10, v8, v9 ; ZVFH-NEXT: vse32.v v10, (a0) ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: fma_v4f32: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: vsetivli zero, 4, e32, mf2, ta, ma ; ZVFHMIN-NEXT: vle32.v v8, (a0) ; ZVFHMIN-NEXT: vle32.v v9, (a1) ; ZVFHMIN-NEXT: vle32.v v10, (a2) ; ZVFHMIN-NEXT: vfmacc.vv v10, v8, v9 ; ZVFHMIN-NEXT: vse32.v v10, (a0) ; ZVFHMIN-NEXT: ret %a = load <4 x float>, ptr %x %b = load <4 x float>, ptr %y %c = load <4 x float>, ptr %z %d = call <4 x float> @llvm.fma.v4f32(<4 x float> %a, <4 x float> %b, <4 x float> %c) store <4 x float> %d, ptr %x ret void } declare <4 x float> @llvm.fma.v4f32(<4 x float>, <4 x float>, <4 x float>) define void @fma_v2f64(ptr %x, ptr %y, ptr %z) { ; CHECK-LABEL: fma_v2f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; CHECK-NEXT: vle64.v v8, (a0) ; CHECK-NEXT: vle64.v v9, (a1) ; CHECK-NEXT: vle64.v v10, (a2) ; CHECK-NEXT: vfmacc.vv v10, v8, v9 ; CHECK-NEXT: vse64.v v10, (a0) ; CHECK-NEXT: ret %a = load <2 x double>, ptr %x %b = load <2 x double>, ptr %y %c = load <2 x double>, ptr %z %d = call <2 x double> @llvm.fma.v2f64(<2 x double> %a, <2 x double> %b, <2 x double> %c) store <2 x double> %d, ptr %x ret void } declare <2 x double> @llvm.fma.v2f64(<2 x double>, <2 x double>, <2 x double>) define void @fmsub_v8f16(ptr %x, ptr %y, ptr %z) { ; ZVFH-LABEL: fmsub_v8f16: ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; ZVFH-NEXT: vle16.v v8, (a0) ; ZVFH-NEXT: vle16.v v9, (a1) ; ZVFH-NEXT: vle16.v v10, (a2) ; ZVFH-NEXT: vfmsac.vv v10, v8, v9 ; ZVFH-NEXT: vse16.v v10, (a0) ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: fmsub_v8f16: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vle16.v v8, (a2) ; ZVFHMIN-NEXT: vle16.v v9, (a0) ; ZVFHMIN-NEXT: vle16.v v10, (a1) ; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMIN-NEXT: vfneg.v v8, v11 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v11, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v10 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v11 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMIN-NEXT: vfmacc.vv v10, v8, v9 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 ; ZVFHMIN-NEXT: vse16.v v8, (a0) ; ZVFHMIN-NEXT: ret %a = load <8 x half>, ptr %x %b = load <8 x half>, ptr %y %c = load <8 x half>, ptr %z %neg = fneg <8 x half> %c %d = call <8 x half> @llvm.fma.v8f16(<8 x half> %a, <8 x half> %b, <8 x half> %neg) store <8 x half> %d, ptr %x ret void } define void @fmsub_v6f16(ptr %x, ptr %y, ptr %z) { ; ZVFH-LABEL: fmsub_v6f16: ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma ; ZVFH-NEXT: vle16.v v8, (a0) ; ZVFH-NEXT: vle16.v v9, (a1) ; ZVFH-NEXT: vle16.v v10, (a2) ; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; ZVFH-NEXT: vfmsac.vv v10, v8, v9 ; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma ; ZVFH-NEXT: vse16.v v10, (a0) ; ZVFH-NEXT: ret ; ; ZVFHMINLMULMAX2-RV32-LABEL: fmsub_v6f16: ; ZVFHMINLMULMAX2-RV32: # %bb.0: ; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma ; ZVFHMINLMULMAX2-RV32-NEXT: vle16.v v8, (a2) ; ZVFHMINLMULMAX2-RV32-NEXT: vle16.v v9, (a0) ; ZVFHMINLMULMAX2-RV32-NEXT: vle16.v v10, (a1) ; ZVFHMINLMULMAX2-RV32-NEXT: vfwcvt.f.f.v v11, v8 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMINLMULMAX2-RV32-NEXT: vfneg.v v8, v11 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMINLMULMAX2-RV32-NEXT: vfncvt.f.f.w v11, v8 ; ZVFHMINLMULMAX2-RV32-NEXT: vfwcvt.f.f.v v8, v9 ; ZVFHMINLMULMAX2-RV32-NEXT: vfwcvt.f.f.v v9, v10 ; ZVFHMINLMULMAX2-RV32-NEXT: vfwcvt.f.f.v v10, v11 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMINLMULMAX2-RV32-NEXT: vfmacc.vv v10, v8, v9 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMINLMULMAX2-RV32-NEXT: vfncvt.f.f.w v8, v10 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma ; ZVFHMINLMULMAX2-RV32-NEXT: vslidedown.vi v9, v8, 2 ; ZVFHMINLMULMAX2-RV32-NEXT: addi a1, a0, 8 ; ZVFHMINLMULMAX2-RV32-NEXT: vse32.v v9, (a1) ; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma ; ZVFHMINLMULMAX2-RV32-NEXT: vse16.v v8, (a0) ; ZVFHMINLMULMAX2-RV32-NEXT: ret ; ; ZVFHMINLMULMAX2-RV64-LABEL: fmsub_v6f16: ; ZVFHMINLMULMAX2-RV64: # %bb.0: ; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma ; ZVFHMINLMULMAX2-RV64-NEXT: vle16.v v8, (a2) ; ZVFHMINLMULMAX2-RV64-NEXT: vle16.v v9, (a0) ; ZVFHMINLMULMAX2-RV64-NEXT: vle16.v v10, (a1) ; ZVFHMINLMULMAX2-RV64-NEXT: vfwcvt.f.f.v v11, v8 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMINLMULMAX2-RV64-NEXT: vfneg.v v8, v11 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMINLMULMAX2-RV64-NEXT: vfncvt.f.f.w v11, v8 ; ZVFHMINLMULMAX2-RV64-NEXT: vfwcvt.f.f.v v8, v9 ; ZVFHMINLMULMAX2-RV64-NEXT: vfwcvt.f.f.v v9, v10 ; ZVFHMINLMULMAX2-RV64-NEXT: vfwcvt.f.f.v v10, v11 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMINLMULMAX2-RV64-NEXT: vfmacc.vv v10, v8, v9 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMINLMULMAX2-RV64-NEXT: vfncvt.f.f.w v8, v10 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma ; ZVFHMINLMULMAX2-RV64-NEXT: vse64.v v8, (a0) ; ZVFHMINLMULMAX2-RV64-NEXT: vslidedown.vi v8, v8, 2 ; ZVFHMINLMULMAX2-RV64-NEXT: addi a0, a0, 8 ; ZVFHMINLMULMAX2-RV64-NEXT: vse32.v v8, (a0) ; ZVFHMINLMULMAX2-RV64-NEXT: ret ; ; ZVFHMINLMULMAX1-RV32-LABEL: fmsub_v6f16: ; ZVFHMINLMULMAX1-RV32: # %bb.0: ; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma ; ZVFHMINLMULMAX1-RV32-NEXT: vle16.v v8, (a2) ; ZVFHMINLMULMAX1-RV32-NEXT: vle16.v v9, (a0) ; ZVFHMINLMULMAX1-RV32-NEXT: vle16.v v10, (a1) ; ZVFHMINLMULMAX1-RV32-NEXT: vfwcvt.f.f.v v11, v8 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMINLMULMAX1-RV32-NEXT: vfneg.v v8, v11 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMINLMULMAX1-RV32-NEXT: vfncvt.f.f.w v11, v8 ; ZVFHMINLMULMAX1-RV32-NEXT: vfwcvt.f.f.v v8, v9 ; ZVFHMINLMULMAX1-RV32-NEXT: vfwcvt.f.f.v v9, v10 ; ZVFHMINLMULMAX1-RV32-NEXT: vfwcvt.f.f.v v10, v11 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMINLMULMAX1-RV32-NEXT: vfmacc.vv v10, v8, v9 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMINLMULMAX1-RV32-NEXT: vfncvt.f.f.w v8, v10 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma ; ZVFHMINLMULMAX1-RV32-NEXT: vslidedown.vi v9, v8, 2 ; ZVFHMINLMULMAX1-RV32-NEXT: addi a1, a0, 8 ; ZVFHMINLMULMAX1-RV32-NEXT: vse32.v v9, (a1) ; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma ; ZVFHMINLMULMAX1-RV32-NEXT: vse16.v v8, (a0) ; ZVFHMINLMULMAX1-RV32-NEXT: ret ; ; ZVFHMINLMULMAX1-RV64-LABEL: fmsub_v6f16: ; ZVFHMINLMULMAX1-RV64: # %bb.0: ; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma ; ZVFHMINLMULMAX1-RV64-NEXT: vle16.v v8, (a2) ; ZVFHMINLMULMAX1-RV64-NEXT: vle16.v v9, (a0) ; ZVFHMINLMULMAX1-RV64-NEXT: vle16.v v10, (a1) ; ZVFHMINLMULMAX1-RV64-NEXT: vfwcvt.f.f.v v11, v8 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMINLMULMAX1-RV64-NEXT: vfneg.v v8, v11 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMINLMULMAX1-RV64-NEXT: vfncvt.f.f.w v11, v8 ; ZVFHMINLMULMAX1-RV64-NEXT: vfwcvt.f.f.v v8, v9 ; ZVFHMINLMULMAX1-RV64-NEXT: vfwcvt.f.f.v v9, v10 ; ZVFHMINLMULMAX1-RV64-NEXT: vfwcvt.f.f.v v10, v11 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMINLMULMAX1-RV64-NEXT: vfmacc.vv v10, v8, v9 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMINLMULMAX1-RV64-NEXT: vfncvt.f.f.w v8, v10 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma ; ZVFHMINLMULMAX1-RV64-NEXT: vse64.v v8, (a0) ; ZVFHMINLMULMAX1-RV64-NEXT: vslidedown.vi v8, v8, 2 ; ZVFHMINLMULMAX1-RV64-NEXT: addi a0, a0, 8 ; ZVFHMINLMULMAX1-RV64-NEXT: vse32.v v8, (a0) ; ZVFHMINLMULMAX1-RV64-NEXT: ret %a = load <6 x half>, ptr %x %b = load <6 x half>, ptr %y %c = load <6 x half>, ptr %z %neg = fneg <6 x half> %c %d = call <6 x half> @llvm.fma.v6f16(<6 x half> %a, <6 x half> %b, <6 x half> %neg) store <6 x half> %d, ptr %x ret void } define void @fnmsub_v4f32(ptr %x, ptr %y, ptr %z) { ; ZVFH-LABEL: fnmsub_v4f32: ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; ZVFH-NEXT: vle32.v v8, (a0) ; ZVFH-NEXT: vle32.v v9, (a1) ; ZVFH-NEXT: vle32.v v10, (a2) ; ZVFH-NEXT: vfnmsac.vv v10, v8, v9 ; ZVFH-NEXT: vse32.v v10, (a0) ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: fnmsub_v4f32: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: vsetivli zero, 4, e32, mf2, ta, ma ; ZVFHMIN-NEXT: vle32.v v8, (a0) ; ZVFHMIN-NEXT: vle32.v v9, (a1) ; ZVFHMIN-NEXT: vle32.v v10, (a2) ; ZVFHMIN-NEXT: vfnmsac.vv v10, v8, v9 ; ZVFHMIN-NEXT: vse32.v v10, (a0) ; ZVFHMIN-NEXT: ret %a = load <4 x float>, ptr %x %b = load <4 x float>, ptr %y %c = load <4 x float>, ptr %z %neg = fneg <4 x float> %a %d = call <4 x float> @llvm.fma.v4f32(<4 x float> %neg, <4 x float> %b, <4 x float> %c) store <4 x float> %d, ptr %x ret void } define void @fnmadd_v2f64(ptr %x, ptr %y, ptr %z) { ; CHECK-LABEL: fnmadd_v2f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; CHECK-NEXT: vle64.v v8, (a0) ; CHECK-NEXT: vle64.v v9, (a1) ; CHECK-NEXT: vle64.v v10, (a2) ; CHECK-NEXT: vfnmacc.vv v10, v8, v9 ; CHECK-NEXT: vse64.v v10, (a0) ; CHECK-NEXT: ret %a = load <2 x double>, ptr %x %b = load <2 x double>, ptr %y %c = load <2 x double>, ptr %z %neg = fneg <2 x double> %b %neg2 = fneg <2 x double> %c %d = call <2 x double> @llvm.fma.v2f64(<2 x double> %a, <2 x double> %neg, <2 x double> %neg2) store <2 x double> %d, ptr %x ret void } define void @fadd_v16f16(ptr %x, ptr %y) { ; LMULMAX2-LABEL: fadd_v16f16: ; LMULMAX2: # %bb.0: ; LMULMAX2-NEXT: vsetivli zero, 16, e16, m2, ta, ma ; LMULMAX2-NEXT: vle16.v v8, (a0) ; LMULMAX2-NEXT: vle16.v v10, (a1) ; LMULMAX2-NEXT: vfadd.vv v8, v8, v10 ; LMULMAX2-NEXT: vse16.v v8, (a0) ; LMULMAX2-NEXT: ret ; ; LMULMAX1-RV32-LABEL: fadd_v16f16: ; LMULMAX1-RV32: # %bb.0: ; LMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; LMULMAX1-RV32-NEXT: vle16.v v8, (a0) ; LMULMAX1-RV32-NEXT: addi a2, a0, 16 ; LMULMAX1-RV32-NEXT: vle16.v v9, (a2) ; LMULMAX1-RV32-NEXT: addi a3, a1, 16 ; LMULMAX1-RV32-NEXT: vle16.v v10, (a3) ; LMULMAX1-RV32-NEXT: vle16.v v11, (a1) ; LMULMAX1-RV32-NEXT: vfadd.vv v9, v9, v10 ; LMULMAX1-RV32-NEXT: vfadd.vv v8, v8, v11 ; LMULMAX1-RV32-NEXT: vse16.v v8, (a0) ; LMULMAX1-RV32-NEXT: vse16.v v9, (a2) ; LMULMAX1-RV32-NEXT: ret ; ; LMULMAX1-RV64-LABEL: fadd_v16f16: ; LMULMAX1-RV64: # %bb.0: ; LMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; LMULMAX1-RV64-NEXT: vle16.v v8, (a0) ; LMULMAX1-RV64-NEXT: addi a2, a1, 16 ; LMULMAX1-RV64-NEXT: vle16.v v9, (a2) ; LMULMAX1-RV64-NEXT: addi a2, a0, 16 ; LMULMAX1-RV64-NEXT: vle16.v v10, (a2) ; LMULMAX1-RV64-NEXT: vle16.v v11, (a1) ; LMULMAX1-RV64-NEXT: vfadd.vv v9, v10, v9 ; LMULMAX1-RV64-NEXT: vfadd.vv v8, v8, v11 ; LMULMAX1-RV64-NEXT: vse16.v v8, (a0) ; LMULMAX1-RV64-NEXT: vse16.v v9, (a2) ; LMULMAX1-RV64-NEXT: ret ; ; ZVFHMINLMULMAX2-LABEL: fadd_v16f16: ; ZVFHMINLMULMAX2: # %bb.0: ; ZVFHMINLMULMAX2-NEXT: vsetivli zero, 16, e16, m1, ta, ma ; ZVFHMINLMULMAX2-NEXT: vle16.v v8, (a1) ; ZVFHMINLMULMAX2-NEXT: vle16.v v9, (a0) ; ZVFHMINLMULMAX2-NEXT: vfwcvt.f.f.v v10, v8 ; ZVFHMINLMULMAX2-NEXT: vfwcvt.f.f.v v12, v9 ; ZVFHMINLMULMAX2-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMINLMULMAX2-NEXT: vfadd.vv v8, v12, v10 ; ZVFHMINLMULMAX2-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; ZVFHMINLMULMAX2-NEXT: vfncvt.f.f.w v10, v8 ; ZVFHMINLMULMAX2-NEXT: vse16.v v10, (a0) ; ZVFHMINLMULMAX2-NEXT: ret %a = load <16 x half>, ptr %x %b = load <16 x half>, ptr %y %c = fadd <16 x half> %a, %b store <16 x half> %c, ptr %x ret void } define void @fadd_v8f32(ptr %x, ptr %y) { ; LMULMAX2-LABEL: fadd_v8f32: ; LMULMAX2: # %bb.0: ; LMULMAX2-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; LMULMAX2-NEXT: vle32.v v8, (a0) ; LMULMAX2-NEXT: vle32.v v10, (a1) ; LMULMAX2-NEXT: vfadd.vv v8, v8, v10 ; LMULMAX2-NEXT: vse32.v v8, (a0) ; LMULMAX2-NEXT: ret ; ; LMULMAX1-RV32-LABEL: fadd_v8f32: ; LMULMAX1-RV32: # %bb.0: ; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; LMULMAX1-RV32-NEXT: vle32.v v8, (a0) ; LMULMAX1-RV32-NEXT: addi a2, a0, 16 ; LMULMAX1-RV32-NEXT: vle32.v v9, (a2) ; LMULMAX1-RV32-NEXT: addi a3, a1, 16 ; LMULMAX1-RV32-NEXT: vle32.v v10, (a3) ; LMULMAX1-RV32-NEXT: vle32.v v11, (a1) ; LMULMAX1-RV32-NEXT: vfadd.vv v9, v9, v10 ; LMULMAX1-RV32-NEXT: vfadd.vv v8, v8, v11 ; LMULMAX1-RV32-NEXT: vse32.v v8, (a0) ; LMULMAX1-RV32-NEXT: vse32.v v9, (a2) ; LMULMAX1-RV32-NEXT: ret ; ; LMULMAX1-RV64-LABEL: fadd_v8f32: ; LMULMAX1-RV64: # %bb.0: ; LMULMAX1-RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; LMULMAX1-RV64-NEXT: vle32.v v8, (a0) ; LMULMAX1-RV64-NEXT: addi a2, a1, 16 ; LMULMAX1-RV64-NEXT: vle32.v v9, (a2) ; LMULMAX1-RV64-NEXT: addi a2, a0, 16 ; LMULMAX1-RV64-NEXT: vle32.v v10, (a2) ; LMULMAX1-RV64-NEXT: vle32.v v11, (a1) ; LMULMAX1-RV64-NEXT: vfadd.vv v9, v10, v9 ; LMULMAX1-RV64-NEXT: vfadd.vv v8, v8, v11 ; LMULMAX1-RV64-NEXT: vse32.v v8, (a0) ; LMULMAX1-RV64-NEXT: vse32.v v9, (a2) ; LMULMAX1-RV64-NEXT: ret ; ; ZVFHMIN-LABEL: fadd_v8f32: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: vsetivli zero, 8, e32, m1, ta, ma ; ZVFHMIN-NEXT: vle32.v v8, (a0) ; ZVFHMIN-NEXT: vle32.v v9, (a1) ; ZVFHMIN-NEXT: vfadd.vv v8, v8, v9 ; ZVFHMIN-NEXT: vse32.v v8, (a0) ; ZVFHMIN-NEXT: ret %a = load <8 x float>, ptr %x %b = load <8 x float>, ptr %y %c = fadd <8 x float> %a, %b store <8 x float> %c, ptr %x ret void } define void @fadd_v4f64(ptr %x, ptr %y) { ; LMULMAX2-LABEL: fadd_v4f64: ; LMULMAX2: # %bb.0: ; LMULMAX2-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; LMULMAX2-NEXT: vle64.v v8, (a0) ; LMULMAX2-NEXT: vle64.v v10, (a1) ; LMULMAX2-NEXT: vfadd.vv v8, v8, v10 ; LMULMAX2-NEXT: vse64.v v8, (a0) ; LMULMAX2-NEXT: ret ; ; LMULMAX1-RV32-LABEL: fadd_v4f64: ; LMULMAX1-RV32: # %bb.0: ; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; LMULMAX1-RV32-NEXT: vle64.v v8, (a0) ; LMULMAX1-RV32-NEXT: addi a2, a0, 16 ; LMULMAX1-RV32-NEXT: vle64.v v9, (a2) ; LMULMAX1-RV32-NEXT: addi a3, a1, 16 ; LMULMAX1-RV32-NEXT: vle64.v v10, (a3) ; LMULMAX1-RV32-NEXT: vle64.v v11, (a1) ; LMULMAX1-RV32-NEXT: vfadd.vv v9, v9, v10 ; LMULMAX1-RV32-NEXT: vfadd.vv v8, v8, v11 ; LMULMAX1-RV32-NEXT: vse64.v v8, (a0) ; LMULMAX1-RV32-NEXT: vse64.v v9, (a2) ; LMULMAX1-RV32-NEXT: ret ; ; LMULMAX1-RV64-LABEL: fadd_v4f64: ; LMULMAX1-RV64: # %bb.0: ; LMULMAX1-RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; LMULMAX1-RV64-NEXT: vle64.v v8, (a0) ; LMULMAX1-RV64-NEXT: addi a2, a1, 16 ; LMULMAX1-RV64-NEXT: vle64.v v9, (a2) ; LMULMAX1-RV64-NEXT: addi a2, a0, 16 ; LMULMAX1-RV64-NEXT: vle64.v v10, (a2) ; LMULMAX1-RV64-NEXT: vle64.v v11, (a1) ; LMULMAX1-RV64-NEXT: vfadd.vv v9, v10, v9 ; LMULMAX1-RV64-NEXT: vfadd.vv v8, v8, v11 ; LMULMAX1-RV64-NEXT: vse64.v v8, (a0) ; LMULMAX1-RV64-NEXT: vse64.v v9, (a2) ; LMULMAX1-RV64-NEXT: ret ; ; ZVFHMIN-LABEL: fadd_v4f64: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: vsetivli zero, 4, e64, m1, ta, ma ; ZVFHMIN-NEXT: vle64.v v8, (a0) ; ZVFHMIN-NEXT: vle64.v v9, (a1) ; ZVFHMIN-NEXT: vfadd.vv v8, v8, v9 ; ZVFHMIN-NEXT: vse64.v v8, (a0) ; ZVFHMIN-NEXT: ret %a = load <4 x double>, ptr %x %b = load <4 x double>, ptr %y %c = fadd <4 x double> %a, %b store <4 x double> %c, ptr %x ret void } define void @fsub_v16f16(ptr %x, ptr %y) { ; LMULMAX2-LABEL: fsub_v16f16: ; LMULMAX2: # %bb.0: ; LMULMAX2-NEXT: vsetivli zero, 16, e16, m2, ta, ma ; LMULMAX2-NEXT: vle16.v v8, (a0) ; LMULMAX2-NEXT: vle16.v v10, (a1) ; LMULMAX2-NEXT: vfsub.vv v8, v8, v10 ; LMULMAX2-NEXT: vse16.v v8, (a0) ; LMULMAX2-NEXT: ret ; ; LMULMAX1-RV32-LABEL: fsub_v16f16: ; LMULMAX1-RV32: # %bb.0: ; LMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; LMULMAX1-RV32-NEXT: vle16.v v8, (a0) ; LMULMAX1-RV32-NEXT: addi a2, a0, 16 ; LMULMAX1-RV32-NEXT: vle16.v v9, (a2) ; LMULMAX1-RV32-NEXT: addi a3, a1, 16 ; LMULMAX1-RV32-NEXT: vle16.v v10, (a3) ; LMULMAX1-RV32-NEXT: vle16.v v11, (a1) ; LMULMAX1-RV32-NEXT: vfsub.vv v9, v9, v10 ; LMULMAX1-RV32-NEXT: vfsub.vv v8, v8, v11 ; LMULMAX1-RV32-NEXT: vse16.v v8, (a0) ; LMULMAX1-RV32-NEXT: vse16.v v9, (a2) ; LMULMAX1-RV32-NEXT: ret ; ; LMULMAX1-RV64-LABEL: fsub_v16f16: ; LMULMAX1-RV64: # %bb.0: ; LMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; LMULMAX1-RV64-NEXT: vle16.v v8, (a0) ; LMULMAX1-RV64-NEXT: addi a2, a1, 16 ; LMULMAX1-RV64-NEXT: vle16.v v9, (a2) ; LMULMAX1-RV64-NEXT: addi a2, a0, 16 ; LMULMAX1-RV64-NEXT: vle16.v v10, (a2) ; LMULMAX1-RV64-NEXT: vle16.v v11, (a1) ; LMULMAX1-RV64-NEXT: vfsub.vv v9, v10, v9 ; LMULMAX1-RV64-NEXT: vfsub.vv v8, v8, v11 ; LMULMAX1-RV64-NEXT: vse16.v v8, (a0) ; LMULMAX1-RV64-NEXT: vse16.v v9, (a2) ; LMULMAX1-RV64-NEXT: ret ; ; ZVFHMINLMULMAX2-LABEL: fsub_v16f16: ; ZVFHMINLMULMAX2: # %bb.0: ; ZVFHMINLMULMAX2-NEXT: vsetivli zero, 16, e16, m1, ta, ma ; ZVFHMINLMULMAX2-NEXT: vle16.v v8, (a1) ; ZVFHMINLMULMAX2-NEXT: vle16.v v9, (a0) ; ZVFHMINLMULMAX2-NEXT: vfwcvt.f.f.v v10, v8 ; ZVFHMINLMULMAX2-NEXT: vfwcvt.f.f.v v12, v9 ; ZVFHMINLMULMAX2-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMINLMULMAX2-NEXT: vfsub.vv v8, v12, v10 ; ZVFHMINLMULMAX2-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; ZVFHMINLMULMAX2-NEXT: vfncvt.f.f.w v10, v8 ; ZVFHMINLMULMAX2-NEXT: vse16.v v10, (a0) ; ZVFHMINLMULMAX2-NEXT: ret %a = load <16 x half>, ptr %x %b = load <16 x half>, ptr %y %c = fsub <16 x half> %a, %b store <16 x half> %c, ptr %x ret void } define void @fsub_v8f32(ptr %x, ptr %y) { ; LMULMAX2-LABEL: fsub_v8f32: ; LMULMAX2: # %bb.0: ; LMULMAX2-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; LMULMAX2-NEXT: vle32.v v8, (a0) ; LMULMAX2-NEXT: vle32.v v10, (a1) ; LMULMAX2-NEXT: vfsub.vv v8, v8, v10 ; LMULMAX2-NEXT: vse32.v v8, (a0) ; LMULMAX2-NEXT: ret ; ; LMULMAX1-RV32-LABEL: fsub_v8f32: ; LMULMAX1-RV32: # %bb.0: ; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; LMULMAX1-RV32-NEXT: vle32.v v8, (a0) ; LMULMAX1-RV32-NEXT: addi a2, a0, 16 ; LMULMAX1-RV32-NEXT: vle32.v v9, (a2) ; LMULMAX1-RV32-NEXT: addi a3, a1, 16 ; LMULMAX1-RV32-NEXT: vle32.v v10, (a3) ; LMULMAX1-RV32-NEXT: vle32.v v11, (a1) ; LMULMAX1-RV32-NEXT: vfsub.vv v9, v9, v10 ; LMULMAX1-RV32-NEXT: vfsub.vv v8, v8, v11 ; LMULMAX1-RV32-NEXT: vse32.v v8, (a0) ; LMULMAX1-RV32-NEXT: vse32.v v9, (a2) ; LMULMAX1-RV32-NEXT: ret ; ; LMULMAX1-RV64-LABEL: fsub_v8f32: ; LMULMAX1-RV64: # %bb.0: ; LMULMAX1-RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; LMULMAX1-RV64-NEXT: vle32.v v8, (a0) ; LMULMAX1-RV64-NEXT: addi a2, a1, 16 ; LMULMAX1-RV64-NEXT: vle32.v v9, (a2) ; LMULMAX1-RV64-NEXT: addi a2, a0, 16 ; LMULMAX1-RV64-NEXT: vle32.v v10, (a2) ; LMULMAX1-RV64-NEXT: vle32.v v11, (a1) ; LMULMAX1-RV64-NEXT: vfsub.vv v9, v10, v9 ; LMULMAX1-RV64-NEXT: vfsub.vv v8, v8, v11 ; LMULMAX1-RV64-NEXT: vse32.v v8, (a0) ; LMULMAX1-RV64-NEXT: vse32.v v9, (a2) ; LMULMAX1-RV64-NEXT: ret ; ; ZVFHMIN-LABEL: fsub_v8f32: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: vsetivli zero, 8, e32, m1, ta, ma ; ZVFHMIN-NEXT: vle32.v v8, (a0) ; ZVFHMIN-NEXT: vle32.v v9, (a1) ; ZVFHMIN-NEXT: vfsub.vv v8, v8, v9 ; ZVFHMIN-NEXT: vse32.v v8, (a0) ; ZVFHMIN-NEXT: ret %a = load <8 x float>, ptr %x %b = load <8 x float>, ptr %y %c = fsub <8 x float> %a, %b store <8 x float> %c, ptr %x ret void } define void @fsub_v4f64(ptr %x, ptr %y) { ; LMULMAX2-LABEL: fsub_v4f64: ; LMULMAX2: # %bb.0: ; LMULMAX2-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; LMULMAX2-NEXT: vle64.v v8, (a0) ; LMULMAX2-NEXT: vle64.v v10, (a1) ; LMULMAX2-NEXT: vfsub.vv v8, v8, v10 ; LMULMAX2-NEXT: vse64.v v8, (a0) ; LMULMAX2-NEXT: ret ; ; LMULMAX1-RV32-LABEL: fsub_v4f64: ; LMULMAX1-RV32: # %bb.0: ; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; LMULMAX1-RV32-NEXT: vle64.v v8, (a0) ; LMULMAX1-RV32-NEXT: addi a2, a0, 16 ; LMULMAX1-RV32-NEXT: vle64.v v9, (a2) ; LMULMAX1-RV32-NEXT: addi a3, a1, 16 ; LMULMAX1-RV32-NEXT: vle64.v v10, (a3) ; LMULMAX1-RV32-NEXT: vle64.v v11, (a1) ; LMULMAX1-RV32-NEXT: vfsub.vv v9, v9, v10 ; LMULMAX1-RV32-NEXT: vfsub.vv v8, v8, v11 ; LMULMAX1-RV32-NEXT: vse64.v v8, (a0) ; LMULMAX1-RV32-NEXT: vse64.v v9, (a2) ; LMULMAX1-RV32-NEXT: ret ; ; LMULMAX1-RV64-LABEL: fsub_v4f64: ; LMULMAX1-RV64: # %bb.0: ; LMULMAX1-RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; LMULMAX1-RV64-NEXT: vle64.v v8, (a0) ; LMULMAX1-RV64-NEXT: addi a2, a1, 16 ; LMULMAX1-RV64-NEXT: vle64.v v9, (a2) ; LMULMAX1-RV64-NEXT: addi a2, a0, 16 ; LMULMAX1-RV64-NEXT: vle64.v v10, (a2) ; LMULMAX1-RV64-NEXT: vle64.v v11, (a1) ; LMULMAX1-RV64-NEXT: vfsub.vv v9, v10, v9 ; LMULMAX1-RV64-NEXT: vfsub.vv v8, v8, v11 ; LMULMAX1-RV64-NEXT: vse64.v v8, (a0) ; LMULMAX1-RV64-NEXT: vse64.v v9, (a2) ; LMULMAX1-RV64-NEXT: ret ; ; ZVFHMIN-LABEL: fsub_v4f64: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: vsetivli zero, 4, e64, m1, ta, ma ; ZVFHMIN-NEXT: vle64.v v8, (a0) ; ZVFHMIN-NEXT: vle64.v v9, (a1) ; ZVFHMIN-NEXT: vfsub.vv v8, v8, v9 ; ZVFHMIN-NEXT: vse64.v v8, (a0) ; ZVFHMIN-NEXT: ret %a = load <4 x double>, ptr %x %b = load <4 x double>, ptr %y %c = fsub <4 x double> %a, %b store <4 x double> %c, ptr %x ret void } define void @fmul_v16f16(ptr %x, ptr %y) { ; LMULMAX2-LABEL: fmul_v16f16: ; LMULMAX2: # %bb.0: ; LMULMAX2-NEXT: vsetivli zero, 16, e16, m2, ta, ma ; LMULMAX2-NEXT: vle16.v v8, (a0) ; LMULMAX2-NEXT: vle16.v v10, (a1) ; LMULMAX2-NEXT: vfmul.vv v8, v8, v10 ; LMULMAX2-NEXT: vse16.v v8, (a0) ; LMULMAX2-NEXT: ret ; ; LMULMAX1-RV32-LABEL: fmul_v16f16: ; LMULMAX1-RV32: # %bb.0: ; LMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; LMULMAX1-RV32-NEXT: vle16.v v8, (a0) ; LMULMAX1-RV32-NEXT: addi a2, a0, 16 ; LMULMAX1-RV32-NEXT: vle16.v v9, (a2) ; LMULMAX1-RV32-NEXT: addi a3, a1, 16 ; LMULMAX1-RV32-NEXT: vle16.v v10, (a3) ; LMULMAX1-RV32-NEXT: vle16.v v11, (a1) ; LMULMAX1-RV32-NEXT: vfmul.vv v9, v9, v10 ; LMULMAX1-RV32-NEXT: vfmul.vv v8, v8, v11 ; LMULMAX1-RV32-NEXT: vse16.v v8, (a0) ; LMULMAX1-RV32-NEXT: vse16.v v9, (a2) ; LMULMAX1-RV32-NEXT: ret ; ; LMULMAX1-RV64-LABEL: fmul_v16f16: ; LMULMAX1-RV64: # %bb.0: ; LMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; LMULMAX1-RV64-NEXT: vle16.v v8, (a0) ; LMULMAX1-RV64-NEXT: addi a2, a1, 16 ; LMULMAX1-RV64-NEXT: vle16.v v9, (a2) ; LMULMAX1-RV64-NEXT: addi a2, a0, 16 ; LMULMAX1-RV64-NEXT: vle16.v v10, (a2) ; LMULMAX1-RV64-NEXT: vle16.v v11, (a1) ; LMULMAX1-RV64-NEXT: vfmul.vv v9, v10, v9 ; LMULMAX1-RV64-NEXT: vfmul.vv v8, v8, v11 ; LMULMAX1-RV64-NEXT: vse16.v v8, (a0) ; LMULMAX1-RV64-NEXT: vse16.v v9, (a2) ; LMULMAX1-RV64-NEXT: ret ; ; ZVFHMINLMULMAX2-LABEL: fmul_v16f16: ; ZVFHMINLMULMAX2: # %bb.0: ; ZVFHMINLMULMAX2-NEXT: vsetivli zero, 16, e16, m1, ta, ma ; ZVFHMINLMULMAX2-NEXT: vle16.v v8, (a1) ; ZVFHMINLMULMAX2-NEXT: vle16.v v9, (a0) ; ZVFHMINLMULMAX2-NEXT: vfwcvt.f.f.v v10, v8 ; ZVFHMINLMULMAX2-NEXT: vfwcvt.f.f.v v12, v9 ; ZVFHMINLMULMAX2-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMINLMULMAX2-NEXT: vfmul.vv v8, v12, v10 ; ZVFHMINLMULMAX2-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; ZVFHMINLMULMAX2-NEXT: vfncvt.f.f.w v10, v8 ; ZVFHMINLMULMAX2-NEXT: vse16.v v10, (a0) ; ZVFHMINLMULMAX2-NEXT: ret %a = load <16 x half>, ptr %x %b = load <16 x half>, ptr %y %c = fmul <16 x half> %a, %b store <16 x half> %c, ptr %x ret void } define void @fmul_v8f32(ptr %x, ptr %y) { ; LMULMAX2-LABEL: fmul_v8f32: ; LMULMAX2: # %bb.0: ; LMULMAX2-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; LMULMAX2-NEXT: vle32.v v8, (a0) ; LMULMAX2-NEXT: vle32.v v10, (a1) ; LMULMAX2-NEXT: vfmul.vv v8, v8, v10 ; LMULMAX2-NEXT: vse32.v v8, (a0) ; LMULMAX2-NEXT: ret ; ; LMULMAX1-RV32-LABEL: fmul_v8f32: ; LMULMAX1-RV32: # %bb.0: ; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; LMULMAX1-RV32-NEXT: vle32.v v8, (a0) ; LMULMAX1-RV32-NEXT: addi a2, a0, 16 ; LMULMAX1-RV32-NEXT: vle32.v v9, (a2) ; LMULMAX1-RV32-NEXT: addi a3, a1, 16 ; LMULMAX1-RV32-NEXT: vle32.v v10, (a3) ; LMULMAX1-RV32-NEXT: vle32.v v11, (a1) ; LMULMAX1-RV32-NEXT: vfmul.vv v9, v9, v10 ; LMULMAX1-RV32-NEXT: vfmul.vv v8, v8, v11 ; LMULMAX1-RV32-NEXT: vse32.v v8, (a0) ; LMULMAX1-RV32-NEXT: vse32.v v9, (a2) ; LMULMAX1-RV32-NEXT: ret ; ; LMULMAX1-RV64-LABEL: fmul_v8f32: ; LMULMAX1-RV64: # %bb.0: ; LMULMAX1-RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; LMULMAX1-RV64-NEXT: vle32.v v8, (a0) ; LMULMAX1-RV64-NEXT: addi a2, a1, 16 ; LMULMAX1-RV64-NEXT: vle32.v v9, (a2) ; LMULMAX1-RV64-NEXT: addi a2, a0, 16 ; LMULMAX1-RV64-NEXT: vle32.v v10, (a2) ; LMULMAX1-RV64-NEXT: vle32.v v11, (a1) ; LMULMAX1-RV64-NEXT: vfmul.vv v9, v10, v9 ; LMULMAX1-RV64-NEXT: vfmul.vv v8, v8, v11 ; LMULMAX1-RV64-NEXT: vse32.v v8, (a0) ; LMULMAX1-RV64-NEXT: vse32.v v9, (a2) ; LMULMAX1-RV64-NEXT: ret ; ; ZVFHMIN-LABEL: fmul_v8f32: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: vsetivli zero, 8, e32, m1, ta, ma ; ZVFHMIN-NEXT: vle32.v v8, (a0) ; ZVFHMIN-NEXT: vle32.v v9, (a1) ; ZVFHMIN-NEXT: vfmul.vv v8, v8, v9 ; ZVFHMIN-NEXT: vse32.v v8, (a0) ; ZVFHMIN-NEXT: ret %a = load <8 x float>, ptr %x %b = load <8 x float>, ptr %y %c = fmul <8 x float> %a, %b store <8 x float> %c, ptr %x ret void } define void @fmul_v4f64(ptr %x, ptr %y) { ; LMULMAX2-LABEL: fmul_v4f64: ; LMULMAX2: # %bb.0: ; LMULMAX2-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; LMULMAX2-NEXT: vle64.v v8, (a0) ; LMULMAX2-NEXT: vle64.v v10, (a1) ; LMULMAX2-NEXT: vfmul.vv v8, v8, v10 ; LMULMAX2-NEXT: vse64.v v8, (a0) ; LMULMAX2-NEXT: ret ; ; LMULMAX1-RV32-LABEL: fmul_v4f64: ; LMULMAX1-RV32: # %bb.0: ; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; LMULMAX1-RV32-NEXT: vle64.v v8, (a0) ; LMULMAX1-RV32-NEXT: addi a2, a0, 16 ; LMULMAX1-RV32-NEXT: vle64.v v9, (a2) ; LMULMAX1-RV32-NEXT: addi a3, a1, 16 ; LMULMAX1-RV32-NEXT: vle64.v v10, (a3) ; LMULMAX1-RV32-NEXT: vle64.v v11, (a1) ; LMULMAX1-RV32-NEXT: vfmul.vv v9, v9, v10 ; LMULMAX1-RV32-NEXT: vfmul.vv v8, v8, v11 ; LMULMAX1-RV32-NEXT: vse64.v v8, (a0) ; LMULMAX1-RV32-NEXT: vse64.v v9, (a2) ; LMULMAX1-RV32-NEXT: ret ; ; LMULMAX1-RV64-LABEL: fmul_v4f64: ; LMULMAX1-RV64: # %bb.0: ; LMULMAX1-RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; LMULMAX1-RV64-NEXT: vle64.v v8, (a0) ; LMULMAX1-RV64-NEXT: addi a2, a1, 16 ; LMULMAX1-RV64-NEXT: vle64.v v9, (a2) ; LMULMAX1-RV64-NEXT: addi a2, a0, 16 ; LMULMAX1-RV64-NEXT: vle64.v v10, (a2) ; LMULMAX1-RV64-NEXT: vle64.v v11, (a1) ; LMULMAX1-RV64-NEXT: vfmul.vv v9, v10, v9 ; LMULMAX1-RV64-NEXT: vfmul.vv v8, v8, v11 ; LMULMAX1-RV64-NEXT: vse64.v v8, (a0) ; LMULMAX1-RV64-NEXT: vse64.v v9, (a2) ; LMULMAX1-RV64-NEXT: ret ; ; ZVFHMIN-LABEL: fmul_v4f64: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: vsetivli zero, 4, e64, m1, ta, ma ; ZVFHMIN-NEXT: vle64.v v8, (a0) ; ZVFHMIN-NEXT: vle64.v v9, (a1) ; ZVFHMIN-NEXT: vfmul.vv v8, v8, v9 ; ZVFHMIN-NEXT: vse64.v v8, (a0) ; ZVFHMIN-NEXT: ret %a = load <4 x double>, ptr %x %b = load <4 x double>, ptr %y %c = fmul <4 x double> %a, %b store <4 x double> %c, ptr %x ret void } define void @fdiv_v16f16(ptr %x, ptr %y) { ; LMULMAX2-LABEL: fdiv_v16f16: ; LMULMAX2: # %bb.0: ; LMULMAX2-NEXT: vsetivli zero, 16, e16, m2, ta, ma ; LMULMAX2-NEXT: vle16.v v8, (a0) ; LMULMAX2-NEXT: vle16.v v10, (a1) ; LMULMAX2-NEXT: vfdiv.vv v8, v8, v10 ; LMULMAX2-NEXT: vse16.v v8, (a0) ; LMULMAX2-NEXT: ret ; ; LMULMAX1-RV32-LABEL: fdiv_v16f16: ; LMULMAX1-RV32: # %bb.0: ; LMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; LMULMAX1-RV32-NEXT: vle16.v v8, (a0) ; LMULMAX1-RV32-NEXT: addi a2, a0, 16 ; LMULMAX1-RV32-NEXT: vle16.v v9, (a2) ; LMULMAX1-RV32-NEXT: addi a3, a1, 16 ; LMULMAX1-RV32-NEXT: vle16.v v10, (a3) ; LMULMAX1-RV32-NEXT: vle16.v v11, (a1) ; LMULMAX1-RV32-NEXT: vfdiv.vv v9, v9, v10 ; LMULMAX1-RV32-NEXT: vfdiv.vv v8, v8, v11 ; LMULMAX1-RV32-NEXT: vse16.v v8, (a0) ; LMULMAX1-RV32-NEXT: vse16.v v9, (a2) ; LMULMAX1-RV32-NEXT: ret ; ; LMULMAX1-RV64-LABEL: fdiv_v16f16: ; LMULMAX1-RV64: # %bb.0: ; LMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; LMULMAX1-RV64-NEXT: vle16.v v8, (a0) ; LMULMAX1-RV64-NEXT: addi a2, a1, 16 ; LMULMAX1-RV64-NEXT: vle16.v v9, (a2) ; LMULMAX1-RV64-NEXT: addi a2, a0, 16 ; LMULMAX1-RV64-NEXT: vle16.v v10, (a2) ; LMULMAX1-RV64-NEXT: vle16.v v11, (a1) ; LMULMAX1-RV64-NEXT: vfdiv.vv v9, v10, v9 ; LMULMAX1-RV64-NEXT: vfdiv.vv v8, v8, v11 ; LMULMAX1-RV64-NEXT: vse16.v v8, (a0) ; LMULMAX1-RV64-NEXT: vse16.v v9, (a2) ; LMULMAX1-RV64-NEXT: ret ; ; ZVFHMINLMULMAX2-LABEL: fdiv_v16f16: ; ZVFHMINLMULMAX2: # %bb.0: ; ZVFHMINLMULMAX2-NEXT: vsetivli zero, 16, e16, m1, ta, ma ; ZVFHMINLMULMAX2-NEXT: vle16.v v8, (a1) ; ZVFHMINLMULMAX2-NEXT: vle16.v v9, (a0) ; ZVFHMINLMULMAX2-NEXT: vfwcvt.f.f.v v10, v8 ; ZVFHMINLMULMAX2-NEXT: vfwcvt.f.f.v v12, v9 ; ZVFHMINLMULMAX2-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMINLMULMAX2-NEXT: vfdiv.vv v8, v12, v10 ; ZVFHMINLMULMAX2-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; ZVFHMINLMULMAX2-NEXT: vfncvt.f.f.w v10, v8 ; ZVFHMINLMULMAX2-NEXT: vse16.v v10, (a0) ; ZVFHMINLMULMAX2-NEXT: ret %a = load <16 x half>, ptr %x %b = load <16 x half>, ptr %y %c = fdiv <16 x half> %a, %b store <16 x half> %c, ptr %x ret void } define void @fdiv_v8f32(ptr %x, ptr %y) { ; LMULMAX2-LABEL: fdiv_v8f32: ; LMULMAX2: # %bb.0: ; LMULMAX2-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; LMULMAX2-NEXT: vle32.v v8, (a0) ; LMULMAX2-NEXT: vle32.v v10, (a1) ; LMULMAX2-NEXT: vfdiv.vv v8, v8, v10 ; LMULMAX2-NEXT: vse32.v v8, (a0) ; LMULMAX2-NEXT: ret ; ; LMULMAX1-RV32-LABEL: fdiv_v8f32: ; LMULMAX1-RV32: # %bb.0: ; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; LMULMAX1-RV32-NEXT: vle32.v v8, (a0) ; LMULMAX1-RV32-NEXT: addi a2, a0, 16 ; LMULMAX1-RV32-NEXT: vle32.v v9, (a2) ; LMULMAX1-RV32-NEXT: addi a3, a1, 16 ; LMULMAX1-RV32-NEXT: vle32.v v10, (a3) ; LMULMAX1-RV32-NEXT: vle32.v v11, (a1) ; LMULMAX1-RV32-NEXT: vfdiv.vv v9, v9, v10 ; LMULMAX1-RV32-NEXT: vfdiv.vv v8, v8, v11 ; LMULMAX1-RV32-NEXT: vse32.v v8, (a0) ; LMULMAX1-RV32-NEXT: vse32.v v9, (a2) ; LMULMAX1-RV32-NEXT: ret ; ; LMULMAX1-RV64-LABEL: fdiv_v8f32: ; LMULMAX1-RV64: # %bb.0: ; LMULMAX1-RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; LMULMAX1-RV64-NEXT: vle32.v v8, (a0) ; LMULMAX1-RV64-NEXT: addi a2, a1, 16 ; LMULMAX1-RV64-NEXT: vle32.v v9, (a2) ; LMULMAX1-RV64-NEXT: addi a2, a0, 16 ; LMULMAX1-RV64-NEXT: vle32.v v10, (a2) ; LMULMAX1-RV64-NEXT: vle32.v v11, (a1) ; LMULMAX1-RV64-NEXT: vfdiv.vv v9, v10, v9 ; LMULMAX1-RV64-NEXT: vfdiv.vv v8, v8, v11 ; LMULMAX1-RV64-NEXT: vse32.v v8, (a0) ; LMULMAX1-RV64-NEXT: vse32.v v9, (a2) ; LMULMAX1-RV64-NEXT: ret ; ; ZVFHMIN-LABEL: fdiv_v8f32: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: vsetivli zero, 8, e32, m1, ta, ma ; ZVFHMIN-NEXT: vle32.v v8, (a0) ; ZVFHMIN-NEXT: vle32.v v9, (a1) ; ZVFHMIN-NEXT: vfdiv.vv v8, v8, v9 ; ZVFHMIN-NEXT: vse32.v v8, (a0) ; ZVFHMIN-NEXT: ret %a = load <8 x float>, ptr %x %b = load <8 x float>, ptr %y %c = fdiv <8 x float> %a, %b store <8 x float> %c, ptr %x ret void } define void @fdiv_v4f64(ptr %x, ptr %y) { ; LMULMAX2-LABEL: fdiv_v4f64: ; LMULMAX2: # %bb.0: ; LMULMAX2-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; LMULMAX2-NEXT: vle64.v v8, (a0) ; LMULMAX2-NEXT: vle64.v v10, (a1) ; LMULMAX2-NEXT: vfdiv.vv v8, v8, v10 ; LMULMAX2-NEXT: vse64.v v8, (a0) ; LMULMAX2-NEXT: ret ; ; LMULMAX1-RV32-LABEL: fdiv_v4f64: ; LMULMAX1-RV32: # %bb.0: ; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; LMULMAX1-RV32-NEXT: vle64.v v8, (a0) ; LMULMAX1-RV32-NEXT: addi a2, a0, 16 ; LMULMAX1-RV32-NEXT: vle64.v v9, (a2) ; LMULMAX1-RV32-NEXT: addi a3, a1, 16 ; LMULMAX1-RV32-NEXT: vle64.v v10, (a3) ; LMULMAX1-RV32-NEXT: vle64.v v11, (a1) ; LMULMAX1-RV32-NEXT: vfdiv.vv v9, v9, v10 ; LMULMAX1-RV32-NEXT: vfdiv.vv v8, v8, v11 ; LMULMAX1-RV32-NEXT: vse64.v v8, (a0) ; LMULMAX1-RV32-NEXT: vse64.v v9, (a2) ; LMULMAX1-RV32-NEXT: ret ; ; LMULMAX1-RV64-LABEL: fdiv_v4f64: ; LMULMAX1-RV64: # %bb.0: ; LMULMAX1-RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; LMULMAX1-RV64-NEXT: vle64.v v8, (a0) ; LMULMAX1-RV64-NEXT: addi a2, a1, 16 ; LMULMAX1-RV64-NEXT: vle64.v v9, (a2) ; LMULMAX1-RV64-NEXT: addi a2, a0, 16 ; LMULMAX1-RV64-NEXT: vle64.v v10, (a2) ; LMULMAX1-RV64-NEXT: vle64.v v11, (a1) ; LMULMAX1-RV64-NEXT: vfdiv.vv v9, v10, v9 ; LMULMAX1-RV64-NEXT: vfdiv.vv v8, v8, v11 ; LMULMAX1-RV64-NEXT: vse64.v v8, (a0) ; LMULMAX1-RV64-NEXT: vse64.v v9, (a2) ; LMULMAX1-RV64-NEXT: ret ; ; ZVFHMIN-LABEL: fdiv_v4f64: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: vsetivli zero, 4, e64, m1, ta, ma ; ZVFHMIN-NEXT: vle64.v v8, (a0) ; ZVFHMIN-NEXT: vle64.v v9, (a1) ; ZVFHMIN-NEXT: vfdiv.vv v8, v8, v9 ; ZVFHMIN-NEXT: vse64.v v8, (a0) ; ZVFHMIN-NEXT: ret %a = load <4 x double>, ptr %x %b = load <4 x double>, ptr %y %c = fdiv <4 x double> %a, %b store <4 x double> %c, ptr %x ret void } define void @fneg_v16f16(ptr %x) { ; LMULMAX2-LABEL: fneg_v16f16: ; LMULMAX2: # %bb.0: ; LMULMAX2-NEXT: vsetivli zero, 16, e16, m2, ta, ma ; LMULMAX2-NEXT: vle16.v v8, (a0) ; LMULMAX2-NEXT: vfneg.v v8, v8 ; LMULMAX2-NEXT: vse16.v v8, (a0) ; LMULMAX2-NEXT: ret ; ; LMULMAX1-LABEL: fneg_v16f16: ; LMULMAX1: # %bb.0: ; LMULMAX1-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; LMULMAX1-NEXT: addi a1, a0, 16 ; LMULMAX1-NEXT: vle16.v v8, (a1) ; LMULMAX1-NEXT: vle16.v v9, (a0) ; LMULMAX1-NEXT: vfneg.v v8, v8 ; LMULMAX1-NEXT: vfneg.v v9, v9 ; LMULMAX1-NEXT: vse16.v v9, (a0) ; LMULMAX1-NEXT: vse16.v v8, (a1) ; LMULMAX1-NEXT: ret ; ; ZVFHMINLMULMAX2-LABEL: fneg_v16f16: ; ZVFHMINLMULMAX2: # %bb.0: ; ZVFHMINLMULMAX2-NEXT: vsetivli zero, 16, e16, m1, ta, ma ; ZVFHMINLMULMAX2-NEXT: vle16.v v8, (a0) ; ZVFHMINLMULMAX2-NEXT: vfwcvt.f.f.v v10, v8 ; ZVFHMINLMULMAX2-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMINLMULMAX2-NEXT: vfneg.v v8, v10 ; ZVFHMINLMULMAX2-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; ZVFHMINLMULMAX2-NEXT: vfncvt.f.f.w v10, v8 ; ZVFHMINLMULMAX2-NEXT: vse16.v v10, (a0) ; ZVFHMINLMULMAX2-NEXT: ret %a = load <16 x half>, ptr %x %b = fneg <16 x half> %a store <16 x half> %b, ptr %x ret void } define void @fneg_v8f32(ptr %x) { ; LMULMAX2-LABEL: fneg_v8f32: ; LMULMAX2: # %bb.0: ; LMULMAX2-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; LMULMAX2-NEXT: vle32.v v8, (a0) ; LMULMAX2-NEXT: vfneg.v v8, v8 ; LMULMAX2-NEXT: vse32.v v8, (a0) ; LMULMAX2-NEXT: ret ; ; LMULMAX1-LABEL: fneg_v8f32: ; LMULMAX1: # %bb.0: ; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; LMULMAX1-NEXT: addi a1, a0, 16 ; LMULMAX1-NEXT: vle32.v v8, (a1) ; LMULMAX1-NEXT: vle32.v v9, (a0) ; LMULMAX1-NEXT: vfneg.v v8, v8 ; LMULMAX1-NEXT: vfneg.v v9, v9 ; LMULMAX1-NEXT: vse32.v v9, (a0) ; LMULMAX1-NEXT: vse32.v v8, (a1) ; LMULMAX1-NEXT: ret ; ; ZVFHMIN-LABEL: fneg_v8f32: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: vsetivli zero, 8, e32, m1, ta, ma ; ZVFHMIN-NEXT: vle32.v v8, (a0) ; ZVFHMIN-NEXT: vfneg.v v8, v8 ; ZVFHMIN-NEXT: vse32.v v8, (a0) ; ZVFHMIN-NEXT: ret %a = load <8 x float>, ptr %x %b = fneg <8 x float> %a store <8 x float> %b, ptr %x ret void } define void @fneg_v4f64(ptr %x) { ; LMULMAX2-LABEL: fneg_v4f64: ; LMULMAX2: # %bb.0: ; LMULMAX2-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; LMULMAX2-NEXT: vle64.v v8, (a0) ; LMULMAX2-NEXT: vfneg.v v8, v8 ; LMULMAX2-NEXT: vse64.v v8, (a0) ; LMULMAX2-NEXT: ret ; ; LMULMAX1-LABEL: fneg_v4f64: ; LMULMAX1: # %bb.0: ; LMULMAX1-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; LMULMAX1-NEXT: addi a1, a0, 16 ; LMULMAX1-NEXT: vle64.v v8, (a1) ; LMULMAX1-NEXT: vle64.v v9, (a0) ; LMULMAX1-NEXT: vfneg.v v8, v8 ; LMULMAX1-NEXT: vfneg.v v9, v9 ; LMULMAX1-NEXT: vse64.v v9, (a0) ; LMULMAX1-NEXT: vse64.v v8, (a1) ; LMULMAX1-NEXT: ret ; ; ZVFHMIN-LABEL: fneg_v4f64: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: vsetivli zero, 4, e64, m1, ta, ma ; ZVFHMIN-NEXT: vle64.v v8, (a0) ; ZVFHMIN-NEXT: vfneg.v v8, v8 ; ZVFHMIN-NEXT: vse64.v v8, (a0) ; ZVFHMIN-NEXT: ret %a = load <4 x double>, ptr %x %b = fneg <4 x double> %a store <4 x double> %b, ptr %x ret void } define void @fma_v16f16(ptr %x, ptr %y, ptr %z) { ; LMULMAX2-LABEL: fma_v16f16: ; LMULMAX2: # %bb.0: ; LMULMAX2-NEXT: vsetivli zero, 16, e16, m2, ta, ma ; LMULMAX2-NEXT: vle16.v v8, (a0) ; LMULMAX2-NEXT: vle16.v v10, (a1) ; LMULMAX2-NEXT: vle16.v v12, (a2) ; LMULMAX2-NEXT: vfmacc.vv v12, v8, v10 ; LMULMAX2-NEXT: vse16.v v12, (a0) ; LMULMAX2-NEXT: ret ; ; LMULMAX1-LABEL: fma_v16f16: ; LMULMAX1: # %bb.0: ; LMULMAX1-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; LMULMAX1-NEXT: vle16.v v8, (a0) ; LMULMAX1-NEXT: addi a3, a0, 16 ; LMULMAX1-NEXT: vle16.v v9, (a3) ; LMULMAX1-NEXT: vle16.v v10, (a1) ; LMULMAX1-NEXT: addi a1, a1, 16 ; LMULMAX1-NEXT: vle16.v v11, (a1) ; LMULMAX1-NEXT: addi a1, a2, 16 ; LMULMAX1-NEXT: vle16.v v12, (a1) ; LMULMAX1-NEXT: vle16.v v13, (a2) ; LMULMAX1-NEXT: vfmacc.vv v12, v9, v11 ; LMULMAX1-NEXT: vfmacc.vv v13, v8, v10 ; LMULMAX1-NEXT: vse16.v v13, (a0) ; LMULMAX1-NEXT: vse16.v v12, (a3) ; LMULMAX1-NEXT: ret ; ; ZVFHMINLMULMAX2-LABEL: fma_v16f16: ; ZVFHMINLMULMAX2: # %bb.0: ; ZVFHMINLMULMAX2-NEXT: vsetivli zero, 16, e16, m1, ta, ma ; ZVFHMINLMULMAX2-NEXT: vle16.v v8, (a2) ; ZVFHMINLMULMAX2-NEXT: vle16.v v9, (a0) ; ZVFHMINLMULMAX2-NEXT: vle16.v v10, (a1) ; ZVFHMINLMULMAX2-NEXT: vfwcvt.f.f.v v12, v8 ; ZVFHMINLMULMAX2-NEXT: vfwcvt.f.f.v v14, v9 ; ZVFHMINLMULMAX2-NEXT: vfwcvt.f.f.v v8, v10 ; ZVFHMINLMULMAX2-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMINLMULMAX2-NEXT: vfmadd.vv v8, v14, v12 ; ZVFHMINLMULMAX2-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; ZVFHMINLMULMAX2-NEXT: vfncvt.f.f.w v10, v8 ; ZVFHMINLMULMAX2-NEXT: vse16.v v10, (a0) ; ZVFHMINLMULMAX2-NEXT: ret %a = load <16 x half>, ptr %x %b = load <16 x half>, ptr %y %c = load <16 x half>, ptr %z %d = call <16 x half> @llvm.fma.v16f16(<16 x half> %a, <16 x half> %b, <16 x half> %c) store <16 x half> %d, ptr %x ret void } declare <16 x half> @llvm.fma.v16f16(<16 x half>, <16 x half>, <16 x half>) define void @fma_v8f32(ptr %x, ptr %y, ptr %z) { ; LMULMAX2-LABEL: fma_v8f32: ; LMULMAX2: # %bb.0: ; LMULMAX2-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; LMULMAX2-NEXT: vle32.v v8, (a0) ; LMULMAX2-NEXT: vle32.v v10, (a1) ; LMULMAX2-NEXT: vle32.v v12, (a2) ; LMULMAX2-NEXT: vfmacc.vv v12, v8, v10 ; LMULMAX2-NEXT: vse32.v v12, (a0) ; LMULMAX2-NEXT: ret ; ; LMULMAX1-LABEL: fma_v8f32: ; LMULMAX1: # %bb.0: ; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; LMULMAX1-NEXT: vle32.v v8, (a0) ; LMULMAX1-NEXT: addi a3, a0, 16 ; LMULMAX1-NEXT: vle32.v v9, (a3) ; LMULMAX1-NEXT: vle32.v v10, (a1) ; LMULMAX1-NEXT: addi a1, a1, 16 ; LMULMAX1-NEXT: vle32.v v11, (a1) ; LMULMAX1-NEXT: addi a1, a2, 16 ; LMULMAX1-NEXT: vle32.v v12, (a1) ; LMULMAX1-NEXT: vle32.v v13, (a2) ; LMULMAX1-NEXT: vfmacc.vv v12, v9, v11 ; LMULMAX1-NEXT: vfmacc.vv v13, v8, v10 ; LMULMAX1-NEXT: vse32.v v13, (a0) ; LMULMAX1-NEXT: vse32.v v12, (a3) ; LMULMAX1-NEXT: ret ; ; ZVFHMIN-LABEL: fma_v8f32: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: vsetivli zero, 8, e32, m1, ta, ma ; ZVFHMIN-NEXT: vle32.v v8, (a0) ; ZVFHMIN-NEXT: vle32.v v9, (a1) ; ZVFHMIN-NEXT: vle32.v v10, (a2) ; ZVFHMIN-NEXT: vfmacc.vv v10, v8, v9 ; ZVFHMIN-NEXT: vse32.v v10, (a0) ; ZVFHMIN-NEXT: ret %a = load <8 x float>, ptr %x %b = load <8 x float>, ptr %y %c = load <8 x float>, ptr %z %d = call <8 x float> @llvm.fma.v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %c) store <8 x float> %d, ptr %x ret void } declare <8 x float> @llvm.fma.v8f32(<8 x float>, <8 x float>, <8 x float>) define void @fma_v4f64(ptr %x, ptr %y, ptr %z) { ; LMULMAX2-LABEL: fma_v4f64: ; LMULMAX2: # %bb.0: ; LMULMAX2-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; LMULMAX2-NEXT: vle64.v v8, (a0) ; LMULMAX2-NEXT: vle64.v v10, (a1) ; LMULMAX2-NEXT: vle64.v v12, (a2) ; LMULMAX2-NEXT: vfmacc.vv v12, v8, v10 ; LMULMAX2-NEXT: vse64.v v12, (a0) ; LMULMAX2-NEXT: ret ; ; LMULMAX1-LABEL: fma_v4f64: ; LMULMAX1: # %bb.0: ; LMULMAX1-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; LMULMAX1-NEXT: vle64.v v8, (a0) ; LMULMAX1-NEXT: addi a3, a0, 16 ; LMULMAX1-NEXT: vle64.v v9, (a3) ; LMULMAX1-NEXT: vle64.v v10, (a1) ; LMULMAX1-NEXT: addi a1, a1, 16 ; LMULMAX1-NEXT: vle64.v v11, (a1) ; LMULMAX1-NEXT: addi a1, a2, 16 ; LMULMAX1-NEXT: vle64.v v12, (a1) ; LMULMAX1-NEXT: vle64.v v13, (a2) ; LMULMAX1-NEXT: vfmacc.vv v12, v9, v11 ; LMULMAX1-NEXT: vfmacc.vv v13, v8, v10 ; LMULMAX1-NEXT: vse64.v v13, (a0) ; LMULMAX1-NEXT: vse64.v v12, (a3) ; LMULMAX1-NEXT: ret ; ; ZVFHMIN-LABEL: fma_v4f64: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: vsetivli zero, 4, e64, m1, ta, ma ; ZVFHMIN-NEXT: vle64.v v8, (a0) ; ZVFHMIN-NEXT: vle64.v v9, (a1) ; ZVFHMIN-NEXT: vle64.v v10, (a2) ; ZVFHMIN-NEXT: vfmacc.vv v10, v8, v9 ; ZVFHMIN-NEXT: vse64.v v10, (a0) ; ZVFHMIN-NEXT: ret %a = load <4 x double>, ptr %x %b = load <4 x double>, ptr %y %c = load <4 x double>, ptr %z %d = call <4 x double> @llvm.fma.v4f64(<4 x double> %a, <4 x double> %b, <4 x double> %c) store <4 x double> %d, ptr %x ret void } declare <4 x double> @llvm.fma.v4f64(<4 x double>, <4 x double>, <4 x double>) define void @fadd_vf_v8f16(ptr %x, half %y) { ; ZVFH-LABEL: fadd_vf_v8f16: ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; ZVFH-NEXT: vle16.v v8, (a0) ; ZVFH-NEXT: vfadd.vf v8, v8, fa0 ; ZVFH-NEXT: vse16.v v8, (a0) ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: fadd_vf_v8f16: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vle16.v v8, (a0) ; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0 ; ZVFHMIN-NEXT: vsetvli a1, zero, e32, m1, ta, ma ; ZVFHMIN-NEXT: vfmv.v.f v9, fa5 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v9 ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMIN-NEXT: vfadd.vv v8, v9, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v8 ; ZVFHMIN-NEXT: vse16.v v9, (a0) ; ZVFHMIN-NEXT: ret %a = load <8 x half>, ptr %x %b = insertelement <8 x half> poison, half %y, i32 0 %c = shufflevector <8 x half> %b, <8 x half> poison, <8 x i32> zeroinitializer %d = fadd <8 x half> %a, %c store <8 x half> %d, ptr %x ret void } define void @fadd_vf_v6f16(ptr %x, half %y) { ; ZVFH-LABEL: fadd_vf_v6f16: ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma ; ZVFH-NEXT: vle16.v v8, (a0) ; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; ZVFH-NEXT: vfadd.vf v8, v8, fa0 ; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma ; ZVFH-NEXT: vse16.v v8, (a0) ; ZVFH-NEXT: ret ; ; ZVFHMINLMULMAX2-RV32-LABEL: fadd_vf_v6f16: ; ZVFHMINLMULMAX2-RV32: # %bb.0: ; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma ; ZVFHMINLMULMAX2-RV32-NEXT: vle16.v v8, (a0) ; ZVFHMINLMULMAX2-RV32-NEXT: fcvt.s.h fa5, fa0 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli a1, zero, e32, m1, ta, ma ; ZVFHMINLMULMAX2-RV32-NEXT: vfmv.v.f v9, fa5 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMINLMULMAX2-RV32-NEXT: vfncvt.f.f.w v10, v9 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma ; ZVFHMINLMULMAX2-RV32-NEXT: vfwcvt.f.f.v v9, v8 ; ZVFHMINLMULMAX2-RV32-NEXT: vfwcvt.f.f.v v8, v10 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMINLMULMAX2-RV32-NEXT: vfadd.vv v8, v9, v8 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMINLMULMAX2-RV32-NEXT: vfncvt.f.f.w v9, v8 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma ; ZVFHMINLMULMAX2-RV32-NEXT: vslidedown.vi v8, v9, 2 ; ZVFHMINLMULMAX2-RV32-NEXT: addi a1, a0, 8 ; ZVFHMINLMULMAX2-RV32-NEXT: vse32.v v8, (a1) ; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma ; ZVFHMINLMULMAX2-RV32-NEXT: vse16.v v9, (a0) ; ZVFHMINLMULMAX2-RV32-NEXT: ret ; ; ZVFHMINLMULMAX2-RV64-LABEL: fadd_vf_v6f16: ; ZVFHMINLMULMAX2-RV64: # %bb.0: ; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma ; ZVFHMINLMULMAX2-RV64-NEXT: vle16.v v8, (a0) ; ZVFHMINLMULMAX2-RV64-NEXT: fcvt.s.h fa5, fa0 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli a1, zero, e32, m1, ta, ma ; ZVFHMINLMULMAX2-RV64-NEXT: vfmv.v.f v9, fa5 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMINLMULMAX2-RV64-NEXT: vfncvt.f.f.w v10, v9 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma ; ZVFHMINLMULMAX2-RV64-NEXT: vfwcvt.f.f.v v9, v8 ; ZVFHMINLMULMAX2-RV64-NEXT: vfwcvt.f.f.v v8, v10 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMINLMULMAX2-RV64-NEXT: vfadd.vv v8, v9, v8 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMINLMULMAX2-RV64-NEXT: vfncvt.f.f.w v9, v8 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma ; ZVFHMINLMULMAX2-RV64-NEXT: vse64.v v9, (a0) ; ZVFHMINLMULMAX2-RV64-NEXT: vslidedown.vi v8, v9, 2 ; ZVFHMINLMULMAX2-RV64-NEXT: addi a0, a0, 8 ; ZVFHMINLMULMAX2-RV64-NEXT: vse32.v v8, (a0) ; ZVFHMINLMULMAX2-RV64-NEXT: ret ; ; ZVFHMINLMULMAX1-RV32-LABEL: fadd_vf_v6f16: ; ZVFHMINLMULMAX1-RV32: # %bb.0: ; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma ; ZVFHMINLMULMAX1-RV32-NEXT: vle16.v v8, (a0) ; ZVFHMINLMULMAX1-RV32-NEXT: fcvt.s.h fa5, fa0 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli a1, zero, e32, m1, ta, ma ; ZVFHMINLMULMAX1-RV32-NEXT: vfmv.v.f v9, fa5 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMINLMULMAX1-RV32-NEXT: vfncvt.f.f.w v10, v9 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma ; ZVFHMINLMULMAX1-RV32-NEXT: vfwcvt.f.f.v v9, v8 ; ZVFHMINLMULMAX1-RV32-NEXT: vfwcvt.f.f.v v8, v10 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMINLMULMAX1-RV32-NEXT: vfadd.vv v8, v9, v8 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMINLMULMAX1-RV32-NEXT: vfncvt.f.f.w v9, v8 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma ; ZVFHMINLMULMAX1-RV32-NEXT: vslidedown.vi v8, v9, 2 ; ZVFHMINLMULMAX1-RV32-NEXT: addi a1, a0, 8 ; ZVFHMINLMULMAX1-RV32-NEXT: vse32.v v8, (a1) ; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma ; ZVFHMINLMULMAX1-RV32-NEXT: vse16.v v9, (a0) ; ZVFHMINLMULMAX1-RV32-NEXT: ret ; ; ZVFHMINLMULMAX1-RV64-LABEL: fadd_vf_v6f16: ; ZVFHMINLMULMAX1-RV64: # %bb.0: ; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma ; ZVFHMINLMULMAX1-RV64-NEXT: vle16.v v8, (a0) ; ZVFHMINLMULMAX1-RV64-NEXT: fcvt.s.h fa5, fa0 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli a1, zero, e32, m1, ta, ma ; ZVFHMINLMULMAX1-RV64-NEXT: vfmv.v.f v9, fa5 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMINLMULMAX1-RV64-NEXT: vfncvt.f.f.w v10, v9 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma ; ZVFHMINLMULMAX1-RV64-NEXT: vfwcvt.f.f.v v9, v8 ; ZVFHMINLMULMAX1-RV64-NEXT: vfwcvt.f.f.v v8, v10 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMINLMULMAX1-RV64-NEXT: vfadd.vv v8, v9, v8 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMINLMULMAX1-RV64-NEXT: vfncvt.f.f.w v9, v8 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma ; ZVFHMINLMULMAX1-RV64-NEXT: vse64.v v9, (a0) ; ZVFHMINLMULMAX1-RV64-NEXT: vslidedown.vi v8, v9, 2 ; ZVFHMINLMULMAX1-RV64-NEXT: addi a0, a0, 8 ; ZVFHMINLMULMAX1-RV64-NEXT: vse32.v v8, (a0) ; ZVFHMINLMULMAX1-RV64-NEXT: ret %a = load <6 x half>, ptr %x %b = insertelement <6 x half> poison, half %y, i32 0 %c = shufflevector <6 x half> %b, <6 x half> poison, <6 x i32> zeroinitializer %d = fadd <6 x half> %a, %c store <6 x half> %d, ptr %x ret void } define void @fadd_vf_v4f32(ptr %x, float %y) { ; ZVFH-LABEL: fadd_vf_v4f32: ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; ZVFH-NEXT: vle32.v v8, (a0) ; ZVFH-NEXT: vfadd.vf v8, v8, fa0 ; ZVFH-NEXT: vse32.v v8, (a0) ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: fadd_vf_v4f32: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: vsetivli zero, 4, e32, mf2, ta, ma ; ZVFHMIN-NEXT: vle32.v v8, (a0) ; ZVFHMIN-NEXT: vfadd.vf v8, v8, fa0 ; ZVFHMIN-NEXT: vse32.v v8, (a0) ; ZVFHMIN-NEXT: ret %a = load <4 x float>, ptr %x %b = insertelement <4 x float> poison, float %y, i32 0 %c = shufflevector <4 x float> %b, <4 x float> poison, <4 x i32> zeroinitializer %d = fadd <4 x float> %a, %c store <4 x float> %d, ptr %x ret void } define void @fadd_vf_v2f64(ptr %x, double %y) { ; CHECK-LABEL: fadd_vf_v2f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; CHECK-NEXT: vle64.v v8, (a0) ; CHECK-NEXT: vfadd.vf v8, v8, fa0 ; CHECK-NEXT: vse64.v v8, (a0) ; CHECK-NEXT: ret %a = load <2 x double>, ptr %x %b = insertelement <2 x double> poison, double %y, i32 0 %c = shufflevector <2 x double> %b, <2 x double> poison, <2 x i32> zeroinitializer %d = fadd <2 x double> %a, %c store <2 x double> %d, ptr %x ret void } define void @fadd_fv_v8f16(ptr %x, half %y) { ; ZVFH-LABEL: fadd_fv_v8f16: ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; ZVFH-NEXT: vle16.v v8, (a0) ; ZVFH-NEXT: vfadd.vf v8, v8, fa0 ; ZVFH-NEXT: vse16.v v8, (a0) ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: fadd_fv_v8f16: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vle16.v v8, (a0) ; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0 ; ZVFHMIN-NEXT: vsetvli a1, zero, e32, m1, ta, ma ; ZVFHMIN-NEXT: vfmv.v.f v9, fa5 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v9 ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMIN-NEXT: vfadd.vv v8, v8, v9 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v8 ; ZVFHMIN-NEXT: vse16.v v9, (a0) ; ZVFHMIN-NEXT: ret %a = load <8 x half>, ptr %x %b = insertelement <8 x half> poison, half %y, i32 0 %c = shufflevector <8 x half> %b, <8 x half> poison, <8 x i32> zeroinitializer %d = fadd <8 x half> %c, %a store <8 x half> %d, ptr %x ret void } define void @fadd_fv_v6f16(ptr %x, half %y) { ; ZVFH-LABEL: fadd_fv_v6f16: ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma ; ZVFH-NEXT: vle16.v v8, (a0) ; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; ZVFH-NEXT: vfadd.vf v8, v8, fa0 ; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma ; ZVFH-NEXT: vse16.v v8, (a0) ; ZVFH-NEXT: ret ; ; ZVFHMINLMULMAX2-RV32-LABEL: fadd_fv_v6f16: ; ZVFHMINLMULMAX2-RV32: # %bb.0: ; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma ; ZVFHMINLMULMAX2-RV32-NEXT: vle16.v v8, (a0) ; ZVFHMINLMULMAX2-RV32-NEXT: fcvt.s.h fa5, fa0 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli a1, zero, e32, m1, ta, ma ; ZVFHMINLMULMAX2-RV32-NEXT: vfmv.v.f v9, fa5 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMINLMULMAX2-RV32-NEXT: vfncvt.f.f.w v10, v9 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma ; ZVFHMINLMULMAX2-RV32-NEXT: vfwcvt.f.f.v v9, v8 ; ZVFHMINLMULMAX2-RV32-NEXT: vfwcvt.f.f.v v8, v10 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMINLMULMAX2-RV32-NEXT: vfadd.vv v8, v8, v9 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMINLMULMAX2-RV32-NEXT: vfncvt.f.f.w v9, v8 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma ; ZVFHMINLMULMAX2-RV32-NEXT: vslidedown.vi v8, v9, 2 ; ZVFHMINLMULMAX2-RV32-NEXT: addi a1, a0, 8 ; ZVFHMINLMULMAX2-RV32-NEXT: vse32.v v8, (a1) ; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma ; ZVFHMINLMULMAX2-RV32-NEXT: vse16.v v9, (a0) ; ZVFHMINLMULMAX2-RV32-NEXT: ret ; ; ZVFHMINLMULMAX2-RV64-LABEL: fadd_fv_v6f16: ; ZVFHMINLMULMAX2-RV64: # %bb.0: ; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma ; ZVFHMINLMULMAX2-RV64-NEXT: vle16.v v8, (a0) ; ZVFHMINLMULMAX2-RV64-NEXT: fcvt.s.h fa5, fa0 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli a1, zero, e32, m1, ta, ma ; ZVFHMINLMULMAX2-RV64-NEXT: vfmv.v.f v9, fa5 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMINLMULMAX2-RV64-NEXT: vfncvt.f.f.w v10, v9 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma ; ZVFHMINLMULMAX2-RV64-NEXT: vfwcvt.f.f.v v9, v8 ; ZVFHMINLMULMAX2-RV64-NEXT: vfwcvt.f.f.v v8, v10 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMINLMULMAX2-RV64-NEXT: vfadd.vv v8, v8, v9 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMINLMULMAX2-RV64-NEXT: vfncvt.f.f.w v9, v8 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma ; ZVFHMINLMULMAX2-RV64-NEXT: vse64.v v9, (a0) ; ZVFHMINLMULMAX2-RV64-NEXT: vslidedown.vi v8, v9, 2 ; ZVFHMINLMULMAX2-RV64-NEXT: addi a0, a0, 8 ; ZVFHMINLMULMAX2-RV64-NEXT: vse32.v v8, (a0) ; ZVFHMINLMULMAX2-RV64-NEXT: ret ; ; ZVFHMINLMULMAX1-RV32-LABEL: fadd_fv_v6f16: ; ZVFHMINLMULMAX1-RV32: # %bb.0: ; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma ; ZVFHMINLMULMAX1-RV32-NEXT: vle16.v v8, (a0) ; ZVFHMINLMULMAX1-RV32-NEXT: fcvt.s.h fa5, fa0 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli a1, zero, e32, m1, ta, ma ; ZVFHMINLMULMAX1-RV32-NEXT: vfmv.v.f v9, fa5 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMINLMULMAX1-RV32-NEXT: vfncvt.f.f.w v10, v9 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma ; ZVFHMINLMULMAX1-RV32-NEXT: vfwcvt.f.f.v v9, v8 ; ZVFHMINLMULMAX1-RV32-NEXT: vfwcvt.f.f.v v8, v10 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMINLMULMAX1-RV32-NEXT: vfadd.vv v8, v8, v9 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMINLMULMAX1-RV32-NEXT: vfncvt.f.f.w v9, v8 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma ; ZVFHMINLMULMAX1-RV32-NEXT: vslidedown.vi v8, v9, 2 ; ZVFHMINLMULMAX1-RV32-NEXT: addi a1, a0, 8 ; ZVFHMINLMULMAX1-RV32-NEXT: vse32.v v8, (a1) ; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma ; ZVFHMINLMULMAX1-RV32-NEXT: vse16.v v9, (a0) ; ZVFHMINLMULMAX1-RV32-NEXT: ret ; ; ZVFHMINLMULMAX1-RV64-LABEL: fadd_fv_v6f16: ; ZVFHMINLMULMAX1-RV64: # %bb.0: ; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma ; ZVFHMINLMULMAX1-RV64-NEXT: vle16.v v8, (a0) ; ZVFHMINLMULMAX1-RV64-NEXT: fcvt.s.h fa5, fa0 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli a1, zero, e32, m1, ta, ma ; ZVFHMINLMULMAX1-RV64-NEXT: vfmv.v.f v9, fa5 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMINLMULMAX1-RV64-NEXT: vfncvt.f.f.w v10, v9 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma ; ZVFHMINLMULMAX1-RV64-NEXT: vfwcvt.f.f.v v9, v8 ; ZVFHMINLMULMAX1-RV64-NEXT: vfwcvt.f.f.v v8, v10 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMINLMULMAX1-RV64-NEXT: vfadd.vv v8, v8, v9 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMINLMULMAX1-RV64-NEXT: vfncvt.f.f.w v9, v8 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma ; ZVFHMINLMULMAX1-RV64-NEXT: vse64.v v9, (a0) ; ZVFHMINLMULMAX1-RV64-NEXT: vslidedown.vi v8, v9, 2 ; ZVFHMINLMULMAX1-RV64-NEXT: addi a0, a0, 8 ; ZVFHMINLMULMAX1-RV64-NEXT: vse32.v v8, (a0) ; ZVFHMINLMULMAX1-RV64-NEXT: ret %a = load <6 x half>, ptr %x %b = insertelement <6 x half> poison, half %y, i32 0 %c = shufflevector <6 x half> %b, <6 x half> poison, <6 x i32> zeroinitializer %d = fadd <6 x half> %c, %a store <6 x half> %d, ptr %x ret void } define void @fadd_fv_v4f32(ptr %x, float %y) { ; ZVFH-LABEL: fadd_fv_v4f32: ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; ZVFH-NEXT: vle32.v v8, (a0) ; ZVFH-NEXT: vfadd.vf v8, v8, fa0 ; ZVFH-NEXT: vse32.v v8, (a0) ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: fadd_fv_v4f32: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: vsetivli zero, 4, e32, mf2, ta, ma ; ZVFHMIN-NEXT: vle32.v v8, (a0) ; ZVFHMIN-NEXT: vfadd.vf v8, v8, fa0 ; ZVFHMIN-NEXT: vse32.v v8, (a0) ; ZVFHMIN-NEXT: ret %a = load <4 x float>, ptr %x %b = insertelement <4 x float> poison, float %y, i32 0 %c = shufflevector <4 x float> %b, <4 x float> poison, <4 x i32> zeroinitializer %d = fadd <4 x float> %c, %a store <4 x float> %d, ptr %x ret void } define void @fadd_fv_v2f64(ptr %x, double %y) { ; CHECK-LABEL: fadd_fv_v2f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; CHECK-NEXT: vle64.v v8, (a0) ; CHECK-NEXT: vfadd.vf v8, v8, fa0 ; CHECK-NEXT: vse64.v v8, (a0) ; CHECK-NEXT: ret %a = load <2 x double>, ptr %x %b = insertelement <2 x double> poison, double %y, i32 0 %c = shufflevector <2 x double> %b, <2 x double> poison, <2 x i32> zeroinitializer %d = fadd <2 x double> %c, %a store <2 x double> %d, ptr %x ret void } define void @fsub_vf_v8f16(ptr %x, half %y) { ; ZVFH-LABEL: fsub_vf_v8f16: ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; ZVFH-NEXT: vle16.v v8, (a0) ; ZVFH-NEXT: vfsub.vf v8, v8, fa0 ; ZVFH-NEXT: vse16.v v8, (a0) ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: fsub_vf_v8f16: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vle16.v v8, (a0) ; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0 ; ZVFHMIN-NEXT: vsetvli a1, zero, e32, m1, ta, ma ; ZVFHMIN-NEXT: vfmv.v.f v9, fa5 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v9 ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMIN-NEXT: vfsub.vv v8, v9, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v8 ; ZVFHMIN-NEXT: vse16.v v9, (a0) ; ZVFHMIN-NEXT: ret %a = load <8 x half>, ptr %x %b = insertelement <8 x half> poison, half %y, i32 0 %c = shufflevector <8 x half> %b, <8 x half> poison, <8 x i32> zeroinitializer %d = fsub <8 x half> %a, %c store <8 x half> %d, ptr %x ret void } define void @fsub_vf_v6f16(ptr %x, half %y) { ; ZVFH-LABEL: fsub_vf_v6f16: ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma ; ZVFH-NEXT: vle16.v v8, (a0) ; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; ZVFH-NEXT: vfsub.vf v8, v8, fa0 ; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma ; ZVFH-NEXT: vse16.v v8, (a0) ; ZVFH-NEXT: ret ; ; ZVFHMINLMULMAX2-RV32-LABEL: fsub_vf_v6f16: ; ZVFHMINLMULMAX2-RV32: # %bb.0: ; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma ; ZVFHMINLMULMAX2-RV32-NEXT: vle16.v v8, (a0) ; ZVFHMINLMULMAX2-RV32-NEXT: fcvt.s.h fa5, fa0 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli a1, zero, e32, m1, ta, ma ; ZVFHMINLMULMAX2-RV32-NEXT: vfmv.v.f v9, fa5 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMINLMULMAX2-RV32-NEXT: vfncvt.f.f.w v10, v9 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma ; ZVFHMINLMULMAX2-RV32-NEXT: vfwcvt.f.f.v v9, v8 ; ZVFHMINLMULMAX2-RV32-NEXT: vfwcvt.f.f.v v8, v10 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMINLMULMAX2-RV32-NEXT: vfsub.vv v8, v9, v8 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMINLMULMAX2-RV32-NEXT: vfncvt.f.f.w v9, v8 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma ; ZVFHMINLMULMAX2-RV32-NEXT: vslidedown.vi v8, v9, 2 ; ZVFHMINLMULMAX2-RV32-NEXT: addi a1, a0, 8 ; ZVFHMINLMULMAX2-RV32-NEXT: vse32.v v8, (a1) ; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma ; ZVFHMINLMULMAX2-RV32-NEXT: vse16.v v9, (a0) ; ZVFHMINLMULMAX2-RV32-NEXT: ret ; ; ZVFHMINLMULMAX2-RV64-LABEL: fsub_vf_v6f16: ; ZVFHMINLMULMAX2-RV64: # %bb.0: ; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma ; ZVFHMINLMULMAX2-RV64-NEXT: vle16.v v8, (a0) ; ZVFHMINLMULMAX2-RV64-NEXT: fcvt.s.h fa5, fa0 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli a1, zero, e32, m1, ta, ma ; ZVFHMINLMULMAX2-RV64-NEXT: vfmv.v.f v9, fa5 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMINLMULMAX2-RV64-NEXT: vfncvt.f.f.w v10, v9 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma ; ZVFHMINLMULMAX2-RV64-NEXT: vfwcvt.f.f.v v9, v8 ; ZVFHMINLMULMAX2-RV64-NEXT: vfwcvt.f.f.v v8, v10 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMINLMULMAX2-RV64-NEXT: vfsub.vv v8, v9, v8 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMINLMULMAX2-RV64-NEXT: vfncvt.f.f.w v9, v8 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma ; ZVFHMINLMULMAX2-RV64-NEXT: vse64.v v9, (a0) ; ZVFHMINLMULMAX2-RV64-NEXT: vslidedown.vi v8, v9, 2 ; ZVFHMINLMULMAX2-RV64-NEXT: addi a0, a0, 8 ; ZVFHMINLMULMAX2-RV64-NEXT: vse32.v v8, (a0) ; ZVFHMINLMULMAX2-RV64-NEXT: ret ; ; ZVFHMINLMULMAX1-RV32-LABEL: fsub_vf_v6f16: ; ZVFHMINLMULMAX1-RV32: # %bb.0: ; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma ; ZVFHMINLMULMAX1-RV32-NEXT: vle16.v v8, (a0) ; ZVFHMINLMULMAX1-RV32-NEXT: fcvt.s.h fa5, fa0 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli a1, zero, e32, m1, ta, ma ; ZVFHMINLMULMAX1-RV32-NEXT: vfmv.v.f v9, fa5 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMINLMULMAX1-RV32-NEXT: vfncvt.f.f.w v10, v9 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma ; ZVFHMINLMULMAX1-RV32-NEXT: vfwcvt.f.f.v v9, v8 ; ZVFHMINLMULMAX1-RV32-NEXT: vfwcvt.f.f.v v8, v10 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMINLMULMAX1-RV32-NEXT: vfsub.vv v8, v9, v8 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMINLMULMAX1-RV32-NEXT: vfncvt.f.f.w v9, v8 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma ; ZVFHMINLMULMAX1-RV32-NEXT: vslidedown.vi v8, v9, 2 ; ZVFHMINLMULMAX1-RV32-NEXT: addi a1, a0, 8 ; ZVFHMINLMULMAX1-RV32-NEXT: vse32.v v8, (a1) ; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma ; ZVFHMINLMULMAX1-RV32-NEXT: vse16.v v9, (a0) ; ZVFHMINLMULMAX1-RV32-NEXT: ret ; ; ZVFHMINLMULMAX1-RV64-LABEL: fsub_vf_v6f16: ; ZVFHMINLMULMAX1-RV64: # %bb.0: ; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma ; ZVFHMINLMULMAX1-RV64-NEXT: vle16.v v8, (a0) ; ZVFHMINLMULMAX1-RV64-NEXT: fcvt.s.h fa5, fa0 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli a1, zero, e32, m1, ta, ma ; ZVFHMINLMULMAX1-RV64-NEXT: vfmv.v.f v9, fa5 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMINLMULMAX1-RV64-NEXT: vfncvt.f.f.w v10, v9 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma ; ZVFHMINLMULMAX1-RV64-NEXT: vfwcvt.f.f.v v9, v8 ; ZVFHMINLMULMAX1-RV64-NEXT: vfwcvt.f.f.v v8, v10 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMINLMULMAX1-RV64-NEXT: vfsub.vv v8, v9, v8 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMINLMULMAX1-RV64-NEXT: vfncvt.f.f.w v9, v8 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma ; ZVFHMINLMULMAX1-RV64-NEXT: vse64.v v9, (a0) ; ZVFHMINLMULMAX1-RV64-NEXT: vslidedown.vi v8, v9, 2 ; ZVFHMINLMULMAX1-RV64-NEXT: addi a0, a0, 8 ; ZVFHMINLMULMAX1-RV64-NEXT: vse32.v v8, (a0) ; ZVFHMINLMULMAX1-RV64-NEXT: ret %a = load <6 x half>, ptr %x %b = insertelement <6 x half> poison, half %y, i32 0 %c = shufflevector <6 x half> %b, <6 x half> poison, <6 x i32> zeroinitializer %d = fsub <6 x half> %a, %c store <6 x half> %d, ptr %x ret void } define void @fsub_vf_v4f32(ptr %x, float %y) { ; ZVFH-LABEL: fsub_vf_v4f32: ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; ZVFH-NEXT: vle32.v v8, (a0) ; ZVFH-NEXT: vfsub.vf v8, v8, fa0 ; ZVFH-NEXT: vse32.v v8, (a0) ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: fsub_vf_v4f32: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: vsetivli zero, 4, e32, mf2, ta, ma ; ZVFHMIN-NEXT: vle32.v v8, (a0) ; ZVFHMIN-NEXT: vfsub.vf v8, v8, fa0 ; ZVFHMIN-NEXT: vse32.v v8, (a0) ; ZVFHMIN-NEXT: ret %a = load <4 x float>, ptr %x %b = insertelement <4 x float> poison, float %y, i32 0 %c = shufflevector <4 x float> %b, <4 x float> poison, <4 x i32> zeroinitializer %d = fsub <4 x float> %a, %c store <4 x float> %d, ptr %x ret void } define void @fsub_vf_v2f64(ptr %x, double %y) { ; CHECK-LABEL: fsub_vf_v2f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; CHECK-NEXT: vle64.v v8, (a0) ; CHECK-NEXT: vfsub.vf v8, v8, fa0 ; CHECK-NEXT: vse64.v v8, (a0) ; CHECK-NEXT: ret %a = load <2 x double>, ptr %x %b = insertelement <2 x double> poison, double %y, i32 0 %c = shufflevector <2 x double> %b, <2 x double> poison, <2 x i32> zeroinitializer %d = fsub <2 x double> %a, %c store <2 x double> %d, ptr %x ret void } define void @fsub_fv_v8f16(ptr %x, half %y) { ; ZVFH-LABEL: fsub_fv_v8f16: ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; ZVFH-NEXT: vle16.v v8, (a0) ; ZVFH-NEXT: vfrsub.vf v8, v8, fa0 ; ZVFH-NEXT: vse16.v v8, (a0) ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: fsub_fv_v8f16: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vle16.v v8, (a0) ; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0 ; ZVFHMIN-NEXT: vsetvli a1, zero, e32, m1, ta, ma ; ZVFHMIN-NEXT: vfmv.v.f v9, fa5 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v9 ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMIN-NEXT: vfsub.vv v8, v8, v9 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v8 ; ZVFHMIN-NEXT: vse16.v v9, (a0) ; ZVFHMIN-NEXT: ret %a = load <8 x half>, ptr %x %b = insertelement <8 x half> poison, half %y, i32 0 %c = shufflevector <8 x half> %b, <8 x half> poison, <8 x i32> zeroinitializer %d = fsub <8 x half> %c, %a store <8 x half> %d, ptr %x ret void } define void @fsub_fv_v6f16(ptr %x, half %y) { ; ZVFH-LABEL: fsub_fv_v6f16: ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma ; ZVFH-NEXT: vle16.v v8, (a0) ; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; ZVFH-NEXT: vfrsub.vf v8, v8, fa0 ; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma ; ZVFH-NEXT: vse16.v v8, (a0) ; ZVFH-NEXT: ret ; ; ZVFHMINLMULMAX2-RV32-LABEL: fsub_fv_v6f16: ; ZVFHMINLMULMAX2-RV32: # %bb.0: ; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma ; ZVFHMINLMULMAX2-RV32-NEXT: vle16.v v8, (a0) ; ZVFHMINLMULMAX2-RV32-NEXT: fcvt.s.h fa5, fa0 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli a1, zero, e32, m1, ta, ma ; ZVFHMINLMULMAX2-RV32-NEXT: vfmv.v.f v9, fa5 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMINLMULMAX2-RV32-NEXT: vfncvt.f.f.w v10, v9 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma ; ZVFHMINLMULMAX2-RV32-NEXT: vfwcvt.f.f.v v9, v8 ; ZVFHMINLMULMAX2-RV32-NEXT: vfwcvt.f.f.v v8, v10 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMINLMULMAX2-RV32-NEXT: vfsub.vv v8, v8, v9 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMINLMULMAX2-RV32-NEXT: vfncvt.f.f.w v9, v8 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma ; ZVFHMINLMULMAX2-RV32-NEXT: vslidedown.vi v8, v9, 2 ; ZVFHMINLMULMAX2-RV32-NEXT: addi a1, a0, 8 ; ZVFHMINLMULMAX2-RV32-NEXT: vse32.v v8, (a1) ; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma ; ZVFHMINLMULMAX2-RV32-NEXT: vse16.v v9, (a0) ; ZVFHMINLMULMAX2-RV32-NEXT: ret ; ; ZVFHMINLMULMAX2-RV64-LABEL: fsub_fv_v6f16: ; ZVFHMINLMULMAX2-RV64: # %bb.0: ; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma ; ZVFHMINLMULMAX2-RV64-NEXT: vle16.v v8, (a0) ; ZVFHMINLMULMAX2-RV64-NEXT: fcvt.s.h fa5, fa0 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli a1, zero, e32, m1, ta, ma ; ZVFHMINLMULMAX2-RV64-NEXT: vfmv.v.f v9, fa5 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMINLMULMAX2-RV64-NEXT: vfncvt.f.f.w v10, v9 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma ; ZVFHMINLMULMAX2-RV64-NEXT: vfwcvt.f.f.v v9, v8 ; ZVFHMINLMULMAX2-RV64-NEXT: vfwcvt.f.f.v v8, v10 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMINLMULMAX2-RV64-NEXT: vfsub.vv v8, v8, v9 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMINLMULMAX2-RV64-NEXT: vfncvt.f.f.w v9, v8 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma ; ZVFHMINLMULMAX2-RV64-NEXT: vse64.v v9, (a0) ; ZVFHMINLMULMAX2-RV64-NEXT: vslidedown.vi v8, v9, 2 ; ZVFHMINLMULMAX2-RV64-NEXT: addi a0, a0, 8 ; ZVFHMINLMULMAX2-RV64-NEXT: vse32.v v8, (a0) ; ZVFHMINLMULMAX2-RV64-NEXT: ret ; ; ZVFHMINLMULMAX1-RV32-LABEL: fsub_fv_v6f16: ; ZVFHMINLMULMAX1-RV32: # %bb.0: ; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma ; ZVFHMINLMULMAX1-RV32-NEXT: vle16.v v8, (a0) ; ZVFHMINLMULMAX1-RV32-NEXT: fcvt.s.h fa5, fa0 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli a1, zero, e32, m1, ta, ma ; ZVFHMINLMULMAX1-RV32-NEXT: vfmv.v.f v9, fa5 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMINLMULMAX1-RV32-NEXT: vfncvt.f.f.w v10, v9 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma ; ZVFHMINLMULMAX1-RV32-NEXT: vfwcvt.f.f.v v9, v8 ; ZVFHMINLMULMAX1-RV32-NEXT: vfwcvt.f.f.v v8, v10 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMINLMULMAX1-RV32-NEXT: vfsub.vv v8, v8, v9 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMINLMULMAX1-RV32-NEXT: vfncvt.f.f.w v9, v8 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma ; ZVFHMINLMULMAX1-RV32-NEXT: vslidedown.vi v8, v9, 2 ; ZVFHMINLMULMAX1-RV32-NEXT: addi a1, a0, 8 ; ZVFHMINLMULMAX1-RV32-NEXT: vse32.v v8, (a1) ; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma ; ZVFHMINLMULMAX1-RV32-NEXT: vse16.v v9, (a0) ; ZVFHMINLMULMAX1-RV32-NEXT: ret ; ; ZVFHMINLMULMAX1-RV64-LABEL: fsub_fv_v6f16: ; ZVFHMINLMULMAX1-RV64: # %bb.0: ; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma ; ZVFHMINLMULMAX1-RV64-NEXT: vle16.v v8, (a0) ; ZVFHMINLMULMAX1-RV64-NEXT: fcvt.s.h fa5, fa0 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli a1, zero, e32, m1, ta, ma ; ZVFHMINLMULMAX1-RV64-NEXT: vfmv.v.f v9, fa5 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMINLMULMAX1-RV64-NEXT: vfncvt.f.f.w v10, v9 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma ; ZVFHMINLMULMAX1-RV64-NEXT: vfwcvt.f.f.v v9, v8 ; ZVFHMINLMULMAX1-RV64-NEXT: vfwcvt.f.f.v v8, v10 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMINLMULMAX1-RV64-NEXT: vfsub.vv v8, v8, v9 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMINLMULMAX1-RV64-NEXT: vfncvt.f.f.w v9, v8 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma ; ZVFHMINLMULMAX1-RV64-NEXT: vse64.v v9, (a0) ; ZVFHMINLMULMAX1-RV64-NEXT: vslidedown.vi v8, v9, 2 ; ZVFHMINLMULMAX1-RV64-NEXT: addi a0, a0, 8 ; ZVFHMINLMULMAX1-RV64-NEXT: vse32.v v8, (a0) ; ZVFHMINLMULMAX1-RV64-NEXT: ret %a = load <6 x half>, ptr %x %b = insertelement <6 x half> poison, half %y, i32 0 %c = shufflevector <6 x half> %b, <6 x half> poison, <6 x i32> zeroinitializer %d = fsub <6 x half> %c, %a store <6 x half> %d, ptr %x ret void } define void @fsub_fv_v4f32(ptr %x, float %y) { ; ZVFH-LABEL: fsub_fv_v4f32: ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; ZVFH-NEXT: vle32.v v8, (a0) ; ZVFH-NEXT: vfrsub.vf v8, v8, fa0 ; ZVFH-NEXT: vse32.v v8, (a0) ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: fsub_fv_v4f32: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: vsetivli zero, 4, e32, mf2, ta, ma ; ZVFHMIN-NEXT: vle32.v v8, (a0) ; ZVFHMIN-NEXT: vfrsub.vf v8, v8, fa0 ; ZVFHMIN-NEXT: vse32.v v8, (a0) ; ZVFHMIN-NEXT: ret %a = load <4 x float>, ptr %x %b = insertelement <4 x float> poison, float %y, i32 0 %c = shufflevector <4 x float> %b, <4 x float> poison, <4 x i32> zeroinitializer %d = fsub <4 x float> %c, %a store <4 x float> %d, ptr %x ret void } define void @fsub_fv_v2f64(ptr %x, double %y) { ; CHECK-LABEL: fsub_fv_v2f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; CHECK-NEXT: vle64.v v8, (a0) ; CHECK-NEXT: vfrsub.vf v8, v8, fa0 ; CHECK-NEXT: vse64.v v8, (a0) ; CHECK-NEXT: ret %a = load <2 x double>, ptr %x %b = insertelement <2 x double> poison, double %y, i32 0 %c = shufflevector <2 x double> %b, <2 x double> poison, <2 x i32> zeroinitializer %d = fsub <2 x double> %c, %a store <2 x double> %d, ptr %x ret void } define void @fmul_vf_v8f16(ptr %x, half %y) { ; ZVFH-LABEL: fmul_vf_v8f16: ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; ZVFH-NEXT: vle16.v v8, (a0) ; ZVFH-NEXT: vfmul.vf v8, v8, fa0 ; ZVFH-NEXT: vse16.v v8, (a0) ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: fmul_vf_v8f16: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vle16.v v8, (a0) ; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0 ; ZVFHMIN-NEXT: vsetvli a1, zero, e32, m1, ta, ma ; ZVFHMIN-NEXT: vfmv.v.f v9, fa5 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v9 ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMIN-NEXT: vfmul.vv v8, v9, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v8 ; ZVFHMIN-NEXT: vse16.v v9, (a0) ; ZVFHMIN-NEXT: ret %a = load <8 x half>, ptr %x %b = insertelement <8 x half> poison, half %y, i32 0 %c = shufflevector <8 x half> %b, <8 x half> poison, <8 x i32> zeroinitializer %d = fmul <8 x half> %a, %c store <8 x half> %d, ptr %x ret void } define void @fmul_vf_v6f16(ptr %x, half %y) { ; ZVFH-LABEL: fmul_vf_v6f16: ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma ; ZVFH-NEXT: vle16.v v8, (a0) ; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; ZVFH-NEXT: vfmul.vf v8, v8, fa0 ; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma ; ZVFH-NEXT: vse16.v v8, (a0) ; ZVFH-NEXT: ret ; ; ZVFHMINLMULMAX2-RV32-LABEL: fmul_vf_v6f16: ; ZVFHMINLMULMAX2-RV32: # %bb.0: ; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma ; ZVFHMINLMULMAX2-RV32-NEXT: vle16.v v8, (a0) ; ZVFHMINLMULMAX2-RV32-NEXT: fcvt.s.h fa5, fa0 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli a1, zero, e32, m1, ta, ma ; ZVFHMINLMULMAX2-RV32-NEXT: vfmv.v.f v9, fa5 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMINLMULMAX2-RV32-NEXT: vfncvt.f.f.w v10, v9 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma ; ZVFHMINLMULMAX2-RV32-NEXT: vfwcvt.f.f.v v9, v8 ; ZVFHMINLMULMAX2-RV32-NEXT: vfwcvt.f.f.v v8, v10 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMINLMULMAX2-RV32-NEXT: vfmul.vv v8, v9, v8 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMINLMULMAX2-RV32-NEXT: vfncvt.f.f.w v9, v8 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma ; ZVFHMINLMULMAX2-RV32-NEXT: vslidedown.vi v8, v9, 2 ; ZVFHMINLMULMAX2-RV32-NEXT: addi a1, a0, 8 ; ZVFHMINLMULMAX2-RV32-NEXT: vse32.v v8, (a1) ; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma ; ZVFHMINLMULMAX2-RV32-NEXT: vse16.v v9, (a0) ; ZVFHMINLMULMAX2-RV32-NEXT: ret ; ; ZVFHMINLMULMAX2-RV64-LABEL: fmul_vf_v6f16: ; ZVFHMINLMULMAX2-RV64: # %bb.0: ; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma ; ZVFHMINLMULMAX2-RV64-NEXT: vle16.v v8, (a0) ; ZVFHMINLMULMAX2-RV64-NEXT: fcvt.s.h fa5, fa0 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli a1, zero, e32, m1, ta, ma ; ZVFHMINLMULMAX2-RV64-NEXT: vfmv.v.f v9, fa5 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMINLMULMAX2-RV64-NEXT: vfncvt.f.f.w v10, v9 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma ; ZVFHMINLMULMAX2-RV64-NEXT: vfwcvt.f.f.v v9, v8 ; ZVFHMINLMULMAX2-RV64-NEXT: vfwcvt.f.f.v v8, v10 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMINLMULMAX2-RV64-NEXT: vfmul.vv v8, v9, v8 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMINLMULMAX2-RV64-NEXT: vfncvt.f.f.w v9, v8 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma ; ZVFHMINLMULMAX2-RV64-NEXT: vse64.v v9, (a0) ; ZVFHMINLMULMAX2-RV64-NEXT: vslidedown.vi v8, v9, 2 ; ZVFHMINLMULMAX2-RV64-NEXT: addi a0, a0, 8 ; ZVFHMINLMULMAX2-RV64-NEXT: vse32.v v8, (a0) ; ZVFHMINLMULMAX2-RV64-NEXT: ret ; ; ZVFHMINLMULMAX1-RV32-LABEL: fmul_vf_v6f16: ; ZVFHMINLMULMAX1-RV32: # %bb.0: ; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma ; ZVFHMINLMULMAX1-RV32-NEXT: vle16.v v8, (a0) ; ZVFHMINLMULMAX1-RV32-NEXT: fcvt.s.h fa5, fa0 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli a1, zero, e32, m1, ta, ma ; ZVFHMINLMULMAX1-RV32-NEXT: vfmv.v.f v9, fa5 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMINLMULMAX1-RV32-NEXT: vfncvt.f.f.w v10, v9 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma ; ZVFHMINLMULMAX1-RV32-NEXT: vfwcvt.f.f.v v9, v8 ; ZVFHMINLMULMAX1-RV32-NEXT: vfwcvt.f.f.v v8, v10 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMINLMULMAX1-RV32-NEXT: vfmul.vv v8, v9, v8 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMINLMULMAX1-RV32-NEXT: vfncvt.f.f.w v9, v8 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma ; ZVFHMINLMULMAX1-RV32-NEXT: vslidedown.vi v8, v9, 2 ; ZVFHMINLMULMAX1-RV32-NEXT: addi a1, a0, 8 ; ZVFHMINLMULMAX1-RV32-NEXT: vse32.v v8, (a1) ; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma ; ZVFHMINLMULMAX1-RV32-NEXT: vse16.v v9, (a0) ; ZVFHMINLMULMAX1-RV32-NEXT: ret ; ; ZVFHMINLMULMAX1-RV64-LABEL: fmul_vf_v6f16: ; ZVFHMINLMULMAX1-RV64: # %bb.0: ; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma ; ZVFHMINLMULMAX1-RV64-NEXT: vle16.v v8, (a0) ; ZVFHMINLMULMAX1-RV64-NEXT: fcvt.s.h fa5, fa0 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli a1, zero, e32, m1, ta, ma ; ZVFHMINLMULMAX1-RV64-NEXT: vfmv.v.f v9, fa5 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMINLMULMAX1-RV64-NEXT: vfncvt.f.f.w v10, v9 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma ; ZVFHMINLMULMAX1-RV64-NEXT: vfwcvt.f.f.v v9, v8 ; ZVFHMINLMULMAX1-RV64-NEXT: vfwcvt.f.f.v v8, v10 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMINLMULMAX1-RV64-NEXT: vfmul.vv v8, v9, v8 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMINLMULMAX1-RV64-NEXT: vfncvt.f.f.w v9, v8 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma ; ZVFHMINLMULMAX1-RV64-NEXT: vse64.v v9, (a0) ; ZVFHMINLMULMAX1-RV64-NEXT: vslidedown.vi v8, v9, 2 ; ZVFHMINLMULMAX1-RV64-NEXT: addi a0, a0, 8 ; ZVFHMINLMULMAX1-RV64-NEXT: vse32.v v8, (a0) ; ZVFHMINLMULMAX1-RV64-NEXT: ret %a = load <6 x half>, ptr %x %b = insertelement <6 x half> poison, half %y, i32 0 %c = shufflevector <6 x half> %b, <6 x half> poison, <6 x i32> zeroinitializer %d = fmul <6 x half> %a, %c store <6 x half> %d, ptr %x ret void } define void @fmul_vf_v4f32(ptr %x, float %y) { ; ZVFH-LABEL: fmul_vf_v4f32: ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; ZVFH-NEXT: vle32.v v8, (a0) ; ZVFH-NEXT: vfmul.vf v8, v8, fa0 ; ZVFH-NEXT: vse32.v v8, (a0) ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: fmul_vf_v4f32: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: vsetivli zero, 4, e32, mf2, ta, ma ; ZVFHMIN-NEXT: vle32.v v8, (a0) ; ZVFHMIN-NEXT: vfmul.vf v8, v8, fa0 ; ZVFHMIN-NEXT: vse32.v v8, (a0) ; ZVFHMIN-NEXT: ret %a = load <4 x float>, ptr %x %b = insertelement <4 x float> poison, float %y, i32 0 %c = shufflevector <4 x float> %b, <4 x float> poison, <4 x i32> zeroinitializer %d = fmul <4 x float> %a, %c store <4 x float> %d, ptr %x ret void } define void @fmul_vf_v2f64(ptr %x, double %y) { ; CHECK-LABEL: fmul_vf_v2f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; CHECK-NEXT: vle64.v v8, (a0) ; CHECK-NEXT: vfmul.vf v8, v8, fa0 ; CHECK-NEXT: vse64.v v8, (a0) ; CHECK-NEXT: ret %a = load <2 x double>, ptr %x %b = insertelement <2 x double> poison, double %y, i32 0 %c = shufflevector <2 x double> %b, <2 x double> poison, <2 x i32> zeroinitializer %d = fmul <2 x double> %a, %c store <2 x double> %d, ptr %x ret void } define void @fmul_fv_v8f16(ptr %x, half %y) { ; ZVFH-LABEL: fmul_fv_v8f16: ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; ZVFH-NEXT: vle16.v v8, (a0) ; ZVFH-NEXT: vfmul.vf v8, v8, fa0 ; ZVFH-NEXT: vse16.v v8, (a0) ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: fmul_fv_v8f16: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vle16.v v8, (a0) ; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0 ; ZVFHMIN-NEXT: vsetvli a1, zero, e32, m1, ta, ma ; ZVFHMIN-NEXT: vfmv.v.f v9, fa5 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v9 ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMIN-NEXT: vfmul.vv v8, v8, v9 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v8 ; ZVFHMIN-NEXT: vse16.v v9, (a0) ; ZVFHMIN-NEXT: ret %a = load <8 x half>, ptr %x %b = insertelement <8 x half> poison, half %y, i32 0 %c = shufflevector <8 x half> %b, <8 x half> poison, <8 x i32> zeroinitializer %d = fmul <8 x half> %c, %a store <8 x half> %d, ptr %x ret void } define void @fmul_fv_v6f16(ptr %x, half %y) { ; ZVFH-LABEL: fmul_fv_v6f16: ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma ; ZVFH-NEXT: vle16.v v8, (a0) ; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; ZVFH-NEXT: vfmul.vf v8, v8, fa0 ; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma ; ZVFH-NEXT: vse16.v v8, (a0) ; ZVFH-NEXT: ret ; ; ZVFHMINLMULMAX2-RV32-LABEL: fmul_fv_v6f16: ; ZVFHMINLMULMAX2-RV32: # %bb.0: ; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma ; ZVFHMINLMULMAX2-RV32-NEXT: vle16.v v8, (a0) ; ZVFHMINLMULMAX2-RV32-NEXT: fcvt.s.h fa5, fa0 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli a1, zero, e32, m1, ta, ma ; ZVFHMINLMULMAX2-RV32-NEXT: vfmv.v.f v9, fa5 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMINLMULMAX2-RV32-NEXT: vfncvt.f.f.w v10, v9 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma ; ZVFHMINLMULMAX2-RV32-NEXT: vfwcvt.f.f.v v9, v8 ; ZVFHMINLMULMAX2-RV32-NEXT: vfwcvt.f.f.v v8, v10 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMINLMULMAX2-RV32-NEXT: vfmul.vv v8, v8, v9 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMINLMULMAX2-RV32-NEXT: vfncvt.f.f.w v9, v8 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma ; ZVFHMINLMULMAX2-RV32-NEXT: vslidedown.vi v8, v9, 2 ; ZVFHMINLMULMAX2-RV32-NEXT: addi a1, a0, 8 ; ZVFHMINLMULMAX2-RV32-NEXT: vse32.v v8, (a1) ; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma ; ZVFHMINLMULMAX2-RV32-NEXT: vse16.v v9, (a0) ; ZVFHMINLMULMAX2-RV32-NEXT: ret ; ; ZVFHMINLMULMAX2-RV64-LABEL: fmul_fv_v6f16: ; ZVFHMINLMULMAX2-RV64: # %bb.0: ; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma ; ZVFHMINLMULMAX2-RV64-NEXT: vle16.v v8, (a0) ; ZVFHMINLMULMAX2-RV64-NEXT: fcvt.s.h fa5, fa0 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli a1, zero, e32, m1, ta, ma ; ZVFHMINLMULMAX2-RV64-NEXT: vfmv.v.f v9, fa5 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMINLMULMAX2-RV64-NEXT: vfncvt.f.f.w v10, v9 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma ; ZVFHMINLMULMAX2-RV64-NEXT: vfwcvt.f.f.v v9, v8 ; ZVFHMINLMULMAX2-RV64-NEXT: vfwcvt.f.f.v v8, v10 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMINLMULMAX2-RV64-NEXT: vfmul.vv v8, v8, v9 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMINLMULMAX2-RV64-NEXT: vfncvt.f.f.w v9, v8 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma ; ZVFHMINLMULMAX2-RV64-NEXT: vse64.v v9, (a0) ; ZVFHMINLMULMAX2-RV64-NEXT: vslidedown.vi v8, v9, 2 ; ZVFHMINLMULMAX2-RV64-NEXT: addi a0, a0, 8 ; ZVFHMINLMULMAX2-RV64-NEXT: vse32.v v8, (a0) ; ZVFHMINLMULMAX2-RV64-NEXT: ret ; ; ZVFHMINLMULMAX1-RV32-LABEL: fmul_fv_v6f16: ; ZVFHMINLMULMAX1-RV32: # %bb.0: ; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma ; ZVFHMINLMULMAX1-RV32-NEXT: vle16.v v8, (a0) ; ZVFHMINLMULMAX1-RV32-NEXT: fcvt.s.h fa5, fa0 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli a1, zero, e32, m1, ta, ma ; ZVFHMINLMULMAX1-RV32-NEXT: vfmv.v.f v9, fa5 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMINLMULMAX1-RV32-NEXT: vfncvt.f.f.w v10, v9 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma ; ZVFHMINLMULMAX1-RV32-NEXT: vfwcvt.f.f.v v9, v8 ; ZVFHMINLMULMAX1-RV32-NEXT: vfwcvt.f.f.v v8, v10 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMINLMULMAX1-RV32-NEXT: vfmul.vv v8, v8, v9 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMINLMULMAX1-RV32-NEXT: vfncvt.f.f.w v9, v8 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma ; ZVFHMINLMULMAX1-RV32-NEXT: vslidedown.vi v8, v9, 2 ; ZVFHMINLMULMAX1-RV32-NEXT: addi a1, a0, 8 ; ZVFHMINLMULMAX1-RV32-NEXT: vse32.v v8, (a1) ; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma ; ZVFHMINLMULMAX1-RV32-NEXT: vse16.v v9, (a0) ; ZVFHMINLMULMAX1-RV32-NEXT: ret ; ; ZVFHMINLMULMAX1-RV64-LABEL: fmul_fv_v6f16: ; ZVFHMINLMULMAX1-RV64: # %bb.0: ; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma ; ZVFHMINLMULMAX1-RV64-NEXT: vle16.v v8, (a0) ; ZVFHMINLMULMAX1-RV64-NEXT: fcvt.s.h fa5, fa0 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli a1, zero, e32, m1, ta, ma ; ZVFHMINLMULMAX1-RV64-NEXT: vfmv.v.f v9, fa5 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMINLMULMAX1-RV64-NEXT: vfncvt.f.f.w v10, v9 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma ; ZVFHMINLMULMAX1-RV64-NEXT: vfwcvt.f.f.v v9, v8 ; ZVFHMINLMULMAX1-RV64-NEXT: vfwcvt.f.f.v v8, v10 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMINLMULMAX1-RV64-NEXT: vfmul.vv v8, v8, v9 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMINLMULMAX1-RV64-NEXT: vfncvt.f.f.w v9, v8 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma ; ZVFHMINLMULMAX1-RV64-NEXT: vse64.v v9, (a0) ; ZVFHMINLMULMAX1-RV64-NEXT: vslidedown.vi v8, v9, 2 ; ZVFHMINLMULMAX1-RV64-NEXT: addi a0, a0, 8 ; ZVFHMINLMULMAX1-RV64-NEXT: vse32.v v8, (a0) ; ZVFHMINLMULMAX1-RV64-NEXT: ret %a = load <6 x half>, ptr %x %b = insertelement <6 x half> poison, half %y, i32 0 %c = shufflevector <6 x half> %b, <6 x half> poison, <6 x i32> zeroinitializer %d = fmul <6 x half> %c, %a store <6 x half> %d, ptr %x ret void } define void @fmul_fv_v4f32(ptr %x, float %y) { ; ZVFH-LABEL: fmul_fv_v4f32: ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; ZVFH-NEXT: vle32.v v8, (a0) ; ZVFH-NEXT: vfmul.vf v8, v8, fa0 ; ZVFH-NEXT: vse32.v v8, (a0) ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: fmul_fv_v4f32: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: vsetivli zero, 4, e32, mf2, ta, ma ; ZVFHMIN-NEXT: vle32.v v8, (a0) ; ZVFHMIN-NEXT: vfmul.vf v8, v8, fa0 ; ZVFHMIN-NEXT: vse32.v v8, (a0) ; ZVFHMIN-NEXT: ret %a = load <4 x float>, ptr %x %b = insertelement <4 x float> poison, float %y, i32 0 %c = shufflevector <4 x float> %b, <4 x float> poison, <4 x i32> zeroinitializer %d = fmul <4 x float> %c, %a store <4 x float> %d, ptr %x ret void } define void @fmul_fv_v2f64(ptr %x, double %y) { ; CHECK-LABEL: fmul_fv_v2f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; CHECK-NEXT: vle64.v v8, (a0) ; CHECK-NEXT: vfmul.vf v8, v8, fa0 ; CHECK-NEXT: vse64.v v8, (a0) ; CHECK-NEXT: ret %a = load <2 x double>, ptr %x %b = insertelement <2 x double> poison, double %y, i32 0 %c = shufflevector <2 x double> %b, <2 x double> poison, <2 x i32> zeroinitializer %d = fmul <2 x double> %c, %a store <2 x double> %d, ptr %x ret void } define void @fdiv_vf_v8f16(ptr %x, half %y) { ; ZVFH-LABEL: fdiv_vf_v8f16: ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; ZVFH-NEXT: vle16.v v8, (a0) ; ZVFH-NEXT: vfdiv.vf v8, v8, fa0 ; ZVFH-NEXT: vse16.v v8, (a0) ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: fdiv_vf_v8f16: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vle16.v v8, (a0) ; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0 ; ZVFHMIN-NEXT: vsetvli a1, zero, e32, m1, ta, ma ; ZVFHMIN-NEXT: vfmv.v.f v9, fa5 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v9 ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMIN-NEXT: vfdiv.vv v8, v9, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v8 ; ZVFHMIN-NEXT: vse16.v v9, (a0) ; ZVFHMIN-NEXT: ret %a = load <8 x half>, ptr %x %b = insertelement <8 x half> poison, half %y, i32 0 %c = shufflevector <8 x half> %b, <8 x half> poison, <8 x i32> zeroinitializer %d = fdiv <8 x half> %a, %c store <8 x half> %d, ptr %x ret void } define void @fdiv_vf_v6f16(ptr %x, half %y) { ; ZVFH-LABEL: fdiv_vf_v6f16: ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma ; ZVFH-NEXT: vle16.v v8, (a0) ; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; ZVFH-NEXT: vfdiv.vf v8, v8, fa0 ; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma ; ZVFH-NEXT: vse16.v v8, (a0) ; ZVFH-NEXT: ret ; ; ZVFHMINLMULMAX2-RV32-LABEL: fdiv_vf_v6f16: ; ZVFHMINLMULMAX2-RV32: # %bb.0: ; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma ; ZVFHMINLMULMAX2-RV32-NEXT: vle16.v v8, (a0) ; ZVFHMINLMULMAX2-RV32-NEXT: fcvt.s.h fa5, fa0 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli a1, zero, e32, m1, ta, ma ; ZVFHMINLMULMAX2-RV32-NEXT: vfmv.v.f v9, fa5 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMINLMULMAX2-RV32-NEXT: vfncvt.f.f.w v10, v9 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma ; ZVFHMINLMULMAX2-RV32-NEXT: vfwcvt.f.f.v v9, v8 ; ZVFHMINLMULMAX2-RV32-NEXT: vfwcvt.f.f.v v8, v10 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMINLMULMAX2-RV32-NEXT: vfdiv.vv v8, v9, v8 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMINLMULMAX2-RV32-NEXT: vfncvt.f.f.w v9, v8 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma ; ZVFHMINLMULMAX2-RV32-NEXT: vslidedown.vi v8, v9, 2 ; ZVFHMINLMULMAX2-RV32-NEXT: addi a1, a0, 8 ; ZVFHMINLMULMAX2-RV32-NEXT: vse32.v v8, (a1) ; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma ; ZVFHMINLMULMAX2-RV32-NEXT: vse16.v v9, (a0) ; ZVFHMINLMULMAX2-RV32-NEXT: ret ; ; ZVFHMINLMULMAX2-RV64-LABEL: fdiv_vf_v6f16: ; ZVFHMINLMULMAX2-RV64: # %bb.0: ; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma ; ZVFHMINLMULMAX2-RV64-NEXT: vle16.v v8, (a0) ; ZVFHMINLMULMAX2-RV64-NEXT: fcvt.s.h fa5, fa0 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli a1, zero, e32, m1, ta, ma ; ZVFHMINLMULMAX2-RV64-NEXT: vfmv.v.f v9, fa5 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMINLMULMAX2-RV64-NEXT: vfncvt.f.f.w v10, v9 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma ; ZVFHMINLMULMAX2-RV64-NEXT: vfwcvt.f.f.v v9, v8 ; ZVFHMINLMULMAX2-RV64-NEXT: vfwcvt.f.f.v v8, v10 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMINLMULMAX2-RV64-NEXT: vfdiv.vv v8, v9, v8 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMINLMULMAX2-RV64-NEXT: vfncvt.f.f.w v9, v8 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma ; ZVFHMINLMULMAX2-RV64-NEXT: vse64.v v9, (a0) ; ZVFHMINLMULMAX2-RV64-NEXT: vslidedown.vi v8, v9, 2 ; ZVFHMINLMULMAX2-RV64-NEXT: addi a0, a0, 8 ; ZVFHMINLMULMAX2-RV64-NEXT: vse32.v v8, (a0) ; ZVFHMINLMULMAX2-RV64-NEXT: ret ; ; ZVFHMINLMULMAX1-RV32-LABEL: fdiv_vf_v6f16: ; ZVFHMINLMULMAX1-RV32: # %bb.0: ; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma ; ZVFHMINLMULMAX1-RV32-NEXT: vle16.v v8, (a0) ; ZVFHMINLMULMAX1-RV32-NEXT: fcvt.s.h fa5, fa0 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli a1, zero, e32, m1, ta, ma ; ZVFHMINLMULMAX1-RV32-NEXT: vfmv.v.f v9, fa5 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMINLMULMAX1-RV32-NEXT: vfncvt.f.f.w v10, v9 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma ; ZVFHMINLMULMAX1-RV32-NEXT: vfwcvt.f.f.v v9, v8 ; ZVFHMINLMULMAX1-RV32-NEXT: vfwcvt.f.f.v v8, v10 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMINLMULMAX1-RV32-NEXT: vfdiv.vv v8, v9, v8 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMINLMULMAX1-RV32-NEXT: vfncvt.f.f.w v9, v8 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma ; ZVFHMINLMULMAX1-RV32-NEXT: vslidedown.vi v8, v9, 2 ; ZVFHMINLMULMAX1-RV32-NEXT: addi a1, a0, 8 ; ZVFHMINLMULMAX1-RV32-NEXT: vse32.v v8, (a1) ; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma ; ZVFHMINLMULMAX1-RV32-NEXT: vse16.v v9, (a0) ; ZVFHMINLMULMAX1-RV32-NEXT: ret ; ; ZVFHMINLMULMAX1-RV64-LABEL: fdiv_vf_v6f16: ; ZVFHMINLMULMAX1-RV64: # %bb.0: ; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma ; ZVFHMINLMULMAX1-RV64-NEXT: vle16.v v8, (a0) ; ZVFHMINLMULMAX1-RV64-NEXT: fcvt.s.h fa5, fa0 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli a1, zero, e32, m1, ta, ma ; ZVFHMINLMULMAX1-RV64-NEXT: vfmv.v.f v9, fa5 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMINLMULMAX1-RV64-NEXT: vfncvt.f.f.w v10, v9 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma ; ZVFHMINLMULMAX1-RV64-NEXT: vfwcvt.f.f.v v9, v8 ; ZVFHMINLMULMAX1-RV64-NEXT: vfwcvt.f.f.v v8, v10 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMINLMULMAX1-RV64-NEXT: vfdiv.vv v8, v9, v8 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMINLMULMAX1-RV64-NEXT: vfncvt.f.f.w v9, v8 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma ; ZVFHMINLMULMAX1-RV64-NEXT: vse64.v v9, (a0) ; ZVFHMINLMULMAX1-RV64-NEXT: vslidedown.vi v8, v9, 2 ; ZVFHMINLMULMAX1-RV64-NEXT: addi a0, a0, 8 ; ZVFHMINLMULMAX1-RV64-NEXT: vse32.v v8, (a0) ; ZVFHMINLMULMAX1-RV64-NEXT: ret %a = load <6 x half>, ptr %x %b = insertelement <6 x half> poison, half %y, i32 0 %c = shufflevector <6 x half> %b, <6 x half> poison, <6 x i32> zeroinitializer %d = fdiv <6 x half> %a, %c store <6 x half> %d, ptr %x ret void } define void @fdiv_vf_v4f32(ptr %x, float %y) { ; ZVFH-LABEL: fdiv_vf_v4f32: ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; ZVFH-NEXT: vle32.v v8, (a0) ; ZVFH-NEXT: vfdiv.vf v8, v8, fa0 ; ZVFH-NEXT: vse32.v v8, (a0) ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: fdiv_vf_v4f32: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: vsetivli zero, 4, e32, mf2, ta, ma ; ZVFHMIN-NEXT: vle32.v v8, (a0) ; ZVFHMIN-NEXT: vfdiv.vf v8, v8, fa0 ; ZVFHMIN-NEXT: vse32.v v8, (a0) ; ZVFHMIN-NEXT: ret %a = load <4 x float>, ptr %x %b = insertelement <4 x float> poison, float %y, i32 0 %c = shufflevector <4 x float> %b, <4 x float> poison, <4 x i32> zeroinitializer %d = fdiv <4 x float> %a, %c store <4 x float> %d, ptr %x ret void } define void @fdiv_vf_v2f64(ptr %x, double %y) { ; CHECK-LABEL: fdiv_vf_v2f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; CHECK-NEXT: vle64.v v8, (a0) ; CHECK-NEXT: vfdiv.vf v8, v8, fa0 ; CHECK-NEXT: vse64.v v8, (a0) ; CHECK-NEXT: ret %a = load <2 x double>, ptr %x %b = insertelement <2 x double> poison, double %y, i32 0 %c = shufflevector <2 x double> %b, <2 x double> poison, <2 x i32> zeroinitializer %d = fdiv <2 x double> %a, %c store <2 x double> %d, ptr %x ret void } define void @fdiv_fv_v8f16(ptr %x, half %y) { ; ZVFH-LABEL: fdiv_fv_v8f16: ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; ZVFH-NEXT: vle16.v v8, (a0) ; ZVFH-NEXT: vfrdiv.vf v8, v8, fa0 ; ZVFH-NEXT: vse16.v v8, (a0) ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: fdiv_fv_v8f16: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vle16.v v8, (a0) ; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0 ; ZVFHMIN-NEXT: vsetvli a1, zero, e32, m1, ta, ma ; ZVFHMIN-NEXT: vfmv.v.f v9, fa5 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v9 ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMIN-NEXT: vfdiv.vv v8, v8, v9 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v8 ; ZVFHMIN-NEXT: vse16.v v9, (a0) ; ZVFHMIN-NEXT: ret %a = load <8 x half>, ptr %x %b = insertelement <8 x half> poison, half %y, i32 0 %c = shufflevector <8 x half> %b, <8 x half> poison, <8 x i32> zeroinitializer %d = fdiv <8 x half> %c, %a store <8 x half> %d, ptr %x ret void } define void @fdiv_fv_v6f16(ptr %x, half %y) { ; ZVFH-LABEL: fdiv_fv_v6f16: ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma ; ZVFH-NEXT: vle16.v v8, (a0) ; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; ZVFH-NEXT: vfrdiv.vf v8, v8, fa0 ; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma ; ZVFH-NEXT: vse16.v v8, (a0) ; ZVFH-NEXT: ret ; ; ZVFHMINLMULMAX2-RV32-LABEL: fdiv_fv_v6f16: ; ZVFHMINLMULMAX2-RV32: # %bb.0: ; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma ; ZVFHMINLMULMAX2-RV32-NEXT: vle16.v v8, (a0) ; ZVFHMINLMULMAX2-RV32-NEXT: fcvt.s.h fa5, fa0 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli a1, zero, e32, m1, ta, ma ; ZVFHMINLMULMAX2-RV32-NEXT: vfmv.v.f v9, fa5 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMINLMULMAX2-RV32-NEXT: vfncvt.f.f.w v10, v9 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma ; ZVFHMINLMULMAX2-RV32-NEXT: vfwcvt.f.f.v v9, v8 ; ZVFHMINLMULMAX2-RV32-NEXT: vfwcvt.f.f.v v8, v10 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMINLMULMAX2-RV32-NEXT: vfdiv.vv v8, v8, v9 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMINLMULMAX2-RV32-NEXT: vfncvt.f.f.w v9, v8 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma ; ZVFHMINLMULMAX2-RV32-NEXT: vslidedown.vi v8, v9, 2 ; ZVFHMINLMULMAX2-RV32-NEXT: addi a1, a0, 8 ; ZVFHMINLMULMAX2-RV32-NEXT: vse32.v v8, (a1) ; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma ; ZVFHMINLMULMAX2-RV32-NEXT: vse16.v v9, (a0) ; ZVFHMINLMULMAX2-RV32-NEXT: ret ; ; ZVFHMINLMULMAX2-RV64-LABEL: fdiv_fv_v6f16: ; ZVFHMINLMULMAX2-RV64: # %bb.0: ; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma ; ZVFHMINLMULMAX2-RV64-NEXT: vle16.v v8, (a0) ; ZVFHMINLMULMAX2-RV64-NEXT: fcvt.s.h fa5, fa0 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli a1, zero, e32, m1, ta, ma ; ZVFHMINLMULMAX2-RV64-NEXT: vfmv.v.f v9, fa5 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMINLMULMAX2-RV64-NEXT: vfncvt.f.f.w v10, v9 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma ; ZVFHMINLMULMAX2-RV64-NEXT: vfwcvt.f.f.v v9, v8 ; ZVFHMINLMULMAX2-RV64-NEXT: vfwcvt.f.f.v v8, v10 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMINLMULMAX2-RV64-NEXT: vfdiv.vv v8, v8, v9 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMINLMULMAX2-RV64-NEXT: vfncvt.f.f.w v9, v8 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma ; ZVFHMINLMULMAX2-RV64-NEXT: vse64.v v9, (a0) ; ZVFHMINLMULMAX2-RV64-NEXT: vslidedown.vi v8, v9, 2 ; ZVFHMINLMULMAX2-RV64-NEXT: addi a0, a0, 8 ; ZVFHMINLMULMAX2-RV64-NEXT: vse32.v v8, (a0) ; ZVFHMINLMULMAX2-RV64-NEXT: ret ; ; ZVFHMINLMULMAX1-RV32-LABEL: fdiv_fv_v6f16: ; ZVFHMINLMULMAX1-RV32: # %bb.0: ; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma ; ZVFHMINLMULMAX1-RV32-NEXT: vle16.v v8, (a0) ; ZVFHMINLMULMAX1-RV32-NEXT: fcvt.s.h fa5, fa0 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli a1, zero, e32, m1, ta, ma ; ZVFHMINLMULMAX1-RV32-NEXT: vfmv.v.f v9, fa5 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMINLMULMAX1-RV32-NEXT: vfncvt.f.f.w v10, v9 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma ; ZVFHMINLMULMAX1-RV32-NEXT: vfwcvt.f.f.v v9, v8 ; ZVFHMINLMULMAX1-RV32-NEXT: vfwcvt.f.f.v v8, v10 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMINLMULMAX1-RV32-NEXT: vfdiv.vv v8, v8, v9 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMINLMULMAX1-RV32-NEXT: vfncvt.f.f.w v9, v8 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma ; ZVFHMINLMULMAX1-RV32-NEXT: vslidedown.vi v8, v9, 2 ; ZVFHMINLMULMAX1-RV32-NEXT: addi a1, a0, 8 ; ZVFHMINLMULMAX1-RV32-NEXT: vse32.v v8, (a1) ; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma ; ZVFHMINLMULMAX1-RV32-NEXT: vse16.v v9, (a0) ; ZVFHMINLMULMAX1-RV32-NEXT: ret ; ; ZVFHMINLMULMAX1-RV64-LABEL: fdiv_fv_v6f16: ; ZVFHMINLMULMAX1-RV64: # %bb.0: ; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma ; ZVFHMINLMULMAX1-RV64-NEXT: vle16.v v8, (a0) ; ZVFHMINLMULMAX1-RV64-NEXT: fcvt.s.h fa5, fa0 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli a1, zero, e32, m1, ta, ma ; ZVFHMINLMULMAX1-RV64-NEXT: vfmv.v.f v9, fa5 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMINLMULMAX1-RV64-NEXT: vfncvt.f.f.w v10, v9 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma ; ZVFHMINLMULMAX1-RV64-NEXT: vfwcvt.f.f.v v9, v8 ; ZVFHMINLMULMAX1-RV64-NEXT: vfwcvt.f.f.v v8, v10 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMINLMULMAX1-RV64-NEXT: vfdiv.vv v8, v8, v9 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMINLMULMAX1-RV64-NEXT: vfncvt.f.f.w v9, v8 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma ; ZVFHMINLMULMAX1-RV64-NEXT: vse64.v v9, (a0) ; ZVFHMINLMULMAX1-RV64-NEXT: vslidedown.vi v8, v9, 2 ; ZVFHMINLMULMAX1-RV64-NEXT: addi a0, a0, 8 ; ZVFHMINLMULMAX1-RV64-NEXT: vse32.v v8, (a0) ; ZVFHMINLMULMAX1-RV64-NEXT: ret %a = load <6 x half>, ptr %x %b = insertelement <6 x half> poison, half %y, i32 0 %c = shufflevector <6 x half> %b, <6 x half> poison, <6 x i32> zeroinitializer %d = fdiv <6 x half> %c, %a store <6 x half> %d, ptr %x ret void } define void @fdiv_fv_v4f32(ptr %x, float %y) { ; ZVFH-LABEL: fdiv_fv_v4f32: ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; ZVFH-NEXT: vle32.v v8, (a0) ; ZVFH-NEXT: vfrdiv.vf v8, v8, fa0 ; ZVFH-NEXT: vse32.v v8, (a0) ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: fdiv_fv_v4f32: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: vsetivli zero, 4, e32, mf2, ta, ma ; ZVFHMIN-NEXT: vle32.v v8, (a0) ; ZVFHMIN-NEXT: vfrdiv.vf v8, v8, fa0 ; ZVFHMIN-NEXT: vse32.v v8, (a0) ; ZVFHMIN-NEXT: ret %a = load <4 x float>, ptr %x %b = insertelement <4 x float> poison, float %y, i32 0 %c = shufflevector <4 x float> %b, <4 x float> poison, <4 x i32> zeroinitializer %d = fdiv <4 x float> %c, %a store <4 x float> %d, ptr %x ret void } define void @fdiv_fv_v2f64(ptr %x, double %y) { ; CHECK-LABEL: fdiv_fv_v2f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; CHECK-NEXT: vle64.v v8, (a0) ; CHECK-NEXT: vfrdiv.vf v8, v8, fa0 ; CHECK-NEXT: vse64.v v8, (a0) ; CHECK-NEXT: ret %a = load <2 x double>, ptr %x %b = insertelement <2 x double> poison, double %y, i32 0 %c = shufflevector <2 x double> %b, <2 x double> poison, <2 x i32> zeroinitializer %d = fdiv <2 x double> %c, %a store <2 x double> %d, ptr %x ret void } define void @fma_vf_v8f16(ptr %x, ptr %y, half %z) { ; ZVFH-LABEL: fma_vf_v8f16: ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; ZVFH-NEXT: vle16.v v8, (a0) ; ZVFH-NEXT: vle16.v v9, (a1) ; ZVFH-NEXT: vfmacc.vf v9, fa0, v8 ; ZVFH-NEXT: vse16.v v9, (a0) ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: fma_vf_v8f16: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vle16.v v8, (a0) ; ZVFHMIN-NEXT: vle16.v v9, (a1) ; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0 ; ZVFHMIN-NEXT: vsetvli a1, zero, e32, m1, ta, ma ; ZVFHMIN-NEXT: vfmv.v.f v10, fa5 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v11, v10 ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v11 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMIN-NEXT: vfmadd.vv v8, v9, v10 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v8 ; ZVFHMIN-NEXT: vse16.v v9, (a0) ; ZVFHMIN-NEXT: ret %a = load <8 x half>, ptr %x %b = load <8 x half>, ptr %y %c = insertelement <8 x half> poison, half %z, i32 0 %d = shufflevector <8 x half> %c, <8 x half> poison, <8 x i32> zeroinitializer %e = call <8 x half> @llvm.fma.v8f16(<8 x half> %a, <8 x half> %d, <8 x half> %b) store <8 x half> %e, ptr %x ret void } define void @fma_vf_v6f16(ptr %x, ptr %y, half %z) { ; ZVFH-LABEL: fma_vf_v6f16: ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma ; ZVFH-NEXT: vle16.v v8, (a0) ; ZVFH-NEXT: vle16.v v9, (a1) ; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; ZVFH-NEXT: vfmacc.vf v9, fa0, v8 ; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma ; ZVFH-NEXT: vse16.v v9, (a0) ; ZVFH-NEXT: ret ; ; ZVFHMINLMULMAX2-RV32-LABEL: fma_vf_v6f16: ; ZVFHMINLMULMAX2-RV32: # %bb.0: ; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma ; ZVFHMINLMULMAX2-RV32-NEXT: vle16.v v8, (a0) ; ZVFHMINLMULMAX2-RV32-NEXT: vle16.v v9, (a1) ; ZVFHMINLMULMAX2-RV32-NEXT: fcvt.s.h fa5, fa0 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli a1, zero, e32, m1, ta, ma ; ZVFHMINLMULMAX2-RV32-NEXT: vfmv.v.f v10, fa5 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMINLMULMAX2-RV32-NEXT: vfncvt.f.f.w v11, v10 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma ; ZVFHMINLMULMAX2-RV32-NEXT: vfwcvt.f.f.v v10, v9 ; ZVFHMINLMULMAX2-RV32-NEXT: vfwcvt.f.f.v v9, v8 ; ZVFHMINLMULMAX2-RV32-NEXT: vfwcvt.f.f.v v8, v11 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMINLMULMAX2-RV32-NEXT: vfmadd.vv v8, v9, v10 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMINLMULMAX2-RV32-NEXT: vfncvt.f.f.w v9, v8 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma ; ZVFHMINLMULMAX2-RV32-NEXT: vslidedown.vi v8, v9, 2 ; ZVFHMINLMULMAX2-RV32-NEXT: addi a1, a0, 8 ; ZVFHMINLMULMAX2-RV32-NEXT: vse32.v v8, (a1) ; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma ; ZVFHMINLMULMAX2-RV32-NEXT: vse16.v v9, (a0) ; ZVFHMINLMULMAX2-RV32-NEXT: ret ; ; ZVFHMINLMULMAX2-RV64-LABEL: fma_vf_v6f16: ; ZVFHMINLMULMAX2-RV64: # %bb.0: ; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma ; ZVFHMINLMULMAX2-RV64-NEXT: vle16.v v8, (a0) ; ZVFHMINLMULMAX2-RV64-NEXT: vle16.v v9, (a1) ; ZVFHMINLMULMAX2-RV64-NEXT: fcvt.s.h fa5, fa0 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli a1, zero, e32, m1, ta, ma ; ZVFHMINLMULMAX2-RV64-NEXT: vfmv.v.f v10, fa5 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMINLMULMAX2-RV64-NEXT: vfncvt.f.f.w v11, v10 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma ; ZVFHMINLMULMAX2-RV64-NEXT: vfwcvt.f.f.v v10, v9 ; ZVFHMINLMULMAX2-RV64-NEXT: vfwcvt.f.f.v v9, v8 ; ZVFHMINLMULMAX2-RV64-NEXT: vfwcvt.f.f.v v8, v11 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMINLMULMAX2-RV64-NEXT: vfmadd.vv v8, v9, v10 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMINLMULMAX2-RV64-NEXT: vfncvt.f.f.w v9, v8 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma ; ZVFHMINLMULMAX2-RV64-NEXT: vse64.v v9, (a0) ; ZVFHMINLMULMAX2-RV64-NEXT: vslidedown.vi v8, v9, 2 ; ZVFHMINLMULMAX2-RV64-NEXT: addi a0, a0, 8 ; ZVFHMINLMULMAX2-RV64-NEXT: vse32.v v8, (a0) ; ZVFHMINLMULMAX2-RV64-NEXT: ret ; ; ZVFHMINLMULMAX1-RV32-LABEL: fma_vf_v6f16: ; ZVFHMINLMULMAX1-RV32: # %bb.0: ; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma ; ZVFHMINLMULMAX1-RV32-NEXT: vle16.v v8, (a0) ; ZVFHMINLMULMAX1-RV32-NEXT: vle16.v v9, (a1) ; ZVFHMINLMULMAX1-RV32-NEXT: fcvt.s.h fa5, fa0 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli a1, zero, e32, m1, ta, ma ; ZVFHMINLMULMAX1-RV32-NEXT: vfmv.v.f v10, fa5 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMINLMULMAX1-RV32-NEXT: vfncvt.f.f.w v11, v10 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma ; ZVFHMINLMULMAX1-RV32-NEXT: vfwcvt.f.f.v v10, v9 ; ZVFHMINLMULMAX1-RV32-NEXT: vfwcvt.f.f.v v9, v8 ; ZVFHMINLMULMAX1-RV32-NEXT: vfwcvt.f.f.v v8, v11 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMINLMULMAX1-RV32-NEXT: vfmadd.vv v8, v9, v10 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMINLMULMAX1-RV32-NEXT: vfncvt.f.f.w v9, v8 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma ; ZVFHMINLMULMAX1-RV32-NEXT: vslidedown.vi v8, v9, 2 ; ZVFHMINLMULMAX1-RV32-NEXT: addi a1, a0, 8 ; ZVFHMINLMULMAX1-RV32-NEXT: vse32.v v8, (a1) ; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma ; ZVFHMINLMULMAX1-RV32-NEXT: vse16.v v9, (a0) ; ZVFHMINLMULMAX1-RV32-NEXT: ret ; ; ZVFHMINLMULMAX1-RV64-LABEL: fma_vf_v6f16: ; ZVFHMINLMULMAX1-RV64: # %bb.0: ; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma ; ZVFHMINLMULMAX1-RV64-NEXT: vle16.v v8, (a0) ; ZVFHMINLMULMAX1-RV64-NEXT: vle16.v v9, (a1) ; ZVFHMINLMULMAX1-RV64-NEXT: fcvt.s.h fa5, fa0 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli a1, zero, e32, m1, ta, ma ; ZVFHMINLMULMAX1-RV64-NEXT: vfmv.v.f v10, fa5 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMINLMULMAX1-RV64-NEXT: vfncvt.f.f.w v11, v10 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma ; ZVFHMINLMULMAX1-RV64-NEXT: vfwcvt.f.f.v v10, v9 ; ZVFHMINLMULMAX1-RV64-NEXT: vfwcvt.f.f.v v9, v8 ; ZVFHMINLMULMAX1-RV64-NEXT: vfwcvt.f.f.v v8, v11 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMINLMULMAX1-RV64-NEXT: vfmadd.vv v8, v9, v10 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMINLMULMAX1-RV64-NEXT: vfncvt.f.f.w v9, v8 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma ; ZVFHMINLMULMAX1-RV64-NEXT: vse64.v v9, (a0) ; ZVFHMINLMULMAX1-RV64-NEXT: vslidedown.vi v8, v9, 2 ; ZVFHMINLMULMAX1-RV64-NEXT: addi a0, a0, 8 ; ZVFHMINLMULMAX1-RV64-NEXT: vse32.v v8, (a0) ; ZVFHMINLMULMAX1-RV64-NEXT: ret %a = load <6 x half>, ptr %x %b = load <6 x half>, ptr %y %c = insertelement <6 x half> poison, half %z, i32 0 %d = shufflevector <6 x half> %c, <6 x half> poison, <6 x i32> zeroinitializer %e = call <6 x half> @llvm.fma.v6f16(<6 x half> %a, <6 x half> %d, <6 x half> %b) store <6 x half> %e, ptr %x ret void } define void @fma_vf_v4f32(ptr %x, ptr %y, float %z) { ; ZVFH-LABEL: fma_vf_v4f32: ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; ZVFH-NEXT: vle32.v v8, (a0) ; ZVFH-NEXT: vle32.v v9, (a1) ; ZVFH-NEXT: vfmacc.vf v9, fa0, v8 ; ZVFH-NEXT: vse32.v v9, (a0) ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: fma_vf_v4f32: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: vsetivli zero, 4, e32, mf2, ta, ma ; ZVFHMIN-NEXT: vle32.v v8, (a0) ; ZVFHMIN-NEXT: vle32.v v9, (a1) ; ZVFHMIN-NEXT: vfmacc.vf v9, fa0, v8 ; ZVFHMIN-NEXT: vse32.v v9, (a0) ; ZVFHMIN-NEXT: ret %a = load <4 x float>, ptr %x %b = load <4 x float>, ptr %y %c = insertelement <4 x float> poison, float %z, i32 0 %d = shufflevector <4 x float> %c, <4 x float> poison, <4 x i32> zeroinitializer %e = call <4 x float> @llvm.fma.v4f32(<4 x float> %a, <4 x float> %d, <4 x float> %b) store <4 x float> %e, ptr %x ret void } define void @fma_vf_v2f64(ptr %x, ptr %y, double %z) { ; CHECK-LABEL: fma_vf_v2f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; CHECK-NEXT: vle64.v v8, (a0) ; CHECK-NEXT: vle64.v v9, (a1) ; CHECK-NEXT: vfmacc.vf v9, fa0, v8 ; CHECK-NEXT: vse64.v v9, (a0) ; CHECK-NEXT: ret %a = load <2 x double>, ptr %x %b = load <2 x double>, ptr %y %c = insertelement <2 x double> poison, double %z, i32 0 %d = shufflevector <2 x double> %c, <2 x double> poison, <2 x i32> zeroinitializer %e = call <2 x double> @llvm.fma.v2f64(<2 x double> %a, <2 x double> %d, <2 x double> %b) store <2 x double> %e, ptr %x ret void } define void @fma_fv_v8f16(ptr %x, ptr %y, half %z) { ; ZVFH-LABEL: fma_fv_v8f16: ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; ZVFH-NEXT: vle16.v v8, (a0) ; ZVFH-NEXT: vle16.v v9, (a1) ; ZVFH-NEXT: vfmacc.vf v9, fa0, v8 ; ZVFH-NEXT: vse16.v v9, (a0) ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: fma_fv_v8f16: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vle16.v v8, (a0) ; ZVFHMIN-NEXT: vle16.v v9, (a1) ; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0 ; ZVFHMIN-NEXT: vsetvli a1, zero, e32, m1, ta, ma ; ZVFHMIN-NEXT: vfmv.v.f v10, fa5 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v11, v10 ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v11 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMIN-NEXT: vfmadd.vv v8, v9, v10 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v8 ; ZVFHMIN-NEXT: vse16.v v9, (a0) ; ZVFHMIN-NEXT: ret %a = load <8 x half>, ptr %x %b = load <8 x half>, ptr %y %c = insertelement <8 x half> poison, half %z, i32 0 %d = shufflevector <8 x half> %c, <8 x half> poison, <8 x i32> zeroinitializer %e = call <8 x half> @llvm.fma.v8f16(<8 x half> %d, <8 x half> %a, <8 x half> %b) store <8 x half> %e, ptr %x ret void } define void @fma_fv_v6f16(ptr %x, ptr %y, half %z) { ; ZVFH-LABEL: fma_fv_v6f16: ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma ; ZVFH-NEXT: vle16.v v8, (a0) ; ZVFH-NEXT: vle16.v v9, (a1) ; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; ZVFH-NEXT: vfmacc.vf v9, fa0, v8 ; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma ; ZVFH-NEXT: vse16.v v9, (a0) ; ZVFH-NEXT: ret ; ; ZVFHMINLMULMAX2-RV32-LABEL: fma_fv_v6f16: ; ZVFHMINLMULMAX2-RV32: # %bb.0: ; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma ; ZVFHMINLMULMAX2-RV32-NEXT: vle16.v v8, (a0) ; ZVFHMINLMULMAX2-RV32-NEXT: vle16.v v9, (a1) ; ZVFHMINLMULMAX2-RV32-NEXT: fcvt.s.h fa5, fa0 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli a1, zero, e32, m1, ta, ma ; ZVFHMINLMULMAX2-RV32-NEXT: vfmv.v.f v10, fa5 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMINLMULMAX2-RV32-NEXT: vfncvt.f.f.w v11, v10 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma ; ZVFHMINLMULMAX2-RV32-NEXT: vfwcvt.f.f.v v10, v9 ; ZVFHMINLMULMAX2-RV32-NEXT: vfwcvt.f.f.v v9, v8 ; ZVFHMINLMULMAX2-RV32-NEXT: vfwcvt.f.f.v v8, v11 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMINLMULMAX2-RV32-NEXT: vfmadd.vv v8, v9, v10 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMINLMULMAX2-RV32-NEXT: vfncvt.f.f.w v9, v8 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma ; ZVFHMINLMULMAX2-RV32-NEXT: vslidedown.vi v8, v9, 2 ; ZVFHMINLMULMAX2-RV32-NEXT: addi a1, a0, 8 ; ZVFHMINLMULMAX2-RV32-NEXT: vse32.v v8, (a1) ; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma ; ZVFHMINLMULMAX2-RV32-NEXT: vse16.v v9, (a0) ; ZVFHMINLMULMAX2-RV32-NEXT: ret ; ; ZVFHMINLMULMAX2-RV64-LABEL: fma_fv_v6f16: ; ZVFHMINLMULMAX2-RV64: # %bb.0: ; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma ; ZVFHMINLMULMAX2-RV64-NEXT: vle16.v v8, (a0) ; ZVFHMINLMULMAX2-RV64-NEXT: vle16.v v9, (a1) ; ZVFHMINLMULMAX2-RV64-NEXT: fcvt.s.h fa5, fa0 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli a1, zero, e32, m1, ta, ma ; ZVFHMINLMULMAX2-RV64-NEXT: vfmv.v.f v10, fa5 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMINLMULMAX2-RV64-NEXT: vfncvt.f.f.w v11, v10 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma ; ZVFHMINLMULMAX2-RV64-NEXT: vfwcvt.f.f.v v10, v9 ; ZVFHMINLMULMAX2-RV64-NEXT: vfwcvt.f.f.v v9, v8 ; ZVFHMINLMULMAX2-RV64-NEXT: vfwcvt.f.f.v v8, v11 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMINLMULMAX2-RV64-NEXT: vfmadd.vv v8, v9, v10 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMINLMULMAX2-RV64-NEXT: vfncvt.f.f.w v9, v8 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma ; ZVFHMINLMULMAX2-RV64-NEXT: vse64.v v9, (a0) ; ZVFHMINLMULMAX2-RV64-NEXT: vslidedown.vi v8, v9, 2 ; ZVFHMINLMULMAX2-RV64-NEXT: addi a0, a0, 8 ; ZVFHMINLMULMAX2-RV64-NEXT: vse32.v v8, (a0) ; ZVFHMINLMULMAX2-RV64-NEXT: ret ; ; ZVFHMINLMULMAX1-RV32-LABEL: fma_fv_v6f16: ; ZVFHMINLMULMAX1-RV32: # %bb.0: ; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma ; ZVFHMINLMULMAX1-RV32-NEXT: vle16.v v8, (a0) ; ZVFHMINLMULMAX1-RV32-NEXT: vle16.v v9, (a1) ; ZVFHMINLMULMAX1-RV32-NEXT: fcvt.s.h fa5, fa0 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli a1, zero, e32, m1, ta, ma ; ZVFHMINLMULMAX1-RV32-NEXT: vfmv.v.f v10, fa5 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMINLMULMAX1-RV32-NEXT: vfncvt.f.f.w v11, v10 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma ; ZVFHMINLMULMAX1-RV32-NEXT: vfwcvt.f.f.v v10, v9 ; ZVFHMINLMULMAX1-RV32-NEXT: vfwcvt.f.f.v v9, v8 ; ZVFHMINLMULMAX1-RV32-NEXT: vfwcvt.f.f.v v8, v11 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMINLMULMAX1-RV32-NEXT: vfmadd.vv v8, v9, v10 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMINLMULMAX1-RV32-NEXT: vfncvt.f.f.w v9, v8 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma ; ZVFHMINLMULMAX1-RV32-NEXT: vslidedown.vi v8, v9, 2 ; ZVFHMINLMULMAX1-RV32-NEXT: addi a1, a0, 8 ; ZVFHMINLMULMAX1-RV32-NEXT: vse32.v v8, (a1) ; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma ; ZVFHMINLMULMAX1-RV32-NEXT: vse16.v v9, (a0) ; ZVFHMINLMULMAX1-RV32-NEXT: ret ; ; ZVFHMINLMULMAX1-RV64-LABEL: fma_fv_v6f16: ; ZVFHMINLMULMAX1-RV64: # %bb.0: ; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma ; ZVFHMINLMULMAX1-RV64-NEXT: vle16.v v8, (a0) ; ZVFHMINLMULMAX1-RV64-NEXT: vle16.v v9, (a1) ; ZVFHMINLMULMAX1-RV64-NEXT: fcvt.s.h fa5, fa0 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli a1, zero, e32, m1, ta, ma ; ZVFHMINLMULMAX1-RV64-NEXT: vfmv.v.f v10, fa5 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMINLMULMAX1-RV64-NEXT: vfncvt.f.f.w v11, v10 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma ; ZVFHMINLMULMAX1-RV64-NEXT: vfwcvt.f.f.v v10, v9 ; ZVFHMINLMULMAX1-RV64-NEXT: vfwcvt.f.f.v v9, v8 ; ZVFHMINLMULMAX1-RV64-NEXT: vfwcvt.f.f.v v8, v11 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMINLMULMAX1-RV64-NEXT: vfmadd.vv v8, v9, v10 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMINLMULMAX1-RV64-NEXT: vfncvt.f.f.w v9, v8 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma ; ZVFHMINLMULMAX1-RV64-NEXT: vse64.v v9, (a0) ; ZVFHMINLMULMAX1-RV64-NEXT: vslidedown.vi v8, v9, 2 ; ZVFHMINLMULMAX1-RV64-NEXT: addi a0, a0, 8 ; ZVFHMINLMULMAX1-RV64-NEXT: vse32.v v8, (a0) ; ZVFHMINLMULMAX1-RV64-NEXT: ret %a = load <6 x half>, ptr %x %b = load <6 x half>, ptr %y %c = insertelement <6 x half> poison, half %z, i32 0 %d = shufflevector <6 x half> %c, <6 x half> poison, <6 x i32> zeroinitializer %e = call <6 x half> @llvm.fma.v6f16(<6 x half> %d, <6 x half> %a, <6 x half> %b) store <6 x half> %e, ptr %x ret void } define void @fma_fv_v4f32(ptr %x, ptr %y, float %z) { ; ZVFH-LABEL: fma_fv_v4f32: ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; ZVFH-NEXT: vle32.v v8, (a0) ; ZVFH-NEXT: vle32.v v9, (a1) ; ZVFH-NEXT: vfmacc.vf v9, fa0, v8 ; ZVFH-NEXT: vse32.v v9, (a0) ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: fma_fv_v4f32: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: vsetivli zero, 4, e32, mf2, ta, ma ; ZVFHMIN-NEXT: vle32.v v8, (a0) ; ZVFHMIN-NEXT: vle32.v v9, (a1) ; ZVFHMIN-NEXT: vfmacc.vf v9, fa0, v8 ; ZVFHMIN-NEXT: vse32.v v9, (a0) ; ZVFHMIN-NEXT: ret %a = load <4 x float>, ptr %x %b = load <4 x float>, ptr %y %c = insertelement <4 x float> poison, float %z, i32 0 %d = shufflevector <4 x float> %c, <4 x float> poison, <4 x i32> zeroinitializer %e = call <4 x float> @llvm.fma.v4f32(<4 x float> %d, <4 x float> %a, <4 x float> %b) store <4 x float> %e, ptr %x ret void } define void @fma_fv_v2f64(ptr %x, ptr %y, double %z) { ; CHECK-LABEL: fma_fv_v2f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; CHECK-NEXT: vle64.v v8, (a0) ; CHECK-NEXT: vle64.v v9, (a1) ; CHECK-NEXT: vfmacc.vf v9, fa0, v8 ; CHECK-NEXT: vse64.v v9, (a0) ; CHECK-NEXT: ret %a = load <2 x double>, ptr %x %b = load <2 x double>, ptr %y %c = insertelement <2 x double> poison, double %z, i32 0 %d = shufflevector <2 x double> %c, <2 x double> poison, <2 x i32> zeroinitializer %e = call <2 x double> @llvm.fma.v2f64(<2 x double> %d, <2 x double> %a, <2 x double> %b) store <2 x double> %e, ptr %x ret void } define void @fmsub_vf_v8f16(ptr %x, ptr %y, half %z) { ; ZVFH-LABEL: fmsub_vf_v8f16: ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; ZVFH-NEXT: vle16.v v8, (a0) ; ZVFH-NEXT: vle16.v v9, (a1) ; ZVFH-NEXT: vfmsac.vf v9, fa0, v8 ; ZVFH-NEXT: vse16.v v9, (a0) ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: fmsub_vf_v8f16: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vle16.v v8, (a0) ; ZVFHMIN-NEXT: vle16.v v9, (a1) ; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0 ; ZVFHMIN-NEXT: vsetvli a1, zero, e32, m1, ta, ma ; ZVFHMIN-NEXT: vfmv.v.f v10, fa5 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v11, v10 ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMIN-NEXT: vfneg.v v9, v10 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v9 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v11 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v10 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMIN-NEXT: vfmacc.vv v11, v9, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v11 ; ZVFHMIN-NEXT: vse16.v v8, (a0) ; ZVFHMIN-NEXT: ret %a = load <8 x half>, ptr %x %b = load <8 x half>, ptr %y %c = insertelement <8 x half> poison, half %z, i32 0 %d = shufflevector <8 x half> %c, <8 x half> poison, <8 x i32> zeroinitializer %neg = fneg <8 x half> %b %e = call <8 x half> @llvm.fma.v8f16(<8 x half> %a, <8 x half> %d, <8 x half> %neg) store <8 x half> %e, ptr %x ret void } define void @fmsub_vf_v6f16(ptr %x, ptr %y, half %z) { ; ZVFH-LABEL: fmsub_vf_v6f16: ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma ; ZVFH-NEXT: vle16.v v8, (a0) ; ZVFH-NEXT: vle16.v v9, (a1) ; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; ZVFH-NEXT: vfmsac.vf v9, fa0, v8 ; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma ; ZVFH-NEXT: vse16.v v9, (a0) ; ZVFH-NEXT: ret ; ; ZVFHMINLMULMAX2-RV32-LABEL: fmsub_vf_v6f16: ; ZVFHMINLMULMAX2-RV32: # %bb.0: ; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma ; ZVFHMINLMULMAX2-RV32-NEXT: vle16.v v8, (a0) ; ZVFHMINLMULMAX2-RV32-NEXT: vle16.v v9, (a1) ; ZVFHMINLMULMAX2-RV32-NEXT: fcvt.s.h fa5, fa0 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli a1, zero, e32, m1, ta, ma ; ZVFHMINLMULMAX2-RV32-NEXT: vfmv.v.f v10, fa5 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMINLMULMAX2-RV32-NEXT: vfncvt.f.f.w v11, v10 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma ; ZVFHMINLMULMAX2-RV32-NEXT: vfwcvt.f.f.v v10, v9 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMINLMULMAX2-RV32-NEXT: vfneg.v v9, v10 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMINLMULMAX2-RV32-NEXT: vfncvt.f.f.w v10, v9 ; ZVFHMINLMULMAX2-RV32-NEXT: vfwcvt.f.f.v v9, v8 ; ZVFHMINLMULMAX2-RV32-NEXT: vfwcvt.f.f.v v8, v11 ; ZVFHMINLMULMAX2-RV32-NEXT: vfwcvt.f.f.v v11, v10 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMINLMULMAX2-RV32-NEXT: vfmacc.vv v11, v9, v8 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMINLMULMAX2-RV32-NEXT: vfncvt.f.f.w v8, v11 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma ; ZVFHMINLMULMAX2-RV32-NEXT: vslidedown.vi v9, v8, 2 ; ZVFHMINLMULMAX2-RV32-NEXT: addi a1, a0, 8 ; ZVFHMINLMULMAX2-RV32-NEXT: vse32.v v9, (a1) ; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma ; ZVFHMINLMULMAX2-RV32-NEXT: vse16.v v8, (a0) ; ZVFHMINLMULMAX2-RV32-NEXT: ret ; ; ZVFHMINLMULMAX2-RV64-LABEL: fmsub_vf_v6f16: ; ZVFHMINLMULMAX2-RV64: # %bb.0: ; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma ; ZVFHMINLMULMAX2-RV64-NEXT: vle16.v v8, (a0) ; ZVFHMINLMULMAX2-RV64-NEXT: vle16.v v9, (a1) ; ZVFHMINLMULMAX2-RV64-NEXT: fcvt.s.h fa5, fa0 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli a1, zero, e32, m1, ta, ma ; ZVFHMINLMULMAX2-RV64-NEXT: vfmv.v.f v10, fa5 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMINLMULMAX2-RV64-NEXT: vfncvt.f.f.w v11, v10 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma ; ZVFHMINLMULMAX2-RV64-NEXT: vfwcvt.f.f.v v10, v9 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMINLMULMAX2-RV64-NEXT: vfneg.v v9, v10 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMINLMULMAX2-RV64-NEXT: vfncvt.f.f.w v10, v9 ; ZVFHMINLMULMAX2-RV64-NEXT: vfwcvt.f.f.v v9, v8 ; ZVFHMINLMULMAX2-RV64-NEXT: vfwcvt.f.f.v v8, v11 ; ZVFHMINLMULMAX2-RV64-NEXT: vfwcvt.f.f.v v11, v10 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMINLMULMAX2-RV64-NEXT: vfmacc.vv v11, v9, v8 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMINLMULMAX2-RV64-NEXT: vfncvt.f.f.w v8, v11 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma ; ZVFHMINLMULMAX2-RV64-NEXT: vse64.v v8, (a0) ; ZVFHMINLMULMAX2-RV64-NEXT: vslidedown.vi v8, v8, 2 ; ZVFHMINLMULMAX2-RV64-NEXT: addi a0, a0, 8 ; ZVFHMINLMULMAX2-RV64-NEXT: vse32.v v8, (a0) ; ZVFHMINLMULMAX2-RV64-NEXT: ret ; ; ZVFHMINLMULMAX1-RV32-LABEL: fmsub_vf_v6f16: ; ZVFHMINLMULMAX1-RV32: # %bb.0: ; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma ; ZVFHMINLMULMAX1-RV32-NEXT: vle16.v v8, (a0) ; ZVFHMINLMULMAX1-RV32-NEXT: vle16.v v9, (a1) ; ZVFHMINLMULMAX1-RV32-NEXT: fcvt.s.h fa5, fa0 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli a1, zero, e32, m1, ta, ma ; ZVFHMINLMULMAX1-RV32-NEXT: vfmv.v.f v10, fa5 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMINLMULMAX1-RV32-NEXT: vfncvt.f.f.w v11, v10 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma ; ZVFHMINLMULMAX1-RV32-NEXT: vfwcvt.f.f.v v10, v9 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMINLMULMAX1-RV32-NEXT: vfneg.v v9, v10 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMINLMULMAX1-RV32-NEXT: vfncvt.f.f.w v10, v9 ; ZVFHMINLMULMAX1-RV32-NEXT: vfwcvt.f.f.v v9, v8 ; ZVFHMINLMULMAX1-RV32-NEXT: vfwcvt.f.f.v v8, v11 ; ZVFHMINLMULMAX1-RV32-NEXT: vfwcvt.f.f.v v11, v10 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMINLMULMAX1-RV32-NEXT: vfmacc.vv v11, v9, v8 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMINLMULMAX1-RV32-NEXT: vfncvt.f.f.w v8, v11 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma ; ZVFHMINLMULMAX1-RV32-NEXT: vslidedown.vi v9, v8, 2 ; ZVFHMINLMULMAX1-RV32-NEXT: addi a1, a0, 8 ; ZVFHMINLMULMAX1-RV32-NEXT: vse32.v v9, (a1) ; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma ; ZVFHMINLMULMAX1-RV32-NEXT: vse16.v v8, (a0) ; ZVFHMINLMULMAX1-RV32-NEXT: ret ; ; ZVFHMINLMULMAX1-RV64-LABEL: fmsub_vf_v6f16: ; ZVFHMINLMULMAX1-RV64: # %bb.0: ; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma ; ZVFHMINLMULMAX1-RV64-NEXT: vle16.v v8, (a0) ; ZVFHMINLMULMAX1-RV64-NEXT: vle16.v v9, (a1) ; ZVFHMINLMULMAX1-RV64-NEXT: fcvt.s.h fa5, fa0 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli a1, zero, e32, m1, ta, ma ; ZVFHMINLMULMAX1-RV64-NEXT: vfmv.v.f v10, fa5 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMINLMULMAX1-RV64-NEXT: vfncvt.f.f.w v11, v10 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma ; ZVFHMINLMULMAX1-RV64-NEXT: vfwcvt.f.f.v v10, v9 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMINLMULMAX1-RV64-NEXT: vfneg.v v9, v10 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMINLMULMAX1-RV64-NEXT: vfncvt.f.f.w v10, v9 ; ZVFHMINLMULMAX1-RV64-NEXT: vfwcvt.f.f.v v9, v8 ; ZVFHMINLMULMAX1-RV64-NEXT: vfwcvt.f.f.v v8, v11 ; ZVFHMINLMULMAX1-RV64-NEXT: vfwcvt.f.f.v v11, v10 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMINLMULMAX1-RV64-NEXT: vfmacc.vv v11, v9, v8 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMINLMULMAX1-RV64-NEXT: vfncvt.f.f.w v8, v11 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma ; ZVFHMINLMULMAX1-RV64-NEXT: vse64.v v8, (a0) ; ZVFHMINLMULMAX1-RV64-NEXT: vslidedown.vi v8, v8, 2 ; ZVFHMINLMULMAX1-RV64-NEXT: addi a0, a0, 8 ; ZVFHMINLMULMAX1-RV64-NEXT: vse32.v v8, (a0) ; ZVFHMINLMULMAX1-RV64-NEXT: ret %a = load <6 x half>, ptr %x %b = load <6 x half>, ptr %y %c = insertelement <6 x half> poison, half %z, i32 0 %d = shufflevector <6 x half> %c, <6 x half> poison, <6 x i32> zeroinitializer %neg = fneg <6 x half> %b %e = call <6 x half> @llvm.fma.v6f16(<6 x half> %a, <6 x half> %d, <6 x half> %neg) store <6 x half> %e, ptr %x ret void } define void @fnmsub_vf_v4f32(ptr %x, ptr %y, float %z) { ; ZVFH-LABEL: fnmsub_vf_v4f32: ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; ZVFH-NEXT: vle32.v v8, (a0) ; ZVFH-NEXT: vle32.v v9, (a1) ; ZVFH-NEXT: vfnmsac.vf v9, fa0, v8 ; ZVFH-NEXT: vse32.v v9, (a0) ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: fnmsub_vf_v4f32: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: vsetivli zero, 4, e32, mf2, ta, ma ; ZVFHMIN-NEXT: vle32.v v8, (a0) ; ZVFHMIN-NEXT: vle32.v v9, (a1) ; ZVFHMIN-NEXT: vfnmsac.vf v9, fa0, v8 ; ZVFHMIN-NEXT: vse32.v v9, (a0) ; ZVFHMIN-NEXT: ret %a = load <4 x float>, ptr %x %b = load <4 x float>, ptr %y %c = insertelement <4 x float> poison, float %z, i32 0 %d = shufflevector <4 x float> %c, <4 x float> poison, <4 x i32> zeroinitializer %neg = fneg <4 x float> %a %e = call <4 x float> @llvm.fma.v4f32(<4 x float> %neg, <4 x float> %d, <4 x float> %b) store <4 x float> %e, ptr %x ret void } define void @fnmadd_vf_v2f64(ptr %x, ptr %y, double %z) { ; CHECK-LABEL: fnmadd_vf_v2f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; CHECK-NEXT: vle64.v v8, (a0) ; CHECK-NEXT: vle64.v v9, (a1) ; CHECK-NEXT: vfnmacc.vf v9, fa0, v8 ; CHECK-NEXT: vse64.v v9, (a0) ; CHECK-NEXT: ret %a = load <2 x double>, ptr %x %b = load <2 x double>, ptr %y %c = insertelement <2 x double> poison, double %z, i32 0 %d = shufflevector <2 x double> %c, <2 x double> poison, <2 x i32> zeroinitializer %neg = fneg <2 x double> %a %neg2 = fneg <2 x double> %b %e = call <2 x double> @llvm.fma.v2f64(<2 x double> %neg, <2 x double> %d, <2 x double> %neg2) store <2 x double> %e, ptr %x ret void } define void @fnmsub_fv_v4f32(ptr %x, ptr %y, float %z) { ; ZVFH-LABEL: fnmsub_fv_v4f32: ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; ZVFH-NEXT: vle32.v v8, (a0) ; ZVFH-NEXT: vle32.v v9, (a1) ; ZVFH-NEXT: vfnmsac.vf v9, fa0, v8 ; ZVFH-NEXT: vse32.v v9, (a0) ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: fnmsub_fv_v4f32: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: vsetivli zero, 4, e32, mf2, ta, ma ; ZVFHMIN-NEXT: vle32.v v8, (a0) ; ZVFHMIN-NEXT: vle32.v v9, (a1) ; ZVFHMIN-NEXT: vfnmsac.vf v9, fa0, v8 ; ZVFHMIN-NEXT: vse32.v v9, (a0) ; ZVFHMIN-NEXT: ret %a = load <4 x float>, ptr %x %b = load <4 x float>, ptr %y %c = insertelement <4 x float> poison, float %z, i32 0 %d = shufflevector <4 x float> %c, <4 x float> poison, <4 x i32> zeroinitializer %neg = fneg <4 x float> %d %e = call <4 x float> @llvm.fma.v4f32(<4 x float> %neg, <4 x float> %a, <4 x float> %b) store <4 x float> %e, ptr %x ret void } define void @fnmadd_fv_v2f64(ptr %x, ptr %y, double %z) { ; CHECK-LABEL: fnmadd_fv_v2f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; CHECK-NEXT: vle64.v v8, (a0) ; CHECK-NEXT: vle64.v v9, (a1) ; CHECK-NEXT: vfnmacc.vf v9, fa0, v8 ; CHECK-NEXT: vse64.v v9, (a0) ; CHECK-NEXT: ret %a = load <2 x double>, ptr %x %b = load <2 x double>, ptr %y %c = insertelement <2 x double> poison, double %z, i32 0 %d = shufflevector <2 x double> %c, <2 x double> poison, <2 x i32> zeroinitializer %neg = fneg <2 x double> %d %neg2 = fneg <2 x double> %b %e = call <2 x double> @llvm.fma.v2f64(<2 x double> %neg, <2 x double> %a, <2 x double> %neg2) store <2 x double> %e, ptr %x ret void } define void @trunc_v8f16(ptr %x) { ; ZVFH-LABEL: trunc_v8f16: ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; ZVFH-NEXT: vle16.v v8, (a0) ; ZVFH-NEXT: lui a1, %hi(.LCPI115_0) ; ZVFH-NEXT: flh fa5, %lo(.LCPI115_0)(a1) ; ZVFH-NEXT: vfabs.v v9, v8 ; ZVFH-NEXT: vmflt.vf v0, v9, fa5 ; ZVFH-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t ; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t ; ZVFH-NEXT: vsetvli zero, zero, e16, m1, ta, mu ; ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; ZVFH-NEXT: vse16.v v8, (a0) ; ZVFH-NEXT: ret %a = load <8 x half>, ptr %x %b = call <8 x half> @llvm.trunc.v8f16(<8 x half> %a) store <8 x half> %b, ptr %x ret void } declare <8 x half> @llvm.trunc.v8f16(<8 x half>) define void @trunc_v6f16(ptr %x) { ; ZVFH-LABEL: trunc_v6f16: ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma ; ZVFH-NEXT: vle16.v v8, (a0) ; ZVFH-NEXT: lui a1, %hi(.LCPI116_0) ; ZVFH-NEXT: flh fa5, %lo(.LCPI116_0)(a1) ; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8 ; ZVFH-NEXT: vmflt.vf v0, v9, fa5 ; ZVFH-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t ; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t ; ZVFH-NEXT: vsetvli zero, zero, e16, m1, ta, mu ; ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma ; ZVFH-NEXT: vse16.v v8, (a0) ; ZVFH-NEXT: ret %a = load <6 x half>, ptr %x %b = call <6 x half> @llvm.trunc.v6f16(<6 x half> %a) store <6 x half> %b, ptr %x ret void } declare <6 x half> @llvm.trunc.v6f16(<6 x half>) define void @trunc_v4f32(ptr %x) { ; ZVFH-LABEL: trunc_v4f32: ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; ZVFH-NEXT: vle32.v v8, (a0) ; ZVFH-NEXT: vfabs.v v9, v8 ; ZVFH-NEXT: lui a1, 307200 ; ZVFH-NEXT: fmv.w.x fa5, a1 ; ZVFH-NEXT: vmflt.vf v0, v9, fa5 ; ZVFH-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t ; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t ; ZVFH-NEXT: vsetvli zero, zero, e32, m1, ta, mu ; ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; ZVFH-NEXT: vse32.v v8, (a0) ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: trunc_v4f32: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: vsetivli zero, 4, e32, mf2, ta, ma ; ZVFHMIN-NEXT: vle32.v v8, (a0) ; ZVFHMIN-NEXT: vfabs.v v9, v8 ; ZVFHMIN-NEXT: lui a1, 307200 ; ZVFHMIN-NEXT: fmv.w.x fa5, a1 ; ZVFHMIN-NEXT: vmflt.vf v0, v9, fa5 ; ZVFHMIN-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t ; ZVFHMIN-NEXT: vfcvt.f.x.v v9, v9, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, mu ; ZVFHMIN-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; ZVFHMIN-NEXT: vse32.v v8, (a0) ; ZVFHMIN-NEXT: ret %a = load <4 x float>, ptr %x %b = call <4 x float> @llvm.trunc.v4f32(<4 x float> %a) store <4 x float> %b, ptr %x ret void } declare <4 x float> @llvm.trunc.v4f32(<4 x float>) define void @trunc_v2f64(ptr %x) { ; CHECK-LABEL: trunc_v2f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; CHECK-NEXT: vle64.v v8, (a0) ; CHECK-NEXT: lui a1, %hi(.LCPI118_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI118_0)(a1) ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: vse64.v v8, (a0) ; CHECK-NEXT: ret %a = load <2 x double>, ptr %x %b = call <2 x double> @llvm.trunc.v2f64(<2 x double> %a) store <2 x double> %b, ptr %x ret void } declare <2 x double> @llvm.trunc.v2f64(<2 x double>) define void @ceil_v8f16(ptr %x) { ; ZVFH-LABEL: ceil_v8f16: ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; ZVFH-NEXT: vle16.v v8, (a0) ; ZVFH-NEXT: lui a1, %hi(.LCPI119_0) ; ZVFH-NEXT: flh fa5, %lo(.LCPI119_0)(a1) ; ZVFH-NEXT: vfabs.v v9, v8 ; ZVFH-NEXT: vmflt.vf v0, v9, fa5 ; ZVFH-NEXT: fsrmi a1, 3 ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t ; ZVFH-NEXT: fsrm a1 ; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t ; ZVFH-NEXT: vsetvli zero, zero, e16, m1, ta, mu ; ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; ZVFH-NEXT: vse16.v v8, (a0) ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: ceil_v8f16: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vle16.v v8, (a0) ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMIN-NEXT: vfabs.v v8, v9 ; ZVFHMIN-NEXT: lui a1, 307200 ; ZVFHMIN-NEXT: fmv.w.x fa5, a1 ; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 ; ZVFHMIN-NEXT: fsrmi a1, 3 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v9, v0.t ; ZVFHMIN-NEXT: fsrm a1 ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, mu ; ZVFHMIN-NEXT: vfsgnj.vv v9, v8, v9, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 ; ZVFHMIN-NEXT: vse16.v v8, (a0) ; ZVFHMIN-NEXT: ret %a = load <8 x half>, ptr %x %b = call <8 x half> @llvm.ceil.v8f16(<8 x half> %a) store <8 x half> %b, ptr %x ret void } declare <8 x half> @llvm.ceil.v8f16(<8 x half>) define void @ceil_v6f16(ptr %x) { ; ZVFH-LABEL: ceil_v6f16: ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma ; ZVFH-NEXT: vle16.v v8, (a0) ; ZVFH-NEXT: lui a1, %hi(.LCPI120_0) ; ZVFH-NEXT: flh fa5, %lo(.LCPI120_0)(a1) ; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8 ; ZVFH-NEXT: vmflt.vf v0, v9, fa5 ; ZVFH-NEXT: fsrmi a1, 3 ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t ; ZVFH-NEXT: fsrm a1 ; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t ; ZVFH-NEXT: vsetvli zero, zero, e16, m1, ta, mu ; ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma ; ZVFH-NEXT: vse16.v v8, (a0) ; ZVFH-NEXT: ret ; ; ZVFHMINLMULMAX2-RV32-LABEL: ceil_v6f16: ; ZVFHMINLMULMAX2-RV32: # %bb.0: ; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma ; ZVFHMINLMULMAX2-RV32-NEXT: vle16.v v8, (a0) ; ZVFHMINLMULMAX2-RV32-NEXT: vfwcvt.f.f.v v9, v8 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMINLMULMAX2-RV32-NEXT: vfabs.v v8, v9 ; ZVFHMINLMULMAX2-RV32-NEXT: lui a1, 307200 ; ZVFHMINLMULMAX2-RV32-NEXT: fmv.w.x fa5, a1 ; ZVFHMINLMULMAX2-RV32-NEXT: vmflt.vf v0, v8, fa5 ; ZVFHMINLMULMAX2-RV32-NEXT: fsrmi a1, 3 ; ZVFHMINLMULMAX2-RV32-NEXT: vfcvt.x.f.v v8, v9, v0.t ; ZVFHMINLMULMAX2-RV32-NEXT: fsrm a1 ; ZVFHMINLMULMAX2-RV32-NEXT: vfcvt.f.x.v v8, v8, v0.t ; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, mu ; ZVFHMINLMULMAX2-RV32-NEXT: vfsgnj.vv v9, v8, v9, v0.t ; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMINLMULMAX2-RV32-NEXT: vfncvt.f.f.w v8, v9 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma ; ZVFHMINLMULMAX2-RV32-NEXT: vslidedown.vi v9, v8, 2 ; ZVFHMINLMULMAX2-RV32-NEXT: addi a1, a0, 8 ; ZVFHMINLMULMAX2-RV32-NEXT: vse32.v v9, (a1) ; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma ; ZVFHMINLMULMAX2-RV32-NEXT: vse16.v v8, (a0) ; ZVFHMINLMULMAX2-RV32-NEXT: ret ; ; ZVFHMINLMULMAX2-RV64-LABEL: ceil_v6f16: ; ZVFHMINLMULMAX2-RV64: # %bb.0: ; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma ; ZVFHMINLMULMAX2-RV64-NEXT: vle16.v v8, (a0) ; ZVFHMINLMULMAX2-RV64-NEXT: vfwcvt.f.f.v v9, v8 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMINLMULMAX2-RV64-NEXT: vfabs.v v8, v9 ; ZVFHMINLMULMAX2-RV64-NEXT: lui a1, 307200 ; ZVFHMINLMULMAX2-RV64-NEXT: fmv.w.x fa5, a1 ; ZVFHMINLMULMAX2-RV64-NEXT: vmflt.vf v0, v8, fa5 ; ZVFHMINLMULMAX2-RV64-NEXT: fsrmi a1, 3 ; ZVFHMINLMULMAX2-RV64-NEXT: vfcvt.x.f.v v8, v9, v0.t ; ZVFHMINLMULMAX2-RV64-NEXT: fsrm a1 ; ZVFHMINLMULMAX2-RV64-NEXT: vfcvt.f.x.v v8, v8, v0.t ; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, mu ; ZVFHMINLMULMAX2-RV64-NEXT: vfsgnj.vv v9, v8, v9, v0.t ; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMINLMULMAX2-RV64-NEXT: vfncvt.f.f.w v8, v9 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma ; ZVFHMINLMULMAX2-RV64-NEXT: vse64.v v8, (a0) ; ZVFHMINLMULMAX2-RV64-NEXT: vslidedown.vi v8, v8, 2 ; ZVFHMINLMULMAX2-RV64-NEXT: addi a0, a0, 8 ; ZVFHMINLMULMAX2-RV64-NEXT: vse32.v v8, (a0) ; ZVFHMINLMULMAX2-RV64-NEXT: ret ; ; ZVFHMINLMULMAX1-RV32-LABEL: ceil_v6f16: ; ZVFHMINLMULMAX1-RV32: # %bb.0: ; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma ; ZVFHMINLMULMAX1-RV32-NEXT: vle16.v v8, (a0) ; ZVFHMINLMULMAX1-RV32-NEXT: vfwcvt.f.f.v v9, v8 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMINLMULMAX1-RV32-NEXT: vfabs.v v8, v9 ; ZVFHMINLMULMAX1-RV32-NEXT: lui a1, 307200 ; ZVFHMINLMULMAX1-RV32-NEXT: fmv.w.x fa5, a1 ; ZVFHMINLMULMAX1-RV32-NEXT: vmflt.vf v0, v8, fa5 ; ZVFHMINLMULMAX1-RV32-NEXT: fsrmi a1, 3 ; ZVFHMINLMULMAX1-RV32-NEXT: vfcvt.x.f.v v8, v9, v0.t ; ZVFHMINLMULMAX1-RV32-NEXT: fsrm a1 ; ZVFHMINLMULMAX1-RV32-NEXT: vfcvt.f.x.v v8, v8, v0.t ; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, mu ; ZVFHMINLMULMAX1-RV32-NEXT: vfsgnj.vv v9, v8, v9, v0.t ; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMINLMULMAX1-RV32-NEXT: vfncvt.f.f.w v8, v9 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma ; ZVFHMINLMULMAX1-RV32-NEXT: vslidedown.vi v9, v8, 2 ; ZVFHMINLMULMAX1-RV32-NEXT: addi a1, a0, 8 ; ZVFHMINLMULMAX1-RV32-NEXT: vse32.v v9, (a1) ; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma ; ZVFHMINLMULMAX1-RV32-NEXT: vse16.v v8, (a0) ; ZVFHMINLMULMAX1-RV32-NEXT: ret ; ; ZVFHMINLMULMAX1-RV64-LABEL: ceil_v6f16: ; ZVFHMINLMULMAX1-RV64: # %bb.0: ; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma ; ZVFHMINLMULMAX1-RV64-NEXT: vle16.v v8, (a0) ; ZVFHMINLMULMAX1-RV64-NEXT: vfwcvt.f.f.v v9, v8 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMINLMULMAX1-RV64-NEXT: vfabs.v v8, v9 ; ZVFHMINLMULMAX1-RV64-NEXT: lui a1, 307200 ; ZVFHMINLMULMAX1-RV64-NEXT: fmv.w.x fa5, a1 ; ZVFHMINLMULMAX1-RV64-NEXT: vmflt.vf v0, v8, fa5 ; ZVFHMINLMULMAX1-RV64-NEXT: fsrmi a1, 3 ; ZVFHMINLMULMAX1-RV64-NEXT: vfcvt.x.f.v v8, v9, v0.t ; ZVFHMINLMULMAX1-RV64-NEXT: fsrm a1 ; ZVFHMINLMULMAX1-RV64-NEXT: vfcvt.f.x.v v8, v8, v0.t ; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, mu ; ZVFHMINLMULMAX1-RV64-NEXT: vfsgnj.vv v9, v8, v9, v0.t ; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMINLMULMAX1-RV64-NEXT: vfncvt.f.f.w v8, v9 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma ; ZVFHMINLMULMAX1-RV64-NEXT: vse64.v v8, (a0) ; ZVFHMINLMULMAX1-RV64-NEXT: vslidedown.vi v8, v8, 2 ; ZVFHMINLMULMAX1-RV64-NEXT: addi a0, a0, 8 ; ZVFHMINLMULMAX1-RV64-NEXT: vse32.v v8, (a0) ; ZVFHMINLMULMAX1-RV64-NEXT: ret %a = load <6 x half>, ptr %x %b = call <6 x half> @llvm.ceil.v6f16(<6 x half> %a) store <6 x half> %b, ptr %x ret void } declare <6 x half> @llvm.ceil.v6f16(<6 x half>) define void @ceil_v4f32(ptr %x) { ; ZVFH-LABEL: ceil_v4f32: ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; ZVFH-NEXT: vle32.v v8, (a0) ; ZVFH-NEXT: vfabs.v v9, v8 ; ZVFH-NEXT: lui a1, 307200 ; ZVFH-NEXT: fmv.w.x fa5, a1 ; ZVFH-NEXT: vmflt.vf v0, v9, fa5 ; ZVFH-NEXT: fsrmi a1, 3 ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t ; ZVFH-NEXT: fsrm a1 ; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t ; ZVFH-NEXT: vsetvli zero, zero, e32, m1, ta, mu ; ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; ZVFH-NEXT: vse32.v v8, (a0) ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: ceil_v4f32: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: vsetivli zero, 4, e32, mf2, ta, ma ; ZVFHMIN-NEXT: vle32.v v8, (a0) ; ZVFHMIN-NEXT: vfabs.v v9, v8 ; ZVFHMIN-NEXT: lui a1, 307200 ; ZVFHMIN-NEXT: fmv.w.x fa5, a1 ; ZVFHMIN-NEXT: vmflt.vf v0, v9, fa5 ; ZVFHMIN-NEXT: fsrmi a1, 3 ; ZVFHMIN-NEXT: vfcvt.x.f.v v9, v8, v0.t ; ZVFHMIN-NEXT: fsrm a1 ; ZVFHMIN-NEXT: vfcvt.f.x.v v9, v9, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, mu ; ZVFHMIN-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; ZVFHMIN-NEXT: vse32.v v8, (a0) ; ZVFHMIN-NEXT: ret %a = load <4 x float>, ptr %x %b = call <4 x float> @llvm.ceil.v4f32(<4 x float> %a) store <4 x float> %b, ptr %x ret void } declare <4 x float> @llvm.ceil.v4f32(<4 x float>) define void @ceil_v2f64(ptr %x) { ; CHECK-LABEL: ceil_v2f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; CHECK-NEXT: vle64.v v8, (a0) ; CHECK-NEXT: lui a1, %hi(.LCPI122_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI122_0)(a1) ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: fsrmi a1, 3 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a1 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: vse64.v v8, (a0) ; CHECK-NEXT: ret %a = load <2 x double>, ptr %x %b = call <2 x double> @llvm.ceil.v2f64(<2 x double> %a) store <2 x double> %b, ptr %x ret void } declare <2 x double> @llvm.ceil.v2f64(<2 x double>) define void @floor_v8f16(ptr %x) { ; ZVFH-LABEL: floor_v8f16: ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; ZVFH-NEXT: vle16.v v8, (a0) ; ZVFH-NEXT: lui a1, %hi(.LCPI123_0) ; ZVFH-NEXT: flh fa5, %lo(.LCPI123_0)(a1) ; ZVFH-NEXT: vfabs.v v9, v8 ; ZVFH-NEXT: vmflt.vf v0, v9, fa5 ; ZVFH-NEXT: fsrmi a1, 2 ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t ; ZVFH-NEXT: fsrm a1 ; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t ; ZVFH-NEXT: vsetvli zero, zero, e16, m1, ta, mu ; ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; ZVFH-NEXT: vse16.v v8, (a0) ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: floor_v8f16: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vle16.v v8, (a0) ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMIN-NEXT: vfabs.v v8, v9 ; ZVFHMIN-NEXT: lui a1, 307200 ; ZVFHMIN-NEXT: fmv.w.x fa5, a1 ; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 ; ZVFHMIN-NEXT: fsrmi a1, 2 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v9, v0.t ; ZVFHMIN-NEXT: fsrm a1 ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, mu ; ZVFHMIN-NEXT: vfsgnj.vv v9, v8, v9, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 ; ZVFHMIN-NEXT: vse16.v v8, (a0) ; ZVFHMIN-NEXT: ret %a = load <8 x half>, ptr %x %b = call <8 x half> @llvm.floor.v8f16(<8 x half> %a) store <8 x half> %b, ptr %x ret void } declare <8 x half> @llvm.floor.v8f16(<8 x half>) define void @floor_v6f16(ptr %x) { ; ZVFH-LABEL: floor_v6f16: ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma ; ZVFH-NEXT: vle16.v v8, (a0) ; ZVFH-NEXT: lui a1, %hi(.LCPI124_0) ; ZVFH-NEXT: flh fa5, %lo(.LCPI124_0)(a1) ; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8 ; ZVFH-NEXT: vmflt.vf v0, v9, fa5 ; ZVFH-NEXT: fsrmi a1, 2 ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t ; ZVFH-NEXT: fsrm a1 ; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t ; ZVFH-NEXT: vsetvli zero, zero, e16, m1, ta, mu ; ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma ; ZVFH-NEXT: vse16.v v8, (a0) ; ZVFH-NEXT: ret ; ; ZVFHMINLMULMAX2-RV32-LABEL: floor_v6f16: ; ZVFHMINLMULMAX2-RV32: # %bb.0: ; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma ; ZVFHMINLMULMAX2-RV32-NEXT: vle16.v v8, (a0) ; ZVFHMINLMULMAX2-RV32-NEXT: vfwcvt.f.f.v v9, v8 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMINLMULMAX2-RV32-NEXT: vfabs.v v8, v9 ; ZVFHMINLMULMAX2-RV32-NEXT: lui a1, 307200 ; ZVFHMINLMULMAX2-RV32-NEXT: fmv.w.x fa5, a1 ; ZVFHMINLMULMAX2-RV32-NEXT: vmflt.vf v0, v8, fa5 ; ZVFHMINLMULMAX2-RV32-NEXT: fsrmi a1, 2 ; ZVFHMINLMULMAX2-RV32-NEXT: vfcvt.x.f.v v8, v9, v0.t ; ZVFHMINLMULMAX2-RV32-NEXT: fsrm a1 ; ZVFHMINLMULMAX2-RV32-NEXT: vfcvt.f.x.v v8, v8, v0.t ; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, mu ; ZVFHMINLMULMAX2-RV32-NEXT: vfsgnj.vv v9, v8, v9, v0.t ; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMINLMULMAX2-RV32-NEXT: vfncvt.f.f.w v8, v9 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma ; ZVFHMINLMULMAX2-RV32-NEXT: vslidedown.vi v9, v8, 2 ; ZVFHMINLMULMAX2-RV32-NEXT: addi a1, a0, 8 ; ZVFHMINLMULMAX2-RV32-NEXT: vse32.v v9, (a1) ; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma ; ZVFHMINLMULMAX2-RV32-NEXT: vse16.v v8, (a0) ; ZVFHMINLMULMAX2-RV32-NEXT: ret ; ; ZVFHMINLMULMAX2-RV64-LABEL: floor_v6f16: ; ZVFHMINLMULMAX2-RV64: # %bb.0: ; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma ; ZVFHMINLMULMAX2-RV64-NEXT: vle16.v v8, (a0) ; ZVFHMINLMULMAX2-RV64-NEXT: vfwcvt.f.f.v v9, v8 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMINLMULMAX2-RV64-NEXT: vfabs.v v8, v9 ; ZVFHMINLMULMAX2-RV64-NEXT: lui a1, 307200 ; ZVFHMINLMULMAX2-RV64-NEXT: fmv.w.x fa5, a1 ; ZVFHMINLMULMAX2-RV64-NEXT: vmflt.vf v0, v8, fa5 ; ZVFHMINLMULMAX2-RV64-NEXT: fsrmi a1, 2 ; ZVFHMINLMULMAX2-RV64-NEXT: vfcvt.x.f.v v8, v9, v0.t ; ZVFHMINLMULMAX2-RV64-NEXT: fsrm a1 ; ZVFHMINLMULMAX2-RV64-NEXT: vfcvt.f.x.v v8, v8, v0.t ; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, mu ; ZVFHMINLMULMAX2-RV64-NEXT: vfsgnj.vv v9, v8, v9, v0.t ; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMINLMULMAX2-RV64-NEXT: vfncvt.f.f.w v8, v9 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma ; ZVFHMINLMULMAX2-RV64-NEXT: vse64.v v8, (a0) ; ZVFHMINLMULMAX2-RV64-NEXT: vslidedown.vi v8, v8, 2 ; ZVFHMINLMULMAX2-RV64-NEXT: addi a0, a0, 8 ; ZVFHMINLMULMAX2-RV64-NEXT: vse32.v v8, (a0) ; ZVFHMINLMULMAX2-RV64-NEXT: ret ; ; ZVFHMINLMULMAX1-RV32-LABEL: floor_v6f16: ; ZVFHMINLMULMAX1-RV32: # %bb.0: ; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma ; ZVFHMINLMULMAX1-RV32-NEXT: vle16.v v8, (a0) ; ZVFHMINLMULMAX1-RV32-NEXT: vfwcvt.f.f.v v9, v8 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMINLMULMAX1-RV32-NEXT: vfabs.v v8, v9 ; ZVFHMINLMULMAX1-RV32-NEXT: lui a1, 307200 ; ZVFHMINLMULMAX1-RV32-NEXT: fmv.w.x fa5, a1 ; ZVFHMINLMULMAX1-RV32-NEXT: vmflt.vf v0, v8, fa5 ; ZVFHMINLMULMAX1-RV32-NEXT: fsrmi a1, 2 ; ZVFHMINLMULMAX1-RV32-NEXT: vfcvt.x.f.v v8, v9, v0.t ; ZVFHMINLMULMAX1-RV32-NEXT: fsrm a1 ; ZVFHMINLMULMAX1-RV32-NEXT: vfcvt.f.x.v v8, v8, v0.t ; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, mu ; ZVFHMINLMULMAX1-RV32-NEXT: vfsgnj.vv v9, v8, v9, v0.t ; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMINLMULMAX1-RV32-NEXT: vfncvt.f.f.w v8, v9 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma ; ZVFHMINLMULMAX1-RV32-NEXT: vslidedown.vi v9, v8, 2 ; ZVFHMINLMULMAX1-RV32-NEXT: addi a1, a0, 8 ; ZVFHMINLMULMAX1-RV32-NEXT: vse32.v v9, (a1) ; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma ; ZVFHMINLMULMAX1-RV32-NEXT: vse16.v v8, (a0) ; ZVFHMINLMULMAX1-RV32-NEXT: ret ; ; ZVFHMINLMULMAX1-RV64-LABEL: floor_v6f16: ; ZVFHMINLMULMAX1-RV64: # %bb.0: ; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma ; ZVFHMINLMULMAX1-RV64-NEXT: vle16.v v8, (a0) ; ZVFHMINLMULMAX1-RV64-NEXT: vfwcvt.f.f.v v9, v8 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMINLMULMAX1-RV64-NEXT: vfabs.v v8, v9 ; ZVFHMINLMULMAX1-RV64-NEXT: lui a1, 307200 ; ZVFHMINLMULMAX1-RV64-NEXT: fmv.w.x fa5, a1 ; ZVFHMINLMULMAX1-RV64-NEXT: vmflt.vf v0, v8, fa5 ; ZVFHMINLMULMAX1-RV64-NEXT: fsrmi a1, 2 ; ZVFHMINLMULMAX1-RV64-NEXT: vfcvt.x.f.v v8, v9, v0.t ; ZVFHMINLMULMAX1-RV64-NEXT: fsrm a1 ; ZVFHMINLMULMAX1-RV64-NEXT: vfcvt.f.x.v v8, v8, v0.t ; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, mu ; ZVFHMINLMULMAX1-RV64-NEXT: vfsgnj.vv v9, v8, v9, v0.t ; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMINLMULMAX1-RV64-NEXT: vfncvt.f.f.w v8, v9 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma ; ZVFHMINLMULMAX1-RV64-NEXT: vse64.v v8, (a0) ; ZVFHMINLMULMAX1-RV64-NEXT: vslidedown.vi v8, v8, 2 ; ZVFHMINLMULMAX1-RV64-NEXT: addi a0, a0, 8 ; ZVFHMINLMULMAX1-RV64-NEXT: vse32.v v8, (a0) ; ZVFHMINLMULMAX1-RV64-NEXT: ret %a = load <6 x half>, ptr %x %b = call <6 x half> @llvm.floor.v6f16(<6 x half> %a) store <6 x half> %b, ptr %x ret void } declare <6 x half> @llvm.floor.v6f16(<6 x half>) define void @floor_v4f32(ptr %x) { ; ZVFH-LABEL: floor_v4f32: ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; ZVFH-NEXT: vle32.v v8, (a0) ; ZVFH-NEXT: vfabs.v v9, v8 ; ZVFH-NEXT: lui a1, 307200 ; ZVFH-NEXT: fmv.w.x fa5, a1 ; ZVFH-NEXT: vmflt.vf v0, v9, fa5 ; ZVFH-NEXT: fsrmi a1, 2 ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t ; ZVFH-NEXT: fsrm a1 ; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t ; ZVFH-NEXT: vsetvli zero, zero, e32, m1, ta, mu ; ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; ZVFH-NEXT: vse32.v v8, (a0) ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: floor_v4f32: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: vsetivli zero, 4, e32, mf2, ta, ma ; ZVFHMIN-NEXT: vle32.v v8, (a0) ; ZVFHMIN-NEXT: vfabs.v v9, v8 ; ZVFHMIN-NEXT: lui a1, 307200 ; ZVFHMIN-NEXT: fmv.w.x fa5, a1 ; ZVFHMIN-NEXT: vmflt.vf v0, v9, fa5 ; ZVFHMIN-NEXT: fsrmi a1, 2 ; ZVFHMIN-NEXT: vfcvt.x.f.v v9, v8, v0.t ; ZVFHMIN-NEXT: fsrm a1 ; ZVFHMIN-NEXT: vfcvt.f.x.v v9, v9, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, mu ; ZVFHMIN-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; ZVFHMIN-NEXT: vse32.v v8, (a0) ; ZVFHMIN-NEXT: ret %a = load <4 x float>, ptr %x %b = call <4 x float> @llvm.floor.v4f32(<4 x float> %a) store <4 x float> %b, ptr %x ret void } declare <4 x float> @llvm.floor.v4f32(<4 x float>) define void @floor_v2f64(ptr %x) { ; CHECK-LABEL: floor_v2f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; CHECK-NEXT: vle64.v v8, (a0) ; CHECK-NEXT: lui a1, %hi(.LCPI126_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI126_0)(a1) ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: fsrmi a1, 2 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a1 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: vse64.v v8, (a0) ; CHECK-NEXT: ret %a = load <2 x double>, ptr %x %b = call <2 x double> @llvm.floor.v2f64(<2 x double> %a) store <2 x double> %b, ptr %x ret void } declare <2 x double> @llvm.floor.v2f64(<2 x double>) define void @round_v8f16(ptr %x) { ; ZVFH-LABEL: round_v8f16: ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; ZVFH-NEXT: vle16.v v8, (a0) ; ZVFH-NEXT: lui a1, %hi(.LCPI127_0) ; ZVFH-NEXT: flh fa5, %lo(.LCPI127_0)(a1) ; ZVFH-NEXT: vfabs.v v9, v8 ; ZVFH-NEXT: vmflt.vf v0, v9, fa5 ; ZVFH-NEXT: fsrmi a1, 4 ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t ; ZVFH-NEXT: fsrm a1 ; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t ; ZVFH-NEXT: vsetvli zero, zero, e16, m1, ta, mu ; ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; ZVFH-NEXT: vse16.v v8, (a0) ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: round_v8f16: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vle16.v v8, (a0) ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMIN-NEXT: vfabs.v v8, v9 ; ZVFHMIN-NEXT: lui a1, 307200 ; ZVFHMIN-NEXT: fmv.w.x fa5, a1 ; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 ; ZVFHMIN-NEXT: fsrmi a1, 4 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v9, v0.t ; ZVFHMIN-NEXT: fsrm a1 ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, mu ; ZVFHMIN-NEXT: vfsgnj.vv v9, v8, v9, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 ; ZVFHMIN-NEXT: vse16.v v8, (a0) ; ZVFHMIN-NEXT: ret %a = load <8 x half>, ptr %x %b = call <8 x half> @llvm.round.v8f16(<8 x half> %a) store <8 x half> %b, ptr %x ret void } declare <8 x half> @llvm.round.v8f16(<8 x half>) define void @round_v6f16(ptr %x) { ; ZVFH-LABEL: round_v6f16: ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma ; ZVFH-NEXT: vle16.v v8, (a0) ; ZVFH-NEXT: lui a1, %hi(.LCPI128_0) ; ZVFH-NEXT: flh fa5, %lo(.LCPI128_0)(a1) ; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8 ; ZVFH-NEXT: vmflt.vf v0, v9, fa5 ; ZVFH-NEXT: fsrmi a1, 4 ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t ; ZVFH-NEXT: fsrm a1 ; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t ; ZVFH-NEXT: vsetvli zero, zero, e16, m1, ta, mu ; ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma ; ZVFH-NEXT: vse16.v v8, (a0) ; ZVFH-NEXT: ret ; ; ZVFHMINLMULMAX2-RV32-LABEL: round_v6f16: ; ZVFHMINLMULMAX2-RV32: # %bb.0: ; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma ; ZVFHMINLMULMAX2-RV32-NEXT: vle16.v v8, (a0) ; ZVFHMINLMULMAX2-RV32-NEXT: vfwcvt.f.f.v v9, v8 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMINLMULMAX2-RV32-NEXT: vfabs.v v8, v9 ; ZVFHMINLMULMAX2-RV32-NEXT: lui a1, 307200 ; ZVFHMINLMULMAX2-RV32-NEXT: fmv.w.x fa5, a1 ; ZVFHMINLMULMAX2-RV32-NEXT: vmflt.vf v0, v8, fa5 ; ZVFHMINLMULMAX2-RV32-NEXT: fsrmi a1, 4 ; ZVFHMINLMULMAX2-RV32-NEXT: vfcvt.x.f.v v8, v9, v0.t ; ZVFHMINLMULMAX2-RV32-NEXT: fsrm a1 ; ZVFHMINLMULMAX2-RV32-NEXT: vfcvt.f.x.v v8, v8, v0.t ; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, mu ; ZVFHMINLMULMAX2-RV32-NEXT: vfsgnj.vv v9, v8, v9, v0.t ; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMINLMULMAX2-RV32-NEXT: vfncvt.f.f.w v8, v9 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma ; ZVFHMINLMULMAX2-RV32-NEXT: vslidedown.vi v9, v8, 2 ; ZVFHMINLMULMAX2-RV32-NEXT: addi a1, a0, 8 ; ZVFHMINLMULMAX2-RV32-NEXT: vse32.v v9, (a1) ; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma ; ZVFHMINLMULMAX2-RV32-NEXT: vse16.v v8, (a0) ; ZVFHMINLMULMAX2-RV32-NEXT: ret ; ; ZVFHMINLMULMAX2-RV64-LABEL: round_v6f16: ; ZVFHMINLMULMAX2-RV64: # %bb.0: ; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma ; ZVFHMINLMULMAX2-RV64-NEXT: vle16.v v8, (a0) ; ZVFHMINLMULMAX2-RV64-NEXT: vfwcvt.f.f.v v9, v8 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMINLMULMAX2-RV64-NEXT: vfabs.v v8, v9 ; ZVFHMINLMULMAX2-RV64-NEXT: lui a1, 307200 ; ZVFHMINLMULMAX2-RV64-NEXT: fmv.w.x fa5, a1 ; ZVFHMINLMULMAX2-RV64-NEXT: vmflt.vf v0, v8, fa5 ; ZVFHMINLMULMAX2-RV64-NEXT: fsrmi a1, 4 ; ZVFHMINLMULMAX2-RV64-NEXT: vfcvt.x.f.v v8, v9, v0.t ; ZVFHMINLMULMAX2-RV64-NEXT: fsrm a1 ; ZVFHMINLMULMAX2-RV64-NEXT: vfcvt.f.x.v v8, v8, v0.t ; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, mu ; ZVFHMINLMULMAX2-RV64-NEXT: vfsgnj.vv v9, v8, v9, v0.t ; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMINLMULMAX2-RV64-NEXT: vfncvt.f.f.w v8, v9 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma ; ZVFHMINLMULMAX2-RV64-NEXT: vse64.v v8, (a0) ; ZVFHMINLMULMAX2-RV64-NEXT: vslidedown.vi v8, v8, 2 ; ZVFHMINLMULMAX2-RV64-NEXT: addi a0, a0, 8 ; ZVFHMINLMULMAX2-RV64-NEXT: vse32.v v8, (a0) ; ZVFHMINLMULMAX2-RV64-NEXT: ret ; ; ZVFHMINLMULMAX1-RV32-LABEL: round_v6f16: ; ZVFHMINLMULMAX1-RV32: # %bb.0: ; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma ; ZVFHMINLMULMAX1-RV32-NEXT: vle16.v v8, (a0) ; ZVFHMINLMULMAX1-RV32-NEXT: vfwcvt.f.f.v v9, v8 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMINLMULMAX1-RV32-NEXT: vfabs.v v8, v9 ; ZVFHMINLMULMAX1-RV32-NEXT: lui a1, 307200 ; ZVFHMINLMULMAX1-RV32-NEXT: fmv.w.x fa5, a1 ; ZVFHMINLMULMAX1-RV32-NEXT: vmflt.vf v0, v8, fa5 ; ZVFHMINLMULMAX1-RV32-NEXT: fsrmi a1, 4 ; ZVFHMINLMULMAX1-RV32-NEXT: vfcvt.x.f.v v8, v9, v0.t ; ZVFHMINLMULMAX1-RV32-NEXT: fsrm a1 ; ZVFHMINLMULMAX1-RV32-NEXT: vfcvt.f.x.v v8, v8, v0.t ; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, mu ; ZVFHMINLMULMAX1-RV32-NEXT: vfsgnj.vv v9, v8, v9, v0.t ; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMINLMULMAX1-RV32-NEXT: vfncvt.f.f.w v8, v9 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma ; ZVFHMINLMULMAX1-RV32-NEXT: vslidedown.vi v9, v8, 2 ; ZVFHMINLMULMAX1-RV32-NEXT: addi a1, a0, 8 ; ZVFHMINLMULMAX1-RV32-NEXT: vse32.v v9, (a1) ; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma ; ZVFHMINLMULMAX1-RV32-NEXT: vse16.v v8, (a0) ; ZVFHMINLMULMAX1-RV32-NEXT: ret ; ; ZVFHMINLMULMAX1-RV64-LABEL: round_v6f16: ; ZVFHMINLMULMAX1-RV64: # %bb.0: ; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma ; ZVFHMINLMULMAX1-RV64-NEXT: vle16.v v8, (a0) ; ZVFHMINLMULMAX1-RV64-NEXT: vfwcvt.f.f.v v9, v8 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMINLMULMAX1-RV64-NEXT: vfabs.v v8, v9 ; ZVFHMINLMULMAX1-RV64-NEXT: lui a1, 307200 ; ZVFHMINLMULMAX1-RV64-NEXT: fmv.w.x fa5, a1 ; ZVFHMINLMULMAX1-RV64-NEXT: vmflt.vf v0, v8, fa5 ; ZVFHMINLMULMAX1-RV64-NEXT: fsrmi a1, 4 ; ZVFHMINLMULMAX1-RV64-NEXT: vfcvt.x.f.v v8, v9, v0.t ; ZVFHMINLMULMAX1-RV64-NEXT: fsrm a1 ; ZVFHMINLMULMAX1-RV64-NEXT: vfcvt.f.x.v v8, v8, v0.t ; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, mu ; ZVFHMINLMULMAX1-RV64-NEXT: vfsgnj.vv v9, v8, v9, v0.t ; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMINLMULMAX1-RV64-NEXT: vfncvt.f.f.w v8, v9 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma ; ZVFHMINLMULMAX1-RV64-NEXT: vse64.v v8, (a0) ; ZVFHMINLMULMAX1-RV64-NEXT: vslidedown.vi v8, v8, 2 ; ZVFHMINLMULMAX1-RV64-NEXT: addi a0, a0, 8 ; ZVFHMINLMULMAX1-RV64-NEXT: vse32.v v8, (a0) ; ZVFHMINLMULMAX1-RV64-NEXT: ret %a = load <6 x half>, ptr %x %b = call <6 x half> @llvm.round.v6f16(<6 x half> %a) store <6 x half> %b, ptr %x ret void } declare <6 x half> @llvm.round.v6f16(<6 x half>) define void @round_v4f32(ptr %x) { ; ZVFH-LABEL: round_v4f32: ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; ZVFH-NEXT: vle32.v v8, (a0) ; ZVFH-NEXT: vfabs.v v9, v8 ; ZVFH-NEXT: lui a1, 307200 ; ZVFH-NEXT: fmv.w.x fa5, a1 ; ZVFH-NEXT: vmflt.vf v0, v9, fa5 ; ZVFH-NEXT: fsrmi a1, 4 ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t ; ZVFH-NEXT: fsrm a1 ; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t ; ZVFH-NEXT: vsetvli zero, zero, e32, m1, ta, mu ; ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; ZVFH-NEXT: vse32.v v8, (a0) ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: round_v4f32: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: vsetivli zero, 4, e32, mf2, ta, ma ; ZVFHMIN-NEXT: vle32.v v8, (a0) ; ZVFHMIN-NEXT: vfabs.v v9, v8 ; ZVFHMIN-NEXT: lui a1, 307200 ; ZVFHMIN-NEXT: fmv.w.x fa5, a1 ; ZVFHMIN-NEXT: vmflt.vf v0, v9, fa5 ; ZVFHMIN-NEXT: fsrmi a1, 4 ; ZVFHMIN-NEXT: vfcvt.x.f.v v9, v8, v0.t ; ZVFHMIN-NEXT: fsrm a1 ; ZVFHMIN-NEXT: vfcvt.f.x.v v9, v9, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, mu ; ZVFHMIN-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; ZVFHMIN-NEXT: vse32.v v8, (a0) ; ZVFHMIN-NEXT: ret %a = load <4 x float>, ptr %x %b = call <4 x float> @llvm.round.v4f32(<4 x float> %a) store <4 x float> %b, ptr %x ret void } declare <4 x float> @llvm.round.v4f32(<4 x float>) define void @round_v2f64(ptr %x) { ; CHECK-LABEL: round_v2f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; CHECK-NEXT: vle64.v v8, (a0) ; CHECK-NEXT: lui a1, %hi(.LCPI130_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI130_0)(a1) ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: fsrmi a1, 4 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a1 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: vse64.v v8, (a0) ; CHECK-NEXT: ret %a = load <2 x double>, ptr %x %b = call <2 x double> @llvm.round.v2f64(<2 x double> %a) store <2 x double> %b, ptr %x ret void } declare <2 x double> @llvm.round.v2f64(<2 x double>) define void @rint_v8f16(ptr %x) { ; ZVFH-LABEL: rint_v8f16: ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; ZVFH-NEXT: vle16.v v8, (a0) ; ZVFH-NEXT: lui a1, %hi(.LCPI131_0) ; ZVFH-NEXT: flh fa5, %lo(.LCPI131_0)(a1) ; ZVFH-NEXT: vfabs.v v9, v8 ; ZVFH-NEXT: vmflt.vf v0, v9, fa5 ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t ; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t ; ZVFH-NEXT: vsetvli zero, zero, e16, m1, ta, mu ; ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; ZVFH-NEXT: vse16.v v8, (a0) ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: rint_v8f16: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vle16.v v8, (a0) ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMIN-NEXT: vfabs.v v8, v9 ; ZVFHMIN-NEXT: lui a1, 307200 ; ZVFHMIN-NEXT: fmv.w.x fa5, a1 ; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v9, v0.t ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, mu ; ZVFHMIN-NEXT: vfsgnj.vv v9, v8, v9, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 ; ZVFHMIN-NEXT: vse16.v v8, (a0) ; ZVFHMIN-NEXT: ret %a = load <8 x half>, ptr %x %b = call <8 x half> @llvm.rint.v8f16(<8 x half> %a) store <8 x half> %b, ptr %x ret void } declare <8 x half> @llvm.rint.v8f16(<8 x half>) define void @rint_v4f32(ptr %x) { ; ZVFH-LABEL: rint_v4f32: ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; ZVFH-NEXT: vle32.v v8, (a0) ; ZVFH-NEXT: vfabs.v v9, v8 ; ZVFH-NEXT: lui a1, 307200 ; ZVFH-NEXT: fmv.w.x fa5, a1 ; ZVFH-NEXT: vmflt.vf v0, v9, fa5 ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t ; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t ; ZVFH-NEXT: vsetvli zero, zero, e32, m1, ta, mu ; ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; ZVFH-NEXT: vse32.v v8, (a0) ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: rint_v4f32: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: vsetivli zero, 4, e32, mf2, ta, ma ; ZVFHMIN-NEXT: vle32.v v8, (a0) ; ZVFHMIN-NEXT: vfabs.v v9, v8 ; ZVFHMIN-NEXT: lui a1, 307200 ; ZVFHMIN-NEXT: fmv.w.x fa5, a1 ; ZVFHMIN-NEXT: vmflt.vf v0, v9, fa5 ; ZVFHMIN-NEXT: vfcvt.x.f.v v9, v8, v0.t ; ZVFHMIN-NEXT: vfcvt.f.x.v v9, v9, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, mu ; ZVFHMIN-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; ZVFHMIN-NEXT: vse32.v v8, (a0) ; ZVFHMIN-NEXT: ret %a = load <4 x float>, ptr %x %b = call <4 x float> @llvm.rint.v4f32(<4 x float> %a) store <4 x float> %b, ptr %x ret void } declare <4 x float> @llvm.rint.v4f32(<4 x float>) define void @rint_v2f64(ptr %x) { ; CHECK-LABEL: rint_v2f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; CHECK-NEXT: vle64.v v8, (a0) ; CHECK-NEXT: lui a1, %hi(.LCPI133_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI133_0)(a1) ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: vse64.v v8, (a0) ; CHECK-NEXT: ret %a = load <2 x double>, ptr %x %b = call <2 x double> @llvm.rint.v2f64(<2 x double> %a) store <2 x double> %b, ptr %x ret void } declare <2 x double> @llvm.rint.v2f64(<2 x double>) define void @nearbyint_v8f16(ptr %x) { ; ZVFH-LABEL: nearbyint_v8f16: ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; ZVFH-NEXT: vle16.v v8, (a0) ; ZVFH-NEXT: lui a1, %hi(.LCPI134_0) ; ZVFH-NEXT: flh fa5, %lo(.LCPI134_0)(a1) ; ZVFH-NEXT: vfabs.v v9, v8 ; ZVFH-NEXT: vmflt.vf v0, v9, fa5 ; ZVFH-NEXT: frflags a1 ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t ; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t ; ZVFH-NEXT: fsflags a1 ; ZVFH-NEXT: vsetvli zero, zero, e16, m1, ta, mu ; ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; ZVFH-NEXT: vse16.v v8, (a0) ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: nearbyint_v8f16: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vle16.v v8, (a0) ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMIN-NEXT: vfabs.v v8, v9 ; ZVFHMIN-NEXT: lui a1, 307200 ; ZVFHMIN-NEXT: fmv.w.x fa5, a1 ; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 ; ZVFHMIN-NEXT: frflags a1 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v9, v0.t ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t ; ZVFHMIN-NEXT: fsflags a1 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, mu ; ZVFHMIN-NEXT: vfsgnj.vv v9, v8, v9, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 ; ZVFHMIN-NEXT: vse16.v v8, (a0) ; ZVFHMIN-NEXT: ret %a = load <8 x half>, ptr %x %b = call <8 x half> @llvm.nearbyint.v8f16(<8 x half> %a) store <8 x half> %b, ptr %x ret void } declare <8 x half> @llvm.nearbyint.v8f16(<8 x half>) define void @nearbyint_v4f32(ptr %x) { ; ZVFH-LABEL: nearbyint_v4f32: ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; ZVFH-NEXT: vle32.v v8, (a0) ; ZVFH-NEXT: vfabs.v v9, v8 ; ZVFH-NEXT: lui a1, 307200 ; ZVFH-NEXT: fmv.w.x fa5, a1 ; ZVFH-NEXT: vmflt.vf v0, v9, fa5 ; ZVFH-NEXT: frflags a1 ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t ; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t ; ZVFH-NEXT: fsflags a1 ; ZVFH-NEXT: vsetvli zero, zero, e32, m1, ta, mu ; ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; ZVFH-NEXT: vse32.v v8, (a0) ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: nearbyint_v4f32: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: vsetivli zero, 4, e32, mf2, ta, ma ; ZVFHMIN-NEXT: vle32.v v8, (a0) ; ZVFHMIN-NEXT: vfabs.v v9, v8 ; ZVFHMIN-NEXT: lui a1, 307200 ; ZVFHMIN-NEXT: fmv.w.x fa5, a1 ; ZVFHMIN-NEXT: vmflt.vf v0, v9, fa5 ; ZVFHMIN-NEXT: frflags a1 ; ZVFHMIN-NEXT: vfcvt.x.f.v v9, v8, v0.t ; ZVFHMIN-NEXT: vfcvt.f.x.v v9, v9, v0.t ; ZVFHMIN-NEXT: fsflags a1 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, mu ; ZVFHMIN-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; ZVFHMIN-NEXT: vse32.v v8, (a0) ; ZVFHMIN-NEXT: ret %a = load <4 x float>, ptr %x %b = call <4 x float> @llvm.nearbyint.v4f32(<4 x float> %a) store <4 x float> %b, ptr %x ret void } declare <4 x float> @llvm.nearbyint.v4f32(<4 x float>) define void @nearbyint_v2f64(ptr %x) { ; CHECK-LABEL: nearbyint_v2f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; CHECK-NEXT: vle64.v v8, (a0) ; CHECK-NEXT: lui a1, %hi(.LCPI136_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI136_0)(a1) ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: frflags a1 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t ; CHECK-NEXT: fsflags a1 ; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: vse64.v v8, (a0) ; CHECK-NEXT: ret %a = load <2 x double>, ptr %x %b = call <2 x double> @llvm.nearbyint.v2f64(<2 x double> %a) store <2 x double> %b, ptr %x ret void } declare <2 x double> @llvm.nearbyint.v2f64(<2 x double>) define void @fmuladd_v8f16(ptr %x, ptr %y, ptr %z) { ; ZVFH-LABEL: fmuladd_v8f16: ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; ZVFH-NEXT: vle16.v v8, (a0) ; ZVFH-NEXT: vle16.v v9, (a1) ; ZVFH-NEXT: vle16.v v10, (a2) ; ZVFH-NEXT: vfmacc.vv v10, v8, v9 ; ZVFH-NEXT: vse16.v v10, (a0) ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: fmuladd_v8f16: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vle16.v v8, (a1) ; ZVFHMIN-NEXT: vle16.v v9, (a0) ; ZVFHMIN-NEXT: vle16.v v10, (a2) ; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMIN-NEXT: vfmul.vv v8, v8, v11 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v10 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMIN-NEXT: vfadd.vv v8, v8, v9 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v8 ; ZVFHMIN-NEXT: vse16.v v9, (a0) ; ZVFHMIN-NEXT: ret %a = load <8 x half>, ptr %x %b = load <8 x half>, ptr %y %c = load <8 x half>, ptr %z %d = call <8 x half> @llvm.fmuladd.v8f16(<8 x half> %a, <8 x half> %b, <8 x half> %c) store <8 x half> %d, ptr %x ret void } declare <8 x half> @llvm.fmuladd.v8f16(<8 x half>, <8 x half>, <8 x half>) define void @fmuladd_v6f16(ptr %x, ptr %y, ptr %z) { ; ZVFH-LABEL: fmuladd_v6f16: ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma ; ZVFH-NEXT: vle16.v v8, (a0) ; ZVFH-NEXT: vle16.v v9, (a1) ; ZVFH-NEXT: vle16.v v10, (a2) ; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; ZVFH-NEXT: vfmacc.vv v10, v8, v9 ; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma ; ZVFH-NEXT: vse16.v v10, (a0) ; ZVFH-NEXT: ret ; ; ZVFHMINLMULMAX2-RV32-LABEL: fmuladd_v6f16: ; ZVFHMINLMULMAX2-RV32: # %bb.0: ; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma ; ZVFHMINLMULMAX2-RV32-NEXT: vle16.v v8, (a1) ; ZVFHMINLMULMAX2-RV32-NEXT: vle16.v v9, (a0) ; ZVFHMINLMULMAX2-RV32-NEXT: vle16.v v10, (a2) ; ZVFHMINLMULMAX2-RV32-NEXT: vfwcvt.f.f.v v11, v8 ; ZVFHMINLMULMAX2-RV32-NEXT: vfwcvt.f.f.v v8, v9 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMINLMULMAX2-RV32-NEXT: vfmul.vv v8, v8, v11 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMINLMULMAX2-RV32-NEXT: vfncvt.f.f.w v9, v8 ; ZVFHMINLMULMAX2-RV32-NEXT: vfwcvt.f.f.v v8, v9 ; ZVFHMINLMULMAX2-RV32-NEXT: vfwcvt.f.f.v v9, v10 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMINLMULMAX2-RV32-NEXT: vfadd.vv v8, v8, v9 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMINLMULMAX2-RV32-NEXT: vfncvt.f.f.w v9, v8 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma ; ZVFHMINLMULMAX2-RV32-NEXT: vslidedown.vi v8, v9, 2 ; ZVFHMINLMULMAX2-RV32-NEXT: addi a1, a0, 8 ; ZVFHMINLMULMAX2-RV32-NEXT: vse32.v v8, (a1) ; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma ; ZVFHMINLMULMAX2-RV32-NEXT: vse16.v v9, (a0) ; ZVFHMINLMULMAX2-RV32-NEXT: ret ; ; ZVFHMINLMULMAX2-RV64-LABEL: fmuladd_v6f16: ; ZVFHMINLMULMAX2-RV64: # %bb.0: ; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma ; ZVFHMINLMULMAX2-RV64-NEXT: vle16.v v8, (a1) ; ZVFHMINLMULMAX2-RV64-NEXT: vle16.v v9, (a0) ; ZVFHMINLMULMAX2-RV64-NEXT: vle16.v v10, (a2) ; ZVFHMINLMULMAX2-RV64-NEXT: vfwcvt.f.f.v v11, v8 ; ZVFHMINLMULMAX2-RV64-NEXT: vfwcvt.f.f.v v8, v9 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMINLMULMAX2-RV64-NEXT: vfmul.vv v8, v8, v11 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMINLMULMAX2-RV64-NEXT: vfncvt.f.f.w v9, v8 ; ZVFHMINLMULMAX2-RV64-NEXT: vfwcvt.f.f.v v8, v9 ; ZVFHMINLMULMAX2-RV64-NEXT: vfwcvt.f.f.v v9, v10 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMINLMULMAX2-RV64-NEXT: vfadd.vv v8, v8, v9 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMINLMULMAX2-RV64-NEXT: vfncvt.f.f.w v9, v8 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma ; ZVFHMINLMULMAX2-RV64-NEXT: vse64.v v9, (a0) ; ZVFHMINLMULMAX2-RV64-NEXT: vslidedown.vi v8, v9, 2 ; ZVFHMINLMULMAX2-RV64-NEXT: addi a0, a0, 8 ; ZVFHMINLMULMAX2-RV64-NEXT: vse32.v v8, (a0) ; ZVFHMINLMULMAX2-RV64-NEXT: ret ; ; ZVFHMINLMULMAX1-RV32-LABEL: fmuladd_v6f16: ; ZVFHMINLMULMAX1-RV32: # %bb.0: ; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma ; ZVFHMINLMULMAX1-RV32-NEXT: vle16.v v8, (a1) ; ZVFHMINLMULMAX1-RV32-NEXT: vle16.v v9, (a0) ; ZVFHMINLMULMAX1-RV32-NEXT: vle16.v v10, (a2) ; ZVFHMINLMULMAX1-RV32-NEXT: vfwcvt.f.f.v v11, v8 ; ZVFHMINLMULMAX1-RV32-NEXT: vfwcvt.f.f.v v8, v9 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMINLMULMAX1-RV32-NEXT: vfmul.vv v8, v8, v11 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMINLMULMAX1-RV32-NEXT: vfncvt.f.f.w v9, v8 ; ZVFHMINLMULMAX1-RV32-NEXT: vfwcvt.f.f.v v8, v9 ; ZVFHMINLMULMAX1-RV32-NEXT: vfwcvt.f.f.v v9, v10 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMINLMULMAX1-RV32-NEXT: vfadd.vv v8, v8, v9 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMINLMULMAX1-RV32-NEXT: vfncvt.f.f.w v9, v8 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma ; ZVFHMINLMULMAX1-RV32-NEXT: vslidedown.vi v8, v9, 2 ; ZVFHMINLMULMAX1-RV32-NEXT: addi a1, a0, 8 ; ZVFHMINLMULMAX1-RV32-NEXT: vse32.v v8, (a1) ; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma ; ZVFHMINLMULMAX1-RV32-NEXT: vse16.v v9, (a0) ; ZVFHMINLMULMAX1-RV32-NEXT: ret ; ; ZVFHMINLMULMAX1-RV64-LABEL: fmuladd_v6f16: ; ZVFHMINLMULMAX1-RV64: # %bb.0: ; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma ; ZVFHMINLMULMAX1-RV64-NEXT: vle16.v v8, (a1) ; ZVFHMINLMULMAX1-RV64-NEXT: vle16.v v9, (a0) ; ZVFHMINLMULMAX1-RV64-NEXT: vle16.v v10, (a2) ; ZVFHMINLMULMAX1-RV64-NEXT: vfwcvt.f.f.v v11, v8 ; ZVFHMINLMULMAX1-RV64-NEXT: vfwcvt.f.f.v v8, v9 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMINLMULMAX1-RV64-NEXT: vfmul.vv v8, v8, v11 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMINLMULMAX1-RV64-NEXT: vfncvt.f.f.w v9, v8 ; ZVFHMINLMULMAX1-RV64-NEXT: vfwcvt.f.f.v v8, v9 ; ZVFHMINLMULMAX1-RV64-NEXT: vfwcvt.f.f.v v9, v10 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMINLMULMAX1-RV64-NEXT: vfadd.vv v8, v8, v9 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMINLMULMAX1-RV64-NEXT: vfncvt.f.f.w v9, v8 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma ; ZVFHMINLMULMAX1-RV64-NEXT: vse64.v v9, (a0) ; ZVFHMINLMULMAX1-RV64-NEXT: vslidedown.vi v8, v9, 2 ; ZVFHMINLMULMAX1-RV64-NEXT: addi a0, a0, 8 ; ZVFHMINLMULMAX1-RV64-NEXT: vse32.v v8, (a0) ; ZVFHMINLMULMAX1-RV64-NEXT: ret %a = load <6 x half>, ptr %x %b = load <6 x half>, ptr %y %c = load <6 x half>, ptr %z %d = call <6 x half> @llvm.fmuladd.v6f16(<6 x half> %a, <6 x half> %b, <6 x half> %c) store <6 x half> %d, ptr %x ret void } declare <6 x half> @llvm.fmuladd.v6f16(<6 x half>, <6 x half>, <6 x half>) define void @fmuladd_v4f32(ptr %x, ptr %y, ptr %z) { ; ZVFH-LABEL: fmuladd_v4f32: ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; ZVFH-NEXT: vle32.v v8, (a0) ; ZVFH-NEXT: vle32.v v9, (a1) ; ZVFH-NEXT: vle32.v v10, (a2) ; ZVFH-NEXT: vfmacc.vv v10, v8, v9 ; ZVFH-NEXT: vse32.v v10, (a0) ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: fmuladd_v4f32: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: vsetivli zero, 4, e32, mf2, ta, ma ; ZVFHMIN-NEXT: vle32.v v8, (a0) ; ZVFHMIN-NEXT: vle32.v v9, (a1) ; ZVFHMIN-NEXT: vle32.v v10, (a2) ; ZVFHMIN-NEXT: vfmacc.vv v10, v8, v9 ; ZVFHMIN-NEXT: vse32.v v10, (a0) ; ZVFHMIN-NEXT: ret %a = load <4 x float>, ptr %x %b = load <4 x float>, ptr %y %c = load <4 x float>, ptr %z %d = call <4 x float> @llvm.fmuladd.v4f32(<4 x float> %a, <4 x float> %b, <4 x float> %c) store <4 x float> %d, ptr %x ret void } declare <4 x float> @llvm.fmuladd.v4f32(<4 x float>, <4 x float>, <4 x float>) define void @fmuladd_v2f64(ptr %x, ptr %y, ptr %z) { ; CHECK-LABEL: fmuladd_v2f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; CHECK-NEXT: vle64.v v8, (a0) ; CHECK-NEXT: vle64.v v9, (a1) ; CHECK-NEXT: vle64.v v10, (a2) ; CHECK-NEXT: vfmacc.vv v10, v8, v9 ; CHECK-NEXT: vse64.v v10, (a0) ; CHECK-NEXT: ret %a = load <2 x double>, ptr %x %b = load <2 x double>, ptr %y %c = load <2 x double>, ptr %z %d = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> %a, <2 x double> %b, <2 x double> %c) store <2 x double> %d, ptr %x ret void } declare <2 x double> @llvm.fmuladd.v2f64(<2 x double>, <2 x double>, <2 x double>) define void @fmsub_fmuladd_v8f16(ptr %x, ptr %y, ptr %z) { ; ZVFH-LABEL: fmsub_fmuladd_v8f16: ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; ZVFH-NEXT: vle16.v v8, (a0) ; ZVFH-NEXT: vle16.v v9, (a1) ; ZVFH-NEXT: vle16.v v10, (a2) ; ZVFH-NEXT: vfmsac.vv v10, v8, v9 ; ZVFH-NEXT: vse16.v v10, (a0) ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: fmsub_fmuladd_v8f16: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vle16.v v8, (a1) ; ZVFHMIN-NEXT: vle16.v v9, (a0) ; ZVFHMIN-NEXT: vle16.v v10, (a2) ; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMIN-NEXT: vfmul.vv v8, v8, v11 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v10 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMIN-NEXT: vfsub.vv v8, v8, v9 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v8 ; ZVFHMIN-NEXT: vse16.v v9, (a0) ; ZVFHMIN-NEXT: ret %a = load <8 x half>, ptr %x %b = load <8 x half>, ptr %y %c = load <8 x half>, ptr %z %neg = fneg <8 x half> %c %d = call <8 x half> @llvm.fmuladd.v8f16(<8 x half> %a, <8 x half> %b, <8 x half> %neg) store <8 x half> %d, ptr %x ret void } define void @fmsub_fmuladd_v6f16(ptr %x, ptr %y, ptr %z) { ; ZVFH-LABEL: fmsub_fmuladd_v6f16: ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma ; ZVFH-NEXT: vle16.v v8, (a0) ; ZVFH-NEXT: vle16.v v9, (a1) ; ZVFH-NEXT: vle16.v v10, (a2) ; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; ZVFH-NEXT: vfmsac.vv v10, v8, v9 ; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma ; ZVFH-NEXT: vse16.v v10, (a0) ; ZVFH-NEXT: ret ; ; ZVFHMINLMULMAX2-RV32-LABEL: fmsub_fmuladd_v6f16: ; ZVFHMINLMULMAX2-RV32: # %bb.0: ; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma ; ZVFHMINLMULMAX2-RV32-NEXT: vle16.v v8, (a1) ; ZVFHMINLMULMAX2-RV32-NEXT: vle16.v v9, (a0) ; ZVFHMINLMULMAX2-RV32-NEXT: vle16.v v10, (a2) ; ZVFHMINLMULMAX2-RV32-NEXT: vfwcvt.f.f.v v11, v8 ; ZVFHMINLMULMAX2-RV32-NEXT: vfwcvt.f.f.v v8, v9 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMINLMULMAX2-RV32-NEXT: vfmul.vv v8, v8, v11 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMINLMULMAX2-RV32-NEXT: vfncvt.f.f.w v9, v8 ; ZVFHMINLMULMAX2-RV32-NEXT: vfwcvt.f.f.v v8, v9 ; ZVFHMINLMULMAX2-RV32-NEXT: vfwcvt.f.f.v v9, v10 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMINLMULMAX2-RV32-NEXT: vfsub.vv v8, v8, v9 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMINLMULMAX2-RV32-NEXT: vfncvt.f.f.w v9, v8 ; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma ; ZVFHMINLMULMAX2-RV32-NEXT: vslidedown.vi v8, v9, 2 ; ZVFHMINLMULMAX2-RV32-NEXT: addi a1, a0, 8 ; ZVFHMINLMULMAX2-RV32-NEXT: vse32.v v8, (a1) ; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma ; ZVFHMINLMULMAX2-RV32-NEXT: vse16.v v9, (a0) ; ZVFHMINLMULMAX2-RV32-NEXT: ret ; ; ZVFHMINLMULMAX2-RV64-LABEL: fmsub_fmuladd_v6f16: ; ZVFHMINLMULMAX2-RV64: # %bb.0: ; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma ; ZVFHMINLMULMAX2-RV64-NEXT: vle16.v v8, (a1) ; ZVFHMINLMULMAX2-RV64-NEXT: vle16.v v9, (a0) ; ZVFHMINLMULMAX2-RV64-NEXT: vle16.v v10, (a2) ; ZVFHMINLMULMAX2-RV64-NEXT: vfwcvt.f.f.v v11, v8 ; ZVFHMINLMULMAX2-RV64-NEXT: vfwcvt.f.f.v v8, v9 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMINLMULMAX2-RV64-NEXT: vfmul.vv v8, v8, v11 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMINLMULMAX2-RV64-NEXT: vfncvt.f.f.w v9, v8 ; ZVFHMINLMULMAX2-RV64-NEXT: vfwcvt.f.f.v v8, v9 ; ZVFHMINLMULMAX2-RV64-NEXT: vfwcvt.f.f.v v9, v10 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMINLMULMAX2-RV64-NEXT: vfsub.vv v8, v8, v9 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMINLMULMAX2-RV64-NEXT: vfncvt.f.f.w v9, v8 ; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma ; ZVFHMINLMULMAX2-RV64-NEXT: vse64.v v9, (a0) ; ZVFHMINLMULMAX2-RV64-NEXT: vslidedown.vi v8, v9, 2 ; ZVFHMINLMULMAX2-RV64-NEXT: addi a0, a0, 8 ; ZVFHMINLMULMAX2-RV64-NEXT: vse32.v v8, (a0) ; ZVFHMINLMULMAX2-RV64-NEXT: ret ; ; ZVFHMINLMULMAX1-RV32-LABEL: fmsub_fmuladd_v6f16: ; ZVFHMINLMULMAX1-RV32: # %bb.0: ; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma ; ZVFHMINLMULMAX1-RV32-NEXT: vle16.v v8, (a1) ; ZVFHMINLMULMAX1-RV32-NEXT: vle16.v v9, (a0) ; ZVFHMINLMULMAX1-RV32-NEXT: vle16.v v10, (a2) ; ZVFHMINLMULMAX1-RV32-NEXT: vfwcvt.f.f.v v11, v8 ; ZVFHMINLMULMAX1-RV32-NEXT: vfwcvt.f.f.v v8, v9 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMINLMULMAX1-RV32-NEXT: vfmul.vv v8, v8, v11 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMINLMULMAX1-RV32-NEXT: vfncvt.f.f.w v9, v8 ; ZVFHMINLMULMAX1-RV32-NEXT: vfwcvt.f.f.v v8, v9 ; ZVFHMINLMULMAX1-RV32-NEXT: vfwcvt.f.f.v v9, v10 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMINLMULMAX1-RV32-NEXT: vfsub.vv v8, v8, v9 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMINLMULMAX1-RV32-NEXT: vfncvt.f.f.w v9, v8 ; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma ; ZVFHMINLMULMAX1-RV32-NEXT: vslidedown.vi v8, v9, 2 ; ZVFHMINLMULMAX1-RV32-NEXT: addi a1, a0, 8 ; ZVFHMINLMULMAX1-RV32-NEXT: vse32.v v8, (a1) ; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma ; ZVFHMINLMULMAX1-RV32-NEXT: vse16.v v9, (a0) ; ZVFHMINLMULMAX1-RV32-NEXT: ret ; ; ZVFHMINLMULMAX1-RV64-LABEL: fmsub_fmuladd_v6f16: ; ZVFHMINLMULMAX1-RV64: # %bb.0: ; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma ; ZVFHMINLMULMAX1-RV64-NEXT: vle16.v v8, (a1) ; ZVFHMINLMULMAX1-RV64-NEXT: vle16.v v9, (a0) ; ZVFHMINLMULMAX1-RV64-NEXT: vle16.v v10, (a2) ; ZVFHMINLMULMAX1-RV64-NEXT: vfwcvt.f.f.v v11, v8 ; ZVFHMINLMULMAX1-RV64-NEXT: vfwcvt.f.f.v v8, v9 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMINLMULMAX1-RV64-NEXT: vfmul.vv v8, v8, v11 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMINLMULMAX1-RV64-NEXT: vfncvt.f.f.w v9, v8 ; ZVFHMINLMULMAX1-RV64-NEXT: vfwcvt.f.f.v v8, v9 ; ZVFHMINLMULMAX1-RV64-NEXT: vfwcvt.f.f.v v9, v10 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMINLMULMAX1-RV64-NEXT: vfsub.vv v8, v8, v9 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMINLMULMAX1-RV64-NEXT: vfncvt.f.f.w v9, v8 ; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma ; ZVFHMINLMULMAX1-RV64-NEXT: vse64.v v9, (a0) ; ZVFHMINLMULMAX1-RV64-NEXT: vslidedown.vi v8, v9, 2 ; ZVFHMINLMULMAX1-RV64-NEXT: addi a0, a0, 8 ; ZVFHMINLMULMAX1-RV64-NEXT: vse32.v v8, (a0) ; ZVFHMINLMULMAX1-RV64-NEXT: ret %a = load <6 x half>, ptr %x %b = load <6 x half>, ptr %y %c = load <6 x half>, ptr %z %neg = fneg <6 x half> %c %d = call <6 x half> @llvm.fmuladd.v6f16(<6 x half> %a, <6 x half> %b, <6 x half> %neg) store <6 x half> %d, ptr %x ret void } define void @fnmsub_fmuladd_v4f32(ptr %x, ptr %y, ptr %z) { ; ZVFH-LABEL: fnmsub_fmuladd_v4f32: ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; ZVFH-NEXT: vle32.v v8, (a0) ; ZVFH-NEXT: vle32.v v9, (a1) ; ZVFH-NEXT: vle32.v v10, (a2) ; ZVFH-NEXT: vfnmsac.vv v10, v8, v9 ; ZVFH-NEXT: vse32.v v10, (a0) ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: fnmsub_fmuladd_v4f32: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: vsetivli zero, 4, e32, mf2, ta, ma ; ZVFHMIN-NEXT: vle32.v v8, (a0) ; ZVFHMIN-NEXT: vle32.v v9, (a1) ; ZVFHMIN-NEXT: vle32.v v10, (a2) ; ZVFHMIN-NEXT: vfnmsac.vv v10, v8, v9 ; ZVFHMIN-NEXT: vse32.v v10, (a0) ; ZVFHMIN-NEXT: ret %a = load <4 x float>, ptr %x %b = load <4 x float>, ptr %y %c = load <4 x float>, ptr %z %neg = fneg <4 x float> %a %d = call <4 x float> @llvm.fmuladd.v4f32(<4 x float> %neg, <4 x float> %b, <4 x float> %c) store <4 x float> %d, ptr %x ret void } define void @fnmadd_fmuladd_v2f64(ptr %x, ptr %y, ptr %z) { ; CHECK-LABEL: fnmadd_fmuladd_v2f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; CHECK-NEXT: vle64.v v8, (a0) ; CHECK-NEXT: vle64.v v9, (a1) ; CHECK-NEXT: vle64.v v10, (a2) ; CHECK-NEXT: vfnmacc.vv v10, v8, v9 ; CHECK-NEXT: vse64.v v10, (a0) ; CHECK-NEXT: ret %a = load <2 x double>, ptr %x %b = load <2 x double>, ptr %y %c = load <2 x double>, ptr %z %neg = fneg <2 x double> %b %neg2 = fneg <2 x double> %c %d = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> %a, <2 x double> %neg, <2 x double> %neg2) store <2 x double> %d, ptr %x ret void }