; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s --mattr=+sve -o - | FileCheck %s target triple = "aarch64" ; a * b + c define @mull_add( %a, %b, %c) { ; CHECK-LABEL: mull_add: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: fcmla z4.d, p0/m, z0.d, z2.d, #0 ; CHECK-NEXT: fcmla z5.d, p0/m, z1.d, z3.d, #0 ; CHECK-NEXT: fcmla z4.d, p0/m, z0.d, z2.d, #90 ; CHECK-NEXT: fcmla z5.d, p0/m, z1.d, z3.d, #90 ; CHECK-NEXT: mov z0.d, z4.d ; CHECK-NEXT: mov z1.d, z5.d ; CHECK-NEXT: ret entry: %strided.vec = tail call { , } @llvm.experimental.vector.deinterleave2.nxv4f64( %a) %0 = extractvalue { , } %strided.vec, 0 %1 = extractvalue { , } %strided.vec, 1 %strided.vec29 = tail call { , } @llvm.experimental.vector.deinterleave2.nxv4f64( %b) %2 = extractvalue { , } %strided.vec29, 0 %3 = extractvalue { , } %strided.vec29, 1 %4 = fmul fast %3, %0 %5 = fmul fast %2, %1 %6 = fadd fast %4, %5 %7 = fmul fast %2, %0 %strided.vec31 = tail call { , } @llvm.experimental.vector.deinterleave2.nxv4f64( %c) %8 = extractvalue { , } %strided.vec31, 0 %9 = extractvalue { , } %strided.vec31, 1 %10 = fadd fast %8, %7 %11 = fmul fast %3, %1 %12 = fsub fast %10, %11 %13 = fadd fast %6, %9 %interleaved.vec = tail call @llvm.experimental.vector.interleave2.nxv4f64( %12, %13) ret %interleaved.vec } ; a * b + c * d define @mul_add_mull( %a, %b, %c, %d) { ; CHECK-LABEL: mul_add_mull: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: mov z24.d, #0 // =0x0 ; CHECK-NEXT: mov z25.d, z24.d ; CHECK-NEXT: fcmla z25.d, p0/m, z6.d, z4.d, #0 ; CHECK-NEXT: fcmla z24.d, p0/m, z7.d, z5.d, #0 ; CHECK-NEXT: fcmla z25.d, p0/m, z0.d, z2.d, #0 ; CHECK-NEXT: fcmla z24.d, p0/m, z1.d, z3.d, #0 ; CHECK-NEXT: fcmla z25.d, p0/m, z6.d, z4.d, #90 ; CHECK-NEXT: fcmla z24.d, p0/m, z7.d, z5.d, #90 ; CHECK-NEXT: fcmla z25.d, p0/m, z0.d, z2.d, #90 ; CHECK-NEXT: fcmla z24.d, p0/m, z1.d, z3.d, #90 ; CHECK-NEXT: mov z0.d, z25.d ; CHECK-NEXT: mov z1.d, z24.d ; CHECK-NEXT: ret entry: %strided.vec = tail call { , } @llvm.experimental.vector.deinterleave2.nxv4f64( %a) %0 = extractvalue { , } %strided.vec, 0 %1 = extractvalue { , } %strided.vec, 1 %strided.vec52 = tail call { , } @llvm.experimental.vector.deinterleave2.nxv4f64( %b) %2 = extractvalue { , } %strided.vec52, 0 %3 = extractvalue { , } %strided.vec52, 1 %4 = fmul fast %3, %0 %5 = fmul fast %2, %1 %6 = fmul fast %2, %0 %7 = fmul fast %3, %1 %strided.vec54 = tail call { , } @llvm.experimental.vector.deinterleave2.nxv4f64( %c) %8 = extractvalue { , } %strided.vec54, 0 %9 = extractvalue { , } %strided.vec54, 1 %strided.vec56 = tail call { , } @llvm.experimental.vector.deinterleave2.nxv4f64( %d) %10 = extractvalue { , } %strided.vec56, 0 %11 = extractvalue { , } %strided.vec56, 1 %12 = fmul fast %11, %8 %13 = fmul fast %10, %9 %14 = fmul fast %10, %8 %15 = fmul fast %11, %9 %16 = fadd fast %15, %7 %17 = fadd fast %14, %6 %18 = fsub fast %17, %16 %19 = fadd fast %4, %5 %20 = fadd fast %19, %13 %21 = fadd fast %20, %12 %interleaved.vec = tail call @llvm.experimental.vector.interleave2.nxv4f64( %18, %21) ret %interleaved.vec } ; a * b - c * d define @mul_sub_mull( %a, %b, %c, %d) { ; CHECK-LABEL: mul_sub_mull: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: mov z24.d, #0 // =0x0 ; CHECK-NEXT: mov z25.d, z24.d ; CHECK-NEXT: fcmla z25.d, p0/m, z6.d, z4.d, #270 ; CHECK-NEXT: fcmla z24.d, p0/m, z7.d, z5.d, #270 ; CHECK-NEXT: fcmla z25.d, p0/m, z0.d, z2.d, #0 ; CHECK-NEXT: fcmla z24.d, p0/m, z1.d, z3.d, #0 ; CHECK-NEXT: fcmla z25.d, p0/m, z6.d, z4.d, #180 ; CHECK-NEXT: fcmla z24.d, p0/m, z7.d, z5.d, #180 ; CHECK-NEXT: fcmla z25.d, p0/m, z0.d, z2.d, #90 ; CHECK-NEXT: fcmla z24.d, p0/m, z1.d, z3.d, #90 ; CHECK-NEXT: mov z0.d, z25.d ; CHECK-NEXT: mov z1.d, z24.d ; CHECK-NEXT: ret entry: %strided.vec = tail call { , } @llvm.experimental.vector.deinterleave2.nxv4f64( %a) %0 = extractvalue { , } %strided.vec, 0 %1 = extractvalue { , } %strided.vec, 1 %strided.vec54 = tail call { , } @llvm.experimental.vector.deinterleave2.nxv4f64( %b) %2 = extractvalue { , } %strided.vec54, 0 %3 = extractvalue { , } %strided.vec54, 1 %4 = fmul fast %3, %0 %5 = fmul fast %2, %1 %6 = fmul fast %2, %0 %7 = fmul fast %3, %1 %strided.vec56 = tail call { , } @llvm.experimental.vector.deinterleave2.nxv4f64( %c) %8 = extractvalue { , } %strided.vec56, 0 %9 = extractvalue { , } %strided.vec56, 1 %strided.vec58 = tail call { , } @llvm.experimental.vector.deinterleave2.nxv4f64( %d) %10 = extractvalue { , } %strided.vec58, 0 %11 = extractvalue { , } %strided.vec58, 1 %12 = fmul fast %11, %9 %13 = fmul fast %10, %8 %14 = fadd fast %13, %7 %15 = fadd fast %12, %6 %16 = fsub fast %15, %14 %17 = fmul fast %10, %9 %18 = fmul fast %11, %8 %19 = fadd fast %18, %17 %20 = fadd fast %4, %5 %21 = fsub fast %20, %19 %interleaved.vec = tail call @llvm.experimental.vector.interleave2.nxv4f64( %16, %21) ret %interleaved.vec } ; a * b + conj(c) * d define @mul_conj_mull( %a, %b, %c, %d) { ; CHECK-LABEL: mul_conj_mull: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: mov z24.d, #0 // =0x0 ; CHECK-NEXT: mov z25.d, z24.d ; CHECK-NEXT: fcmla z25.d, p0/m, z0.d, z2.d, #0 ; CHECK-NEXT: fcmla z24.d, p0/m, z1.d, z3.d, #0 ; CHECK-NEXT: fcmla z25.d, p0/m, z0.d, z2.d, #90 ; CHECK-NEXT: fcmla z24.d, p0/m, z1.d, z3.d, #90 ; CHECK-NEXT: fcmla z25.d, p0/m, z4.d, z6.d, #0 ; CHECK-NEXT: fcmla z24.d, p0/m, z5.d, z7.d, #0 ; CHECK-NEXT: fcmla z25.d, p0/m, z4.d, z6.d, #270 ; CHECK-NEXT: fcmla z24.d, p0/m, z5.d, z7.d, #270 ; CHECK-NEXT: mov z0.d, z25.d ; CHECK-NEXT: mov z1.d, z24.d ; CHECK-NEXT: ret entry: %strided.vec = tail call { , } @llvm.experimental.vector.deinterleave2.nxv4f64( %a) %0 = extractvalue { , } %strided.vec, 0 %1 = extractvalue { , } %strided.vec, 1 %strided.vec60 = tail call { , } @llvm.experimental.vector.deinterleave2.nxv4f64( %b) %2 = extractvalue { , } %strided.vec60, 0 %3 = extractvalue { , } %strided.vec60, 1 %4 = fmul fast %3, %0 %5 = fmul fast %2, %1 %6 = fmul fast %2, %0 %strided.vec62 = tail call { , } @llvm.experimental.vector.deinterleave2.nxv4f64( %c) %7 = extractvalue { , } %strided.vec62, 0 %8 = extractvalue { , } %strided.vec62, 1 %strided.vec64 = tail call { , } @llvm.experimental.vector.deinterleave2.nxv4f64( %d) %9 = extractvalue { , } %strided.vec64, 0 %10 = extractvalue { , } %strided.vec64, 1 %11 = fmul fast %10, %7 %12 = fmul fast %9, %7 %13 = fmul fast %10, %8 %14 = fmul fast %3, %1 %15 = fsub fast %6, %14 %16 = fadd fast %15, %12 %17 = fadd fast %16, %13 %18 = fadd fast %4, %5 %19 = fmul fast %9, %8 %20 = fsub fast %18, %19 %21 = fadd fast %20, %11 %interleaved.vec = tail call @llvm.experimental.vector.interleave2.nxv4f64( %17, %21) ret %interleaved.vec } ; a + b + 1i * c * d define @mul_add_rot_mull( %a, %b, %c, %d) { ; CHECK-LABEL: mul_add_rot_mull: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: uzp1 z24.d, z2.d, z3.d ; CHECK-NEXT: uzp2 z25.d, z0.d, z1.d ; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: uzp2 z2.d, z2.d, z3.d ; CHECK-NEXT: uzp1 z0.d, z0.d, z1.d ; CHECK-NEXT: uzp1 z26.d, z6.d, z7.d ; CHECK-NEXT: fmul z1.d, z24.d, z25.d ; CHECK-NEXT: fmul z3.d, z2.d, z25.d ; CHECK-NEXT: uzp2 z25.d, z4.d, z5.d ; CHECK-NEXT: uzp1 z4.d, z4.d, z5.d ; CHECK-NEXT: uzp2 z5.d, z6.d, z7.d ; CHECK-NEXT: fmla z1.d, p0/m, z2.d, z0.d ; CHECK-NEXT: fmla z3.d, p0/m, z26.d, z25.d ; CHECK-NEXT: movprfx z2, z3 ; CHECK-NEXT: fmla z2.d, p0/m, z5.d, z4.d ; CHECK-NEXT: fnmls z2.d, p0/m, z24.d, z0.d ; CHECK-NEXT: fmla z1.d, p0/m, z26.d, z4.d ; CHECK-NEXT: fmls z1.d, p0/m, z5.d, z25.d ; CHECK-NEXT: zip1 z0.d, z2.d, z1.d ; CHECK-NEXT: zip2 z1.d, z2.d, z1.d ; CHECK-NEXT: ret entry: %strided.vec = tail call { , } @llvm.experimental.vector.deinterleave2.nxv4f64( %a) %0 = extractvalue { , } %strided.vec, 0 %1 = extractvalue { , } %strided.vec, 1 %strided.vec80 = tail call { , } @llvm.experimental.vector.deinterleave2.nxv4f64( %b) %2 = extractvalue { , } %strided.vec80, 0 %3 = extractvalue { , } %strided.vec80, 1 %4 = fmul fast %3, %0 %5 = fmul fast %2, %1 %6 = fmul fast %2, %0 %7 = fmul fast %3, %1 %strided.vec82 = tail call { , } @llvm.experimental.vector.deinterleave2.nxv4f64( %c) %8 = extractvalue { , } %strided.vec82, 0 %9 = extractvalue { , } %strided.vec82, 1 %strided.vec84 = tail call { , } @llvm.experimental.vector.deinterleave2.nxv4f64( %d) %10 = extractvalue { , } %strided.vec84, 0 %11 = extractvalue { , } %strided.vec84, 1 %12 = fmul fast %10, %8 %13 = fmul fast %10, %9 %14 = fmul fast %11, %8 %15 = fadd fast %13, %7 %16 = fadd fast %15, %14 %17 = fsub fast %6, %16 %18 = fadd fast %4, %5 %19 = fadd fast %18, %12 %20 = fmul fast %11, %9 %21 = fsub fast %19, %20 %interleaved.vec = tail call @llvm.experimental.vector.interleave2.nxv4f64( %17, %21) ret %interleaved.vec } declare { , } @llvm.experimental.vector.deinterleave2.nxv4f64() declare @llvm.experimental.vector.interleave2.nxv4f64(, )