; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s --mattr=+complxnum,+neon -o - | FileCheck %s target triple = "aarch64" ; Expected to transform ; *p = (a * b); ; return (a * b) * a; define <4 x float> @mul_triangle(<4 x float> %a, <4 x float> %b, ptr %p) { ; CHECK-LABEL: mul_triangle: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: movi v3.2d, #0000000000000000 ; CHECK-NEXT: movi v2.2d, #0000000000000000 ; CHECK-NEXT: fcmla v3.4s, v0.4s, v1.4s, #0 ; CHECK-NEXT: fcmla v3.4s, v0.4s, v1.4s, #90 ; CHECK-NEXT: fcmla v2.4s, v3.4s, v0.4s, #0 ; CHECK-NEXT: str q3, [x0] ; CHECK-NEXT: fcmla v2.4s, v3.4s, v0.4s, #90 ; CHECK-NEXT: mov v0.16b, v2.16b ; CHECK-NEXT: ret entry: %strided.vec = shufflevector <4 x float> %a, <4 x float> poison, <2 x i32> %strided.vec35 = shufflevector <4 x float> %a, <4 x float> poison, <2 x i32> %strided.vec37 = shufflevector <4 x float> %b, <4 x float> poison, <2 x i32> %strided.vec38 = shufflevector <4 x float> %b, <4 x float> poison, <2 x i32> %0 = fmul fast <2 x float> %strided.vec37, %strided.vec %1 = fmul fast <2 x float> %strided.vec38, %strided.vec35 %2 = fsub fast <2 x float> %0, %1 %3 = fmul fast <2 x float> %2, %strided.vec35 %4 = fmul fast <2 x float> %strided.vec38, %strided.vec %5 = fmul fast <2 x float> %strided.vec35, %strided.vec37 %6 = fadd fast <2 x float> %4, %5 %otheruse = shufflevector <2 x float> %2, <2 x float> %6, <4 x i32> store <4 x float> %otheruse, ptr %p %7 = fmul fast <2 x float> %6, %strided.vec %8 = fadd fast <2 x float> %3, %7 %9 = fmul fast <2 x float> %2, %strided.vec %10 = fmul fast <2 x float> %6, %strided.vec35 %11 = fsub fast <2 x float> %9, %10 %interleaved.vec = shufflevector <2 x float> %11, <2 x float> %8, <4 x i32> ret <4 x float> %interleaved.vec } ; Expected to not transform. Shows that external use prevents deinterleaving. ; *p = (a * b).real(); ; return (a * b) * a; define <4 x float> @mul_triangle_external_use(<4 x float> %a, <4 x float> %b, ptr %p) { ; CHECK-LABEL: mul_triangle_external_use: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: ext v2.16b, v0.16b, v0.16b, #8 ; CHECK-NEXT: ext v3.16b, v1.16b, v1.16b, #8 ; CHECK-NEXT: zip2 v4.2s, v0.2s, v2.2s ; CHECK-NEXT: zip1 v5.2s, v1.2s, v3.2s ; CHECK-NEXT: zip1 v0.2s, v0.2s, v2.2s ; CHECK-NEXT: zip2 v1.2s, v1.2s, v3.2s ; CHECK-NEXT: fmul v2.2s, v4.2s, v5.2s ; CHECK-NEXT: fmul v3.2s, v1.2s, v4.2s ; CHECK-NEXT: fmla v2.2s, v0.2s, v1.2s ; CHECK-NEXT: fneg v1.2s, v3.2s ; CHECK-NEXT: fmul v3.2s, v2.2s, v4.2s ; CHECK-NEXT: str d2, [x0] ; CHECK-NEXT: fmla v1.2s, v0.2s, v5.2s ; CHECK-NEXT: fmul v5.2s, v2.2s, v0.2s ; CHECK-NEXT: fneg v3.2s, v3.2s ; CHECK-NEXT: fmla v5.2s, v4.2s, v1.2s ; CHECK-NEXT: fmla v3.2s, v0.2s, v1.2s ; CHECK-NEXT: zip1 v0.4s, v3.4s, v5.4s ; CHECK-NEXT: ret entry: %strided.vec = shufflevector <4 x float> %a, <4 x float> poison, <2 x i32> %strided.vec35 = shufflevector <4 x float> %a, <4 x float> poison, <2 x i32> %strided.vec37 = shufflevector <4 x float> %b, <4 x float> poison, <2 x i32> %strided.vec38 = shufflevector <4 x float> %b, <4 x float> poison, <2 x i32> %0 = fmul fast <2 x float> %strided.vec37, %strided.vec %1 = fmul fast <2 x float> %strided.vec38, %strided.vec35 %2 = fsub fast <2 x float> %0, %1 %3 = fmul fast <2 x float> %2, %strided.vec35 %4 = fmul fast <2 x float> %strided.vec38, %strided.vec %5 = fmul fast <2 x float> %strided.vec35, %strided.vec37 %6 = fadd fast <2 x float> %4, %5 store <2 x float> %6, ptr %p %7 = fmul fast <2 x float> %6, %strided.vec %8 = fadd fast <2 x float> %3, %7 %9 = fmul fast <2 x float> %2, %strided.vec %10 = fmul fast <2 x float> %6, %strided.vec35 %11 = fsub fast <2 x float> %9, %10 %interleaved.vec = shufflevector <2 x float> %11, <2 x float> %8, <4 x i32> ret <4 x float> %interleaved.vec } ; Expected to transform partially (only d * c). Shows that external use of shufflevector does not prevent deinterleaving. ; *p1 = (a * b).real(); ; *p2 = (a * b) * c; ; return d * c; define <4 x float> @multiple_muls_shuffle_external(<4 x float> %a, <4 x float> %b, <4 x float> %c, <4 x float> %d, ptr %p1, ptr %p2) { ; CHECK-LABEL: multiple_muls_shuffle_external: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: ext v5.16b, v0.16b, v0.16b, #8 ; CHECK-NEXT: ext v6.16b, v1.16b, v1.16b, #8 ; CHECK-NEXT: ext v4.16b, v2.16b, v2.16b, #8 ; CHECK-NEXT: zip2 v7.2s, v0.2s, v5.2s ; CHECK-NEXT: zip1 v16.2s, v1.2s, v6.2s ; CHECK-NEXT: zip2 v1.2s, v1.2s, v6.2s ; CHECK-NEXT: zip1 v0.2s, v0.2s, v5.2s ; CHECK-NEXT: fmul v5.2s, v16.2s, v7.2s ; CHECK-NEXT: fmul v6.2s, v1.2s, v7.2s ; CHECK-NEXT: fmla v5.2s, v0.2s, v1.2s ; CHECK-NEXT: fneg v1.2s, v6.2s ; CHECK-NEXT: zip1 v6.2s, v2.2s, v4.2s ; CHECK-NEXT: zip2 v4.2s, v2.2s, v4.2s ; CHECK-NEXT: fmla v1.2s, v0.2s, v16.2s ; CHECK-NEXT: fmul v17.2s, v6.2s, v5.2s ; CHECK-NEXT: movi v0.2d, #0000000000000000 ; CHECK-NEXT: fmul v5.2s, v4.2s, v5.2s ; CHECK-NEXT: fmla v17.2s, v1.2s, v4.2s ; CHECK-NEXT: fcmla v0.4s, v2.4s, v3.4s, #0 ; CHECK-NEXT: str d1, [x0] ; CHECK-NEXT: fneg v16.2s, v5.2s ; CHECK-NEXT: fcmla v0.4s, v2.4s, v3.4s, #90 ; CHECK-NEXT: fmla v16.2s, v1.2s, v6.2s ; CHECK-NEXT: st2 { v16.2s, v17.2s }, [x1] ; CHECK-NEXT: ret entry: %strided.vec = shufflevector <4 x float> %a, <4 x float> poison, <2 x i32> %strided.vec88 = shufflevector <4 x float> %a, <4 x float> poison, <2 x i32> %strided.vec90 = shufflevector <4 x float> %b, <4 x float> poison, <2 x i32> %strided.vec91 = shufflevector <4 x float> %b, <4 x float> poison, <2 x i32> %0 = fmul fast <2 x float> %strided.vec91, %strided.vec %1 = fmul fast <2 x float> %strided.vec90, %strided.vec88 %2 = fadd fast <2 x float> %0, %1 %3 = fmul fast <2 x float> %strided.vec90, %strided.vec %4 = fmul fast <2 x float> %strided.vec91, %strided.vec88 %5 = fsub fast <2 x float> %3, %4 store <2 x float> %5, ptr %p1 %strided.vec93 = shufflevector <4 x float> %c, <4 x float> poison, <2 x i32> %strided.vec94 = shufflevector <4 x float> %c, <4 x float> poison, <2 x i32> %6 = fmul fast <2 x float> %strided.vec94, %5 %7 = fmul fast <2 x float> %strided.vec93, %2 %8 = fadd fast <2 x float> %6, %7 %9 = fmul fast <2 x float> %strided.vec93, %5 %10 = fmul fast <2 x float> %strided.vec94, %2 %11 = fsub fast <2 x float> %9, %10 %interleaved.vec = shufflevector <2 x float> %11, <2 x float> %8, <4 x i32> store <4 x float> %interleaved.vec, ptr %p2 %strided.vec96 = shufflevector <4 x float> %d, <4 x float> poison, <2 x i32> %strided.vec97 = shufflevector <4 x float> %d, <4 x float> poison, <2 x i32> %12 = fmul fast <2 x float> %strided.vec96, %strided.vec94 %13 = fmul fast <2 x float> %strided.vec97, %strided.vec93 %14 = fadd fast <2 x float> %13, %12 %15 = fmul fast <2 x float> %strided.vec96, %strided.vec93 %16 = fmul fast <2 x float> %strided.vec97, %strided.vec94 %17 = fsub fast <2 x float> %15, %16 %interleaved.vec98 = shufflevector <2 x float> %17, <2 x float> %14, <4 x i32> ret <4 x float> %interleaved.vec98 } ; Same as above but data are loaded from memory instead of being passes as arguments. ; Expected to transform partially (only d * c). ; Shows that ld2 is not generated for `c` although it used by both complex `d * c` and non-complex `(a * b) * c` instruction chains. define <4 x float> @multiple_muls_shuffle_external_with_loads(ptr %ptr_a, ptr %ptr_b, ptr %ptr_c, ptr %ptr_d, ptr %p1, ptr %p2) { ; CHECK-LABEL: multiple_muls_shuffle_external_with_loads: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: ld2 { v0.2s, v1.2s }, [x0] ; CHECK-NEXT: ld2 { v2.2s, v3.2s }, [x1] ; CHECK-NEXT: fmul v4.2s, v3.2s, v1.2s ; CHECK-NEXT: fmul v6.2s, v2.2s, v1.2s ; CHECK-NEXT: fneg v4.2s, v4.2s ; CHECK-NEXT: fmla v6.2s, v0.2s, v3.2s ; CHECK-NEXT: fmla v4.2s, v0.2s, v2.2s ; CHECK-NEXT: str d4, [x4] ; CHECK-NEXT: ldr q5, [x2] ; CHECK-NEXT: ext v7.16b, v5.16b, v5.16b, #8 ; CHECK-NEXT: zip1 v0.2s, v5.2s, v7.2s ; CHECK-NEXT: zip2 v1.2s, v5.2s, v7.2s ; CHECK-NEXT: fmul v3.2s, v0.2s, v6.2s ; CHECK-NEXT: fmul v6.2s, v1.2s, v6.2s ; CHECK-NEXT: fmla v3.2s, v4.2s, v1.2s ; CHECK-NEXT: fneg v2.2s, v6.2s ; CHECK-NEXT: fmla v2.2s, v4.2s, v0.2s ; CHECK-NEXT: movi v0.2d, #0000000000000000 ; CHECK-NEXT: st2 { v2.2s, v3.2s }, [x5] ; CHECK-NEXT: ldr q1, [x3] ; CHECK-NEXT: fcmla v0.4s, v5.4s, v1.4s, #0 ; CHECK-NEXT: fcmla v0.4s, v5.4s, v1.4s, #90 ; CHECK-NEXT: ret entry: %a = load <4 x float>, ptr %ptr_a %strided.vec = shufflevector <4 x float> %a, <4 x float> poison, <2 x i32> %strided.vec88 = shufflevector <4 x float> %a, <4 x float> poison, <2 x i32> %b = load <4 x float>, ptr %ptr_b %strided.vec90 = shufflevector <4 x float> %b, <4 x float> poison, <2 x i32> %strided.vec91 = shufflevector <4 x float> %b, <4 x float> poison, <2 x i32> %0 = fmul fast <2 x float> %strided.vec91, %strided.vec %1 = fmul fast <2 x float> %strided.vec90, %strided.vec88 %2 = fadd fast <2 x float> %0, %1 %3 = fmul fast <2 x float> %strided.vec90, %strided.vec %4 = fmul fast <2 x float> %strided.vec91, %strided.vec88 %5 = fsub fast <2 x float> %3, %4 store <2 x float> %5, ptr %p1 %c = load <4 x float>, ptr %ptr_c %strided.vec93 = shufflevector <4 x float> %c, <4 x float> poison, <2 x i32> %strided.vec94 = shufflevector <4 x float> %c, <4 x float> poison, <2 x i32> %6 = fmul fast <2 x float> %strided.vec94, %5 %7 = fmul fast <2 x float> %strided.vec93, %2 %8 = fadd fast <2 x float> %6, %7 %9 = fmul fast <2 x float> %strided.vec93, %5 %10 = fmul fast <2 x float> %strided.vec94, %2 %11 = fsub fast <2 x float> %9, %10 %interleaved.vec = shufflevector <2 x float> %11, <2 x float> %8, <4 x i32> store <4 x float> %interleaved.vec, ptr %p2 %d = load <4 x float>, ptr %ptr_d %strided.vec96 = shufflevector <4 x float> %d, <4 x float> poison, <2 x i32> %strided.vec97 = shufflevector <4 x float> %d, <4 x float> poison, <2 x i32> %12 = fmul fast <2 x float> %strided.vec96, %strided.vec94 %13 = fmul fast <2 x float> %strided.vec97, %strided.vec93 %14 = fadd fast <2 x float> %13, %12 %15 = fmul fast <2 x float> %strided.vec96, %strided.vec93 %16 = fmul fast <2 x float> %strided.vec97, %strided.vec94 %17 = fsub fast <2 x float> %15, %16 %interleaved.vec98 = shufflevector <2 x float> %17, <2 x float> %14, <4 x i32> ret <4 x float> %interleaved.vec98 } ; Expected to not transform. Shows that external use prevents deinterleaving whole chain. ; *p1 = (a * b).real(); ; *p2 = (a * b) * (d * c); ; return d * c; define <4 x float> @multiple_muls_mul_external(<4 x float> %a, <4 x float> %b, <4 x float> %c, <4 x float> %d, ptr %p1, ptr %p2) { ; CHECK-LABEL: multiple_muls_mul_external: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: ext v4.16b, v0.16b, v0.16b, #8 ; CHECK-NEXT: ext v5.16b, v1.16b, v1.16b, #8 ; CHECK-NEXT: ext v16.16b, v2.16b, v2.16b, #8 ; CHECK-NEXT: ext v17.16b, v3.16b, v3.16b, #8 ; CHECK-NEXT: zip2 v6.2s, v0.2s, v4.2s ; CHECK-NEXT: zip2 v7.2s, v1.2s, v5.2s ; CHECK-NEXT: zip1 v19.2s, v2.2s, v16.2s ; CHECK-NEXT: zip2 v2.2s, v2.2s, v16.2s ; CHECK-NEXT: zip2 v16.2s, v3.2s, v17.2s ; CHECK-NEXT: zip1 v0.2s, v0.2s, v4.2s ; CHECK-NEXT: zip1 v1.2s, v1.2s, v5.2s ; CHECK-NEXT: zip1 v3.2s, v3.2s, v17.2s ; CHECK-NEXT: fmul v18.2s, v6.2s, v7.2s ; CHECK-NEXT: fmul v5.2s, v19.2s, v16.2s ; CHECK-NEXT: fmul v16.2s, v2.2s, v16.2s ; CHECK-NEXT: fmul v7.2s, v0.2s, v7.2s ; CHECK-NEXT: fneg v4.2s, v18.2s ; CHECK-NEXT: fmla v5.2s, v3.2s, v2.2s ; CHECK-NEXT: fneg v2.2s, v16.2s ; CHECK-NEXT: fmla v7.2s, v1.2s, v6.2s ; CHECK-NEXT: fmla v4.2s, v1.2s, v0.2s ; CHECK-NEXT: fmla v2.2s, v3.2s, v19.2s ; CHECK-NEXT: fmul v0.2s, v7.2s, v5.2s ; CHECK-NEXT: fmul v17.2s, v4.2s, v5.2s ; CHECK-NEXT: str d4, [x0] ; CHECK-NEXT: fmla v17.2s, v2.2s, v7.2s ; CHECK-NEXT: fneg v16.2s, v0.2s ; CHECK-NEXT: zip1 v0.4s, v2.4s, v5.4s ; CHECK-NEXT: fmla v16.2s, v2.2s, v4.2s ; CHECK-NEXT: st2 { v16.2s, v17.2s }, [x1] ; CHECK-NEXT: ret entry: %strided.vec = shufflevector <4 x float> %a, <4 x float> poison, <2 x i32> %strided.vec126 = shufflevector <4 x float> %a, <4 x float> poison, <2 x i32> %strided.vec128 = shufflevector <4 x float> %b, <4 x float> poison, <2 x i32> %strided.vec129 = shufflevector <4 x float> %b, <4 x float> poison, <2 x i32> %0 = fmul nnan ninf contract <2 x float> %strided.vec, %strided.vec129 %1 = fmul nnan ninf contract <2 x float> %strided.vec126, %strided.vec128 %2 = fadd nnan ninf contract <2 x float> %1, %0 %3 = fmul nnan ninf contract <2 x float> %strided.vec, %strided.vec128 %4 = fmul nnan ninf contract <2 x float> %strided.vec126, %strided.vec129 %5 = fsub nnan ninf contract <2 x float> %3, %4 store <2 x float> %5, ptr %p1 %strided.vec131 = shufflevector <4 x float> %c, <4 x float> poison, <2 x i32> %strided.vec132 = shufflevector <4 x float> %c, <4 x float> poison, <2 x i32> %strided.vec134 = shufflevector <4 x float> %d, <4 x float> poison, <2 x i32> %strided.vec135 = shufflevector <4 x float> %d, <4 x float> poison, <2 x i32> %6 = fmul nnan ninf contract <2 x float> %strided.vec131, %strided.vec135 %7 = fmul nnan ninf contract <2 x float> %strided.vec132, %strided.vec134 %8 = fadd nnan ninf contract <2 x float> %7, %6 %9 = fmul nnan ninf contract <2 x float> %strided.vec131, %strided.vec134 %10 = fmul nnan ninf contract <2 x float> %strided.vec132, %strided.vec135 %11 = fsub nnan ninf contract <2 x float> %9, %10 %12 = fmul nnan ninf contract <2 x float> %5, %8 %13 = fmul nnan ninf contract <2 x float> %2, %11 %14 = fadd nnan ninf contract <2 x float> %13, %12 %15 = fmul nnan ninf contract <2 x float> %5, %11 %16 = fmul nnan ninf contract <2 x float> %2, %8 %17 = fsub nnan ninf contract <2 x float> %15, %16 %interleaved.vec = shufflevector <2 x float> %17, <2 x float> %14, <4 x i32> store <4 x float> %interleaved.vec, ptr %p2 %interleaved.vec136 = shufflevector <2 x float> %11, <2 x float> %8, <4 x i32> ret <4 x float> %interleaved.vec136 } ; Expected to transform. Shows that composite common subexpression is not generated twice. ; u[i] = a[i] * b[i] - (c[i] * d[i] + g[i] * h[i]); ; v[i] = e[i] * f[i] + (c[i] * d[i] + g[i] * h[i]); define void @mul_add_common_mul_add_mul(<4 x double> %a, <4 x double> %b, <4 x double> %c, <4 x double> %d, <4 x double> %e, <4 x double> %f, <4 x double> %g, <4 x double> %h, ptr %p1, ptr %p2) { ; CHECK-LABEL: mul_add_common_mul_add_mul: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: movi v16.2d, #0000000000000000 ; CHECK-NEXT: movi v17.2d, #0000000000000000 ; CHECK-NEXT: ldr q19, [sp, #112] ; CHECK-NEXT: ldp q18, q20, [sp, #80] ; CHECK-NEXT: ldr q21, [sp, #64] ; CHECK-NEXT: movi v22.2d, #0000000000000000 ; CHECK-NEXT: fcmla v16.2d, v18.2d, v19.2d, #0 ; CHECK-NEXT: fcmla v17.2d, v21.2d, v20.2d, #0 ; CHECK-NEXT: fcmla v22.2d, v1.2d, v3.2d, #0 ; CHECK-NEXT: fcmla v16.2d, v18.2d, v19.2d, #90 ; CHECK-NEXT: movi v18.2d, #0000000000000000 ; CHECK-NEXT: fcmla v17.2d, v21.2d, v20.2d, #90 ; CHECK-NEXT: fcmla v22.2d, v1.2d, v3.2d, #90 ; CHECK-NEXT: fcmla v16.2d, v5.2d, v7.2d, #0 ; CHECK-NEXT: fcmla v18.2d, v0.2d, v2.2d, #0 ; CHECK-NEXT: fcmla v17.2d, v4.2d, v6.2d, #0 ; CHECK-NEXT: fcmla v16.2d, v5.2d, v7.2d, #90 ; CHECK-NEXT: fcmla v18.2d, v0.2d, v2.2d, #90 ; CHECK-NEXT: fcmla v17.2d, v4.2d, v6.2d, #90 ; CHECK-NEXT: ldp q3, q0, [sp, #32] ; CHECK-NEXT: ldp q2, q1, [sp] ; CHECK-NEXT: fsub v4.2d, v22.2d, v16.2d ; CHECK-NEXT: fsub v5.2d, v18.2d, v17.2d ; CHECK-NEXT: fcmla v16.2d, v0.2d, v1.2d, #0 ; CHECK-NEXT: fcmla v17.2d, v3.2d, v2.2d, #0 ; CHECK-NEXT: stp q5, q4, [x0] ; CHECK-NEXT: fcmla v16.2d, v0.2d, v1.2d, #90 ; CHECK-NEXT: fcmla v17.2d, v3.2d, v2.2d, #90 ; CHECK-NEXT: stp q17, q16, [x1] ; CHECK-NEXT: ret entry: %strided.vec = shufflevector <4 x double> %a, <4 x double> poison, <2 x i32> %strided.vec123 = shufflevector <4 x double> %a, <4 x double> poison, <2 x i32> %strided.vec125 = shufflevector <4 x double> %b, <4 x double> poison, <2 x i32> %strided.vec126 = shufflevector <4 x double> %b, <4 x double> poison, <2 x i32> %0 = fmul fast <2 x double> %strided.vec125, %strided.vec %1 = fmul fast <2 x double> %strided.vec126, %strided.vec %2 = fmul fast <2 x double> %strided.vec125, %strided.vec123 %3 = fadd fast <2 x double> %1, %2 %strided.vec128 = shufflevector <4 x double> %c, <4 x double> poison, <2 x i32> %strided.vec129 = shufflevector <4 x double> %c, <4 x double> poison, <2 x i32> %strided.vec131 = shufflevector <4 x double> %d, <4 x double> poison, <2 x i32> %strided.vec132 = shufflevector <4 x double> %d, <4 x double> poison, <2 x i32> %4 = fmul fast <2 x double> %strided.vec131, %strided.vec128 %5 = fmul fast <2 x double> %strided.vec132, %strided.vec129 %6 = fmul fast <2 x double> %strided.vec132, %strided.vec128 %7 = fmul fast <2 x double> %strided.vec131, %strided.vec129 %8 = fsub fast <2 x double> %4, %5 %strided.vec134 = shufflevector <4 x double> %g, <4 x double> poison, <2 x i32> %strided.vec135 = shufflevector <4 x double> %g, <4 x double> poison, <2 x i32> %strided.vec137 = shufflevector <4 x double> %h, <4 x double> poison, <2 x i32> %strided.vec138 = shufflevector <4 x double> %h, <4 x double> poison, <2 x i32> %9 = fmul fast <2 x double> %strided.vec138, %strided.vec134 %10 = fmul fast <2 x double> %strided.vec137, %strided.vec135 %11 = fmul fast <2 x double> %strided.vec137, %strided.vec134 %12 = fmul fast <2 x double> %strided.vec135, %strided.vec138 %13 = fsub fast <2 x double> %11, %12 %14 = fadd fast <2 x double> %13, %8 %15 = fadd fast <2 x double> %6, %7 %16 = fadd fast <2 x double> %15, %9 %17 = fadd fast <2 x double> %16, %10 %18 = fmul fast <2 x double> %strided.vec126, %strided.vec123 %19 = fadd fast <2 x double> %18, %14 %20 = fsub fast <2 x double> %0, %19 %21 = fsub fast <2 x double> %3, %17 %interleaved.vec = shufflevector <2 x double> %20, <2 x double> %21, <4 x i32> store <4 x double> %interleaved.vec, ptr %p1, align 8 %strided.vec140 = shufflevector <4 x double> %e, <4 x double> poison, <2 x i32> %strided.vec141 = shufflevector <4 x double> %e, <4 x double> poison, <2 x i32> %strided.vec143 = shufflevector <4 x double> %f, <4 x double> poison, <2 x i32> %strided.vec144 = shufflevector <4 x double> %f, <4 x double> poison, <2 x i32> %22 = fmul fast <2 x double> %strided.vec143, %strided.vec140 %23 = fmul fast <2 x double> %strided.vec144, %strided.vec140 %24 = fmul fast <2 x double> %strided.vec143, %strided.vec141 %25 = fadd fast <2 x double> %22, %14 %26 = fmul fast <2 x double> %strided.vec144, %strided.vec141 %27 = fsub fast <2 x double> %25, %26 %28 = fadd fast <2 x double> %24, %17 %29 = fadd fast <2 x double> %28, %23 %interleaved.vec145 = shufflevector <2 x double> %27, <2 x double> %29, <4 x i32> store <4 x double> %interleaved.vec145, ptr %p2, align 8 ret void }