217 lines
11 KiB
LLVM
217 lines
11 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
; RUN: llc < %s --mattr=+complxnum,+neon -o - | FileCheck %s
|
|
|
|
target triple = "aarch64"
|
|
|
|
; a * b + c
|
|
define <4 x double> @mull_add(<4 x double> %a, <4 x double> %b, <4 x double> %c) {
|
|
; CHECK-LABEL: mull_add:
|
|
; CHECK: // %bb.0: // %entry
|
|
; CHECK-NEXT: fcmla v4.2d, v0.2d, v2.2d, #0
|
|
; CHECK-NEXT: fcmla v5.2d, v1.2d, v3.2d, #0
|
|
; CHECK-NEXT: fcmla v4.2d, v0.2d, v2.2d, #90
|
|
; CHECK-NEXT: fcmla v5.2d, v1.2d, v3.2d, #90
|
|
; CHECK-NEXT: mov v0.16b, v4.16b
|
|
; CHECK-NEXT: mov v1.16b, v5.16b
|
|
; CHECK-NEXT: ret
|
|
entry:
|
|
%strided.vec = shufflevector <4 x double> %a, <4 x double> poison, <2 x i32> <i32 0, i32 2>
|
|
%strided.vec28 = shufflevector <4 x double> %a, <4 x double> poison, <2 x i32> <i32 1, i32 3>
|
|
%strided.vec30 = shufflevector <4 x double> %b, <4 x double> poison, <2 x i32> <i32 0, i32 2>
|
|
%strided.vec31 = shufflevector <4 x double> %b, <4 x double> poison, <2 x i32> <i32 1, i32 3>
|
|
%0 = fmul fast <2 x double> %strided.vec31, %strided.vec
|
|
%1 = fmul fast <2 x double> %strided.vec30, %strided.vec28
|
|
%2 = fadd fast <2 x double> %0, %1
|
|
%3 = fmul fast <2 x double> %strided.vec30, %strided.vec
|
|
%strided.vec33 = shufflevector <4 x double> %c, <4 x double> poison, <2 x i32> <i32 0, i32 2>
|
|
%strided.vec34 = shufflevector <4 x double> %c, <4 x double> poison, <2 x i32> <i32 1, i32 3>
|
|
%4 = fadd fast <2 x double> %strided.vec33, %3
|
|
%5 = fmul fast <2 x double> %strided.vec31, %strided.vec28
|
|
%6 = fsub fast <2 x double> %4, %5
|
|
%7 = fadd fast <2 x double> %2, %strided.vec34
|
|
%interleaved.vec = shufflevector <2 x double> %6, <2 x double> %7, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
|
|
ret <4 x double> %interleaved.vec
|
|
}
|
|
|
|
; a * b + c * d
|
|
define <4 x double> @mul_add_mull(<4 x double> %a, <4 x double> %b, <4 x double> %c, <4 x double> %d) {
|
|
; CHECK-LABEL: mul_add_mull:
|
|
; CHECK: // %bb.0: // %entry
|
|
; CHECK-NEXT: movi v16.2d, #0000000000000000
|
|
; CHECK-NEXT: movi v17.2d, #0000000000000000
|
|
; CHECK-NEXT: fcmla v17.2d, v6.2d, v4.2d, #0
|
|
; CHECK-NEXT: fcmla v16.2d, v7.2d, v5.2d, #0
|
|
; CHECK-NEXT: fcmla v17.2d, v0.2d, v2.2d, #0
|
|
; CHECK-NEXT: fcmla v16.2d, v1.2d, v3.2d, #0
|
|
; CHECK-NEXT: fcmla v17.2d, v6.2d, v4.2d, #90
|
|
; CHECK-NEXT: fcmla v16.2d, v7.2d, v5.2d, #90
|
|
; CHECK-NEXT: fcmla v17.2d, v0.2d, v2.2d, #90
|
|
; CHECK-NEXT: fcmla v16.2d, v1.2d, v3.2d, #90
|
|
; CHECK-NEXT: mov v0.16b, v17.16b
|
|
; CHECK-NEXT: mov v1.16b, v16.16b
|
|
; CHECK-NEXT: ret
|
|
entry:
|
|
%strided.vec = shufflevector <4 x double> %a, <4 x double> poison, <2 x i32> <i32 0, i32 2>
|
|
%strided.vec51 = shufflevector <4 x double> %a, <4 x double> poison, <2 x i32> <i32 1, i32 3>
|
|
%strided.vec53 = shufflevector <4 x double> %b, <4 x double> poison, <2 x i32> <i32 0, i32 2>
|
|
%strided.vec54 = shufflevector <4 x double> %b, <4 x double> poison, <2 x i32> <i32 1, i32 3>
|
|
%0 = fmul fast <2 x double> %strided.vec54, %strided.vec
|
|
%1 = fmul fast <2 x double> %strided.vec53, %strided.vec51
|
|
%2 = fmul fast <2 x double> %strided.vec53, %strided.vec
|
|
%3 = fmul fast <2 x double> %strided.vec54, %strided.vec51
|
|
%strided.vec56 = shufflevector <4 x double> %c, <4 x double> poison, <2 x i32> <i32 0, i32 2>
|
|
%strided.vec57 = shufflevector <4 x double> %c, <4 x double> poison, <2 x i32> <i32 1, i32 3>
|
|
%strided.vec59 = shufflevector <4 x double> %d, <4 x double> poison, <2 x i32> <i32 0, i32 2>
|
|
%strided.vec60 = shufflevector <4 x double> %d, <4 x double> poison, <2 x i32> <i32 1, i32 3>
|
|
%4 = fmul fast <2 x double> %strided.vec60, %strided.vec56
|
|
%5 = fmul fast <2 x double> %strided.vec59, %strided.vec57
|
|
%6 = fmul fast <2 x double> %strided.vec59, %strided.vec56
|
|
%7 = fmul fast <2 x double> %strided.vec60, %strided.vec57
|
|
%8 = fadd fast <2 x double> %7, %3
|
|
%9 = fadd fast <2 x double> %6, %2
|
|
%10 = fsub fast <2 x double> %9, %8
|
|
%11 = fadd fast <2 x double> %0, %1
|
|
%12 = fadd fast <2 x double> %11, %5
|
|
%13 = fadd fast <2 x double> %12, %4
|
|
%interleaved.vec = shufflevector <2 x double> %10, <2 x double> %13, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
|
|
ret <4 x double> %interleaved.vec
|
|
}
|
|
|
|
; a * b - c * d
|
|
define <4 x double> @mul_sub_mull(<4 x double> %a, <4 x double> %b, <4 x double> %c, <4 x double> %d) {
|
|
; CHECK-LABEL: mul_sub_mull:
|
|
; CHECK: // %bb.0: // %entry
|
|
; CHECK-NEXT: movi v16.2d, #0000000000000000
|
|
; CHECK-NEXT: movi v17.2d, #0000000000000000
|
|
; CHECK-NEXT: fcmla v17.2d, v6.2d, v4.2d, #270
|
|
; CHECK-NEXT: fcmla v16.2d, v7.2d, v5.2d, #270
|
|
; CHECK-NEXT: fcmla v17.2d, v0.2d, v2.2d, #0
|
|
; CHECK-NEXT: fcmla v16.2d, v1.2d, v3.2d, #0
|
|
; CHECK-NEXT: fcmla v17.2d, v6.2d, v4.2d, #180
|
|
; CHECK-NEXT: fcmla v16.2d, v7.2d, v5.2d, #180
|
|
; CHECK-NEXT: fcmla v17.2d, v0.2d, v2.2d, #90
|
|
; CHECK-NEXT: fcmla v16.2d, v1.2d, v3.2d, #90
|
|
; CHECK-NEXT: mov v0.16b, v17.16b
|
|
; CHECK-NEXT: mov v1.16b, v16.16b
|
|
; CHECK-NEXT: ret
|
|
entry:
|
|
%strided.vec = shufflevector <4 x double> %a, <4 x double> poison, <2 x i32> <i32 0, i32 2>
|
|
%strided.vec53 = shufflevector <4 x double> %a, <4 x double> poison, <2 x i32> <i32 1, i32 3>
|
|
%strided.vec55 = shufflevector <4 x double> %b, <4 x double> poison, <2 x i32> <i32 0, i32 2>
|
|
%strided.vec56 = shufflevector <4 x double> %b, <4 x double> poison, <2 x i32> <i32 1, i32 3>
|
|
%0 = fmul fast <2 x double> %strided.vec56, %strided.vec
|
|
%1 = fmul fast <2 x double> %strided.vec55, %strided.vec53
|
|
%2 = fmul fast <2 x double> %strided.vec55, %strided.vec
|
|
%3 = fmul fast <2 x double> %strided.vec56, %strided.vec53
|
|
%strided.vec58 = shufflevector <4 x double> %c, <4 x double> poison, <2 x i32> <i32 0, i32 2>
|
|
%strided.vec59 = shufflevector <4 x double> %c, <4 x double> poison, <2 x i32> <i32 1, i32 3>
|
|
%strided.vec61 = shufflevector <4 x double> %d, <4 x double> poison, <2 x i32> <i32 0, i32 2>
|
|
%strided.vec62 = shufflevector <4 x double> %d, <4 x double> poison, <2 x i32> <i32 1, i32 3>
|
|
%4 = fmul fast <2 x double> %strided.vec62, %strided.vec59
|
|
%5 = fmul fast <2 x double> %strided.vec61, %strided.vec58
|
|
%6 = fadd fast <2 x double> %5, %3
|
|
%7 = fadd fast <2 x double> %4, %2
|
|
%8 = fsub fast <2 x double> %7, %6
|
|
%9 = fmul fast <2 x double> %strided.vec61, %strided.vec59
|
|
%10 = fmul fast <2 x double> %strided.vec62, %strided.vec58
|
|
%11 = fadd fast <2 x double> %10, %9
|
|
%12 = fadd fast <2 x double> %0, %1
|
|
%13 = fsub fast <2 x double> %12, %11
|
|
%interleaved.vec = shufflevector <2 x double> %8, <2 x double> %13, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
|
|
ret <4 x double> %interleaved.vec
|
|
}
|
|
|
|
; a * b + conj(c) * d
|
|
define <4 x double> @mul_conj_mull(<4 x double> %a, <4 x double> %b, <4 x double> %c, <4 x double> %d) {
|
|
; CHECK-LABEL: mul_conj_mull:
|
|
; CHECK: // %bb.0: // %entry
|
|
; CHECK-NEXT: movi v16.2d, #0000000000000000
|
|
; CHECK-NEXT: movi v17.2d, #0000000000000000
|
|
; CHECK-NEXT: fcmla v17.2d, v0.2d, v2.2d, #0
|
|
; CHECK-NEXT: fcmla v16.2d, v1.2d, v3.2d, #0
|
|
; CHECK-NEXT: fcmla v17.2d, v0.2d, v2.2d, #90
|
|
; CHECK-NEXT: fcmla v16.2d, v1.2d, v3.2d, #90
|
|
; CHECK-NEXT: fcmla v17.2d, v4.2d, v6.2d, #0
|
|
; CHECK-NEXT: fcmla v16.2d, v5.2d, v7.2d, #0
|
|
; CHECK-NEXT: fcmla v17.2d, v4.2d, v6.2d, #270
|
|
; CHECK-NEXT: fcmla v16.2d, v5.2d, v7.2d, #270
|
|
; CHECK-NEXT: mov v0.16b, v17.16b
|
|
; CHECK-NEXT: mov v1.16b, v16.16b
|
|
; CHECK-NEXT: ret
|
|
entry:
|
|
%strided.vec = shufflevector <4 x double> %a, <4 x double> poison, <2 x i32> <i32 0, i32 2>
|
|
%strided.vec59 = shufflevector <4 x double> %a, <4 x double> poison, <2 x i32> <i32 1, i32 3>
|
|
%strided.vec61 = shufflevector <4 x double> %b, <4 x double> poison, <2 x i32> <i32 0, i32 2>
|
|
%strided.vec62 = shufflevector <4 x double> %b, <4 x double> poison, <2 x i32> <i32 1, i32 3>
|
|
%0 = fmul fast <2 x double> %strided.vec62, %strided.vec
|
|
%1 = fmul fast <2 x double> %strided.vec61, %strided.vec59
|
|
%2 = fmul fast <2 x double> %strided.vec61, %strided.vec
|
|
%strided.vec64 = shufflevector <4 x double> %c, <4 x double> poison, <2 x i32> <i32 0, i32 2>
|
|
%strided.vec65 = shufflevector <4 x double> %c, <4 x double> poison, <2 x i32> <i32 1, i32 3>
|
|
%strided.vec67 = shufflevector <4 x double> %d, <4 x double> poison, <2 x i32> <i32 0, i32 2>
|
|
%strided.vec68 = shufflevector <4 x double> %d, <4 x double> poison, <2 x i32> <i32 1, i32 3>
|
|
%3 = fmul fast <2 x double> %strided.vec68, %strided.vec64
|
|
%4 = fmul fast <2 x double> %strided.vec67, %strided.vec64
|
|
%5 = fmul fast <2 x double> %strided.vec68, %strided.vec65
|
|
%6 = fmul fast <2 x double> %strided.vec62, %strided.vec59
|
|
%7 = fsub fast <2 x double> %2, %6
|
|
%8 = fadd fast <2 x double> %7, %4
|
|
%9 = fadd fast <2 x double> %8, %5
|
|
%10 = fadd fast <2 x double> %0, %1
|
|
%11 = fmul fast <2 x double> %strided.vec67, %strided.vec65
|
|
%12 = fsub fast <2 x double> %10, %11
|
|
%13 = fadd fast <2 x double> %12, %3
|
|
%interleaved.vec = shufflevector <2 x double> %9, <2 x double> %13, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
|
|
ret <4 x double> %interleaved.vec
|
|
}
|
|
|
|
; a + b + 1i * c * d
|
|
define <4 x double> @mul_add_rot_mull(<4 x double> %a, <4 x double> %b, <4 x double> %c, <4 x double> %d) {
|
|
; CHECK-LABEL: mul_add_rot_mull:
|
|
; CHECK: // %bb.0: // %entry
|
|
; CHECK-NEXT: zip2 v16.2d, v2.2d, v3.2d
|
|
; CHECK-NEXT: zip2 v17.2d, v0.2d, v1.2d
|
|
; CHECK-NEXT: zip1 v2.2d, v2.2d, v3.2d
|
|
; CHECK-NEXT: zip2 v18.2d, v4.2d, v5.2d
|
|
; CHECK-NEXT: zip1 v19.2d, v6.2d, v7.2d
|
|
; CHECK-NEXT: zip1 v0.2d, v0.2d, v1.2d
|
|
; CHECK-NEXT: zip1 v1.2d, v4.2d, v5.2d
|
|
; CHECK-NEXT: zip2 v5.2d, v6.2d, v7.2d
|
|
; CHECK-NEXT: fmul v3.2d, v16.2d, v17.2d
|
|
; CHECK-NEXT: fmul v4.2d, v2.2d, v17.2d
|
|
; CHECK-NEXT: fmla v3.2d, v18.2d, v19.2d
|
|
; CHECK-NEXT: fmla v4.2d, v0.2d, v16.2d
|
|
; CHECK-NEXT: fmla v3.2d, v1.2d, v5.2d
|
|
; CHECK-NEXT: fmla v4.2d, v1.2d, v19.2d
|
|
; CHECK-NEXT: fneg v3.2d, v3.2d
|
|
; CHECK-NEXT: fmls v4.2d, v18.2d, v5.2d
|
|
; CHECK-NEXT: fmla v3.2d, v0.2d, v2.2d
|
|
; CHECK-NEXT: zip1 v0.2d, v3.2d, v4.2d
|
|
; CHECK-NEXT: zip2 v1.2d, v3.2d, v4.2d
|
|
; CHECK-NEXT: ret
|
|
entry:
|
|
%strided.vec = shufflevector <4 x double> %a, <4 x double> poison, <2 x i32> <i32 0, i32 2>
|
|
%strided.vec79 = shufflevector <4 x double> %a, <4 x double> poison, <2 x i32> <i32 1, i32 3>
|
|
%strided.vec81 = shufflevector <4 x double> %b, <4 x double> poison, <2 x i32> <i32 0, i32 2>
|
|
%strided.vec82 = shufflevector <4 x double> %b, <4 x double> poison, <2 x i32> <i32 1, i32 3>
|
|
%0 = fmul fast <2 x double> %strided.vec82, %strided.vec
|
|
%1 = fmul fast <2 x double> %strided.vec81, %strided.vec79
|
|
%2 = fmul fast <2 x double> %strided.vec81, %strided.vec
|
|
%3 = fmul fast <2 x double> %strided.vec82, %strided.vec79
|
|
%strided.vec84 = shufflevector <4 x double> %c, <4 x double> poison, <2 x i32> <i32 0, i32 2>
|
|
%strided.vec85 = shufflevector <4 x double> %c, <4 x double> poison, <2 x i32> <i32 1, i32 3>
|
|
%strided.vec87 = shufflevector <4 x double> %d, <4 x double> poison, <2 x i32> <i32 0, i32 2>
|
|
%strided.vec88 = shufflevector <4 x double> %d, <4 x double> poison, <2 x i32> <i32 1, i32 3>
|
|
%4 = fmul fast <2 x double> %strided.vec87, %strided.vec84
|
|
%5 = fmul fast <2 x double> %strided.vec87, %strided.vec85
|
|
%6 = fmul fast <2 x double> %strided.vec88, %strided.vec84
|
|
%7 = fadd fast <2 x double> %5, %3
|
|
%8 = fadd fast <2 x double> %7, %6
|
|
%9 = fsub fast <2 x double> %2, %8
|
|
%10 = fadd fast <2 x double> %0, %1
|
|
%11 = fadd fast <2 x double> %10, %4
|
|
%12 = fmul fast <2 x double> %strided.vec88, %strided.vec85
|
|
%13 = fsub fast <2 x double> %11, %12
|
|
%interleaved.vec = shufflevector <2 x double> %9, <2 x double> %13, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
|
|
ret <4 x double> %interleaved.vec
|
|
}
|