341 lines
14 KiB
LLVM
341 lines
14 KiB
LLVM
|
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||
|
; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple | FileCheck %s
|
||
|
|
||
|
define <8 x i8> @vzipi8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
|
||
|
; CHECK-LABEL: vzipi8:
|
||
|
; CHECK: // %bb.0:
|
||
|
; CHECK-NEXT: ldr d0, [x0]
|
||
|
; CHECK-NEXT: ldr d1, [x1]
|
||
|
; CHECK-NEXT: zip1.8b v2, v0, v1
|
||
|
; CHECK-NEXT: zip2.8b v0, v0, v1
|
||
|
; CHECK-NEXT: add.8b v0, v2, v0
|
||
|
; CHECK-NEXT: ret
|
||
|
%tmp1 = load <8 x i8>, <8 x i8>* %A
|
||
|
%tmp2 = load <8 x i8>, <8 x i8>* %B
|
||
|
%tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
|
||
|
%tmp4 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
|
||
|
%tmp5 = add <8 x i8> %tmp3, %tmp4
|
||
|
ret <8 x i8> %tmp5
|
||
|
}
|
||
|
|
||
|
define <4 x i16> @vzipi16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
|
||
|
; CHECK-LABEL: vzipi16:
|
||
|
; CHECK: // %bb.0:
|
||
|
; CHECK-NEXT: ldr d0, [x0]
|
||
|
; CHECK-NEXT: ldr d1, [x1]
|
||
|
; CHECK-NEXT: zip1.4h v2, v0, v1
|
||
|
; CHECK-NEXT: zip2.4h v0, v0, v1
|
||
|
; CHECK-NEXT: add.4h v0, v2, v0
|
||
|
; CHECK-NEXT: ret
|
||
|
%tmp1 = load <4 x i16>, <4 x i16>* %A
|
||
|
%tmp2 = load <4 x i16>, <4 x i16>* %B
|
||
|
%tmp3 = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
|
||
|
%tmp4 = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
|
||
|
%tmp5 = add <4 x i16> %tmp3, %tmp4
|
||
|
ret <4 x i16> %tmp5
|
||
|
}
|
||
|
|
||
|
define <16 x i8> @vzipQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
|
||
|
; CHECK-LABEL: vzipQi8:
|
||
|
; CHECK: // %bb.0:
|
||
|
; CHECK-NEXT: ldr q0, [x0]
|
||
|
; CHECK-NEXT: ldr q1, [x1]
|
||
|
; CHECK-NEXT: zip1.16b v2, v0, v1
|
||
|
; CHECK-NEXT: zip2.16b v0, v0, v1
|
||
|
; CHECK-NEXT: add.16b v0, v2, v0
|
||
|
; CHECK-NEXT: ret
|
||
|
%tmp1 = load <16 x i8>, <16 x i8>* %A
|
||
|
%tmp2 = load <16 x i8>, <16 x i8>* %B
|
||
|
%tmp3 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
|
||
|
%tmp4 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i32> <i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
|
||
|
%tmp5 = add <16 x i8> %tmp3, %tmp4
|
||
|
ret <16 x i8> %tmp5
|
||
|
}
|
||
|
|
||
|
define <8 x i16> @vzipQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
|
||
|
; CHECK-LABEL: vzipQi16:
|
||
|
; CHECK: // %bb.0:
|
||
|
; CHECK-NEXT: ldr q0, [x0]
|
||
|
; CHECK-NEXT: ldr q1, [x1]
|
||
|
; CHECK-NEXT: zip1.8h v2, v0, v1
|
||
|
; CHECK-NEXT: zip2.8h v0, v0, v1
|
||
|
; CHECK-NEXT: add.8h v0, v2, v0
|
||
|
; CHECK-NEXT: ret
|
||
|
%tmp1 = load <8 x i16>, <8 x i16>* %A
|
||
|
%tmp2 = load <8 x i16>, <8 x i16>* %B
|
||
|
%tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
|
||
|
%tmp4 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
|
||
|
%tmp5 = add <8 x i16> %tmp3, %tmp4
|
||
|
ret <8 x i16> %tmp5
|
||
|
}
|
||
|
|
||
|
define <4 x i32> @vzipQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
|
||
|
; CHECK-LABEL: vzipQi32:
|
||
|
; CHECK: // %bb.0:
|
||
|
; CHECK-NEXT: ldr q0, [x0]
|
||
|
; CHECK-NEXT: ldr q1, [x1]
|
||
|
; CHECK-NEXT: zip1.4s v2, v0, v1
|
||
|
; CHECK-NEXT: zip2.4s v0, v0, v1
|
||
|
; CHECK-NEXT: add.4s v0, v2, v0
|
||
|
; CHECK-NEXT: ret
|
||
|
%tmp1 = load <4 x i32>, <4 x i32>* %A
|
||
|
%tmp2 = load <4 x i32>, <4 x i32>* %B
|
||
|
%tmp3 = shufflevector <4 x i32> %tmp1, <4 x i32> %tmp2, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
|
||
|
%tmp4 = shufflevector <4 x i32> %tmp1, <4 x i32> %tmp2, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
|
||
|
%tmp5 = add <4 x i32> %tmp3, %tmp4
|
||
|
ret <4 x i32> %tmp5
|
||
|
}
|
||
|
|
||
|
define <4 x float> @vzipQf(<4 x float>* %A, <4 x float>* %B) nounwind {
|
||
|
; CHECK-LABEL: vzipQf:
|
||
|
; CHECK: // %bb.0:
|
||
|
; CHECK-NEXT: ldr q0, [x0]
|
||
|
; CHECK-NEXT: ldr q1, [x1]
|
||
|
; CHECK-NEXT: zip1.4s v2, v0, v1
|
||
|
; CHECK-NEXT: zip2.4s v0, v0, v1
|
||
|
; CHECK-NEXT: fadd.4s v0, v2, v0
|
||
|
; CHECK-NEXT: ret
|
||
|
%tmp1 = load <4 x float>, <4 x float>* %A
|
||
|
%tmp2 = load <4 x float>, <4 x float>* %B
|
||
|
%tmp3 = shufflevector <4 x float> %tmp1, <4 x float> %tmp2, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
|
||
|
%tmp4 = shufflevector <4 x float> %tmp1, <4 x float> %tmp2, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
|
||
|
%tmp5 = fadd <4 x float> %tmp3, %tmp4
|
||
|
ret <4 x float> %tmp5
|
||
|
}
|
||
|
|
||
|
; Undef shuffle indices should not prevent matching to VZIP:
|
||
|
|
||
|
define <8 x i8> @vzipi8_undef(<8 x i8>* %A, <8 x i8>* %B) nounwind {
|
||
|
; CHECK-LABEL: vzipi8_undef:
|
||
|
; CHECK: // %bb.0:
|
||
|
; CHECK-NEXT: ldr d0, [x0]
|
||
|
; CHECK-NEXT: ldr d1, [x1]
|
||
|
; CHECK-NEXT: zip1.8b v2, v0, v1
|
||
|
; CHECK-NEXT: zip2.8b v0, v0, v1
|
||
|
; CHECK-NEXT: add.8b v0, v2, v0
|
||
|
; CHECK-NEXT: ret
|
||
|
%tmp1 = load <8 x i8>, <8 x i8>* %A
|
||
|
%tmp2 = load <8 x i8>, <8 x i8>* %B
|
||
|
%tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 0, i32 undef, i32 1, i32 9, i32 undef, i32 10, i32 3, i32 11>
|
||
|
%tmp4 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 undef, i32 undef, i32 15>
|
||
|
%tmp5 = add <8 x i8> %tmp3, %tmp4
|
||
|
ret <8 x i8> %tmp5
|
||
|
}
|
||
|
|
||
|
define <16 x i8> @vzipQi8_undef(<16 x i8>* %A, <16 x i8>* %B) nounwind {
|
||
|
; CHECK-LABEL: vzipQi8_undef:
|
||
|
; CHECK: // %bb.0:
|
||
|
; CHECK-NEXT: ldr q0, [x0]
|
||
|
; CHECK-NEXT: ldr q1, [x1]
|
||
|
; CHECK-NEXT: zip1.16b v2, v0, v1
|
||
|
; CHECK-NEXT: zip2.16b v0, v0, v1
|
||
|
; CHECK-NEXT: add.16b v0, v2, v0
|
||
|
; CHECK-NEXT: ret
|
||
|
%tmp1 = load <16 x i8>, <16 x i8>* %A
|
||
|
%tmp2 = load <16 x i8>, <16 x i8>* %B
|
||
|
%tmp3 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i32> <i32 0, i32 16, i32 1, i32 undef, i32 undef, i32 undef, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
|
||
|
%tmp4 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i32> <i32 8, i32 24, i32 9, i32 undef, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 undef, i32 14, i32 30, i32 undef, i32 31>
|
||
|
%tmp5 = add <16 x i8> %tmp3, %tmp4
|
||
|
ret <16 x i8> %tmp5
|
||
|
}
|
||
|
|
||
|
define <16 x i8> @combine_v16i8(<8 x i8> %0, <8 x i8> %1) {
|
||
|
; CHECK-LABEL: combine_v16i8:
|
||
|
; CHECK: // %bb.0:
|
||
|
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
|
||
|
; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
|
||
|
; CHECK-NEXT: zip1.16b v0, v0, v1
|
||
|
; CHECK-NEXT: ret
|
||
|
%3 = shufflevector <8 x i8> %0, <8 x i8> %1, <16 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11, i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
|
||
|
ret <16 x i8> %3
|
||
|
}
|
||
|
|
||
|
define <16 x i8> @combine2_v16i8(<8 x i8> %0, <8 x i8> %1) {
|
||
|
; CHECK-LABEL: combine2_v16i8:
|
||
|
; CHECK: // %bb.0:
|
||
|
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
|
||
|
; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
|
||
|
; CHECK-NEXT: zip1.16b v0, v0, v1
|
||
|
; CHECK-NEXT: ret
|
||
|
%3 = shufflevector <8 x i8> %0, <8 x i8> %1, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
|
||
|
%4 = shufflevector <8 x i8> %0, <8 x i8> %1, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
|
||
|
%5 = shufflevector <8 x i8> %3, <8 x i8> %4, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
||
|
ret <16 x i8> %5
|
||
|
}
|
||
|
|
||
|
define <8 x i16> @combine_v8i16(<4 x i16> %0, <4 x i16> %1) {
|
||
|
; CHECK-LABEL: combine_v8i16:
|
||
|
; CHECK: // %bb.0:
|
||
|
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
|
||
|
; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
|
||
|
; CHECK-NEXT: zip1.8h v0, v0, v1
|
||
|
; CHECK-NEXT: ret
|
||
|
%3 = shufflevector <4 x i16> %0, <4 x i16> %1, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7>
|
||
|
ret <8 x i16> %3
|
||
|
}
|
||
|
|
||
|
define <8 x i16> @combine2_v8i16(<4 x i16> %0, <4 x i16> %1) {
|
||
|
; CHECK-LABEL: combine2_v8i16:
|
||
|
; CHECK: // %bb.0:
|
||
|
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
|
||
|
; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
|
||
|
; CHECK-NEXT: zip1.8h v0, v0, v1
|
||
|
; CHECK-NEXT: ret
|
||
|
%3 = shufflevector <4 x i16> %0, <4 x i16> %1, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
|
||
|
%4 = shufflevector <4 x i16> %0, <4 x i16> %1, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
|
||
|
%5 = shufflevector <4 x i16> %3, <4 x i16> %4, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
|
||
|
ret <8 x i16> %5
|
||
|
}
|
||
|
|
||
|
define <4 x i32> @combine_v4i32(<2 x i32> %0, <2 x i32> %1) {
|
||
|
; CHECK-LABEL: combine_v4i32:
|
||
|
; CHECK: // %bb.0:
|
||
|
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
|
||
|
; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
|
||
|
; CHECK-NEXT: zip1.4s v0, v0, v1
|
||
|
; CHECK-NEXT: ret
|
||
|
%3 = shufflevector <2 x i32> %0, <2 x i32> %1, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
|
||
|
ret <4 x i32> %3
|
||
|
}
|
||
|
|
||
|
define <4 x i32> @combine2_v4i32(<2 x i32> %0, <2 x i32> %1) {
|
||
|
; CHECK-LABEL: combine2_v4i32:
|
||
|
; CHECK: // %bb.0:
|
||
|
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
|
||
|
; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
|
||
|
; CHECK-NEXT: zip1.4s v0, v0, v1
|
||
|
; CHECK-NEXT: ret
|
||
|
%3 = shufflevector <2 x i32> %0, <2 x i32> %1, <2 x i32> <i32 0, i32 2>
|
||
|
%4 = shufflevector <2 x i32> %0, <2 x i32> %1, <2 x i32> <i32 1, i32 3>
|
||
|
%5 = shufflevector <2 x i32> %3, <2 x i32> %4, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
||
|
ret <4 x i32> %5
|
||
|
}
|
||
|
|
||
|
define <16 x i8> @combine_v16i8_undef(<8 x i8> %0, <8 x i8> %1) {
|
||
|
; CHECK-LABEL: combine_v16i8_undef:
|
||
|
; CHECK: // %bb.0:
|
||
|
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
|
||
|
; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
|
||
|
; CHECK-NEXT: zip1.16b v0, v0, v1
|
||
|
; CHECK-NEXT: ret
|
||
|
%3 = shufflevector <8 x i8> %0, <8 x i8> %1, <16 x i32> <i32 0, i32 undef, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11, i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
|
||
|
ret <16 x i8> %3
|
||
|
}
|
||
|
|
||
|
define <16 x i8> @combine2_v16i8_undef(<8 x i8> %0, <8 x i8> %1) {
|
||
|
; CHECK-LABEL: combine2_v16i8_undef:
|
||
|
; CHECK: // %bb.0:
|
||
|
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
|
||
|
; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
|
||
|
; CHECK-NEXT: zip1.16b v0, v0, v1
|
||
|
; CHECK-NEXT: ret
|
||
|
%3 = shufflevector <8 x i8> %0, <8 x i8> %1, <8 x i32> <i32 0, i32 undef, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
|
||
|
%4 = shufflevector <8 x i8> %0, <8 x i8> %1, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
|
||
|
%5 = shufflevector <8 x i8> %3, <8 x i8> %4, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
||
|
ret <16 x i8> %5
|
||
|
}
|
||
|
|
||
|
define <8 x i16> @combine_v8i16_undef(<4 x i16> %0, <4 x i16> %1) {
|
||
|
; CHECK-LABEL: combine_v8i16_undef:
|
||
|
; CHECK: // %bb.0:
|
||
|
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
|
||
|
; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
|
||
|
; CHECK-NEXT: zip1.8h v0, v0, v1
|
||
|
; CHECK-NEXT: ret
|
||
|
%3 = shufflevector <4 x i16> %0, <4 x i16> %1, <8 x i32> <i32 0, i32 undef, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7>
|
||
|
ret <8 x i16> %3
|
||
|
}
|
||
|
|
||
|
; FIXME: This could be zip1 too, 8,0,9,1... pattern is handled
|
||
|
define <16 x i8> @combine_v8i16_8first(<8 x i8> %0, <8 x i8> %1) {
|
||
|
; CHECK-LABEL: combine_v8i16_8first:
|
||
|
; CHECK: // %bb.0:
|
||
|
; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1_q2
|
||
|
; CHECK-NEXT: adrp x8, .LCPI17_0
|
||
|
; CHECK-NEXT: fmov d2, d0
|
||
|
; CHECK-NEXT: ldr q3, [x8, :lo12:.LCPI17_0]
|
||
|
; CHECK-NEXT: tbl.16b v0, { v1, v2 }, v3
|
||
|
; CHECK-NEXT: ret
|
||
|
%3 = shufflevector <8 x i8> %1, <8 x i8> %0, <16 x i32> <i32 8, i32 0, i32 9, i32 1, i32 10, i32 2, i32 11, i32 3, i32 12, i32 4, i32 13, i32 5, i32 14, i32 6, i32 15, i32 7>
|
||
|
ret <16 x i8> %3
|
||
|
}
|
||
|
|
||
|
|
||
|
; FIXME: This could be zip1 too, 8,0,9,1... pattern is handled
|
||
|
define <16 x i8> @combine_v8i16_8firstundef(<8 x i8> %0, <8 x i8> %1) {
|
||
|
; CHECK-LABEL: combine_v8i16_8firstundef:
|
||
|
; CHECK: // %bb.0:
|
||
|
; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1_q2
|
||
|
; CHECK-NEXT: adrp x8, .LCPI18_0
|
||
|
; CHECK-NEXT: fmov d2, d0
|
||
|
; CHECK-NEXT: ldr q3, [x8, :lo12:.LCPI18_0]
|
||
|
; CHECK-NEXT: tbl.16b v0, { v1, v2 }, v3
|
||
|
; CHECK-NEXT: ret
|
||
|
%3 = shufflevector <8 x i8> %1, <8 x i8> %0, <16 x i32> <i32 8, i32 0, i32 9, i32 1, i32 10, i32 2, i32 11, i32 3, i32 12, i32 4, i32 13, i32 5, i32 14, i32 6, i32 15, i32 undef>
|
||
|
ret <16 x i8> %3
|
||
|
}
|
||
|
|
||
|
define <4 x float> @shuffle_zip1(<4 x float> %arg) {
|
||
|
; CHECK-LABEL: shuffle_zip1:
|
||
|
; CHECK: // %bb.0: // %bb
|
||
|
; CHECK-NEXT: movi.2d v1, #0000000000000000
|
||
|
; CHECK-NEXT: fcmgt.4s v0, v0, v1
|
||
|
; CHECK-NEXT: uzp1.8h v1, v0, v0
|
||
|
; CHECK-NEXT: xtn.4h v0, v0
|
||
|
; CHECK-NEXT: xtn.4h v1, v1
|
||
|
; CHECK-NEXT: zip2.4h v0, v0, v1
|
||
|
; CHECK-NEXT: fmov.4s v1, #1.00000000
|
||
|
; CHECK-NEXT: zip1.4h v0, v0, v0
|
||
|
; CHECK-NEXT: sshll.4s v0, v0, #0
|
||
|
; CHECK-NEXT: and.16b v0, v1, v0
|
||
|
; CHECK-NEXT: ret
|
||
|
bb:
|
||
|
%inst = fcmp olt <4 x float> zeroinitializer, %arg
|
||
|
%inst1 = shufflevector <4 x i1> %inst, <4 x i1> zeroinitializer, <2 x i32> <i32 2, i32 0>
|
||
|
%inst2 = shufflevector <2 x i1> %inst1, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 0, i32 1, i32 1>
|
||
|
%inst3 = select <4 x i1> %inst2, <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, <4 x float> zeroinitializer
|
||
|
ret <4 x float> %inst3
|
||
|
}
|
||
|
|
||
|
define <4 x i32> @shuffle_zip2(<4 x i32> %arg) {
|
||
|
; CHECK-LABEL: shuffle_zip2:
|
||
|
; CHECK: // %bb.0: // %bb
|
||
|
; CHECK-NEXT: cmtst.4s v0, v0, v0
|
||
|
; CHECK-NEXT: uzp1.8h v1, v0, v0
|
||
|
; CHECK-NEXT: xtn.4h v0, v0
|
||
|
; CHECK-NEXT: xtn.4h v1, v1
|
||
|
; CHECK-NEXT: zip2.4h v0, v0, v1
|
||
|
; CHECK-NEXT: movi.4s v1, #1
|
||
|
; CHECK-NEXT: zip1.4h v0, v0, v0
|
||
|
; CHECK-NEXT: ushll.4s v0, v0, #0
|
||
|
; CHECK-NEXT: and.16b v0, v0, v1
|
||
|
; CHECK-NEXT: ret
|
||
|
bb:
|
||
|
%inst = icmp ult <4 x i32> zeroinitializer, %arg
|
||
|
%inst1 = shufflevector <4 x i1> %inst, <4 x i1> zeroinitializer, <2 x i32> <i32 2, i32 0>
|
||
|
%inst2 = shufflevector <2 x i1> %inst1, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 0, i32 1, i32 1>
|
||
|
%inst3 = select <4 x i1> %inst2, <4 x i32> <i32 1, i32 1, i32 1, i32 1>, <4 x i32> zeroinitializer
|
||
|
ret <4 x i32> %inst3
|
||
|
}
|
||
|
|
||
|
define <4 x i32> @shuffle_zip3(<4 x i32> %arg) {
|
||
|
; CHECK-LABEL: shuffle_zip3:
|
||
|
; CHECK: // %bb.0: // %bb
|
||
|
; CHECK-NEXT: cmgt.4s v0, v0, #0
|
||
|
; CHECK-NEXT: uzp1.8h v1, v0, v0
|
||
|
; CHECK-NEXT: xtn.4h v0, v0
|
||
|
; CHECK-NEXT: xtn.4h v1, v1
|
||
|
; CHECK-NEXT: zip2.4h v0, v0, v1
|
||
|
; CHECK-NEXT: movi.4s v1, #1
|
||
|
; CHECK-NEXT: zip1.4h v0, v0, v0
|
||
|
; CHECK-NEXT: sshll.4s v0, v0, #0
|
||
|
; CHECK-NEXT: and.16b v0, v0, v1
|
||
|
; CHECK-NEXT: ret
|
||
|
bb:
|
||
|
%inst = icmp slt <4 x i32> zeroinitializer, %arg
|
||
|
%inst1 = shufflevector <4 x i1> %inst, <4 x i1> zeroinitializer, <2 x i32> <i32 2, i32 0>
|
||
|
%inst2 = shufflevector <2 x i1> %inst1, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 0, i32 1, i32 1>
|
||
|
%inst3 = select <4 x i1> %inst2, <4 x i32> <i32 1, i32 1, i32 1, i32 1>, <4 x i32> zeroinitializer
|
||
|
ret <4 x i32> %inst3
|
||
|
}
|