; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple | FileCheck %s define <8 x i8> @vzipi8(<8 x i8>* %A, <8 x i8>* %B) nounwind { ; CHECK-LABEL: vzipi8: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: ldr d1, [x1] ; CHECK-NEXT: zip1.8b v2, v0, v1 ; CHECK-NEXT: zip2.8b v0, v0, v1 ; CHECK-NEXT: add.8b v0, v2, v0 ; CHECK-NEXT: ret %tmp1 = load <8 x i8>, <8 x i8>* %A %tmp2 = load <8 x i8>, <8 x i8>* %B %tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> %tmp4 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> %tmp5 = add <8 x i8> %tmp3, %tmp4 ret <8 x i8> %tmp5 } define <4 x i16> @vzipi16(<4 x i16>* %A, <4 x i16>* %B) nounwind { ; CHECK-LABEL: vzipi16: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: ldr d1, [x1] ; CHECK-NEXT: zip1.4h v2, v0, v1 ; CHECK-NEXT: zip2.4h v0, v0, v1 ; CHECK-NEXT: add.4h v0, v2, v0 ; CHECK-NEXT: ret %tmp1 = load <4 x i16>, <4 x i16>* %A %tmp2 = load <4 x i16>, <4 x i16>* %B %tmp3 = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <4 x i32> %tmp4 = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <4 x i32> %tmp5 = add <4 x i16> %tmp3, %tmp4 ret <4 x i16> %tmp5 } define <16 x i8> @vzipQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind { ; CHECK-LABEL: vzipQi8: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: ldr q1, [x1] ; CHECK-NEXT: zip1.16b v2, v0, v1 ; CHECK-NEXT: zip2.16b v0, v0, v1 ; CHECK-NEXT: add.16b v0, v2, v0 ; CHECK-NEXT: ret %tmp1 = load <16 x i8>, <16 x i8>* %A %tmp2 = load <16 x i8>, <16 x i8>* %B %tmp3 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i32> %tmp4 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i32> %tmp5 = add <16 x i8> %tmp3, %tmp4 ret <16 x i8> %tmp5 } define <8 x i16> @vzipQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind { ; CHECK-LABEL: vzipQi16: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: ldr q1, [x1] ; CHECK-NEXT: zip1.8h v2, v0, v1 ; CHECK-NEXT: zip2.8h v0, v0, v1 ; CHECK-NEXT: add.8h v0, v2, v0 ; CHECK-NEXT: ret %tmp1 = load <8 x i16>, <8 x i16>* %A %tmp2 = load <8 x i16>, <8 x i16>* %B %tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> %tmp4 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> %tmp5 = add <8 x i16> %tmp3, %tmp4 ret <8 x i16> %tmp5 } define <4 x i32> @vzipQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind { ; CHECK-LABEL: vzipQi32: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: ldr q1, [x1] ; CHECK-NEXT: zip1.4s v2, v0, v1 ; CHECK-NEXT: zip2.4s v0, v0, v1 ; CHECK-NEXT: add.4s v0, v2, v0 ; CHECK-NEXT: ret %tmp1 = load <4 x i32>, <4 x i32>* %A %tmp2 = load <4 x i32>, <4 x i32>* %B %tmp3 = shufflevector <4 x i32> %tmp1, <4 x i32> %tmp2, <4 x i32> %tmp4 = shufflevector <4 x i32> %tmp1, <4 x i32> %tmp2, <4 x i32> %tmp5 = add <4 x i32> %tmp3, %tmp4 ret <4 x i32> %tmp5 } define <4 x float> @vzipQf(<4 x float>* %A, <4 x float>* %B) nounwind { ; CHECK-LABEL: vzipQf: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: ldr q1, [x1] ; CHECK-NEXT: zip1.4s v2, v0, v1 ; CHECK-NEXT: zip2.4s v0, v0, v1 ; CHECK-NEXT: fadd.4s v0, v2, v0 ; CHECK-NEXT: ret %tmp1 = load <4 x float>, <4 x float>* %A %tmp2 = load <4 x float>, <4 x float>* %B %tmp3 = shufflevector <4 x float> %tmp1, <4 x float> %tmp2, <4 x i32> %tmp4 = shufflevector <4 x float> %tmp1, <4 x float> %tmp2, <4 x i32> %tmp5 = fadd <4 x float> %tmp3, %tmp4 ret <4 x float> %tmp5 } ; Undef shuffle indices should not prevent matching to VZIP: define <8 x i8> @vzipi8_undef(<8 x i8>* %A, <8 x i8>* %B) nounwind { ; CHECK-LABEL: vzipi8_undef: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: ldr d1, [x1] ; CHECK-NEXT: zip1.8b v2, v0, v1 ; CHECK-NEXT: zip2.8b v0, v0, v1 ; CHECK-NEXT: add.8b v0, v2, v0 ; CHECK-NEXT: ret %tmp1 = load <8 x i8>, <8 x i8>* %A %tmp2 = load <8 x i8>, <8 x i8>* %B %tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> %tmp4 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> %tmp5 = add <8 x i8> %tmp3, %tmp4 ret <8 x i8> %tmp5 } define <16 x i8> @vzipQi8_undef(<16 x i8>* %A, <16 x i8>* %B) nounwind { ; CHECK-LABEL: vzipQi8_undef: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: ldr q1, [x1] ; CHECK-NEXT: zip1.16b v2, v0, v1 ; CHECK-NEXT: zip2.16b v0, v0, v1 ; CHECK-NEXT: add.16b v0, v2, v0 ; CHECK-NEXT: ret %tmp1 = load <16 x i8>, <16 x i8>* %A %tmp2 = load <16 x i8>, <16 x i8>* %B %tmp3 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i32> %tmp4 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i32> %tmp5 = add <16 x i8> %tmp3, %tmp4 ret <16 x i8> %tmp5 } define <16 x i8> @combine_v16i8(<8 x i8> %0, <8 x i8> %1) { ; CHECK-LABEL: combine_v16i8: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 ; CHECK-NEXT: zip1.16b v0, v0, v1 ; CHECK-NEXT: ret %3 = shufflevector <8 x i8> %0, <8 x i8> %1, <16 x i32> ret <16 x i8> %3 } define <16 x i8> @combine2_v16i8(<8 x i8> %0, <8 x i8> %1) { ; CHECK-LABEL: combine2_v16i8: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 ; CHECK-NEXT: zip1.16b v0, v0, v1 ; CHECK-NEXT: ret %3 = shufflevector <8 x i8> %0, <8 x i8> %1, <8 x i32> %4 = shufflevector <8 x i8> %0, <8 x i8> %1, <8 x i32> %5 = shufflevector <8 x i8> %3, <8 x i8> %4, <16 x i32> ret <16 x i8> %5 } define <8 x i16> @combine_v8i16(<4 x i16> %0, <4 x i16> %1) { ; CHECK-LABEL: combine_v8i16: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 ; CHECK-NEXT: zip1.8h v0, v0, v1 ; CHECK-NEXT: ret %3 = shufflevector <4 x i16> %0, <4 x i16> %1, <8 x i32> ret <8 x i16> %3 } define <8 x i16> @combine2_v8i16(<4 x i16> %0, <4 x i16> %1) { ; CHECK-LABEL: combine2_v8i16: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 ; CHECK-NEXT: zip1.8h v0, v0, v1 ; CHECK-NEXT: ret %3 = shufflevector <4 x i16> %0, <4 x i16> %1, <4 x i32> %4 = shufflevector <4 x i16> %0, <4 x i16> %1, <4 x i32> %5 = shufflevector <4 x i16> %3, <4 x i16> %4, <8 x i32> ret <8 x i16> %5 } define <4 x i32> @combine_v4i32(<2 x i32> %0, <2 x i32> %1) { ; CHECK-LABEL: combine_v4i32: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 ; CHECK-NEXT: zip1.4s v0, v0, v1 ; CHECK-NEXT: ret %3 = shufflevector <2 x i32> %0, <2 x i32> %1, <4 x i32> ret <4 x i32> %3 } define <4 x i32> @combine2_v4i32(<2 x i32> %0, <2 x i32> %1) { ; CHECK-LABEL: combine2_v4i32: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 ; CHECK-NEXT: zip1.4s v0, v0, v1 ; CHECK-NEXT: ret %3 = shufflevector <2 x i32> %0, <2 x i32> %1, <2 x i32> %4 = shufflevector <2 x i32> %0, <2 x i32> %1, <2 x i32> %5 = shufflevector <2 x i32> %3, <2 x i32> %4, <4 x i32> ret <4 x i32> %5 } define <16 x i8> @combine_v16i8_undef(<8 x i8> %0, <8 x i8> %1) { ; CHECK-LABEL: combine_v16i8_undef: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 ; CHECK-NEXT: zip1.16b v0, v0, v1 ; CHECK-NEXT: ret %3 = shufflevector <8 x i8> %0, <8 x i8> %1, <16 x i32> ret <16 x i8> %3 } define <16 x i8> @combine2_v16i8_undef(<8 x i8> %0, <8 x i8> %1) { ; CHECK-LABEL: combine2_v16i8_undef: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 ; CHECK-NEXT: zip1.16b v0, v0, v1 ; CHECK-NEXT: ret %3 = shufflevector <8 x i8> %0, <8 x i8> %1, <8 x i32> %4 = shufflevector <8 x i8> %0, <8 x i8> %1, <8 x i32> %5 = shufflevector <8 x i8> %3, <8 x i8> %4, <16 x i32> ret <16 x i8> %5 } define <8 x i16> @combine_v8i16_undef(<4 x i16> %0, <4 x i16> %1) { ; CHECK-LABEL: combine_v8i16_undef: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 ; CHECK-NEXT: zip1.8h v0, v0, v1 ; CHECK-NEXT: ret %3 = shufflevector <4 x i16> %0, <4 x i16> %1, <8 x i32> ret <8 x i16> %3 } ; FIXME: This could be zip1 too, 8,0,9,1... pattern is handled define <16 x i8> @combine_v8i16_8first(<8 x i8> %0, <8 x i8> %1) { ; CHECK-LABEL: combine_v8i16_8first: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1_q2 ; CHECK-NEXT: adrp x8, .LCPI17_0 ; CHECK-NEXT: fmov d2, d0 ; CHECK-NEXT: ldr q3, [x8, :lo12:.LCPI17_0] ; CHECK-NEXT: tbl.16b v0, { v1, v2 }, v3 ; CHECK-NEXT: ret %3 = shufflevector <8 x i8> %1, <8 x i8> %0, <16 x i32> ret <16 x i8> %3 } ; FIXME: This could be zip1 too, 8,0,9,1... pattern is handled define <16 x i8> @combine_v8i16_8firstundef(<8 x i8> %0, <8 x i8> %1) { ; CHECK-LABEL: combine_v8i16_8firstundef: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1_q2 ; CHECK-NEXT: adrp x8, .LCPI18_0 ; CHECK-NEXT: fmov d2, d0 ; CHECK-NEXT: ldr q3, [x8, :lo12:.LCPI18_0] ; CHECK-NEXT: tbl.16b v0, { v1, v2 }, v3 ; CHECK-NEXT: ret %3 = shufflevector <8 x i8> %1, <8 x i8> %0, <16 x i32> ret <16 x i8> %3 } define <4 x float> @shuffle_zip1(<4 x float> %arg) { ; CHECK-LABEL: shuffle_zip1: ; CHECK: // %bb.0: // %bb ; CHECK-NEXT: movi.2d v1, #0000000000000000 ; CHECK-NEXT: fcmgt.4s v0, v0, v1 ; CHECK-NEXT: uzp1.8h v1, v0, v0 ; CHECK-NEXT: xtn.4h v0, v0 ; CHECK-NEXT: xtn.4h v1, v1 ; CHECK-NEXT: zip2.4h v0, v0, v1 ; CHECK-NEXT: fmov.4s v1, #1.00000000 ; CHECK-NEXT: zip1.4h v0, v0, v0 ; CHECK-NEXT: sshll.4s v0, v0, #0 ; CHECK-NEXT: and.16b v0, v1, v0 ; CHECK-NEXT: ret bb: %inst = fcmp olt <4 x float> zeroinitializer, %arg %inst1 = shufflevector <4 x i1> %inst, <4 x i1> zeroinitializer, <2 x i32> %inst2 = shufflevector <2 x i1> %inst1, <2 x i1> zeroinitializer, <4 x i32> %inst3 = select <4 x i1> %inst2, <4 x float> , <4 x float> zeroinitializer ret <4 x float> %inst3 } define <4 x i32> @shuffle_zip2(<4 x i32> %arg) { ; CHECK-LABEL: shuffle_zip2: ; CHECK: // %bb.0: // %bb ; CHECK-NEXT: cmtst.4s v0, v0, v0 ; CHECK-NEXT: uzp1.8h v1, v0, v0 ; CHECK-NEXT: xtn.4h v0, v0 ; CHECK-NEXT: xtn.4h v1, v1 ; CHECK-NEXT: zip2.4h v0, v0, v1 ; CHECK-NEXT: movi.4s v1, #1 ; CHECK-NEXT: zip1.4h v0, v0, v0 ; CHECK-NEXT: ushll.4s v0, v0, #0 ; CHECK-NEXT: and.16b v0, v0, v1 ; CHECK-NEXT: ret bb: %inst = icmp ult <4 x i32> zeroinitializer, %arg %inst1 = shufflevector <4 x i1> %inst, <4 x i1> zeroinitializer, <2 x i32> %inst2 = shufflevector <2 x i1> %inst1, <2 x i1> zeroinitializer, <4 x i32> %inst3 = select <4 x i1> %inst2, <4 x i32> , <4 x i32> zeroinitializer ret <4 x i32> %inst3 } define <4 x i32> @shuffle_zip3(<4 x i32> %arg) { ; CHECK-LABEL: shuffle_zip3: ; CHECK: // %bb.0: // %bb ; CHECK-NEXT: cmgt.4s v0, v0, #0 ; CHECK-NEXT: uzp1.8h v1, v0, v0 ; CHECK-NEXT: xtn.4h v0, v0 ; CHECK-NEXT: xtn.4h v1, v1 ; CHECK-NEXT: zip2.4h v0, v0, v1 ; CHECK-NEXT: movi.4s v1, #1 ; CHECK-NEXT: zip1.4h v0, v0, v0 ; CHECK-NEXT: sshll.4s v0, v0, #0 ; CHECK-NEXT: and.16b v0, v0, v1 ; CHECK-NEXT: ret bb: %inst = icmp slt <4 x i32> zeroinitializer, %arg %inst1 = shufflevector <4 x i1> %inst, <4 x i1> zeroinitializer, <2 x i32> %inst2 = shufflevector <2 x i1> %inst1, <2 x i1> zeroinitializer, <4 x i32> %inst3 = select <4 x i1> %inst2, <4 x i32> , <4 x i32> zeroinitializer ret <4 x i32> %inst3 }