; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=aarch64--linux-gnu | FileCheck %s --check-prefix=CHECKLE ; RUN: llc < %s -mtriple=aarch64_be--linux-gnu | FileCheck %s --check-prefix=CHECKBE define <16 x i32> @test_shuf1(<16 x i32> %x, <16 x i32> %y) { ; CHECKLE-LABEL: test_shuf1: ; CHECKLE: // %bb.0: ; CHECKLE-NEXT: ext v3.16b, v6.16b, v1.16b, #4 ; CHECKLE-NEXT: uzp1 v5.4s, v1.4s, v0.4s ; CHECKLE-NEXT: uzp2 v16.4s, v2.4s, v4.4s ; CHECKLE-NEXT: dup v17.4s, v4.s[0] ; CHECKLE-NEXT: trn2 v4.4s, v1.4s, v3.4s ; CHECKLE-NEXT: mov v17.s[0], v6.s[3] ; CHECKLE-NEXT: trn2 v1.4s, v5.4s, v1.4s ; CHECKLE-NEXT: rev64 v3.4s, v7.4s ; CHECKLE-NEXT: trn1 v2.4s, v16.4s, v2.4s ; CHECKLE-NEXT: mov v4.s[0], v7.s[1] ; CHECKLE-NEXT: ext v1.16b, v0.16b, v1.16b, #12 ; CHECKLE-NEXT: mov v3.d[0], v17.d[0] ; CHECKLE-NEXT: mov v2.s[3], v7.s[0] ; CHECKLE-NEXT: mov v0.16b, v4.16b ; CHECKLE-NEXT: ret ; ; CHECKBE-LABEL: test_shuf1: ; CHECKBE: // %bb.0: ; CHECKBE-NEXT: rev64 v1.4s, v1.4s ; CHECKBE-NEXT: rev64 v3.4s, v6.4s ; CHECKBE-NEXT: rev64 v0.4s, v0.4s ; CHECKBE-NEXT: rev64 v2.4s, v2.4s ; CHECKBE-NEXT: rev64 v4.4s, v4.4s ; CHECKBE-NEXT: rev64 v5.4s, v7.4s ; CHECKBE-NEXT: ext v1.16b, v1.16b, v1.16b, #8 ; CHECKBE-NEXT: ext v3.16b, v3.16b, v3.16b, #8 ; CHECKBE-NEXT: ext v0.16b, v0.16b, v0.16b, #8 ; CHECKBE-NEXT: ext v2.16b, v2.16b, v2.16b, #8 ; CHECKBE-NEXT: ext v4.16b, v4.16b, v4.16b, #8 ; CHECKBE-NEXT: ext v5.16b, v5.16b, v5.16b, #8 ; CHECKBE-NEXT: ext v6.16b, v3.16b, v1.16b, #4 ; CHECKBE-NEXT: uzp1 v16.4s, v1.4s, v0.4s ; CHECKBE-NEXT: uzp2 v7.4s, v2.4s, v4.4s ; CHECKBE-NEXT: dup v4.4s, v4.s[0] ; CHECKBE-NEXT: rev64 v17.4s, v5.4s ; CHECKBE-NEXT: trn2 v6.4s, v1.4s, v6.4s ; CHECKBE-NEXT: mov v4.s[0], v3.s[3] ; CHECKBE-NEXT: trn2 v1.4s, v16.4s, v1.4s ; CHECKBE-NEXT: trn1 v2.4s, v7.4s, v2.4s ; CHECKBE-NEXT: rev64 v3.4s, v17.4s ; CHECKBE-NEXT: mov v6.s[0], v5.s[1] ; CHECKBE-NEXT: rev64 v4.4s, v4.4s ; CHECKBE-NEXT: ext v0.16b, v0.16b, v1.16b, #12 ; CHECKBE-NEXT: mov v2.s[3], v5.s[0] ; CHECKBE-NEXT: rev64 v1.4s, v6.4s ; CHECKBE-NEXT: mov v3.d[0], v4.d[0] ; CHECKBE-NEXT: rev64 v4.4s, v0.4s ; CHECKBE-NEXT: rev64 v2.4s, v2.4s ; CHECKBE-NEXT: ext v0.16b, v1.16b, v1.16b, #8 ; CHECKBE-NEXT: ext v3.16b, v3.16b, v3.16b, #8 ; CHECKBE-NEXT: ext v1.16b, v4.16b, v4.16b, #8 ; CHECKBE-NEXT: ext v2.16b, v2.16b, v2.16b, #8 ; CHECKBE-NEXT: ret %s3 = shufflevector <16 x i32> %x, <16 x i32> %y, <16 x i32> ret <16 x i32> %s3 } define <4 x i32> @test_shuf2(<16 x i32> %x, <16 x i32> %y) { ; CHECKLE-LABEL: test_shuf2: ; CHECKLE: // %bb.0: ; CHECKLE-NEXT: zip2 v0.4s, v7.4s, v6.4s ; CHECKLE-NEXT: trn2 v2.4s, v7.4s, v0.4s ; CHECKLE-NEXT: ext v0.16b, v1.16b, v1.16b, #4 ; CHECKLE-NEXT: mov v0.d[0], v2.d[0] ; CHECKLE-NEXT: ret ; ; CHECKBE-LABEL: test_shuf2: ; CHECKBE: // %bb.0: ; CHECKBE-NEXT: rev64 v0.4s, v6.4s ; CHECKBE-NEXT: rev64 v2.4s, v7.4s ; CHECKBE-NEXT: rev64 v1.4s, v1.4s ; CHECKBE-NEXT: ext v0.16b, v0.16b, v0.16b, #8 ; CHECKBE-NEXT: ext v2.16b, v2.16b, v2.16b, #8 ; CHECKBE-NEXT: ext v1.16b, v1.16b, v1.16b, #8 ; CHECKBE-NEXT: zip2 v0.4s, v2.4s, v0.4s ; CHECKBE-NEXT: ext v1.16b, v1.16b, v1.16b, #4 ; CHECKBE-NEXT: trn2 v0.4s, v2.4s, v0.4s ; CHECKBE-NEXT: rev64 v1.4s, v1.4s ; CHECKBE-NEXT: rev64 v0.4s, v0.4s ; CHECKBE-NEXT: mov v1.d[0], v0.d[0] ; CHECKBE-NEXT: ext v0.16b, v1.16b, v1.16b, #8 ; CHECKBE-NEXT: ret %s3 = shufflevector <16 x i32> %x, <16 x i32> %y, <4 x i32> ret <4 x i32> %s3 } define <4 x i32> @test_shuf3(<16 x i32> %x, <16 x i32> %y) { ; CHECKLE-LABEL: test_shuf3: ; CHECKLE: // %bb.0: ; CHECKLE-NEXT: uzp1 v2.4s, v1.4s, v0.4s ; CHECKLE-NEXT: trn2 v1.4s, v2.4s, v1.4s ; CHECKLE-NEXT: ext v0.16b, v0.16b, v1.16b, #12 ; CHECKLE-NEXT: ret ; ; CHECKBE-LABEL: test_shuf3: ; CHECKBE: // %bb.0: ; CHECKBE-NEXT: rev64 v0.4s, v0.4s ; CHECKBE-NEXT: rev64 v1.4s, v1.4s ; CHECKBE-NEXT: ext v0.16b, v0.16b, v0.16b, #8 ; CHECKBE-NEXT: ext v1.16b, v1.16b, v1.16b, #8 ; CHECKBE-NEXT: uzp1 v2.4s, v1.4s, v0.4s ; CHECKBE-NEXT: trn2 v1.4s, v2.4s, v1.4s ; CHECKBE-NEXT: ext v0.16b, v0.16b, v1.16b, #12 ; CHECKBE-NEXT: rev64 v0.4s, v0.4s ; CHECKBE-NEXT: ext v0.16b, v0.16b, v0.16b, #8 ; CHECKBE-NEXT: ret %s3 = shufflevector <16 x i32> %x, <16 x i32> %y, <4 x i32> ret <4 x i32> %s3 } define <4 x i32> @test_shuf4(<16 x i32> %x, <16 x i32> %y) { ; CHECKLE-LABEL: test_shuf4: ; CHECKLE: // %bb.0: ; CHECKLE-NEXT: uzp2 v0.4s, v2.4s, v4.4s ; CHECKLE-NEXT: trn1 v0.4s, v0.4s, v2.4s ; CHECKLE-NEXT: mov v0.s[3], v7.s[0] ; CHECKLE-NEXT: ret ; ; CHECKBE-LABEL: test_shuf4: ; CHECKBE: // %bb.0: ; CHECKBE-NEXT: rev64 v0.4s, v4.4s ; CHECKBE-NEXT: rev64 v1.4s, v2.4s ; CHECKBE-NEXT: rev64 v2.4s, v7.4s ; CHECKBE-NEXT: ext v0.16b, v0.16b, v0.16b, #8 ; CHECKBE-NEXT: ext v1.16b, v1.16b, v1.16b, #8 ; CHECKBE-NEXT: ext v2.16b, v2.16b, v2.16b, #8 ; CHECKBE-NEXT: uzp2 v0.4s, v1.4s, v0.4s ; CHECKBE-NEXT: trn1 v0.4s, v0.4s, v1.4s ; CHECKBE-NEXT: mov v0.s[3], v2.s[0] ; CHECKBE-NEXT: rev64 v0.4s, v0.4s ; CHECKBE-NEXT: ext v0.16b, v0.16b, v0.16b, #8 ; CHECKBE-NEXT: ret %s3 = shufflevector <16 x i32> %x, <16 x i32> %y, <4 x i32> ret <4 x i32> %s3 } define <4 x i32> @test_shuf5(<16 x i32> %x, <16 x i32> %y) { ; CHECKLE-LABEL: test_shuf5: ; CHECKLE: // %bb.0: ; CHECKLE-NEXT: ext v1.16b, v6.16b, v4.16b, #12 ; CHECKLE-NEXT: rev64 v0.4s, v7.4s ; CHECKLE-NEXT: mov v0.d[0], v1.d[0] ; CHECKLE-NEXT: ret ; ; CHECKBE-LABEL: test_shuf5: ; CHECKBE: // %bb.0: ; CHECKBE-NEXT: rev64 v0.4s, v7.4s ; CHECKBE-NEXT: rev64 v1.4s, v4.4s ; CHECKBE-NEXT: rev64 v2.4s, v6.4s ; CHECKBE-NEXT: ext v0.16b, v0.16b, v0.16b, #8 ; CHECKBE-NEXT: ext v1.16b, v1.16b, v1.16b, #8 ; CHECKBE-NEXT: ext v2.16b, v2.16b, v2.16b, #8 ; CHECKBE-NEXT: ext v1.16b, v2.16b, v1.16b, #12 ; CHECKBE-NEXT: rev64 v0.4s, v0.4s ; CHECKBE-NEXT: rev64 v1.4s, v1.4s ; CHECKBE-NEXT: rev64 v0.4s, v0.4s ; CHECKBE-NEXT: mov v0.d[0], v1.d[0] ; CHECKBE-NEXT: ext v0.16b, v0.16b, v0.16b, #8 ; CHECKBE-NEXT: ret %s3 = shufflevector <16 x i32> %x, <16 x i32> %y, <4 x i32> ret <4 x i32> %s3 } define <4 x i32> @test1503(<4 x i32> %a, <4 x i32> %b) ; CHECKLE-LABEL: test1503: ; CHECKLE: // %bb.0: ; CHECKLE-NEXT: zip1 v1.4s, v0.4s, v1.4s ; CHECKLE-NEXT: ext v1.16b, v1.16b, v0.16b, #8 ; CHECKLE-NEXT: mov v1.s[3], v0.s[3] ; CHECKLE-NEXT: mov v0.16b, v1.16b ; CHECKLE-NEXT: ret ; ; CHECKBE-LABEL: test1503: ; CHECKBE: // %bb.0: ; CHECKBE-NEXT: rev64 v1.4s, v1.4s ; CHECKBE-NEXT: rev64 v0.4s, v0.4s ; CHECKBE-NEXT: ext v1.16b, v1.16b, v1.16b, #8 ; CHECKBE-NEXT: ext v0.16b, v0.16b, v0.16b, #8 ; CHECKBE-NEXT: zip1 v1.4s, v0.4s, v1.4s ; CHECKBE-NEXT: ext v1.16b, v1.16b, v0.16b, #8 ; CHECKBE-NEXT: mov v1.s[3], v0.s[3] ; CHECKBE-NEXT: rev64 v0.4s, v1.4s ; CHECKBE-NEXT: ext v0.16b, v0.16b, v0.16b, #8 ; CHECKBE-NEXT: ret { %r = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> ret <4 x i32> %r } define <4 x i32> @test4366(<4 x i32> %a, <4 x i32> %b) ; CHECKLE-LABEL: test4366: ; CHECKLE: // %bb.0: ; CHECKLE-NEXT: trn1 v1.4s, v1.4s, v1.4s ; CHECKLE-NEXT: mov v1.s[1], v0.s[3] ; CHECKLE-NEXT: mov v0.16b, v1.16b ; CHECKLE-NEXT: ret ; ; CHECKBE-LABEL: test4366: ; CHECKBE: // %bb.0: ; CHECKBE-NEXT: rev64 v1.4s, v1.4s ; CHECKBE-NEXT: rev64 v0.4s, v0.4s ; CHECKBE-NEXT: ext v1.16b, v1.16b, v1.16b, #8 ; CHECKBE-NEXT: ext v0.16b, v0.16b, v0.16b, #8 ; CHECKBE-NEXT: trn1 v1.4s, v1.4s, v1.4s ; CHECKBE-NEXT: mov v1.s[1], v0.s[3] ; CHECKBE-NEXT: rev64 v0.4s, v1.4s ; CHECKBE-NEXT: ext v0.16b, v0.16b, v0.16b, #8 ; CHECKBE-NEXT: ret { %r = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> ret <4 x i32> %r } define <4 x i32> @test7367(<4 x i32> %a, <4 x i32> %b) ; CHECKLE-LABEL: test7367: ; CHECKLE: // %bb.0: ; CHECKLE-NEXT: mov v2.16b, v1.16b ; CHECKLE-NEXT: mov v2.d[0], v0.d[1] ; CHECKLE-NEXT: mov v2.s[0], v1.s[3] ; CHECKLE-NEXT: mov v0.16b, v2.16b ; CHECKLE-NEXT: ret ; ; CHECKBE-LABEL: test7367: ; CHECKBE: // %bb.0: ; CHECKBE-NEXT: ext v0.16b, v0.16b, v0.16b, #8 ; CHECKBE-NEXT: ext v2.16b, v1.16b, v1.16b, #8 ; CHECKBE-NEXT: rev64 v1.4s, v1.4s ; CHECKBE-NEXT: mov v2.d[0], v0.d[1] ; CHECKBE-NEXT: ext v0.16b, v1.16b, v1.16b, #8 ; CHECKBE-NEXT: rev64 v1.4s, v2.4s ; CHECKBE-NEXT: mov v1.s[0], v0.s[3] ; CHECKBE-NEXT: rev64 v0.4s, v1.4s ; CHECKBE-NEXT: ext v0.16b, v0.16b, v0.16b, #8 ; CHECKBE-NEXT: ret { %r = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> ret <4 x i32> %r } define <4 x i32> @test4045(<4 x i32> %a, <4 x i32> %b) ; CHECKLE-LABEL: test4045: ; CHECKLE: // %bb.0: ; CHECKLE-NEXT: trn1 v0.4s, v1.4s, v0.4s ; CHECKLE-NEXT: mov v0.d[1], v1.d[0] ; CHECKLE-NEXT: ret ; ; CHECKBE-LABEL: test4045: ; CHECKBE: // %bb.0: ; CHECKBE-NEXT: rev64 v0.4s, v0.4s ; CHECKBE-NEXT: rev64 v2.4s, v1.4s ; CHECKBE-NEXT: ext v1.16b, v1.16b, v1.16b, #8 ; CHECKBE-NEXT: ext v0.16b, v0.16b, v0.16b, #8 ; CHECKBE-NEXT: ext v2.16b, v2.16b, v2.16b, #8 ; CHECKBE-NEXT: trn1 v0.4s, v2.4s, v0.4s ; CHECKBE-NEXT: rev64 v0.4s, v0.4s ; CHECKBE-NEXT: mov v0.d[1], v1.d[0] ; CHECKBE-NEXT: ext v0.16b, v0.16b, v0.16b, #8 ; CHECKBE-NEXT: ret { %r = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> ret <4 x i32> %r } define <4 x i32> @test0067(<4 x i32> %a, <4 x i32> %b) ; CHECKLE-LABEL: test0067: ; CHECKLE: // %bb.0: ; CHECKLE-NEXT: trn1 v0.4s, v0.4s, v0.4s ; CHECKLE-NEXT: mov v0.d[1], v1.d[1] ; CHECKLE-NEXT: ret ; ; CHECKBE-LABEL: test0067: ; CHECKBE: // %bb.0: ; CHECKBE-NEXT: rev64 v0.4s, v0.4s ; CHECKBE-NEXT: ext v1.16b, v1.16b, v1.16b, #8 ; CHECKBE-NEXT: ext v0.16b, v0.16b, v0.16b, #8 ; CHECKBE-NEXT: trn1 v0.4s, v0.4s, v0.4s ; CHECKBE-NEXT: rev64 v0.4s, v0.4s ; CHECKBE-NEXT: mov v0.d[1], v1.d[1] ; CHECKBE-NEXT: ext v0.16b, v0.16b, v0.16b, #8 ; CHECKBE-NEXT: ret { %r = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> ret <4 x i32> %r } define <4 x i32> @test_shuf6(<4 x i32> %a, <4 x i32> %b) ; CHECKLE-LABEL: test_shuf6: ; CHECKLE: // %bb.0: ; CHECKLE-NEXT: mov v0.s[2], v1.s[3] ; CHECKLE-NEXT: trn1 v0.4s, v0.4s, v0.4s ; CHECKLE-NEXT: ret ; ; CHECKBE-LABEL: test_shuf6: ; CHECKBE: // %bb.0: ; CHECKBE-NEXT: rev64 v1.4s, v1.4s ; CHECKBE-NEXT: rev64 v0.4s, v0.4s ; CHECKBE-NEXT: ext v1.16b, v1.16b, v1.16b, #8 ; CHECKBE-NEXT: ext v0.16b, v0.16b, v0.16b, #8 ; CHECKBE-NEXT: mov v0.s[2], v1.s[3] ; CHECKBE-NEXT: trn1 v0.4s, v0.4s, v0.4s ; CHECKBE-NEXT: rev64 v0.4s, v0.4s ; CHECKBE-NEXT: ext v0.16b, v0.16b, v0.16b, #8 ; CHECKBE-NEXT: ret { %r = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> ret <4 x i32> %r } define <4 x i16> @test_shuf7(<4 x i16> %a, <4 x i16> %b) ; CHECKLE-LABEL: test_shuf7: ; CHECKLE: // %bb.0: ; CHECKLE-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECKLE-NEXT: // kill: def $d1 killed $d1 def $q1 ; CHECKLE-NEXT: mov v0.h[2], v1.h[3] ; CHECKLE-NEXT: trn1 v0.4h, v0.4h, v0.4h ; CHECKLE-NEXT: ret ; ; CHECKBE-LABEL: test_shuf7: ; CHECKBE: // %bb.0: ; CHECKBE-NEXT: rev64 v1.4h, v1.4h ; CHECKBE-NEXT: rev64 v0.4h, v0.4h ; CHECKBE-NEXT: mov v0.h[2], v1.h[3] ; CHECKBE-NEXT: trn1 v0.4h, v0.4h, v0.4h ; CHECKBE-NEXT: rev64 v0.4h, v0.4h ; CHECKBE-NEXT: ret { %r = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> ret <4 x i16> %r } define <8 x i8> @test_shuf8(<8 x i8> %a, <8 x i8> %b) ; CHECKLE-LABEL: test_shuf8: ; CHECKLE: // %bb.0: ; CHECKLE-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECKLE-NEXT: // kill: def $d1 killed $d1 def $q1 ; CHECKLE-NEXT: adrp x8, .LCPI12_0 ; CHECKLE-NEXT: mov v0.d[1], v1.d[0] ; CHECKLE-NEXT: ldr d1, [x8, :lo12:.LCPI12_0] ; CHECKLE-NEXT: tbl v0.8b, { v0.16b }, v1.8b ; CHECKLE-NEXT: ret ; ; CHECKBE-LABEL: test_shuf8: ; CHECKBE: // %bb.0: ; CHECKBE-NEXT: rev64 v0.8b, v0.8b ; CHECKBE-NEXT: rev64 v1.8b, v1.8b ; CHECKBE-NEXT: adrp x8, .LCPI12_0 ; CHECKBE-NEXT: add x8, x8, :lo12:.LCPI12_0 ; CHECKBE-NEXT: mov v0.d[1], v1.d[0] ; CHECKBE-NEXT: ld1 { v1.8b }, [x8] ; CHECKBE-NEXT: tbl v0.8b, { v0.16b }, v1.8b ; CHECKBE-NEXT: rev64 v0.8b, v0.8b ; CHECKBE-NEXT: ret { %r = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> ret <8 x i8> %r } define <8 x i16> @test_shuf9(<8 x i16> %a, <8 x i16> %b) ; CHECKLE-LABEL: test_shuf9: ; CHECKLE: // %bb.0: ; CHECKLE-NEXT: adrp x8, .LCPI13_0 ; CHECKLE-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 ; CHECKLE-NEXT: ldr q2, [x8, :lo12:.LCPI13_0] ; CHECKLE-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECKLE-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b ; CHECKLE-NEXT: ret ; ; CHECKBE-LABEL: test_shuf9: ; CHECKBE: // %bb.0: ; CHECKBE-NEXT: rev64 v1.16b, v1.16b ; CHECKBE-NEXT: rev64 v0.16b, v0.16b ; CHECKBE-NEXT: adrp x8, .LCPI13_0 ; CHECKBE-NEXT: add x8, x8, :lo12:.LCPI13_0 ; CHECKBE-NEXT: ext v2.16b, v1.16b, v1.16b, #8 ; CHECKBE-NEXT: ext v1.16b, v0.16b, v0.16b, #8 ; CHECKBE-NEXT: ld1 { v0.16b }, [x8] ; CHECKBE-NEXT: tbl v0.16b, { v1.16b, v2.16b }, v0.16b ; CHECKBE-NEXT: rev64 v0.16b, v0.16b ; CHECKBE-NEXT: ext v0.16b, v0.16b, v0.16b, #8 ; CHECKBE-NEXT: ret { %r = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> ret <8 x i16> %r } define <16 x i8> @test_shuf10(<16 x i8> %a, <16 x i8> %b) ; CHECKLE-LABEL: test_shuf10: ; CHECKLE: // %bb.0: ; CHECKLE-NEXT: adrp x8, .LCPI14_0 ; CHECKLE-NEXT: ldr q1, [x8, :lo12:.LCPI14_0] ; CHECKLE-NEXT: tbl v0.16b, { v0.16b }, v1.16b ; CHECKLE-NEXT: ret ; ; CHECKBE-LABEL: test_shuf10: ; CHECKBE: // %bb.0: ; CHECKBE-NEXT: rev64 v0.16b, v0.16b ; CHECKBE-NEXT: adrp x8, .LCPI14_0 ; CHECKBE-NEXT: add x8, x8, :lo12:.LCPI14_0 ; CHECKBE-NEXT: ld1 { v1.16b }, [x8] ; CHECKBE-NEXT: ext v0.16b, v0.16b, v0.16b, #8 ; CHECKBE-NEXT: tbl v0.16b, { v0.16b }, v1.16b ; CHECKBE-NEXT: rev64 v0.16b, v0.16b ; CHECKBE-NEXT: ext v0.16b, v0.16b, v0.16b, #8 ; CHECKBE-NEXT: ret { %r = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> ret <16 x i8> %r } define <8 x half> @test_shuf11(<8 x half> %a, <8 x half> %b) ; CHECKLE-LABEL: test_shuf11: ; CHECKLE: // %bb.0: ; CHECKLE-NEXT: adrp x8, .LCPI15_0 ; CHECKLE-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 ; CHECKLE-NEXT: ldr q2, [x8, :lo12:.LCPI15_0] ; CHECKLE-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECKLE-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b ; CHECKLE-NEXT: ret ; ; CHECKBE-LABEL: test_shuf11: ; CHECKBE: // %bb.0: ; CHECKBE-NEXT: rev64 v1.16b, v1.16b ; CHECKBE-NEXT: rev64 v0.16b, v0.16b ; CHECKBE-NEXT: adrp x8, .LCPI15_0 ; CHECKBE-NEXT: add x8, x8, :lo12:.LCPI15_0 ; CHECKBE-NEXT: ext v2.16b, v1.16b, v1.16b, #8 ; CHECKBE-NEXT: ext v1.16b, v0.16b, v0.16b, #8 ; CHECKBE-NEXT: ld1 { v0.16b }, [x8] ; CHECKBE-NEXT: tbl v0.16b, { v1.16b, v2.16b }, v0.16b ; CHECKBE-NEXT: rev64 v0.16b, v0.16b ; CHECKBE-NEXT: ext v0.16b, v0.16b, v0.16b, #8 ; CHECKBE-NEXT: ret { %r = shufflevector <8 x half> %a, <8 x half> %b, <8 x i32> ret <8 x half> %r } define <8 x half> @test_shuf12(<8 x half> %a, <8 x half> %b) ; CHECKLE-LABEL: test_shuf12: ; CHECKLE: // %bb.0: ; CHECKLE-NEXT: adrp x8, .LCPI16_0 ; CHECKLE-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 ; CHECKLE-NEXT: ldr q2, [x8, :lo12:.LCPI16_0] ; CHECKLE-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECKLE-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b ; CHECKLE-NEXT: ret ; ; CHECKBE-LABEL: test_shuf12: ; CHECKBE: // %bb.0: ; CHECKBE-NEXT: rev64 v1.16b, v1.16b ; CHECKBE-NEXT: rev64 v0.16b, v0.16b ; CHECKBE-NEXT: adrp x8, .LCPI16_0 ; CHECKBE-NEXT: add x8, x8, :lo12:.LCPI16_0 ; CHECKBE-NEXT: ext v2.16b, v1.16b, v1.16b, #8 ; CHECKBE-NEXT: ext v1.16b, v0.16b, v0.16b, #8 ; CHECKBE-NEXT: ld1 { v0.16b }, [x8] ; CHECKBE-NEXT: tbl v0.16b, { v1.16b, v2.16b }, v0.16b ; CHECKBE-NEXT: rev64 v0.16b, v0.16b ; CHECKBE-NEXT: ext v0.16b, v0.16b, v0.16b, #8 ; CHECKBE-NEXT: ret { %r = shufflevector <8 x half> %a, <8 x half> %b, <8 x i32> ret <8 x half> %r } define <8 x half> @test_shuf13(<8 x half> %a, <8 x half> %b) ; CHECKLE-LABEL: test_shuf13: ; CHECKLE: // %bb.0: ; CHECKLE-NEXT: adrp x8, .LCPI17_0 ; CHECKLE-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 ; CHECKLE-NEXT: ldr q2, [x8, :lo12:.LCPI17_0] ; CHECKLE-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECKLE-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b ; CHECKLE-NEXT: ret ; ; CHECKBE-LABEL: test_shuf13: ; CHECKBE: // %bb.0: ; CHECKBE-NEXT: rev64 v1.16b, v1.16b ; CHECKBE-NEXT: rev64 v0.16b, v0.16b ; CHECKBE-NEXT: adrp x8, .LCPI17_0 ; CHECKBE-NEXT: add x8, x8, :lo12:.LCPI17_0 ; CHECKBE-NEXT: ext v2.16b, v1.16b, v1.16b, #8 ; CHECKBE-NEXT: ext v1.16b, v0.16b, v0.16b, #8 ; CHECKBE-NEXT: ld1 { v0.16b }, [x8] ; CHECKBE-NEXT: tbl v0.16b, { v1.16b, v2.16b }, v0.16b ; CHECKBE-NEXT: rev64 v0.16b, v0.16b ; CHECKBE-NEXT: ext v0.16b, v0.16b, v0.16b, #8 ; CHECKBE-NEXT: ret { %r = shufflevector <8 x half> %a, <8 x half> %b, <8 x i32> ret <8 x half> %r } define <8 x half> @test_shuf14(<8 x half> %a, <8 x half> %b) ; CHECKLE-LABEL: test_shuf14: ; CHECKLE: // %bb.0: ; CHECKLE-NEXT: adrp x8, .LCPI18_0 ; CHECKLE-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 ; CHECKLE-NEXT: ldr q2, [x8, :lo12:.LCPI18_0] ; CHECKLE-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECKLE-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b ; CHECKLE-NEXT: ret ; ; CHECKBE-LABEL: test_shuf14: ; CHECKBE: // %bb.0: ; CHECKBE-NEXT: rev64 v1.16b, v1.16b ; CHECKBE-NEXT: rev64 v0.16b, v0.16b ; CHECKBE-NEXT: adrp x8, .LCPI18_0 ; CHECKBE-NEXT: add x8, x8, :lo12:.LCPI18_0 ; CHECKBE-NEXT: ext v2.16b, v1.16b, v1.16b, #8 ; CHECKBE-NEXT: ext v1.16b, v0.16b, v0.16b, #8 ; CHECKBE-NEXT: ld1 { v0.16b }, [x8] ; CHECKBE-NEXT: tbl v0.16b, { v1.16b, v2.16b }, v0.16b ; CHECKBE-NEXT: rev64 v0.16b, v0.16b ; CHECKBE-NEXT: ext v0.16b, v0.16b, v0.16b, #8 ; CHECKBE-NEXT: ret { %r = shufflevector <8 x half> %a, <8 x half> %b, <8 x i32> ret <8 x half> %r } define <8 x half> @test_shuf15(<8 x half> %a, <8 x half> %b) ; CHECKLE-LABEL: test_shuf15: ; CHECKLE: // %bb.0: ; CHECKLE-NEXT: adrp x8, .LCPI19_0 ; CHECKLE-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 ; CHECKLE-NEXT: ldr q2, [x8, :lo12:.LCPI19_0] ; CHECKLE-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECKLE-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b ; CHECKLE-NEXT: ret ; ; CHECKBE-LABEL: test_shuf15: ; CHECKBE: // %bb.0: ; CHECKBE-NEXT: rev64 v1.16b, v1.16b ; CHECKBE-NEXT: rev64 v0.16b, v0.16b ; CHECKBE-NEXT: adrp x8, .LCPI19_0 ; CHECKBE-NEXT: add x8, x8, :lo12:.LCPI19_0 ; CHECKBE-NEXT: ext v2.16b, v1.16b, v1.16b, #8 ; CHECKBE-NEXT: ext v1.16b, v0.16b, v0.16b, #8 ; CHECKBE-NEXT: ld1 { v0.16b }, [x8] ; CHECKBE-NEXT: tbl v0.16b, { v1.16b, v2.16b }, v0.16b ; CHECKBE-NEXT: rev64 v0.16b, v0.16b ; CHECKBE-NEXT: ext v0.16b, v0.16b, v0.16b, #8 ; CHECKBE-NEXT: ret { %r = shufflevector <8 x half> %a, <8 x half> %b, <8 x i32> ret <8 x half> %r } define <4 x i32> @extract_shuffle(<8 x i16> %j, <4 x i16> %k) { ; CHECKLE-LABEL: extract_shuffle: ; CHECKLE: // %bb.0: ; CHECKLE-NEXT: ushll2 v0.4s, v0.8h, #3 ; CHECKLE-NEXT: ret ; ; CHECKBE-LABEL: extract_shuffle: ; CHECKBE: // %bb.0: ; CHECKBE-NEXT: rev64 v0.8h, v0.8h ; CHECKBE-NEXT: ext v0.16b, v0.16b, v0.16b, #8 ; CHECKBE-NEXT: ushll2 v0.4s, v0.8h, #3 ; CHECKBE-NEXT: rev64 v0.4s, v0.4s ; CHECKBE-NEXT: ext v0.16b, v0.16b, v0.16b, #8 ; CHECKBE-NEXT: ret %a = shufflevector <8 x i16> %j, <8 x i16> poison, <8 x i32> %b = shufflevector <8 x i16> %a, <8 x i16> poison, <4 x i32> %c = zext <4 x i16> %b to <4 x i32> %d = shl <4 x i32> %c, ret <4 x i32> %d }