; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=aarch64--linux-gnu | FileCheck %s ; i8 define <16 x i8> @insert_v16i8_2_1(float %tmp, <16 x i8> %b, <16 x i8> %a) { ; CHECK-LABEL: insert_v16i8_2_1: ; CHECK: // %bb.0: ; CHECK-NEXT: mov v0.16b, v1.16b ; CHECK-NEXT: mov v0.h[0], v2.h[0] ; CHECK-NEXT: ret %s2 = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> ret <16 x i8> %s2 } define <16 x i8> @insert_v16i8_2_2(float %tmp, <16 x i8> %b, <16 x i8> %a) { ; CHECK-LABEL: insert_v16i8_2_2: ; CHECK: // %bb.0: ; CHECK-NEXT: mov v0.16b, v1.16b ; CHECK-NEXT: mov v0.h[1], v2.h[0] ; CHECK-NEXT: ret %s2 = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> ret <16 x i8> %s2 } define <16 x i8> @insert_v16i8_2_6(float %tmp, <16 x i8> %b, <16 x i8> %a) { ; CHECK-LABEL: insert_v16i8_2_6: ; CHECK: // %bb.0: ; CHECK-NEXT: mov v0.16b, v1.16b ; CHECK-NEXT: mov v0.h[6], v2.h[0] ; CHECK-NEXT: ret %s2 = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> ret <16 x i8> %s2 } define <16 x i8> @insert_v16i8_4_1(float %tmp, <16 x i8> %b, <16 x i8> %a) { ; CHECK-LABEL: insert_v16i8_4_1: ; CHECK: // %bb.0: ; CHECK-NEXT: mov v0.16b, v1.16b ; CHECK-NEXT: mov v0.s[0], v2.s[0] ; CHECK-NEXT: ret %s2 = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> ret <16 x i8> %s2 } define <16 x i8> @insert_v16i8_4_15(float %tmp, <16 x i8> %b, <16 x i8> %a) { ; CHECK-LABEL: insert_v16i8_4_15: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $q2 killed $q2 def $q2_q3 ; CHECK-NEXT: adrp x8, .LCPI4_0 ; CHECK-NEXT: mov v3.16b, v1.16b ; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI4_0] ; CHECK-NEXT: tbl v0.16b, { v2.16b, v3.16b }, v0.16b ; CHECK-NEXT: ret %s2 = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> ret <16 x i8> %s2 } define <16 x i8> @insert_v16i8_4_2(float %tmp, <16 x i8> %b, <16 x i8> %a) { ; CHECK-LABEL: insert_v16i8_4_2: ; CHECK: // %bb.0: ; CHECK-NEXT: mov v0.16b, v1.16b ; CHECK-NEXT: mov v0.s[1], v2.s[0] ; CHECK-NEXT: ret %s2 = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> ret <16 x i8> %s2 } define <16 x i8> @insert_v16i8_4_3(float %tmp, <16 x i8> %b, <16 x i8> %a) { ; CHECK-LABEL: insert_v16i8_4_3: ; CHECK: // %bb.0: ; CHECK-NEXT: mov v0.16b, v1.16b ; CHECK-NEXT: mov v0.s[2], v2.s[0] ; CHECK-NEXT: ret %s2 = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> ret <16 x i8> %s2 } define <16 x i8> @insert_v16i8_4_4(float %tmp, <16 x i8> %b, <16 x i8> %a) { ; CHECK-LABEL: insert_v16i8_4_4: ; CHECK: // %bb.0: ; CHECK-NEXT: mov v0.16b, v1.16b ; CHECK-NEXT: mov v0.s[3], v2.s[0] ; CHECK-NEXT: ret %s2 = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> ret <16 x i8> %s2 } define <8 x i8> @insert_v8i8_4_1(float %tmp, <8 x i8> %b, <8 x i8> %a) { ; CHECK-LABEL: insert_v8i8_4_1: ; CHECK: // %bb.0: ; CHECK-NEXT: fmov d0, d2 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 ; CHECK-NEXT: mov v0.s[1], v1.s[1] ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-NEXT: ret %s2 = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> ret <8 x i8> %s2 } define <8 x i8> @insert_v8i8_4_2(float %tmp, <8 x i8> %b, <8 x i8> %a) { ; CHECK-LABEL: insert_v8i8_4_2: ; CHECK: // %bb.0: ; CHECK-NEXT: fmov d0, d1 ; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 ; CHECK-NEXT: mov v0.s[1], v2.s[0] ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-NEXT: ret %s2 = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> ret <8 x i8> %s2 } define <16 x i8> @insert_v16i8_8_1(float %tmp, <16 x i8> %b, <16 x i8> %a) { ; CHECK-LABEL: insert_v16i8_8_1: ; CHECK: // %bb.0: ; CHECK-NEXT: mov v0.16b, v2.16b ; CHECK-NEXT: mov v0.d[1], v1.d[1] ; CHECK-NEXT: ret %s2 = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> ret <16 x i8> %s2 } define <16 x i8> @insert_v16i8_8_2(float %tmp, <16 x i8> %b, <16 x i8> %a) { ; CHECK-LABEL: insert_v16i8_8_2: ; CHECK: // %bb.0: ; CHECK-NEXT: mov v0.16b, v1.16b ; CHECK-NEXT: mov v0.d[1], v2.d[0] ; CHECK-NEXT: ret %s2 = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> ret <16 x i8> %s2 } ; i16 define <8 x i16> @insert_v8i16_2_1(float %tmp, <8 x i16> %b, <8 x i16> %a) { ; CHECK-LABEL: insert_v8i16_2_1: ; CHECK: // %bb.0: ; CHECK-NEXT: mov v0.16b, v1.16b ; CHECK-NEXT: mov v0.s[0], v2.s[0] ; CHECK-NEXT: ret %s2 = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> ret <8 x i16> %s2 } define <8 x i16> @insert_v8i16_2_15(float %tmp, <8 x i16> %b, <8 x i16> %a) { ; CHECK-LABEL: insert_v8i16_2_15: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $q2 killed $q2 def $q2_q3 ; CHECK-NEXT: adrp x8, .LCPI13_0 ; CHECK-NEXT: mov v3.16b, v1.16b ; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI13_0] ; CHECK-NEXT: tbl v0.16b, { v2.16b, v3.16b }, v0.16b ; CHECK-NEXT: ret %s2 = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> ret <8 x i16> %s2 } define <8 x i16> @insert_v8i16_2_2(float %tmp, <8 x i16> %b, <8 x i16> %a) { ; CHECK-LABEL: insert_v8i16_2_2: ; CHECK: // %bb.0: ; CHECK-NEXT: mov v0.16b, v1.16b ; CHECK-NEXT: mov v0.s[1], v2.s[0] ; CHECK-NEXT: ret %s2 = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> ret <8 x i16> %s2 } define <8 x i16> @insert_v8i16_2_3(float %tmp, <8 x i16> %b, <8 x i16> %a) { ; CHECK-LABEL: insert_v8i16_2_3: ; CHECK: // %bb.0: ; CHECK-NEXT: mov v0.16b, v1.16b ; CHECK-NEXT: mov v0.s[2], v2.s[0] ; CHECK-NEXT: ret %s2 = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> ret <8 x i16> %s2 } define <8 x i16> @insert_v8i16_2_4(float %tmp, <8 x i16> %b, <8 x i16> %a) { ; CHECK-LABEL: insert_v8i16_2_4: ; CHECK: // %bb.0: ; CHECK-NEXT: mov v0.16b, v1.16b ; CHECK-NEXT: mov v0.s[3], v2.s[0] ; CHECK-NEXT: ret %s2 = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> ret <8 x i16> %s2 } define <4 x i16> @insert_v4i16_2_1(float %tmp, <4 x i16> %b, <4 x i16> %a) { ; CHECK-LABEL: insert_v4i16_2_1: ; CHECK: // %bb.0: ; CHECK-NEXT: fmov d0, d2 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 ; CHECK-NEXT: mov v0.s[1], v1.s[1] ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-NEXT: ret %s2 = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> ret <4 x i16> %s2 } define <4 x i16> @insert_v4i16_2_2(float %tmp, <4 x i16> %b, <4 x i16> %a) { ; CHECK-LABEL: insert_v4i16_2_2: ; CHECK: // %bb.0: ; CHECK-NEXT: fmov d0, d1 ; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 ; CHECK-NEXT: mov v0.s[1], v2.s[0] ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-NEXT: ret %s2 = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> ret <4 x i16> %s2 } define <8 x i16> @insert_v8i16_4_1(float %tmp, <8 x i16> %b, <8 x i16> %a) { ; CHECK-LABEL: insert_v8i16_4_1: ; CHECK: // %bb.0: ; CHECK-NEXT: mov v0.16b, v2.16b ; CHECK-NEXT: mov v0.d[1], v1.d[1] ; CHECK-NEXT: ret %s2 = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> ret <8 x i16> %s2 } define <8 x i16> @insert_v8i16_4_2(float %tmp, <8 x i16> %b, <8 x i16> %a) { ; CHECK-LABEL: insert_v8i16_4_2: ; CHECK: // %bb.0: ; CHECK-NEXT: mov v0.16b, v1.16b ; CHECK-NEXT: mov v0.d[1], v2.d[0] ; CHECK-NEXT: ret %s2 = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> ret <8 x i16> %s2 } ; i32 define <4 x i32> @insert_v4i32_2_1(float %tmp, <4 x i32> %b, <4 x i32> %a) { ; CHECK-LABEL: insert_v4i32_2_1: ; CHECK: // %bb.0: ; CHECK-NEXT: mov v0.16b, v2.16b ; CHECK-NEXT: mov v0.d[1], v1.d[1] ; CHECK-NEXT: ret %s2 = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> ret <4 x i32> %s2 } define <4 x i32> @insert_v4i32_2_2(float %tmp, <4 x i32> %b, <4 x i32> %a) { ; CHECK-LABEL: insert_v4i32_2_2: ; CHECK: // %bb.0: ; CHECK-NEXT: mov v0.16b, v1.16b ; CHECK-NEXT: mov v0.d[1], v2.d[0] ; CHECK-NEXT: ret %s2 = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> ret <4 x i32> %s2 } ; i8 define <16 x i8> @load_v16i8_4_1(float %tmp, <16 x i8> %b, ptr %a) { ; CHECK-LABEL: load_v16i8_4_1: ; CHECK: // %bb.0: ; CHECK-NEXT: mov v0.16b, v1.16b ; CHECK-NEXT: ld1 { v0.s }[0], [x0] ; CHECK-NEXT: ret %l = load <4 x i8>, ptr %a %s1 = shufflevector <4 x i8> %l, <4 x i8> poison, <16 x i32> %s2 = shufflevector <16 x i8> %s1, <16 x i8> %b, <16 x i32> ret <16 x i8> %s2 } define <16 x i8> @load_v16i8_4_15(float %tmp, <16 x i8> %b, ptr %a) { ; CHECK-LABEL: load_v16i8_4_15: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $q1 killed $q1 def $q0_q1 ; CHECK-NEXT: adrp x8, .LCPI24_0 ; CHECK-NEXT: ldr s0, [x0] ; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI24_0] ; CHECK-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b ; CHECK-NEXT: ret %l = load <4 x i8>, ptr %a %s1 = shufflevector <4 x i8> %l, <4 x i8> poison, <16 x i32> %s2 = shufflevector <16 x i8> %s1, <16 x i8> %b, <16 x i32> ret <16 x i8> %s2 } define <16 x i8> @load_v16i8_4_2(float %tmp, <16 x i8> %b, ptr %a) { ; CHECK-LABEL: load_v16i8_4_2: ; CHECK: // %bb.0: ; CHECK-NEXT: mov v0.16b, v1.16b ; CHECK-NEXT: ld1 { v0.s }[1], [x0] ; CHECK-NEXT: ret %l = load <4 x i8>, ptr %a %s1 = shufflevector <4 x i8> %l, <4 x i8> poison, <16 x i32> %s2 = shufflevector <16 x i8> %s1, <16 x i8> %b, <16 x i32> ret <16 x i8> %s2 } define <16 x i8> @load_v16i8_4_3(float %tmp, <16 x i8> %b, ptr %a) { ; CHECK-LABEL: load_v16i8_4_3: ; CHECK: // %bb.0: ; CHECK-NEXT: mov v0.16b, v1.16b ; CHECK-NEXT: ld1 { v0.s }[2], [x0] ; CHECK-NEXT: ret %l = load <4 x i8>, ptr %a %s1 = shufflevector <4 x i8> %l, <4 x i8> poison, <16 x i32> %s2 = shufflevector <16 x i8> %s1, <16 x i8> %b, <16 x i32> ret <16 x i8> %s2 } define <16 x i8> @load_v16i8_4_4(float %tmp, <16 x i8> %b, ptr %a) { ; CHECK-LABEL: load_v16i8_4_4: ; CHECK: // %bb.0: ; CHECK-NEXT: mov v0.16b, v1.16b ; CHECK-NEXT: ld1 { v0.s }[3], [x0] ; CHECK-NEXT: ret %l = load <4 x i8>, ptr %a %s1 = shufflevector <4 x i8> %l, <4 x i8> poison, <16 x i32> %s2 = shufflevector <16 x i8> %s1, <16 x i8> %b, <16 x i32> ret <16 x i8> %s2 } define <8 x i8> @load_v8i8_4_1(float %tmp, <8 x i8> %b, ptr %a) { ; CHECK-LABEL: load_v8i8_4_1: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr s0, [x0] ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 ; CHECK-NEXT: mov v0.s[1], v1.s[1] ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-NEXT: ret %l = load <4 x i8>, ptr %a %s1 = shufflevector <4 x i8> %l, <4 x i8> poison, <8 x i32> %s2 = shufflevector <8 x i8> %s1, <8 x i8> %b, <8 x i32> ret <8 x i8> %s2 } define <8 x i8> @load_v8i8_4_2(float %tmp, <8 x i8> %b, ptr %a) { ; CHECK-LABEL: load_v8i8_4_2: ; CHECK: // %bb.0: ; CHECK-NEXT: fmov d0, d1 ; CHECK-NEXT: ldr s2, [x0] ; CHECK-NEXT: mov v0.s[1], v2.s[0] ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-NEXT: ret %l = load <4 x i8>, ptr %a %s1 = shufflevector <4 x i8> %l, <4 x i8> poison, <8 x i32> %s2 = shufflevector <8 x i8> %s1, <8 x i8> %b, <8 x i32> ret <8 x i8> %s2 } define <16 x i8> @load_v16i8_8_1(float %tmp, <16 x i8> %b, ptr %a) { ; CHECK-LABEL: load_v16i8_8_1: ; CHECK: // %bb.0: ; CHECK-NEXT: ext v1.16b, v1.16b, v1.16b, #8 ; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: mov v0.d[1], v1.d[0] ; CHECK-NEXT: ret %l = load <8 x i8>, ptr %a %s1 = shufflevector <8 x i8> %l, <8 x i8> poison, <16 x i32> %s2 = shufflevector <16 x i8> %s1, <16 x i8> %b, <16 x i32> ret <16 x i8> %s2 } define <16 x i8> @load_v16i8_8_2(float %tmp, <16 x i8> %b, ptr %a) { ; CHECK-LABEL: load_v16i8_8_2: ; CHECK: // %bb.0: ; CHECK-NEXT: mov v0.16b, v1.16b ; CHECK-NEXT: ldr d2, [x0] ; CHECK-NEXT: mov v0.d[1], v2.d[0] ; CHECK-NEXT: ret %l = load <8 x i8>, ptr %a %s1 = shufflevector <8 x i8> %l, <8 x i8> poison, <16 x i32> %s2 = shufflevector <16 x i8> %s1, <16 x i8> %b, <16 x i32> ret <16 x i8> %s2 } ; i16 define <8 x i16> @load_v8i16_2_1(float %tmp, <8 x i16> %b, ptr %a) { ; CHECK-LABEL: load_v8i16_2_1: ; CHECK: // %bb.0: ; CHECK-NEXT: ldrh w8, [x0] ; CHECK-NEXT: add x9, x0, #2 ; CHECK-NEXT: fmov s0, w8 ; CHECK-NEXT: ld1 { v0.h }[2], [x9] ; CHECK-NEXT: xtn v2.4h, v0.4s ; CHECK-NEXT: mov v0.16b, v1.16b ; CHECK-NEXT: mov v0.s[0], v2.s[0] ; CHECK-NEXT: ret %l = load <2 x i16>, ptr %a %s1 = shufflevector <2 x i16> %l, <2 x i16> poison, <8 x i32> %s2 = shufflevector <8 x i16> %s1, <8 x i16> %b, <8 x i32> ret <8 x i16> %s2 } define <8 x i16> @load_v8i16_2_15(float %tmp, <8 x i16> %b, ptr %a) { ; CHECK-LABEL: load_v8i16_2_15: ; CHECK: // %bb.0: ; CHECK-NEXT: ldrh w8, [x0] ; CHECK-NEXT: add x9, x0, #2 ; CHECK-NEXT: // kill: def $q1 killed $q1 def $q0_q1 ; CHECK-NEXT: fmov s2, w8 ; CHECK-NEXT: adrp x8, .LCPI33_0 ; CHECK-NEXT: ld1 { v2.h }[2], [x9] ; CHECK-NEXT: xtn v0.4h, v2.4s ; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI33_0] ; CHECK-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b ; CHECK-NEXT: ret %l = load <2 x i16>, ptr %a %s1 = shufflevector <2 x i16> %l, <2 x i16> poison, <8 x i32> %s2 = shufflevector <8 x i16> %s1, <8 x i16> %b, <8 x i32> ret <8 x i16> %s2 } define <8 x i16> @load_v8i16_2_2(float %tmp, <8 x i16> %b, ptr %a) { ; CHECK-LABEL: load_v8i16_2_2: ; CHECK: // %bb.0: ; CHECK-NEXT: ldrh w8, [x0] ; CHECK-NEXT: add x9, x0, #2 ; CHECK-NEXT: fmov s0, w8 ; CHECK-NEXT: ld1 { v0.h }[2], [x9] ; CHECK-NEXT: xtn v2.4h, v0.4s ; CHECK-NEXT: mov v0.16b, v1.16b ; CHECK-NEXT: mov v0.s[1], v2.s[0] ; CHECK-NEXT: ret %l = load <2 x i16>, ptr %a %s1 = shufflevector <2 x i16> %l, <2 x i16> poison, <8 x i32> %s2 = shufflevector <8 x i16> %s1, <8 x i16> %b, <8 x i32> ret <8 x i16> %s2 } define <8 x i16> @load_v8i16_2_3(float %tmp, <8 x i16> %b, ptr %a) { ; CHECK-LABEL: load_v8i16_2_3: ; CHECK: // %bb.0: ; CHECK-NEXT: ldrh w8, [x0] ; CHECK-NEXT: add x9, x0, #2 ; CHECK-NEXT: fmov s0, w8 ; CHECK-NEXT: ld1 { v0.h }[2], [x9] ; CHECK-NEXT: xtn v2.4h, v0.4s ; CHECK-NEXT: mov v0.16b, v1.16b ; CHECK-NEXT: mov v0.s[2], v2.s[0] ; CHECK-NEXT: ret %l = load <2 x i16>, ptr %a %s1 = shufflevector <2 x i16> %l, <2 x i16> poison, <8 x i32> %s2 = shufflevector <8 x i16> %s1, <8 x i16> %b, <8 x i32> ret <8 x i16> %s2 } define <8 x i16> @load_v8i16_2_4(float %tmp, <8 x i16> %b, ptr %a) { ; CHECK-LABEL: load_v8i16_2_4: ; CHECK: // %bb.0: ; CHECK-NEXT: ldrh w8, [x0] ; CHECK-NEXT: add x9, x0, #2 ; CHECK-NEXT: fmov s0, w8 ; CHECK-NEXT: ld1 { v0.h }[2], [x9] ; CHECK-NEXT: xtn v2.4h, v0.4s ; CHECK-NEXT: mov v0.16b, v1.16b ; CHECK-NEXT: mov v0.s[3], v2.s[0] ; CHECK-NEXT: ret %l = load <2 x i16>, ptr %a %s1 = shufflevector <2 x i16> %l, <2 x i16> poison, <8 x i32> %s2 = shufflevector <8 x i16> %s1, <8 x i16> %b, <8 x i32> ret <8 x i16> %s2 } define <4 x i16> @load_v4i16_2_1(float %tmp, <4 x i16> %b, ptr %a) { ; CHECK-LABEL: load_v4i16_2_1: ; CHECK: // %bb.0: ; CHECK-NEXT: ld1 { v0.h }[0], [x0] ; CHECK-NEXT: add x8, x0, #2 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 ; CHECK-NEXT: ld1 { v0.h }[2], [x8] ; CHECK-NEXT: uzp1 v0.4h, v0.4h, v0.4h ; CHECK-NEXT: mov v0.s[1], v1.s[1] ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-NEXT: ret %l = load <2 x i16>, ptr %a %s1 = shufflevector <2 x i16> %l, <2 x i16> poison, <4 x i32> %s2 = shufflevector <4 x i16> %s1, <4 x i16> %b, <4 x i32> ret <4 x i16> %s2 } define <4 x i16> @load_v4i16_2_2(float %tmp, <4 x i16> %b, ptr %a) { ; CHECK-LABEL: load_v4i16_2_2: ; CHECK: // %bb.0: ; CHECK-NEXT: ld1 { v0.h }[0], [x0] ; CHECK-NEXT: add x8, x0, #2 ; CHECK-NEXT: ld1 { v0.h }[2], [x8] ; CHECK-NEXT: uzp1 v2.4h, v0.4h, v0.4h ; CHECK-NEXT: fmov d0, d1 ; CHECK-NEXT: mov v0.s[1], v2.s[0] ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-NEXT: ret %l = load <2 x i16>, ptr %a %s1 = shufflevector <2 x i16> %l, <2 x i16> poison, <4 x i32> %s2 = shufflevector <4 x i16> %s1, <4 x i16> %b, <4 x i32> ret <4 x i16> %s2 } define <8 x i16> @load_v8i16_4_1(float %tmp, <8 x i16> %b, ptr %a) { ; CHECK-LABEL: load_v8i16_4_1: ; CHECK: // %bb.0: ; CHECK-NEXT: ext v1.16b, v1.16b, v1.16b, #8 ; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: mov v0.d[1], v1.d[0] ; CHECK-NEXT: ret %l = load <4 x i16>, ptr %a %s1 = shufflevector <4 x i16> %l, <4 x i16> poison, <8 x i32> %s2 = shufflevector <8 x i16> %s1, <8 x i16> %b, <8 x i32> ret <8 x i16> %s2 } define <8 x i16> @load_v8i16_4_2(float %tmp, <8 x i16> %b, ptr %a) { ; CHECK-LABEL: load_v8i16_4_2: ; CHECK: // %bb.0: ; CHECK-NEXT: mov v0.16b, v1.16b ; CHECK-NEXT: ldr d2, [x0] ; CHECK-NEXT: mov v0.d[1], v2.d[0] ; CHECK-NEXT: ret %l = load <4 x i16>, ptr %a %s1 = shufflevector <4 x i16> %l, <4 x i16> poison, <8 x i32> %s2 = shufflevector <8 x i16> %s1, <8 x i16> %b, <8 x i32> ret <8 x i16> %s2 } ; i32 define <4 x i32> @load_v4i32_2_1(float %tmp, <4 x i32> %b, ptr %a) { ; CHECK-LABEL: load_v4i32_2_1: ; CHECK: // %bb.0: ; CHECK-NEXT: ext v1.16b, v1.16b, v1.16b, #8 ; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: mov v0.d[1], v1.d[0] ; CHECK-NEXT: ret %l = load <2 x i32>, ptr %a %s1 = shufflevector <2 x i32> %l, <2 x i32> poison, <4 x i32> %s2 = shufflevector <4 x i32> %s1, <4 x i32> %b, <4 x i32> ret <4 x i32> %s2 } define <4 x i32> @load_v4i32_2_2(float %tmp, <4 x i32> %b, ptr %a) { ; CHECK-LABEL: load_v4i32_2_2: ; CHECK: // %bb.0: ; CHECK-NEXT: mov v0.16b, v1.16b ; CHECK-NEXT: ldr d2, [x0] ; CHECK-NEXT: mov v0.d[1], v2.d[0] ; CHECK-NEXT: ret %l = load <2 x i32>, ptr %a %s1 = shufflevector <2 x i32> %l, <2 x i32> poison, <4 x i32> %s2 = shufflevector <4 x i32> %s1, <4 x i32> %b, <4 x i32> ret <4 x i32> %s2 } ; More than a single vector define <8 x i8> @load2_v4i8(float %tmp, ptr %a, ptr %b) { ; CHECK-LABEL: load2_v4i8: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr s0, [x0] ; CHECK-NEXT: ld1 { v0.s }[1], [x1] ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-NEXT: ret %la = load <4 x i8>, ptr %a %lb = load <4 x i8>, ptr %b %s1 = shufflevector <4 x i8> %la, <4 x i8> %lb, <8 x i32> ret <8 x i8> %s1 } define <16 x i8> @load3_v4i8(float %tmp, ptr %a, ptr %b) { ; CHECK-LABEL: load3_v4i8: ; CHECK: // %bb.0: ; CHECK-NEXT: ldp s0, s1, [x0] ; CHECK-NEXT: ld1 { v0.s }[1], [x1] ; CHECK-NEXT: mov v0.d[1], v1.d[0] ; CHECK-NEXT: ret %la = load <4 x i8>, ptr %a %lb = load <4 x i8>, ptr %b %c = getelementptr <4 x i8>, ptr %a, i64 1 %d = getelementptr <4 x i8>, ptr %b, i64 1 %lc = load <4 x i8>, ptr %c %s1 = shufflevector <4 x i8> %la, <4 x i8> %lb, <8 x i32> %s2 = shufflevector <4 x i8> %lc, <4 x i8> undef, <8 x i32> %s3 = shufflevector <8 x i8> %s1, <8 x i8> %s2, <16 x i32> ret <16 x i8> %s3 } define <16 x i8> @load4_v4i8(float %tmp, ptr %a, ptr %b) { ; CHECK-LABEL: load4_v4i8: ; CHECK: // %bb.0: ; CHECK-NEXT: ldp s0, s1, [x0] ; CHECK-NEXT: ld1 { v0.s }[1], [x1], #4 ; CHECK-NEXT: ld1 { v1.s }[1], [x1] ; CHECK-NEXT: mov v0.d[1], v1.d[0] ; CHECK-NEXT: ret %la = load <4 x i8>, ptr %a %lb = load <4 x i8>, ptr %b %c = getelementptr <4 x i8>, ptr %a, i64 1 %d = getelementptr <4 x i8>, ptr %b, i64 1 %lc = load <4 x i8>, ptr %c %ld = load <4 x i8>, ptr %d %s1 = shufflevector <4 x i8> %la, <4 x i8> %lb, <8 x i32> %s2 = shufflevector <4 x i8> %lc, <4 x i8> %ld, <8 x i32> %s3 = shufflevector <8 x i8> %s1, <8 x i8> %s2, <16 x i32> ret <16 x i8> %s3 } define <16 x i8> @load2multi1_v4i8(float %tmp, ptr %a, ptr %b) { ; CHECK-LABEL: load2multi1_v4i8: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr s0, [x0] ; CHECK-NEXT: ld1 { v0.s }[1], [x1] ; CHECK-NEXT: mov v0.d[1], v0.d[0] ; CHECK-NEXT: ret %la = load <4 x i8>, ptr %a %lb = load <4 x i8>, ptr %b %s1 = shufflevector <4 x i8> %la, <4 x i8> %lb, <8 x i32> %s3 = shufflevector <8 x i8> %s1, <8 x i8> %s1, <16 x i32> ret <16 x i8> %s3 } define <16 x i8> @load2multi2_v4i8(float %tmp, ptr %a, ptr %b) { ; CHECK-LABEL: load2multi2_v4i8: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr s0, [x0] ; CHECK-NEXT: ldr s1, [x1] ; CHECK-NEXT: ushll v0.8h, v0.8b, #0 ; CHECK-NEXT: ushll v1.8h, v1.8b, #0 ; CHECK-NEXT: mov v1.d[1], v1.d[0] ; CHECK-NEXT: mov v0.d[1], v0.d[0] ; CHECK-NEXT: uzp1 v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret %la = load <4 x i8>, ptr %a %lb = load <4 x i8>, ptr %b %s1 = shufflevector <4 x i8> %la, <4 x i8> %la, <8 x i32> %s2 = shufflevector <4 x i8> %lb, <4 x i8> %lb, <8 x i32> %s3 = shufflevector <8 x i8> %s1, <8 x i8> %s2, <16 x i32> ret <16 x i8> %s3 } define void @loads_before_stores(ptr %i44) { ; CHECK-LABEL: loads_before_stores: ; CHECK: // %bb.0: // %bb ; CHECK-NEXT: ldr s0, [x0, #28] ; CHECK-NEXT: add x8, x0, #20 ; CHECK-NEXT: ldrh w9, [x0, #26] ; CHECK-NEXT: ldrh w10, [x0, #24] ; CHECK-NEXT: ld1 { v0.s }[1], [x8] ; CHECK-NEXT: strh w9, [x0, #20] ; CHECK-NEXT: strh w10, [x0, #30] ; CHECK-NEXT: stur d0, [x0, #22] ; CHECK-NEXT: ret bb: %i45 = getelementptr inbounds i8, ptr %i44, i64 20 %i46 = getelementptr inbounds i8, ptr %i44, i64 26 %i48 = load i8, ptr %i46, align 1 %i49 = getelementptr inbounds i8, ptr %i44, i64 21 %i50 = getelementptr inbounds i8, ptr %i44, i64 27 %i52 = load i8, ptr %i50, align 1 %i53 = getelementptr inbounds i8, ptr %i44, i64 22 %i54 = getelementptr inbounds i8, ptr %i44, i64 28 %i61 = getelementptr inbounds i8, ptr %i44, i64 24 %i62 = getelementptr inbounds i8, ptr %i44, i64 30 %i63 = load i8, ptr %i61, align 1 %i65 = getelementptr inbounds i8, ptr %i44, i64 25 %i66 = getelementptr inbounds i8, ptr %i44, i64 31 %i67 = load i8, ptr %i65, align 1 %0 = load <4 x i8>, ptr %i45, align 1 store i8 %i48, ptr %i45, align 1 store i8 %i52, ptr %i49, align 1 %1 = load <4 x i8>, ptr %i54, align 1 store i8 %i63, ptr %i62, align 1 %2 = shufflevector <4 x i8> %1, <4 x i8> %0, <8 x i32> store <8 x i8> %2, ptr %i53, align 1 store i8 %i67, ptr %i66, align 1 ret void }