; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 ; RUN: llc -mtriple=arm64-apple-macosx -o - %s | FileCheck %s ; RUN: llc -mtriple=aarch64_be -o - %s | FileCheck --check-prefix BE %s define <16 x i8> @load_v3i8(ptr %src) { ; CHECK-LABEL: load_v3i8: ; CHECK: ; %bb.0: ; CHECK-NEXT: sub sp, sp, #16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: ldrh w8, [x0] ; CHECK-NEXT: strh w8, [sp, #12] ; CHECK-NEXT: ldr s0, [sp, #12] ; CHECK-NEXT: ushll.8h v0, v0, #0 ; CHECK-NEXT: umov.h w8, v0[0] ; CHECK-NEXT: umov.h w9, v0[1] ; CHECK-NEXT: fmov s0, w8 ; CHECK-NEXT: add x8, x0, #2 ; CHECK-NEXT: mov.b v0[1], w9 ; CHECK-NEXT: ld1.b { v0 }[2], [x8] ; CHECK-NEXT: add sp, sp, #16 ; CHECK-NEXT: ret ; ; BE-LABEL: load_v3i8: ; BE: // %bb.0: ; BE-NEXT: sub sp, sp, #16 ; BE-NEXT: .cfi_def_cfa_offset 16 ; BE-NEXT: ldrh w8, [x0] ; BE-NEXT: strh w8, [sp, #12] ; BE-NEXT: ldr s0, [sp, #12] ; BE-NEXT: rev32 v0.8b, v0.8b ; BE-NEXT: ushll v0.8h, v0.8b, #0 ; BE-NEXT: umov w8, v0.h[0] ; BE-NEXT: umov w9, v0.h[1] ; BE-NEXT: fmov s0, w8 ; BE-NEXT: add x8, x0, #2 ; BE-NEXT: mov v0.b[1], w9 ; BE-NEXT: ld1 { v0.b }[2], [x8] ; BE-NEXT: rev64 v0.16b, v0.16b ; BE-NEXT: ext v0.16b, v0.16b, v0.16b, #8 ; BE-NEXT: add sp, sp, #16 ; BE-NEXT: ret %l = load <3 x i8>, ptr %src, align 1 %s = shufflevector <3 x i8> poison, <3 x i8> %l, <16 x i32> ret <16 x i8> %s } define <4 x i32> @load_v3i8_to_4xi32(ptr %src) { ; CHECK-LABEL: load_v3i8_to_4xi32: ; CHECK: ; %bb.0: ; CHECK-NEXT: sub sp, sp, #16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: ldrh w8, [x0] ; CHECK-NEXT: movi.2d v1, #0x0000ff000000ff ; CHECK-NEXT: strh w8, [sp, #12] ; CHECK-NEXT: ldr s0, [sp, #12] ; CHECK-NEXT: ldrsb w8, [x0, #2] ; CHECK-NEXT: ushll.8h v0, v0, #0 ; CHECK-NEXT: mov.h v0[1], v0[1] ; CHECK-NEXT: mov.h v0[2], w8 ; CHECK-NEXT: ushll.4s v0, v0, #0 ; CHECK-NEXT: and.16b v0, v0, v1 ; CHECK-NEXT: add sp, sp, #16 ; CHECK-NEXT: ret ; ; BE-LABEL: load_v3i8_to_4xi32: ; BE: // %bb.0: ; BE-NEXT: sub sp, sp, #16 ; BE-NEXT: .cfi_def_cfa_offset 16 ; BE-NEXT: ldrh w8, [x0] ; BE-NEXT: movi v1.2d, #0x0000ff000000ff ; BE-NEXT: strh w8, [sp, #12] ; BE-NEXT: ldr s0, [sp, #12] ; BE-NEXT: ldrsb w8, [x0, #2] ; BE-NEXT: rev32 v0.8b, v0.8b ; BE-NEXT: ushll v0.8h, v0.8b, #0 ; BE-NEXT: mov v0.h[1], v0.h[1] ; BE-NEXT: mov v0.h[2], w8 ; BE-NEXT: ushll v0.4s, v0.4h, #0 ; BE-NEXT: and v0.16b, v0.16b, v1.16b ; BE-NEXT: rev64 v0.4s, v0.4s ; BE-NEXT: ext v0.16b, v0.16b, v0.16b, #8 ; BE-NEXT: add sp, sp, #16 ; BE-NEXT: ret %l = load <3 x i8>, ptr %src, align 1 %s = shufflevector <3 x i8> poison, <3 x i8> %l, <4 x i32> %e = zext <4 x i8> %s to <4 x i32> ret <4 x i32> %e } define <4 x i32> @load_v3i8_to_4xi32_const_offset_1(ptr %src) { ; CHECK-LABEL: load_v3i8_to_4xi32_const_offset_1: ; CHECK: ; %bb.0: ; CHECK-NEXT: sub sp, sp, #16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: ldurh w8, [x0, #1] ; CHECK-NEXT: movi.2d v1, #0x0000ff000000ff ; CHECK-NEXT: strh w8, [sp, #12] ; CHECK-NEXT: ldr s0, [sp, #12] ; CHECK-NEXT: ldrsb w8, [x0, #3] ; CHECK-NEXT: ushll.8h v0, v0, #0 ; CHECK-NEXT: mov.h v0[1], v0[1] ; CHECK-NEXT: mov.h v0[2], w8 ; CHECK-NEXT: ushll.4s v0, v0, #0 ; CHECK-NEXT: and.16b v0, v0, v1 ; CHECK-NEXT: add sp, sp, #16 ; CHECK-NEXT: ret ; ; BE-LABEL: load_v3i8_to_4xi32_const_offset_1: ; BE: // %bb.0: ; BE-NEXT: sub sp, sp, #16 ; BE-NEXT: .cfi_def_cfa_offset 16 ; BE-NEXT: ldurh w8, [x0, #1] ; BE-NEXT: movi v1.2d, #0x0000ff000000ff ; BE-NEXT: strh w8, [sp, #12] ; BE-NEXT: ldr s0, [sp, #12] ; BE-NEXT: ldrsb w8, [x0, #3] ; BE-NEXT: rev32 v0.8b, v0.8b ; BE-NEXT: ushll v0.8h, v0.8b, #0 ; BE-NEXT: mov v0.h[1], v0.h[1] ; BE-NEXT: mov v0.h[2], w8 ; BE-NEXT: ushll v0.4s, v0.4h, #0 ; BE-NEXT: and v0.16b, v0.16b, v1.16b ; BE-NEXT: rev64 v0.4s, v0.4s ; BE-NEXT: ext v0.16b, v0.16b, v0.16b, #8 ; BE-NEXT: add sp, sp, #16 ; BE-NEXT: ret %src.1 = getelementptr inbounds i8, ptr %src, i64 1 %l = load <3 x i8>, ptr %src.1, align 1 %s = shufflevector <3 x i8> poison, <3 x i8> %l, <4 x i32> %e = zext <4 x i8> %s to <4 x i32> ret <4 x i32> %e } define <4 x i32> @load_v3i8_to_4xi32_const_offset_3(ptr %src) { ; CHECK-LABEL: load_v3i8_to_4xi32_const_offset_3: ; CHECK: ; %bb.0: ; CHECK-NEXT: sub sp, sp, #16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: ldurh w8, [x0, #3] ; CHECK-NEXT: movi.2d v1, #0x0000ff000000ff ; CHECK-NEXT: strh w8, [sp, #12] ; CHECK-NEXT: ldr s0, [sp, #12] ; CHECK-NEXT: ldrsb w8, [x0, #5] ; CHECK-NEXT: ushll.8h v0, v0, #0 ; CHECK-NEXT: mov.h v0[1], v0[1] ; CHECK-NEXT: mov.h v0[2], w8 ; CHECK-NEXT: ushll.4s v0, v0, #0 ; CHECK-NEXT: and.16b v0, v0, v1 ; CHECK-NEXT: add sp, sp, #16 ; CHECK-NEXT: ret ; ; BE-LABEL: load_v3i8_to_4xi32_const_offset_3: ; BE: // %bb.0: ; BE-NEXT: sub sp, sp, #16 ; BE-NEXT: .cfi_def_cfa_offset 16 ; BE-NEXT: ldurh w8, [x0, #3] ; BE-NEXT: movi v1.2d, #0x0000ff000000ff ; BE-NEXT: strh w8, [sp, #12] ; BE-NEXT: ldr s0, [sp, #12] ; BE-NEXT: ldrsb w8, [x0, #5] ; BE-NEXT: rev32 v0.8b, v0.8b ; BE-NEXT: ushll v0.8h, v0.8b, #0 ; BE-NEXT: mov v0.h[1], v0.h[1] ; BE-NEXT: mov v0.h[2], w8 ; BE-NEXT: ushll v0.4s, v0.4h, #0 ; BE-NEXT: and v0.16b, v0.16b, v1.16b ; BE-NEXT: rev64 v0.4s, v0.4s ; BE-NEXT: ext v0.16b, v0.16b, v0.16b, #8 ; BE-NEXT: add sp, sp, #16 ; BE-NEXT: ret %src.3 = getelementptr inbounds i8, ptr %src, i64 3 %l = load <3 x i8>, ptr %src.3, align 1 %s = shufflevector <3 x i8> poison, <3 x i8> %l, <4 x i32> %e = zext <4 x i8> %s to <4 x i32> ret <4 x i32> %e } define <4 x i32> @volatile_load_v3i8_to_4xi32(ptr %src) { ; CHECK-LABEL: volatile_load_v3i8_to_4xi32: ; CHECK: ; %bb.0: ; CHECK-NEXT: sub sp, sp, #16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: ldrh w8, [x0] ; CHECK-NEXT: movi.2d v1, #0x0000ff000000ff ; CHECK-NEXT: strh w8, [sp, #12] ; CHECK-NEXT: ldr s0, [sp, #12] ; CHECK-NEXT: ldrsb w8, [x0, #2] ; CHECK-NEXT: ushll.8h v0, v0, #0 ; CHECK-NEXT: mov.h v0[1], v0[1] ; CHECK-NEXT: mov.h v0[2], w8 ; CHECK-NEXT: ushll.4s v0, v0, #0 ; CHECK-NEXT: and.16b v0, v0, v1 ; CHECK-NEXT: add sp, sp, #16 ; CHECK-NEXT: ret ; ; BE-LABEL: volatile_load_v3i8_to_4xi32: ; BE: // %bb.0: ; BE-NEXT: sub sp, sp, #16 ; BE-NEXT: .cfi_def_cfa_offset 16 ; BE-NEXT: ldrh w8, [x0] ; BE-NEXT: movi v1.2d, #0x0000ff000000ff ; BE-NEXT: strh w8, [sp, #12] ; BE-NEXT: ldr s0, [sp, #12] ; BE-NEXT: ldrsb w8, [x0, #2] ; BE-NEXT: rev32 v0.8b, v0.8b ; BE-NEXT: ushll v0.8h, v0.8b, #0 ; BE-NEXT: mov v0.h[1], v0.h[1] ; BE-NEXT: mov v0.h[2], w8 ; BE-NEXT: ushll v0.4s, v0.4h, #0 ; BE-NEXT: and v0.16b, v0.16b, v1.16b ; BE-NEXT: rev64 v0.4s, v0.4s ; BE-NEXT: ext v0.16b, v0.16b, v0.16b, #8 ; BE-NEXT: add sp, sp, #16 ; BE-NEXT: ret %l = load volatile <3 x i8>, ptr %src, align 1 %s = shufflevector <3 x i8> poison, <3 x i8> %l, <4 x i32> %e = zext <4 x i8> %s to <4 x i32> ret <4 x i32> %e } define <3 x i32> @load_v3i32(ptr %src) { ; CHECK-LABEL: load_v3i32: ; CHECK: ; %bb.0: ; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: add x8, x0, #8 ; CHECK-NEXT: ld1.s { v0 }[2], [x8] ; CHECK-NEXT: ret ; ; BE-LABEL: load_v3i32: ; BE: // %bb.0: ; BE-NEXT: ldr d0, [x0] ; BE-NEXT: add x8, x0, #8 ; BE-NEXT: rev64 v0.4s, v0.4s ; BE-NEXT: ld1 { v0.s }[2], [x8] ; BE-NEXT: rev64 v0.4s, v0.4s ; BE-NEXT: ext v0.16b, v0.16b, v0.16b, #8 ; BE-NEXT: ret %l = load <3 x i32>, ptr %src, align 1 ret <3 x i32> %l } define void @store_trunc_from_64bits(ptr %src, ptr %dst) { ; CHECK-LABEL: store_trunc_from_64bits: ; CHECK: ; %bb.0: ; %entry ; CHECK-NEXT: sub sp, sp, #16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: ldr s0, [x0] ; CHECK-NEXT: ldrh w8, [x0, #4] ; CHECK-NEXT: mov.h v0[2], w8 ; CHECK-NEXT: xtn.8b v0, v0 ; CHECK-NEXT: str s0, [sp, #12] ; CHECK-NEXT: ldrh w9, [sp, #12] ; CHECK-NEXT: strb w8, [x1, #2] ; CHECK-NEXT: strh w9, [x1] ; CHECK-NEXT: add sp, sp, #16 ; CHECK-NEXT: ret ; ; BE-LABEL: store_trunc_from_64bits: ; BE: // %bb.0: // %entry ; BE-NEXT: sub sp, sp, #16 ; BE-NEXT: .cfi_def_cfa_offset 16 ; BE-NEXT: ldr s0, [x0] ; BE-NEXT: ldrh w8, [x0, #4] ; BE-NEXT: rev32 v0.4h, v0.4h ; BE-NEXT: mov v0.h[2], w8 ; BE-NEXT: xtn v0.8b, v0.8h ; BE-NEXT: rev32 v0.16b, v0.16b ; BE-NEXT: str s0, [sp, #12] ; BE-NEXT: ldrh w9, [sp, #12] ; BE-NEXT: strb w8, [x1, #2] ; BE-NEXT: strh w9, [x1] ; BE-NEXT: add sp, sp, #16 ; BE-NEXT: ret entry: %l = load <3 x i16>, ptr %src, align 1 %t = trunc <3 x i16> %l to <3 x i8> store <3 x i8> %t, ptr %dst, align 1 ret void } define void @store_trunc_add_from_64bits(ptr %src, ptr %dst) { ; CHECK-LABEL: store_trunc_add_from_64bits: ; CHECK: ; %bb.0: ; %entry ; CHECK-NEXT: sub sp, sp, #16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: ldr s0, [x0] ; CHECK-NEXT: add x9, x0, #4 ; CHECK-NEXT: Lloh0: ; CHECK-NEXT: adrp x8, lCPI7_0@PAGE ; CHECK-NEXT: Lloh1: ; CHECK-NEXT: ldr d1, [x8, lCPI7_0@PAGEOFF] ; CHECK-NEXT: ld1.h { v0 }[2], [x9] ; CHECK-NEXT: add.4h v0, v0, v1 ; CHECK-NEXT: xtn.8b v1, v0 ; CHECK-NEXT: umov.h w8, v0[2] ; CHECK-NEXT: str s1, [sp, #12] ; CHECK-NEXT: ldrh w9, [sp, #12] ; CHECK-NEXT: strb w8, [x1, #2] ; CHECK-NEXT: strh w9, [x1] ; CHECK-NEXT: add sp, sp, #16 ; CHECK-NEXT: ret ; CHECK-NEXT: .loh AdrpLdr Lloh0, Lloh1 ; ; BE-LABEL: store_trunc_add_from_64bits: ; BE: // %bb.0: // %entry ; BE-NEXT: sub sp, sp, #16 ; BE-NEXT: .cfi_def_cfa_offset 16 ; BE-NEXT: ldr s0, [x0] ; BE-NEXT: add x8, x0, #4 ; BE-NEXT: rev32 v0.4h, v0.4h ; BE-NEXT: ld1 { v0.h }[2], [x8] ; BE-NEXT: adrp x8, .LCPI7_0 ; BE-NEXT: add x8, x8, :lo12:.LCPI7_0 ; BE-NEXT: ld1 { v1.4h }, [x8] ; BE-NEXT: add v0.4h, v0.4h, v1.4h ; BE-NEXT: xtn v1.8b, v0.8h ; BE-NEXT: umov w8, v0.h[2] ; BE-NEXT: rev32 v1.16b, v1.16b ; BE-NEXT: str s1, [sp, #12] ; BE-NEXT: ldrh w9, [sp, #12] ; BE-NEXT: strb w8, [x1, #2] ; BE-NEXT: strh w9, [x1] ; BE-NEXT: add sp, sp, #16 ; BE-NEXT: ret entry: %l = load <3 x i16>, ptr %src, align 1 %a = add <3 x i16> %l, %t = trunc <3 x i16> %a to <3 x i8> store <3 x i8> %t, ptr %dst, align 1 ret void } define void @load_ext_to_64bits(ptr %src, ptr %dst) { ; CHECK-LABEL: load_ext_to_64bits: ; CHECK: ; %bb.0: ; %entry ; CHECK-NEXT: sub sp, sp, #16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: ldrh w8, [x0] ; CHECK-NEXT: strh w8, [sp, #12] ; CHECK-NEXT: add x8, x0, #2 ; CHECK-NEXT: ldr s0, [sp, #12] ; CHECK-NEXT: ushll.8h v0, v0, #0 ; CHECK-NEXT: ld1.b { v0 }[4], [x8] ; CHECK-NEXT: add x8, x1, #4 ; CHECK-NEXT: bic.4h v0, #255, lsl #8 ; CHECK-NEXT: st1.h { v0 }[2], [x8] ; CHECK-NEXT: str s0, [x1] ; CHECK-NEXT: add sp, sp, #16 ; CHECK-NEXT: ret ; ; BE-LABEL: load_ext_to_64bits: ; BE: // %bb.0: // %entry ; BE-NEXT: sub sp, sp, #16 ; BE-NEXT: .cfi_def_cfa_offset 16 ; BE-NEXT: ldrh w8, [x0] ; BE-NEXT: strh w8, [sp, #12] ; BE-NEXT: add x8, x0, #2 ; BE-NEXT: ldr s0, [sp, #12] ; BE-NEXT: rev32 v0.8b, v0.8b ; BE-NEXT: ushll v0.8h, v0.8b, #0 ; BE-NEXT: ld1 { v0.b }[4], [x8] ; BE-NEXT: add x8, x1, #4 ; BE-NEXT: bic v0.4h, #255, lsl #8 ; BE-NEXT: rev32 v1.8h, v0.8h ; BE-NEXT: st1 { v0.h }[2], [x8] ; BE-NEXT: str s1, [x1] ; BE-NEXT: add sp, sp, #16 ; BE-NEXT: ret entry: %l = load <3 x i8>, ptr %src, align 1 %e = zext <3 x i8> %l to <3 x i16> store <3 x i16> %e, ptr %dst, align 1 ret void } define void @load_ext_add_to_64bits(ptr %src, ptr %dst) { ; CHECK-LABEL: load_ext_add_to_64bits: ; CHECK: ; %bb.0: ; %entry ; CHECK-NEXT: sub sp, sp, #16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: ldrh w9, [x0] ; CHECK-NEXT: Lloh2: ; CHECK-NEXT: adrp x8, lCPI9_0@PAGE ; CHECK-NEXT: Lloh3: ; CHECK-NEXT: ldr d1, [x8, lCPI9_0@PAGEOFF] ; CHECK-NEXT: add x8, x1, #4 ; CHECK-NEXT: strh w9, [sp, #12] ; CHECK-NEXT: add x9, x0, #2 ; CHECK-NEXT: ldr s0, [sp, #12] ; CHECK-NEXT: ushll.8h v0, v0, #0 ; CHECK-NEXT: ld1.b { v0 }[4], [x9] ; CHECK-NEXT: bic.4h v0, #255, lsl #8 ; CHECK-NEXT: add.4h v0, v0, v1 ; CHECK-NEXT: st1.h { v0 }[2], [x8] ; CHECK-NEXT: str s0, [x1] ; CHECK-NEXT: add sp, sp, #16 ; CHECK-NEXT: ret ; CHECK-NEXT: .loh AdrpLdr Lloh2, Lloh3 ; ; BE-LABEL: load_ext_add_to_64bits: ; BE: // %bb.0: // %entry ; BE-NEXT: sub sp, sp, #16 ; BE-NEXT: .cfi_def_cfa_offset 16 ; BE-NEXT: ldrh w8, [x0] ; BE-NEXT: strh w8, [sp, #12] ; BE-NEXT: add x8, x0, #2 ; BE-NEXT: ldr s0, [sp, #12] ; BE-NEXT: rev32 v0.8b, v0.8b ; BE-NEXT: ushll v0.8h, v0.8b, #0 ; BE-NEXT: ld1 { v0.b }[4], [x8] ; BE-NEXT: adrp x8, .LCPI9_0 ; BE-NEXT: add x8, x8, :lo12:.LCPI9_0 ; BE-NEXT: ld1 { v1.4h }, [x8] ; BE-NEXT: add x8, x1, #4 ; BE-NEXT: bic v0.4h, #255, lsl #8 ; BE-NEXT: add v0.4h, v0.4h, v1.4h ; BE-NEXT: rev32 v1.8h, v0.8h ; BE-NEXT: st1 { v0.h }[2], [x8] ; BE-NEXT: str s1, [x1] ; BE-NEXT: add sp, sp, #16 ; BE-NEXT: ret entry: %l = load <3 x i8>, ptr %src, align 1 %e = zext <3 x i8> %l to <3 x i16> %a = add <3 x i16> %e, store <3 x i16> %a, ptr %dst, align 1 ret void } define void @shift_trunc_store(ptr %src, ptr %dst) { ; CHECK-LABEL: shift_trunc_store: ; CHECK: ; %bb.0: ; CHECK-NEXT: sub sp, sp, #16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: shrn.4h v0, v0, #16 ; CHECK-NEXT: xtn.8b v1, v0 ; CHECK-NEXT: umov.h w8, v0[2] ; CHECK-NEXT: str s1, [sp, #12] ; CHECK-NEXT: ldrh w9, [sp, #12] ; CHECK-NEXT: strb w8, [x1, #2] ; CHECK-NEXT: strh w9, [x1] ; CHECK-NEXT: add sp, sp, #16 ; CHECK-NEXT: ret ; ; BE-LABEL: shift_trunc_store: ; BE: // %bb.0: ; BE-NEXT: sub sp, sp, #16 ; BE-NEXT: .cfi_def_cfa_offset 16 ; BE-NEXT: ld1 { v0.4s }, [x0] ; BE-NEXT: shrn v0.4h, v0.4s, #16 ; BE-NEXT: xtn v1.8b, v0.8h ; BE-NEXT: umov w8, v0.h[2] ; BE-NEXT: rev32 v1.16b, v1.16b ; BE-NEXT: str s1, [sp, #12] ; BE-NEXT: ldrh w9, [sp, #12] ; BE-NEXT: strb w8, [x1, #2] ; BE-NEXT: strh w9, [x1] ; BE-NEXT: add sp, sp, #16 ; BE-NEXT: ret %l = load <3 x i32>, ptr %src %s = lshr <3 x i32> %l, %t = trunc <3 x i32> %s to <3 x i8> store <3 x i8> %t, ptr %dst, align 1 ret void } define void @shift_trunc_store_const_offset_1(ptr %src, ptr %dst) { ; CHECK-LABEL: shift_trunc_store_const_offset_1: ; CHECK: ; %bb.0: ; CHECK-NEXT: sub sp, sp, #16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: shrn.4h v0, v0, #16 ; CHECK-NEXT: xtn.8b v1, v0 ; CHECK-NEXT: umov.h w8, v0[2] ; CHECK-NEXT: str s1, [sp, #12] ; CHECK-NEXT: ldrh w9, [sp, #12] ; CHECK-NEXT: strb w8, [x1, #3] ; CHECK-NEXT: sturh w9, [x1, #1] ; CHECK-NEXT: add sp, sp, #16 ; CHECK-NEXT: ret ; ; BE-LABEL: shift_trunc_store_const_offset_1: ; BE: // %bb.0: ; BE-NEXT: sub sp, sp, #16 ; BE-NEXT: .cfi_def_cfa_offset 16 ; BE-NEXT: ld1 { v0.4s }, [x0] ; BE-NEXT: shrn v0.4h, v0.4s, #16 ; BE-NEXT: xtn v1.8b, v0.8h ; BE-NEXT: umov w8, v0.h[2] ; BE-NEXT: rev32 v1.16b, v1.16b ; BE-NEXT: str s1, [sp, #12] ; BE-NEXT: ldrh w9, [sp, #12] ; BE-NEXT: strb w8, [x1, #3] ; BE-NEXT: sturh w9, [x1, #1] ; BE-NEXT: add sp, sp, #16 ; BE-NEXT: ret %l = load <3 x i32>, ptr %src %s = lshr <3 x i32> %l, %t = trunc <3 x i32> %s to <3 x i8> %dst.1 = getelementptr inbounds i8, ptr %dst, i64 1 store <3 x i8> %t, ptr %dst.1, align 1 ret void } define void @shift_trunc_store_const_offset_3(ptr %src, ptr %dst) { ; CHECK-LABEL: shift_trunc_store_const_offset_3: ; CHECK: ; %bb.0: ; CHECK-NEXT: sub sp, sp, #16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: shrn.4h v0, v0, #16 ; CHECK-NEXT: xtn.8b v1, v0 ; CHECK-NEXT: umov.h w8, v0[2] ; CHECK-NEXT: str s1, [sp, #12] ; CHECK-NEXT: ldrh w9, [sp, #12] ; CHECK-NEXT: strb w8, [x1, #5] ; CHECK-NEXT: sturh w9, [x1, #3] ; CHECK-NEXT: add sp, sp, #16 ; CHECK-NEXT: ret ; ; BE-LABEL: shift_trunc_store_const_offset_3: ; BE: // %bb.0: ; BE-NEXT: sub sp, sp, #16 ; BE-NEXT: .cfi_def_cfa_offset 16 ; BE-NEXT: ld1 { v0.4s }, [x0] ; BE-NEXT: shrn v0.4h, v0.4s, #16 ; BE-NEXT: xtn v1.8b, v0.8h ; BE-NEXT: umov w8, v0.h[2] ; BE-NEXT: rev32 v1.16b, v1.16b ; BE-NEXT: str s1, [sp, #12] ; BE-NEXT: ldrh w9, [sp, #12] ; BE-NEXT: strb w8, [x1, #5] ; BE-NEXT: sturh w9, [x1, #3] ; BE-NEXT: add sp, sp, #16 ; BE-NEXT: ret %l = load <3 x i32>, ptr %src %s = lshr <3 x i32> %l, %t = trunc <3 x i32> %s to <3 x i8> %dst.3 = getelementptr inbounds i8, ptr %dst, i64 3 store <3 x i8> %t, ptr %dst.3, align 1 ret void } define void @shift_trunc_volatile_store(ptr %src, ptr %dst) { ; CHECK-LABEL: shift_trunc_volatile_store: ; CHECK: ; %bb.0: ; CHECK-NEXT: sub sp, sp, #16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: shrn.4h v0, v0, #16 ; CHECK-NEXT: xtn.8b v1, v0 ; CHECK-NEXT: umov.h w8, v0[2] ; CHECK-NEXT: str s1, [sp, #12] ; CHECK-NEXT: ldrh w9, [sp, #12] ; CHECK-NEXT: strb w8, [x1, #2] ; CHECK-NEXT: strh w9, [x1] ; CHECK-NEXT: add sp, sp, #16 ; CHECK-NEXT: ret ; ; BE-LABEL: shift_trunc_volatile_store: ; BE: // %bb.0: ; BE-NEXT: sub sp, sp, #16 ; BE-NEXT: .cfi_def_cfa_offset 16 ; BE-NEXT: ld1 { v0.4s }, [x0] ; BE-NEXT: shrn v0.4h, v0.4s, #16 ; BE-NEXT: xtn v1.8b, v0.8h ; BE-NEXT: umov w8, v0.h[2] ; BE-NEXT: rev32 v1.16b, v1.16b ; BE-NEXT: str s1, [sp, #12] ; BE-NEXT: ldrh w9, [sp, #12] ; BE-NEXT: strb w8, [x1, #2] ; BE-NEXT: strh w9, [x1] ; BE-NEXT: add sp, sp, #16 ; BE-NEXT: ret %l = load <3 x i32>, ptr %src %s = lshr <3 x i32> %l, %t = trunc <3 x i32> %s to <3 x i8> store volatile <3 x i8> %t, ptr %dst, align 1 ret void }