; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 ; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon -fp-contract=fast -aarch64-enable-sink-fold=true | FileCheck %s --check-prefix=CHECK-SDAG ; RUN: llc < %s -global-isel -global-isel-abort=2 -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon -fp-contract=fast -aarch64-enable-sink-fold=true | FileCheck %s --check-prefix=CHECK-GISEL define <4 x i8> @test_varidx_extract_v8s8(<8 x i8> %x, i32 %idx) { ; CHECK-SDAG-LABEL: test_varidx_extract_v8s8: ; CHECK-SDAG: // %bb.0: ; CHECK-SDAG-NEXT: sub sp, sp, #16 ; CHECK-SDAG-NEXT: .cfi_def_cfa_offset 16 ; CHECK-SDAG-NEXT: add x8, sp, #8 ; CHECK-SDAG-NEXT: // kill: def $w0 killed $w0 def $x0 ; CHECK-SDAG-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-SDAG-NEXT: str d0, [sp, #8] ; CHECK-SDAG-NEXT: umov w9, v0.b[1] ; CHECK-SDAG-NEXT: bfxil x8, x0, #0, #3 ; CHECK-SDAG-NEXT: ld1 { v1.b }[0], [x8] ; CHECK-SDAG-NEXT: umov w8, v0.b[2] ; CHECK-SDAG-NEXT: mov v1.h[1], w9 ; CHECK-SDAG-NEXT: umov w9, v0.b[3] ; CHECK-SDAG-NEXT: mov v1.h[2], w8 ; CHECK-SDAG-NEXT: mov v1.h[3], w9 ; CHECK-SDAG-NEXT: fmov d0, d1 ; CHECK-SDAG-NEXT: add sp, sp, #16 ; CHECK-SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_varidx_extract_v8s8: ; CHECK-GISEL: // %bb.0: ; CHECK-GISEL-NEXT: sub sp, sp, #16 ; CHECK-GISEL-NEXT: .cfi_def_cfa_offset 16 ; CHECK-GISEL-NEXT: mov w9, w0 ; CHECK-GISEL-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-GISEL-NEXT: add x8, sp, #8 ; CHECK-GISEL-NEXT: str d0, [sp, #8] ; CHECK-GISEL-NEXT: and x9, x9, #0x7 ; CHECK-GISEL-NEXT: mov b2, v0.b[1] ; CHECK-GISEL-NEXT: mov b3, v0.b[2] ; CHECK-GISEL-NEXT: lsl x10, x9, #1 ; CHECK-GISEL-NEXT: mov b0, v0.b[3] ; CHECK-GISEL-NEXT: sub x9, x10, x9 ; CHECK-GISEL-NEXT: ldrb w8, [x8, x9] ; CHECK-GISEL-NEXT: fmov s1, w8 ; CHECK-GISEL-NEXT: mov v1.h[1], v2.h[0] ; CHECK-GISEL-NEXT: mov v1.h[2], v3.h[0] ; CHECK-GISEL-NEXT: mov v1.h[3], v0.h[0] ; CHECK-GISEL-NEXT: fmov d0, d1 ; CHECK-GISEL-NEXT: add sp, sp, #16 ; CHECK-GISEL-NEXT: ret %tmp = extractelement <8 x i8> %x, i32 %idx %tmp2 = insertelement <4 x i8> undef, i8 %tmp, i32 0 %tmp3 = extractelement <8 x i8> %x, i32 1 %tmp4 = insertelement <4 x i8> %tmp2, i8 %tmp3, i32 1 %tmp5 = extractelement <8 x i8> %x, i32 2 %tmp6 = insertelement <4 x i8> %tmp4, i8 %tmp5, i32 2 %tmp7 = extractelement <8 x i8> %x, i32 3 %tmp8 = insertelement <4 x i8> %tmp6, i8 %tmp7, i32 3 ret <4 x i8> %tmp8 } define <8 x i8> @test_varidx_extract_v16s8(<16 x i8> %x, i32 %idx) { ; CHECK-SDAG-LABEL: test_varidx_extract_v16s8: ; CHECK-SDAG: // %bb.0: ; CHECK-SDAG-NEXT: sub sp, sp, #16 ; CHECK-SDAG-NEXT: .cfi_def_cfa_offset 16 ; CHECK-SDAG-NEXT: mov x8, sp ; CHECK-SDAG-NEXT: // kill: def $w0 killed $w0 def $x0 ; CHECK-SDAG-NEXT: str q0, [sp] ; CHECK-SDAG-NEXT: bfxil x8, x0, #0, #4 ; CHECK-SDAG-NEXT: ldr b1, [x8] ; CHECK-SDAG-NEXT: mov v1.b[1], v0.b[1] ; CHECK-SDAG-NEXT: mov v1.b[2], v0.b[2] ; CHECK-SDAG-NEXT: mov v1.b[3], v0.b[3] ; CHECK-SDAG-NEXT: mov v1.b[4], v0.b[4] ; CHECK-SDAG-NEXT: mov v1.b[5], v0.b[5] ; CHECK-SDAG-NEXT: mov v1.b[6], v0.b[6] ; CHECK-SDAG-NEXT: mov v1.b[7], v0.b[7] ; CHECK-SDAG-NEXT: fmov d0, d1 ; CHECK-SDAG-NEXT: add sp, sp, #16 ; CHECK-SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_varidx_extract_v16s8: ; CHECK-GISEL: // %bb.0: ; CHECK-GISEL-NEXT: sub sp, sp, #16 ; CHECK-GISEL-NEXT: .cfi_def_cfa_offset 16 ; CHECK-GISEL-NEXT: mov w9, w0 ; CHECK-GISEL-NEXT: mov b2, v0.b[1] ; CHECK-GISEL-NEXT: mov x8, sp ; CHECK-GISEL-NEXT: and x9, x9, #0xf ; CHECK-GISEL-NEXT: str q0, [sp] ; CHECK-GISEL-NEXT: mov b3, v0.b[2] ; CHECK-GISEL-NEXT: lsl x10, x9, #1 ; CHECK-GISEL-NEXT: sub x9, x10, x9 ; CHECK-GISEL-NEXT: ldr b1, [x8, x9] ; CHECK-GISEL-NEXT: mov v1.b[1], v2.b[0] ; CHECK-GISEL-NEXT: mov b2, v0.b[3] ; CHECK-GISEL-NEXT: mov v1.b[2], v3.b[0] ; CHECK-GISEL-NEXT: mov b3, v0.b[4] ; CHECK-GISEL-NEXT: mov v1.b[3], v2.b[0] ; CHECK-GISEL-NEXT: mov b2, v0.b[5] ; CHECK-GISEL-NEXT: mov v1.b[4], v3.b[0] ; CHECK-GISEL-NEXT: mov b3, v0.b[6] ; CHECK-GISEL-NEXT: mov b0, v0.b[7] ; CHECK-GISEL-NEXT: mov v1.b[5], v2.b[0] ; CHECK-GISEL-NEXT: mov v1.b[6], v3.b[0] ; CHECK-GISEL-NEXT: mov v1.b[7], v0.b[0] ; CHECK-GISEL-NEXT: fmov d0, d1 ; CHECK-GISEL-NEXT: add sp, sp, #16 ; CHECK-GISEL-NEXT: ret %tmp = extractelement <16 x i8> %x, i32 %idx %tmp2 = insertelement <8 x i8> undef, i8 %tmp, i32 0 %tmp3 = extractelement <16 x i8> %x, i32 1 %tmp4 = insertelement <8 x i8> %tmp2, i8 %tmp3, i32 1 %tmp5 = extractelement <16 x i8> %x, i32 2 %tmp6 = insertelement <8 x i8> %tmp4, i8 %tmp5, i32 2 %tmp7 = extractelement <16 x i8> %x, i32 3 %tmp8 = insertelement <8 x i8> %tmp6, i8 %tmp7, i32 3 %tmp9 = extractelement <16 x i8> %x, i32 4 %tmp10 = insertelement <8 x i8> %tmp8, i8 %tmp9, i32 4 %tmp11 = extractelement <16 x i8> %x, i32 5 %tmp12 = insertelement <8 x i8> %tmp10, i8 %tmp11, i32 5 %tmp13 = extractelement <16 x i8> %x, i32 6 %tmp14 = insertelement <8 x i8> %tmp12, i8 %tmp13, i32 6 %tmp15 = extractelement <16 x i8> %x, i32 7 %tmp16 = insertelement <8 x i8> %tmp14, i8 %tmp15, i32 7 ret <8 x i8> %tmp16 } define i16 @test_varidx_extract_v2s16(<2 x i16> %x, i32 %idx) { ; CHECK-SDAG-LABEL: test_varidx_extract_v2s16: ; CHECK-SDAG: // %bb.0: ; CHECK-SDAG-NEXT: sub sp, sp, #16 ; CHECK-SDAG-NEXT: .cfi_def_cfa_offset 16 ; CHECK-SDAG-NEXT: add x8, sp, #8 ; CHECK-SDAG-NEXT: // kill: def $w0 killed $w0 def $x0 ; CHECK-SDAG-NEXT: str d0, [sp, #8] ; CHECK-SDAG-NEXT: bfi x8, x0, #2, #1 ; CHECK-SDAG-NEXT: ldr w0, [x8] ; CHECK-SDAG-NEXT: add sp, sp, #16 ; CHECK-SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_varidx_extract_v2s16: ; CHECK-GISEL: // %bb.0: ; CHECK-GISEL-NEXT: sub sp, sp, #16 ; CHECK-GISEL-NEXT: .cfi_def_cfa_offset 16 ; CHECK-GISEL-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-GISEL-NEXT: mov s1, v0.s[1] ; CHECK-GISEL-NEXT: mov w9, w0 ; CHECK-GISEL-NEXT: add x8, sp, #12 ; CHECK-GISEL-NEXT: str h0, [sp, #12] ; CHECK-GISEL-NEXT: and x9, x9, #0x1 ; CHECK-GISEL-NEXT: str h1, [sp, #14] ; CHECK-GISEL-NEXT: ldrh w0, [x8, x9, lsl #1] ; CHECK-GISEL-NEXT: add sp, sp, #16 ; CHECK-GISEL-NEXT: ret %tmp = extractelement <2 x i16> %x, i32 %idx ret i16 %tmp } define <2 x i16> @test_varidx_extract_v4s16(<4 x i16> %x, i32 %idx) { ; CHECK-SDAG-LABEL: test_varidx_extract_v4s16: ; CHECK-SDAG: // %bb.0: ; CHECK-SDAG-NEXT: sub sp, sp, #16 ; CHECK-SDAG-NEXT: .cfi_def_cfa_offset 16 ; CHECK-SDAG-NEXT: add x8, sp, #8 ; CHECK-SDAG-NEXT: // kill: def $w0 killed $w0 def $x0 ; CHECK-SDAG-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-SDAG-NEXT: str d0, [sp, #8] ; CHECK-SDAG-NEXT: umov w9, v0.h[1] ; CHECK-SDAG-NEXT: bfi x8, x0, #1, #2 ; CHECK-SDAG-NEXT: ld1 { v0.h }[0], [x8] ; CHECK-SDAG-NEXT: mov v0.s[1], w9 ; CHECK-SDAG-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-SDAG-NEXT: add sp, sp, #16 ; CHECK-SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_varidx_extract_v4s16: ; CHECK-GISEL: // %bb.0: ; CHECK-GISEL-NEXT: sub sp, sp, #16 ; CHECK-GISEL-NEXT: .cfi_def_cfa_offset 16 ; CHECK-GISEL-NEXT: mov w9, w0 ; CHECK-GISEL-NEXT: mov w8, #2 // =0x2 ; CHECK-GISEL-NEXT: add x10, sp, #8 ; CHECK-GISEL-NEXT: and x9, x9, #0x3 ; CHECK-GISEL-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-GISEL-NEXT: str d0, [sp, #8] ; CHECK-GISEL-NEXT: madd x8, x9, x8, x10 ; CHECK-GISEL-NEXT: umov w9, v0.h[1] ; CHECK-GISEL-NEXT: fmov s1, w9 ; CHECK-GISEL-NEXT: ldr h0, [x8] ; CHECK-GISEL-NEXT: mov v0.s[1], v1.s[0] ; CHECK-GISEL-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-GISEL-NEXT: add sp, sp, #16 ; CHECK-GISEL-NEXT: ret %tmp = extractelement <4 x i16> %x, i32 %idx %tmp2 = insertelement <2 x i16> undef, i16 %tmp, i32 0 %tmp3 = extractelement <4 x i16> %x, i32 1 %tmp4 = insertelement <2 x i16> %tmp2, i16 %tmp3, i32 1 ret <2 x i16> %tmp4 } define <4 x i16> @test_varidx_extract_v8s16(<8 x i16> %x, i32 %idx) { ; CHECK-SDAG-LABEL: test_varidx_extract_v8s16: ; CHECK-SDAG: // %bb.0: ; CHECK-SDAG-NEXT: sub sp, sp, #16 ; CHECK-SDAG-NEXT: .cfi_def_cfa_offset 16 ; CHECK-SDAG-NEXT: mov x8, sp ; CHECK-SDAG-NEXT: // kill: def $w0 killed $w0 def $x0 ; CHECK-SDAG-NEXT: str q0, [sp] ; CHECK-SDAG-NEXT: bfi x8, x0, #1, #3 ; CHECK-SDAG-NEXT: ldr h1, [x8] ; CHECK-SDAG-NEXT: mov v1.h[1], v0.h[1] ; CHECK-SDAG-NEXT: mov v1.h[2], v0.h[2] ; CHECK-SDAG-NEXT: mov v1.h[3], v0.h[3] ; CHECK-SDAG-NEXT: fmov d0, d1 ; CHECK-SDAG-NEXT: add sp, sp, #16 ; CHECK-SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_varidx_extract_v8s16: ; CHECK-GISEL: // %bb.0: ; CHECK-GISEL-NEXT: sub sp, sp, #16 ; CHECK-GISEL-NEXT: .cfi_def_cfa_offset 16 ; CHECK-GISEL-NEXT: mov w9, w0 ; CHECK-GISEL-NEXT: mov h2, v0.h[1] ; CHECK-GISEL-NEXT: mov x8, sp ; CHECK-GISEL-NEXT: str q0, [sp] ; CHECK-GISEL-NEXT: and x9, x9, #0x7 ; CHECK-GISEL-NEXT: mov h3, v0.h[2] ; CHECK-GISEL-NEXT: ldr h1, [x8, x9, lsl #1] ; CHECK-GISEL-NEXT: mov h0, v0.h[3] ; CHECK-GISEL-NEXT: mov v1.h[1], v2.h[0] ; CHECK-GISEL-NEXT: mov v1.h[2], v3.h[0] ; CHECK-GISEL-NEXT: mov v1.h[3], v0.h[0] ; CHECK-GISEL-NEXT: fmov d0, d1 ; CHECK-GISEL-NEXT: add sp, sp, #16 ; CHECK-GISEL-NEXT: ret %tmp = extractelement <8 x i16> %x, i32 %idx %tmp2 = insertelement <4 x i16> undef, i16 %tmp, i32 0 %tmp3 = extractelement <8 x i16> %x, i32 1 %tmp4 = insertelement <4 x i16> %tmp2, i16 %tmp3, i32 1 %tmp5 = extractelement <8 x i16> %x, i32 2 %tmp6 = insertelement <4 x i16> %tmp4, i16 %tmp5, i32 2 %tmp7 = extractelement <8 x i16> %x, i32 3 %tmp8 = insertelement <4 x i16> %tmp6, i16 %tmp7, i32 3 ret <4 x i16> %tmp8 } define i32 @test_varidx_extract_v2s32(<2 x i32> %x, i32 %idx) { ; CHECK-SDAG-LABEL: test_varidx_extract_v2s32: ; CHECK-SDAG: // %bb.0: ; CHECK-SDAG-NEXT: sub sp, sp, #16 ; CHECK-SDAG-NEXT: .cfi_def_cfa_offset 16 ; CHECK-SDAG-NEXT: add x8, sp, #8 ; CHECK-SDAG-NEXT: // kill: def $w0 killed $w0 def $x0 ; CHECK-SDAG-NEXT: str d0, [sp, #8] ; CHECK-SDAG-NEXT: bfi x8, x0, #2, #1 ; CHECK-SDAG-NEXT: ldr w0, [x8] ; CHECK-SDAG-NEXT: add sp, sp, #16 ; CHECK-SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_varidx_extract_v2s32: ; CHECK-GISEL: // %bb.0: ; CHECK-GISEL-NEXT: sub sp, sp, #16 ; CHECK-GISEL-NEXT: .cfi_def_cfa_offset 16 ; CHECK-GISEL-NEXT: mov w9, w0 ; CHECK-GISEL-NEXT: add x8, sp, #8 ; CHECK-GISEL-NEXT: str d0, [sp, #8] ; CHECK-GISEL-NEXT: and x9, x9, #0x1 ; CHECK-GISEL-NEXT: ldr w0, [x8, x9, lsl #2] ; CHECK-GISEL-NEXT: add sp, sp, #16 ; CHECK-GISEL-NEXT: ret %tmp = extractelement <2 x i32> %x, i32 %idx ret i32 %tmp } define <2 x i32> @test_varidx_extract_v4s32(<4 x i32> %x, i32 %idx) { ; CHECK-SDAG-LABEL: test_varidx_extract_v4s32: ; CHECK-SDAG: // %bb.0: ; CHECK-SDAG-NEXT: sub sp, sp, #16 ; CHECK-SDAG-NEXT: .cfi_def_cfa_offset 16 ; CHECK-SDAG-NEXT: mov x8, sp ; CHECK-SDAG-NEXT: // kill: def $w0 killed $w0 def $x0 ; CHECK-SDAG-NEXT: str q0, [sp] ; CHECK-SDAG-NEXT: bfi x8, x0, #2, #2 ; CHECK-SDAG-NEXT: ldr s1, [x8] ; CHECK-SDAG-NEXT: mov v1.s[1], v0.s[1] ; CHECK-SDAG-NEXT: fmov d0, d1 ; CHECK-SDAG-NEXT: add sp, sp, #16 ; CHECK-SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_varidx_extract_v4s32: ; CHECK-GISEL: // %bb.0: ; CHECK-GISEL-NEXT: sub sp, sp, #16 ; CHECK-GISEL-NEXT: .cfi_def_cfa_offset 16 ; CHECK-GISEL-NEXT: mov w9, w0 ; CHECK-GISEL-NEXT: mov s1, v0.s[1] ; CHECK-GISEL-NEXT: mov x8, sp ; CHECK-GISEL-NEXT: str q0, [sp] ; CHECK-GISEL-NEXT: and x9, x9, #0x3 ; CHECK-GISEL-NEXT: ldr s0, [x8, x9, lsl #2] ; CHECK-GISEL-NEXT: mov v0.s[1], v1.s[0] ; CHECK-GISEL-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-GISEL-NEXT: add sp, sp, #16 ; CHECK-GISEL-NEXT: ret %tmp = extractelement <4 x i32> %x, i32 %idx %tmp2 = insertelement <2 x i32> undef, i32 %tmp, i32 0 %tmp3 = extractelement <4 x i32> %x, i32 1 %tmp4 = insertelement <2 x i32> %tmp2, i32 %tmp3, i32 1 ret <2 x i32> %tmp4 } define i64 @test_varidx_extract_v2s64(<2 x i64> %x, i32 %idx) { ; CHECK-SDAG-LABEL: test_varidx_extract_v2s64: ; CHECK-SDAG: // %bb.0: ; CHECK-SDAG-NEXT: sub sp, sp, #16 ; CHECK-SDAG-NEXT: .cfi_def_cfa_offset 16 ; CHECK-SDAG-NEXT: mov x8, sp ; CHECK-SDAG-NEXT: // kill: def $w0 killed $w0 def $x0 ; CHECK-SDAG-NEXT: str q0, [sp] ; CHECK-SDAG-NEXT: bfi x8, x0, #3, #1 ; CHECK-SDAG-NEXT: ldr x0, [x8] ; CHECK-SDAG-NEXT: add sp, sp, #16 ; CHECK-SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_varidx_extract_v2s64: ; CHECK-GISEL: // %bb.0: ; CHECK-GISEL-NEXT: sub sp, sp, #16 ; CHECK-GISEL-NEXT: .cfi_def_cfa_offset 16 ; CHECK-GISEL-NEXT: mov w9, w0 ; CHECK-GISEL-NEXT: mov x8, sp ; CHECK-GISEL-NEXT: str q0, [sp] ; CHECK-GISEL-NEXT: and x9, x9, #0x1 ; CHECK-GISEL-NEXT: ldr x0, [x8, x9, lsl #3] ; CHECK-GISEL-NEXT: add sp, sp, #16 ; CHECK-GISEL-NEXT: ret %tmp = extractelement <2 x i64> %x, i32 %idx ret i64 %tmp } define ptr @test_varidx_extract_v2p0(<2 x ptr> %x, i32 %idx) { ; CHECK-SDAG-LABEL: test_varidx_extract_v2p0: ; CHECK-SDAG: // %bb.0: ; CHECK-SDAG-NEXT: sub sp, sp, #16 ; CHECK-SDAG-NEXT: .cfi_def_cfa_offset 16 ; CHECK-SDAG-NEXT: mov x8, sp ; CHECK-SDAG-NEXT: // kill: def $w0 killed $w0 def $x0 ; CHECK-SDAG-NEXT: str q0, [sp] ; CHECK-SDAG-NEXT: bfi x8, x0, #3, #1 ; CHECK-SDAG-NEXT: ldr x0, [x8] ; CHECK-SDAG-NEXT: add sp, sp, #16 ; CHECK-SDAG-NEXT: ret ; ; CHECK-GISEL-LABEL: test_varidx_extract_v2p0: ; CHECK-GISEL: // %bb.0: ; CHECK-GISEL-NEXT: sub sp, sp, #16 ; CHECK-GISEL-NEXT: .cfi_def_cfa_offset 16 ; CHECK-GISEL-NEXT: mov w9, w0 ; CHECK-GISEL-NEXT: mov x8, sp ; CHECK-GISEL-NEXT: str q0, [sp] ; CHECK-GISEL-NEXT: and x9, x9, #0x1 ; CHECK-GISEL-NEXT: ldr x0, [x8, x9, lsl #3] ; CHECK-GISEL-NEXT: add sp, sp, #16 ; CHECK-GISEL-NEXT: ret %tmp = extractelement <2 x ptr> %x, i32 %idx ret ptr %tmp }