; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 ; RUN: llc < %s | FileCheck %s target triple = "aarch64-unknown-unknown-eabi-elf" ; This test verifies that call arguments and results are not coalesced ; with SVE vector registers by the coalescer, such that no 'mul vl' ; ldr/str pairs are generated in the streaming-mode-changing call ; sequence. ; ; Scalar arguments ; define void @dont_coalesce_arg_i8(i8 %arg, ptr %ptr) #0 { ; CHECK-LABEL: dont_coalesce_arg_i8: ; CHECK: // %bb.0: ; CHECK-NEXT: stp d15, d14, [sp, #-96]! // 16-byte Folded Spill ; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill ; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill ; CHECK-NEXT: str x29, [sp, #64] // 8-byte Folded Spill ; CHECK-NEXT: stp x30, x19, [sp, #80] // 16-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 ; CHECK-NEXT: fmov s0, w0 ; CHECK-NEXT: mov x19, x1 ; CHECK-NEXT: str z0, [sp] // 16-byte Folded Spill ; CHECK-NEXT: smstop sm ; CHECK-NEXT: bl use_i8 ; CHECK-NEXT: smstart sm ; CHECK-NEXT: ptrue p0.b ; CHECK-NEXT: ldr z0, [sp] // 16-byte Folded Reload ; CHECK-NEXT: st1b { z0.b }, p0, [x19] ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: ldp x30, x19, [sp, #80] // 16-byte Folded Reload ; CHECK-NEXT: ldr x29, [sp, #64] // 8-byte Folded Reload ; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload ; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload ; CHECK-NEXT: ldp d15, d14, [sp], #96 // 16-byte Folded Reload ; CHECK-NEXT: ret %vec = insertelement poison, i8 %arg, i32 0 call void @use_i8(i8 %arg) store %vec, ptr %ptr ret void } define void @dont_coalesce_arg_i16(i16 %arg, ptr %ptr) #0 { ; CHECK-LABEL: dont_coalesce_arg_i16: ; CHECK: // %bb.0: ; CHECK-NEXT: stp d15, d14, [sp, #-96]! // 16-byte Folded Spill ; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill ; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill ; CHECK-NEXT: str x29, [sp, #64] // 8-byte Folded Spill ; CHECK-NEXT: stp x30, x19, [sp, #80] // 16-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 ; CHECK-NEXT: fmov s0, w0 ; CHECK-NEXT: mov x19, x1 ; CHECK-NEXT: str z0, [sp] // 16-byte Folded Spill ; CHECK-NEXT: smstop sm ; CHECK-NEXT: bl use_i16 ; CHECK-NEXT: smstart sm ; CHECK-NEXT: ptrue p0.h ; CHECK-NEXT: ldr z0, [sp] // 16-byte Folded Reload ; CHECK-NEXT: st1h { z0.h }, p0, [x19] ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: ldp x30, x19, [sp, #80] // 16-byte Folded Reload ; CHECK-NEXT: ldr x29, [sp, #64] // 8-byte Folded Reload ; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload ; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload ; CHECK-NEXT: ldp d15, d14, [sp], #96 // 16-byte Folded Reload ; CHECK-NEXT: ret %vec = insertelement poison, i16 %arg, i32 0 call void @use_i16(i16 %arg) store %vec, ptr %ptr ret void } define void @dont_coalesce_arg_i32(i32 %arg, ptr %ptr) #0 { ; CHECK-LABEL: dont_coalesce_arg_i32: ; CHECK: // %bb.0: ; CHECK-NEXT: stp d15, d14, [sp, #-96]! // 16-byte Folded Spill ; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill ; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill ; CHECK-NEXT: str x29, [sp, #64] // 8-byte Folded Spill ; CHECK-NEXT: stp x30, x19, [sp, #80] // 16-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 ; CHECK-NEXT: fmov s0, w0 ; CHECK-NEXT: mov x19, x1 ; CHECK-NEXT: str z0, [sp] // 16-byte Folded Spill ; CHECK-NEXT: smstop sm ; CHECK-NEXT: bl use_i32 ; CHECK-NEXT: smstart sm ; CHECK-NEXT: ptrue p0.s ; CHECK-NEXT: ldr z0, [sp] // 16-byte Folded Reload ; CHECK-NEXT: st1w { z0.s }, p0, [x19] ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: ldp x30, x19, [sp, #80] // 16-byte Folded Reload ; CHECK-NEXT: ldr x29, [sp, #64] // 8-byte Folded Reload ; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload ; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload ; CHECK-NEXT: ldp d15, d14, [sp], #96 // 16-byte Folded Reload ; CHECK-NEXT: ret %vec = insertelement poison, i32 %arg, i32 0 call void @use_i32(i32 %arg) store %vec, ptr %ptr ret void } define void @dont_coalesce_arg_i64(i64 %arg, ptr %ptr) #0 { ; CHECK-LABEL: dont_coalesce_arg_i64: ; CHECK: // %bb.0: ; CHECK-NEXT: stp d15, d14, [sp, #-96]! // 16-byte Folded Spill ; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill ; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill ; CHECK-NEXT: str x29, [sp, #64] // 8-byte Folded Spill ; CHECK-NEXT: stp x30, x19, [sp, #80] // 16-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 ; CHECK-NEXT: fmov d0, x0 ; CHECK-NEXT: mov x19, x1 ; CHECK-NEXT: str z0, [sp] // 16-byte Folded Spill ; CHECK-NEXT: smstop sm ; CHECK-NEXT: bl use_i64 ; CHECK-NEXT: smstart sm ; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: ldr z0, [sp] // 16-byte Folded Reload ; CHECK-NEXT: st1d { z0.d }, p0, [x19] ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: ldp x30, x19, [sp, #80] // 16-byte Folded Reload ; CHECK-NEXT: ldr x29, [sp, #64] // 8-byte Folded Reload ; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload ; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload ; CHECK-NEXT: ldp d15, d14, [sp], #96 // 16-byte Folded Reload ; CHECK-NEXT: ret %vec = insertelement poison, i64 %arg, i32 0 call void @use_i64(i64 %arg) store %vec, ptr %ptr ret void } define void @dont_coalesce_arg_f16(half %arg, ptr %ptr) #0 { ; CHECK-LABEL: dont_coalesce_arg_f16: ; CHECK: // %bb.0: ; CHECK-NEXT: stp d15, d14, [sp, #-96]! // 16-byte Folded Spill ; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill ; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill ; CHECK-NEXT: str x29, [sp, #64] // 8-byte Folded Spill ; CHECK-NEXT: stp x30, x19, [sp, #80] // 16-byte Folded Spill ; CHECK-NEXT: sub sp, sp, #16 ; CHECK-NEXT: addvl sp, sp, #-1 ; CHECK-NEXT: // kill: def $h0 killed $h0 def $z0 ; CHECK-NEXT: add x8, sp, #16 ; CHECK-NEXT: mov x19, x0 ; CHECK-NEXT: str z0, [x8] // 16-byte Folded Spill ; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0 ; CHECK-NEXT: str h0, [sp, #14] // 2-byte Folded Spill ; CHECK-NEXT: smstop sm ; CHECK-NEXT: ldr h0, [sp, #14] // 2-byte Folded Reload ; CHECK-NEXT: bl use_f16 ; CHECK-NEXT: smstart sm ; CHECK-NEXT: ptrue p0.h ; CHECK-NEXT: add x8, sp, #16 ; CHECK-NEXT: ldr z0, [x8] // 16-byte Folded Reload ; CHECK-NEXT: st1h { z0.h }, p0, [x19] ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: add sp, sp, #16 ; CHECK-NEXT: ldp x30, x19, [sp, #80] // 16-byte Folded Reload ; CHECK-NEXT: ldr x29, [sp, #64] // 8-byte Folded Reload ; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload ; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload ; CHECK-NEXT: ldp d15, d14, [sp], #96 // 16-byte Folded Reload ; CHECK-NEXT: ret %vec = insertelement poison, half %arg, i32 0 call void @use_f16(half %arg) store %vec, ptr %ptr ret void } define void @dont_coalesce_arg_f32(float %arg, ptr %ptr) #0 { ; CHECK-LABEL: dont_coalesce_arg_f32: ; CHECK: // %bb.0: ; CHECK-NEXT: stp d15, d14, [sp, #-96]! // 16-byte Folded Spill ; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill ; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill ; CHECK-NEXT: str x29, [sp, #64] // 8-byte Folded Spill ; CHECK-NEXT: stp x30, x19, [sp, #80] // 16-byte Folded Spill ; CHECK-NEXT: sub sp, sp, #16 ; CHECK-NEXT: addvl sp, sp, #-1 ; CHECK-NEXT: // kill: def $s0 killed $s0 def $z0 ; CHECK-NEXT: add x8, sp, #16 ; CHECK-NEXT: mov x19, x0 ; CHECK-NEXT: str z0, [x8] // 16-byte Folded Spill ; CHECK-NEXT: // kill: def $s0 killed $s0 killed $z0 ; CHECK-NEXT: str s0, [sp, #12] // 4-byte Folded Spill ; CHECK-NEXT: smstop sm ; CHECK-NEXT: ldr s0, [sp, #12] // 4-byte Folded Reload ; CHECK-NEXT: bl use_f32 ; CHECK-NEXT: smstart sm ; CHECK-NEXT: ptrue p0.s ; CHECK-NEXT: add x8, sp, #16 ; CHECK-NEXT: ldr z0, [x8] // 16-byte Folded Reload ; CHECK-NEXT: st1w { z0.s }, p0, [x19] ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: add sp, sp, #16 ; CHECK-NEXT: ldp x30, x19, [sp, #80] // 16-byte Folded Reload ; CHECK-NEXT: ldr x29, [sp, #64] // 8-byte Folded Reload ; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload ; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload ; CHECK-NEXT: ldp d15, d14, [sp], #96 // 16-byte Folded Reload ; CHECK-NEXT: ret %vec = insertelement poison, float %arg, i32 0 call void @use_f32(float %arg) store %vec, ptr %ptr ret void } define void @dont_coalesce_arg_f64(double %arg, ptr %ptr) #0 { ; CHECK-LABEL: dont_coalesce_arg_f64: ; CHECK: // %bb.0: ; CHECK-NEXT: stp d15, d14, [sp, #-96]! // 16-byte Folded Spill ; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill ; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill ; CHECK-NEXT: str x29, [sp, #64] // 8-byte Folded Spill ; CHECK-NEXT: stp x30, x19, [sp, #80] // 16-byte Folded Spill ; CHECK-NEXT: sub sp, sp, #16 ; CHECK-NEXT: addvl sp, sp, #-1 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: add x8, sp, #16 ; CHECK-NEXT: mov x19, x0 ; CHECK-NEXT: str z0, [x8] // 16-byte Folded Spill ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: str d0, [sp, #8] // 8-byte Folded Spill ; CHECK-NEXT: smstop sm ; CHECK-NEXT: ldr d0, [sp, #8] // 8-byte Folded Reload ; CHECK-NEXT: bl use_f64 ; CHECK-NEXT: smstart sm ; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: add x8, sp, #16 ; CHECK-NEXT: ldr z0, [x8] // 16-byte Folded Reload ; CHECK-NEXT: st1d { z0.d }, p0, [x19] ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: add sp, sp, #16 ; CHECK-NEXT: ldp x30, x19, [sp, #80] // 16-byte Folded Reload ; CHECK-NEXT: ldr x29, [sp, #64] // 8-byte Folded Reload ; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload ; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload ; CHECK-NEXT: ldp d15, d14, [sp], #96 // 16-byte Folded Reload ; CHECK-NEXT: ret %vec = insertelement poison, double %arg, i32 0 call void @use_f64(double %arg) store %vec, ptr %ptr ret void } ; ; Single-element vector arguments ; define void @dont_coalesce_arg_v1i8(<1 x i8> %arg, ptr %ptr) #0 { ; CHECK-LABEL: dont_coalesce_arg_v1i8: ; CHECK: // %bb.0: ; CHECK-NEXT: stp d15, d14, [sp, #-96]! // 16-byte Folded Spill ; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill ; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill ; CHECK-NEXT: str x29, [sp, #64] // 8-byte Folded Spill ; CHECK-NEXT: stp x30, x19, [sp, #80] // 16-byte Folded Spill ; CHECK-NEXT: sub sp, sp, #16 ; CHECK-NEXT: addvl sp, sp, #-1 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: add x8, sp, #16 ; CHECK-NEXT: mov x19, x0 ; CHECK-NEXT: str z0, [x8] // 16-byte Folded Spill ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: str d0, [sp, #8] // 8-byte Folded Spill ; CHECK-NEXT: smstop sm ; CHECK-NEXT: ldr d0, [sp, #8] // 8-byte Folded Reload ; CHECK-NEXT: bl use_v16i8 ; CHECK-NEXT: smstart sm ; CHECK-NEXT: ptrue p0.b ; CHECK-NEXT: add x8, sp, #16 ; CHECK-NEXT: ldr z0, [x8] // 16-byte Folded Reload ; CHECK-NEXT: st1b { z0.b }, p0, [x19] ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: add sp, sp, #16 ; CHECK-NEXT: ldp x30, x19, [sp, #80] // 16-byte Folded Reload ; CHECK-NEXT: ldr x29, [sp, #64] // 8-byte Folded Reload ; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload ; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload ; CHECK-NEXT: ldp d15, d14, [sp], #96 // 16-byte Folded Reload ; CHECK-NEXT: ret %elt = extractelement <1 x i8> %arg, i32 0 %vec = insertelement poison, i8 %elt, i32 0 call void @use_v16i8(<1 x i8> %arg) store %vec, ptr %ptr ret void } define void @dont_coalesce_arg_v1i16(<1 x i16> %arg, ptr %ptr) #0 { ; CHECK-LABEL: dont_coalesce_arg_v1i16: ; CHECK: // %bb.0: ; CHECK-NEXT: stp d15, d14, [sp, #-96]! // 16-byte Folded Spill ; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill ; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill ; CHECK-NEXT: str x29, [sp, #64] // 8-byte Folded Spill ; CHECK-NEXT: stp x30, x19, [sp, #80] // 16-byte Folded Spill ; CHECK-NEXT: sub sp, sp, #16 ; CHECK-NEXT: addvl sp, sp, #-1 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: add x8, sp, #16 ; CHECK-NEXT: mov x19, x0 ; CHECK-NEXT: str z0, [x8] // 16-byte Folded Spill ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: str d0, [sp, #8] // 8-byte Folded Spill ; CHECK-NEXT: smstop sm ; CHECK-NEXT: ldr d0, [sp, #8] // 8-byte Folded Reload ; CHECK-NEXT: bl use_v8i16 ; CHECK-NEXT: smstart sm ; CHECK-NEXT: ptrue p0.h ; CHECK-NEXT: add x8, sp, #16 ; CHECK-NEXT: ldr z0, [x8] // 16-byte Folded Reload ; CHECK-NEXT: st1h { z0.h }, p0, [x19] ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: add sp, sp, #16 ; CHECK-NEXT: ldp x30, x19, [sp, #80] // 16-byte Folded Reload ; CHECK-NEXT: ldr x29, [sp, #64] // 8-byte Folded Reload ; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload ; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload ; CHECK-NEXT: ldp d15, d14, [sp], #96 // 16-byte Folded Reload ; CHECK-NEXT: ret %elt = extractelement <1 x i16> %arg, i32 0 %vec = insertelement poison, i16 %elt, i32 0 call void @use_v8i16(<1 x i16> %arg) store %vec, ptr %ptr ret void } define void @dont_coalesce_arg_v1i32(<1 x i32> %arg, ptr %ptr) #0 { ; CHECK-LABEL: dont_coalesce_arg_v1i32: ; CHECK: // %bb.0: ; CHECK-NEXT: stp d15, d14, [sp, #-96]! // 16-byte Folded Spill ; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill ; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill ; CHECK-NEXT: str x29, [sp, #64] // 8-byte Folded Spill ; CHECK-NEXT: stp x30, x19, [sp, #80] // 16-byte Folded Spill ; CHECK-NEXT: sub sp, sp, #16 ; CHECK-NEXT: addvl sp, sp, #-1 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: add x8, sp, #16 ; CHECK-NEXT: mov x19, x0 ; CHECK-NEXT: str z0, [x8] // 16-byte Folded Spill ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: str d0, [sp, #8] // 8-byte Folded Spill ; CHECK-NEXT: smstop sm ; CHECK-NEXT: ldr d0, [sp, #8] // 8-byte Folded Reload ; CHECK-NEXT: bl use_v4i32 ; CHECK-NEXT: smstart sm ; CHECK-NEXT: ptrue p0.s ; CHECK-NEXT: add x8, sp, #16 ; CHECK-NEXT: ldr z0, [x8] // 16-byte Folded Reload ; CHECK-NEXT: st1w { z0.s }, p0, [x19] ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: add sp, sp, #16 ; CHECK-NEXT: ldp x30, x19, [sp, #80] // 16-byte Folded Reload ; CHECK-NEXT: ldr x29, [sp, #64] // 8-byte Folded Reload ; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload ; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload ; CHECK-NEXT: ldp d15, d14, [sp], #96 // 16-byte Folded Reload ; CHECK-NEXT: ret %elt = extractelement <1 x i32> %arg, i32 0 %vec = insertelement poison, i32 %elt, i32 0 call void @use_v4i32(<1 x i32> %arg) store %vec, ptr %ptr ret void } define void @dont_coalesce_arg_v1i64(<1 x i64> %arg, ptr %ptr) #0 { ; CHECK-LABEL: dont_coalesce_arg_v1i64: ; CHECK: // %bb.0: ; CHECK-NEXT: stp d15, d14, [sp, #-96]! // 16-byte Folded Spill ; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill ; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill ; CHECK-NEXT: str x29, [sp, #64] // 8-byte Folded Spill ; CHECK-NEXT: stp x30, x19, [sp, #80] // 16-byte Folded Spill ; CHECK-NEXT: sub sp, sp, #16 ; CHECK-NEXT: addvl sp, sp, #-1 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: add x8, sp, #16 ; CHECK-NEXT: mov x19, x0 ; CHECK-NEXT: str z0, [x8] // 16-byte Folded Spill ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: str d0, [sp, #8] // 8-byte Folded Spill ; CHECK-NEXT: smstop sm ; CHECK-NEXT: ldr d0, [sp, #8] // 8-byte Folded Reload ; CHECK-NEXT: bl use_v2i64 ; CHECK-NEXT: smstart sm ; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: add x8, sp, #16 ; CHECK-NEXT: ldr z0, [x8] // 16-byte Folded Reload ; CHECK-NEXT: st1d { z0.d }, p0, [x19] ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: add sp, sp, #16 ; CHECK-NEXT: ldp x30, x19, [sp, #80] // 16-byte Folded Reload ; CHECK-NEXT: ldr x29, [sp, #64] // 8-byte Folded Reload ; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload ; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload ; CHECK-NEXT: ldp d15, d14, [sp], #96 // 16-byte Folded Reload ; CHECK-NEXT: ret %elt = extractelement <1 x i64> %arg, i32 0 %vec = insertelement poison, i64 %elt, i32 0 call void @use_v2i64(<1 x i64> %arg) store %vec, ptr %ptr ret void } define void @dont_coalesce_arg_v1f16(<1 x half> %arg, ptr %ptr) #0 { ; CHECK-LABEL: dont_coalesce_arg_v1f16: ; CHECK: // %bb.0: ; CHECK-NEXT: stp d15, d14, [sp, #-96]! // 16-byte Folded Spill ; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill ; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill ; CHECK-NEXT: str x29, [sp, #64] // 8-byte Folded Spill ; CHECK-NEXT: stp x30, x19, [sp, #80] // 16-byte Folded Spill ; CHECK-NEXT: sub sp, sp, #16 ; CHECK-NEXT: addvl sp, sp, #-1 ; CHECK-NEXT: // kill: def $h0 killed $h0 def $z0 ; CHECK-NEXT: add x8, sp, #16 ; CHECK-NEXT: mov x19, x0 ; CHECK-NEXT: str z0, [x8] // 16-byte Folded Spill ; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0 ; CHECK-NEXT: str h0, [sp, #14] // 2-byte Folded Spill ; CHECK-NEXT: smstop sm ; CHECK-NEXT: ldr h0, [sp, #14] // 2-byte Folded Reload ; CHECK-NEXT: bl use_v8f16 ; CHECK-NEXT: smstart sm ; CHECK-NEXT: ptrue p0.h ; CHECK-NEXT: add x8, sp, #16 ; CHECK-NEXT: ldr z0, [x8] // 16-byte Folded Reload ; CHECK-NEXT: st1h { z0.h }, p0, [x19] ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: add sp, sp, #16 ; CHECK-NEXT: ldp x30, x19, [sp, #80] // 16-byte Folded Reload ; CHECK-NEXT: ldr x29, [sp, #64] // 8-byte Folded Reload ; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload ; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload ; CHECK-NEXT: ldp d15, d14, [sp], #96 // 16-byte Folded Reload ; CHECK-NEXT: ret %elt = extractelement <1 x half> %arg, i32 0 %vec = insertelement poison, half %elt, i32 0 call void @use_v8f16(<1 x half> %arg) store %vec, ptr %ptr ret void } define void @dont_coalesce_arg_v1f32(<1 x float> %arg, ptr %ptr) #0 { ; CHECK-LABEL: dont_coalesce_arg_v1f32: ; CHECK: // %bb.0: ; CHECK-NEXT: stp d15, d14, [sp, #-96]! // 16-byte Folded Spill ; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill ; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill ; CHECK-NEXT: str x29, [sp, #64] // 8-byte Folded Spill ; CHECK-NEXT: stp x30, x19, [sp, #80] // 16-byte Folded Spill ; CHECK-NEXT: sub sp, sp, #16 ; CHECK-NEXT: addvl sp, sp, #-1 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: add x8, sp, #16 ; CHECK-NEXT: mov x19, x0 ; CHECK-NEXT: str z0, [x8] // 16-byte Folded Spill ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: str d0, [sp, #8] // 8-byte Folded Spill ; CHECK-NEXT: smstop sm ; CHECK-NEXT: ldr d0, [sp, #8] // 8-byte Folded Reload ; CHECK-NEXT: bl use_v4f32 ; CHECK-NEXT: smstart sm ; CHECK-NEXT: ptrue p0.s ; CHECK-NEXT: add x8, sp, #16 ; CHECK-NEXT: ldr z0, [x8] // 16-byte Folded Reload ; CHECK-NEXT: st1w { z0.s }, p0, [x19] ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: add sp, sp, #16 ; CHECK-NEXT: ldp x30, x19, [sp, #80] // 16-byte Folded Reload ; CHECK-NEXT: ldr x29, [sp, #64] // 8-byte Folded Reload ; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload ; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload ; CHECK-NEXT: ldp d15, d14, [sp], #96 // 16-byte Folded Reload ; CHECK-NEXT: ret %elt = extractelement <1 x float> %arg, i32 0 %vec = insertelement poison, float %elt, i32 0 call void @use_v4f32(<1 x float> %arg) store %vec, ptr %ptr ret void } define void @dont_coalesce_arg_v1f64(<1 x double> %arg, ptr %ptr) #0 { ; CHECK-LABEL: dont_coalesce_arg_v1f64: ; CHECK: // %bb.0: ; CHECK-NEXT: stp d15, d14, [sp, #-96]! // 16-byte Folded Spill ; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill ; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill ; CHECK-NEXT: str x29, [sp, #64] // 8-byte Folded Spill ; CHECK-NEXT: stp x30, x19, [sp, #80] // 16-byte Folded Spill ; CHECK-NEXT: sub sp, sp, #16 ; CHECK-NEXT: addvl sp, sp, #-1 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: add x8, sp, #16 ; CHECK-NEXT: mov x19, x0 ; CHECK-NEXT: str z0, [x8] // 16-byte Folded Spill ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: str d0, [sp, #8] // 8-byte Folded Spill ; CHECK-NEXT: smstop sm ; CHECK-NEXT: ldr d0, [sp, #8] // 8-byte Folded Reload ; CHECK-NEXT: bl use_v2f64 ; CHECK-NEXT: smstart sm ; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: add x8, sp, #16 ; CHECK-NEXT: ldr z0, [x8] // 16-byte Folded Reload ; CHECK-NEXT: st1d { z0.d }, p0, [x19] ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: add sp, sp, #16 ; CHECK-NEXT: ldp x30, x19, [sp, #80] // 16-byte Folded Reload ; CHECK-NEXT: ldr x29, [sp, #64] // 8-byte Folded Reload ; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload ; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload ; CHECK-NEXT: ldp d15, d14, [sp], #96 // 16-byte Folded Reload ; CHECK-NEXT: ret %elt = extractelement <1 x double> %arg, i32 0 %vec = insertelement poison, double %elt, i32 0 call void @use_v2f64(<1 x double> %arg) store %vec, ptr %ptr ret void } ; ; Full vector arguments ; define void @dont_coalesce_arg_v16i8(<16 x i8> %arg, ptr %ptr) #0 { ; CHECK-LABEL: dont_coalesce_arg_v16i8: ; CHECK: // %bb.0: ; CHECK-NEXT: stp d15, d14, [sp, #-96]! // 16-byte Folded Spill ; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill ; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill ; CHECK-NEXT: str x29, [sp, #64] // 8-byte Folded Spill ; CHECK-NEXT: stp x30, x19, [sp, #80] // 16-byte Folded Spill ; CHECK-NEXT: sub sp, sp, #16 ; CHECK-NEXT: addvl sp, sp, #-1 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: add x8, sp, #16 ; CHECK-NEXT: mov x19, x0 ; CHECK-NEXT: str z0, [x8] // 16-byte Folded Spill ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill ; CHECK-NEXT: smstop sm ; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload ; CHECK-NEXT: bl use_v16i8 ; CHECK-NEXT: smstart sm ; CHECK-NEXT: ptrue p0.b ; CHECK-NEXT: add x8, sp, #16 ; CHECK-NEXT: ldr z0, [x8] // 16-byte Folded Reload ; CHECK-NEXT: st1b { z0.b }, p0, [x19] ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: add sp, sp, #16 ; CHECK-NEXT: ldp x30, x19, [sp, #80] // 16-byte Folded Reload ; CHECK-NEXT: ldr x29, [sp, #64] // 8-byte Folded Reload ; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload ; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload ; CHECK-NEXT: ldp d15, d14, [sp], #96 // 16-byte Folded Reload ; CHECK-NEXT: ret %vec = call @llvm.vector.insert.nxv16i8.v16i8( poison, <16 x i8> %arg, i64 0) call void @use_v16i8(<16 x i8> %arg) store %vec, ptr %ptr ret void } define void @dont_coalesce_arg_v8i16(<8 x i16> %arg, ptr %ptr) #0 { ; CHECK-LABEL: dont_coalesce_arg_v8i16: ; CHECK: // %bb.0: ; CHECK-NEXT: stp d15, d14, [sp, #-96]! // 16-byte Folded Spill ; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill ; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill ; CHECK-NEXT: str x29, [sp, #64] // 8-byte Folded Spill ; CHECK-NEXT: stp x30, x19, [sp, #80] // 16-byte Folded Spill ; CHECK-NEXT: sub sp, sp, #16 ; CHECK-NEXT: addvl sp, sp, #-1 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: add x8, sp, #16 ; CHECK-NEXT: mov x19, x0 ; CHECK-NEXT: str z0, [x8] // 16-byte Folded Spill ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill ; CHECK-NEXT: smstop sm ; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload ; CHECK-NEXT: bl use_v8i16 ; CHECK-NEXT: smstart sm ; CHECK-NEXT: ptrue p0.h ; CHECK-NEXT: add x8, sp, #16 ; CHECK-NEXT: ldr z0, [x8] // 16-byte Folded Reload ; CHECK-NEXT: st1h { z0.h }, p0, [x19] ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: add sp, sp, #16 ; CHECK-NEXT: ldp x30, x19, [sp, #80] // 16-byte Folded Reload ; CHECK-NEXT: ldr x29, [sp, #64] // 8-byte Folded Reload ; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload ; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload ; CHECK-NEXT: ldp d15, d14, [sp], #96 // 16-byte Folded Reload ; CHECK-NEXT: ret %vec = call @llvm.vector.insert.nxv8i16.v8i16( poison, <8 x i16> %arg, i64 0) call void @use_v8i16(<8 x i16> %arg) store %vec, ptr %ptr ret void } define void @dont_coalesce_arg_v4i32(<4 x i32> %arg, ptr %ptr) #0 { ; CHECK-LABEL: dont_coalesce_arg_v4i32: ; CHECK: // %bb.0: ; CHECK-NEXT: stp d15, d14, [sp, #-96]! // 16-byte Folded Spill ; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill ; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill ; CHECK-NEXT: str x29, [sp, #64] // 8-byte Folded Spill ; CHECK-NEXT: stp x30, x19, [sp, #80] // 16-byte Folded Spill ; CHECK-NEXT: sub sp, sp, #16 ; CHECK-NEXT: addvl sp, sp, #-1 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: add x8, sp, #16 ; CHECK-NEXT: mov x19, x0 ; CHECK-NEXT: str z0, [x8] // 16-byte Folded Spill ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill ; CHECK-NEXT: smstop sm ; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload ; CHECK-NEXT: bl use_v4i32 ; CHECK-NEXT: smstart sm ; CHECK-NEXT: ptrue p0.s ; CHECK-NEXT: add x8, sp, #16 ; CHECK-NEXT: ldr z0, [x8] // 16-byte Folded Reload ; CHECK-NEXT: st1w { z0.s }, p0, [x19] ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: add sp, sp, #16 ; CHECK-NEXT: ldp x30, x19, [sp, #80] // 16-byte Folded Reload ; CHECK-NEXT: ldr x29, [sp, #64] // 8-byte Folded Reload ; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload ; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload ; CHECK-NEXT: ldp d15, d14, [sp], #96 // 16-byte Folded Reload ; CHECK-NEXT: ret %vec = call @llvm.vector.insert.nxv4i32.v4i32( poison, <4 x i32> %arg, i64 0) call void @use_v4i32(<4 x i32> %arg) store %vec, ptr %ptr ret void } define void @dont_coalesce_arg_v2i64(<2 x i64> %arg, ptr %ptr) #0 { ; CHECK-LABEL: dont_coalesce_arg_v2i64: ; CHECK: // %bb.0: ; CHECK-NEXT: stp d15, d14, [sp, #-96]! // 16-byte Folded Spill ; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill ; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill ; CHECK-NEXT: str x29, [sp, #64] // 8-byte Folded Spill ; CHECK-NEXT: stp x30, x19, [sp, #80] // 16-byte Folded Spill ; CHECK-NEXT: sub sp, sp, #16 ; CHECK-NEXT: addvl sp, sp, #-1 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: add x8, sp, #16 ; CHECK-NEXT: mov x19, x0 ; CHECK-NEXT: str z0, [x8] // 16-byte Folded Spill ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill ; CHECK-NEXT: smstop sm ; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload ; CHECK-NEXT: bl use_v2i64 ; CHECK-NEXT: smstart sm ; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: add x8, sp, #16 ; CHECK-NEXT: ldr z0, [x8] // 16-byte Folded Reload ; CHECK-NEXT: st1d { z0.d }, p0, [x19] ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: add sp, sp, #16 ; CHECK-NEXT: ldp x30, x19, [sp, #80] // 16-byte Folded Reload ; CHECK-NEXT: ldr x29, [sp, #64] // 8-byte Folded Reload ; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload ; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload ; CHECK-NEXT: ldp d15, d14, [sp], #96 // 16-byte Folded Reload ; CHECK-NEXT: ret %vec = call @llvm.vector.insert.nxv2i64.v2i64( poison, <2 x i64> %arg, i64 0) call void @use_v2i64(<2 x i64> %arg) store %vec, ptr %ptr ret void } define void @dont_coalesce_arg_v8f16(<8 x half> %arg, ptr %ptr) #0 { ; CHECK-LABEL: dont_coalesce_arg_v8f16: ; CHECK: // %bb.0: ; CHECK-NEXT: stp d15, d14, [sp, #-96]! // 16-byte Folded Spill ; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill ; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill ; CHECK-NEXT: str x29, [sp, #64] // 8-byte Folded Spill ; CHECK-NEXT: stp x30, x19, [sp, #80] // 16-byte Folded Spill ; CHECK-NEXT: sub sp, sp, #16 ; CHECK-NEXT: addvl sp, sp, #-1 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: add x8, sp, #16 ; CHECK-NEXT: mov x19, x0 ; CHECK-NEXT: str z0, [x8] // 16-byte Folded Spill ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill ; CHECK-NEXT: smstop sm ; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload ; CHECK-NEXT: bl use_v8f16 ; CHECK-NEXT: smstart sm ; CHECK-NEXT: ptrue p0.h ; CHECK-NEXT: add x8, sp, #16 ; CHECK-NEXT: ldr z0, [x8] // 16-byte Folded Reload ; CHECK-NEXT: st1h { z0.h }, p0, [x19] ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: add sp, sp, #16 ; CHECK-NEXT: ldp x30, x19, [sp, #80] // 16-byte Folded Reload ; CHECK-NEXT: ldr x29, [sp, #64] // 8-byte Folded Reload ; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload ; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload ; CHECK-NEXT: ldp d15, d14, [sp], #96 // 16-byte Folded Reload ; CHECK-NEXT: ret %vec = call @llvm.vector.insert.nxv8f16.v8f16( poison, <8 x half> %arg, i64 0) call void @use_v8f16(<8 x half> %arg) store %vec, ptr %ptr ret void } define void @dont_coalesce_arg_v8bf16(<8 x bfloat> %arg, ptr %ptr) #0 { ; CHECK-LABEL: dont_coalesce_arg_v8bf16: ; CHECK: // %bb.0: ; CHECK-NEXT: stp d15, d14, [sp, #-96]! // 16-byte Folded Spill ; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill ; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill ; CHECK-NEXT: str x29, [sp, #64] // 8-byte Folded Spill ; CHECK-NEXT: stp x30, x19, [sp, #80] // 16-byte Folded Spill ; CHECK-NEXT: sub sp, sp, #16 ; CHECK-NEXT: addvl sp, sp, #-1 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: add x8, sp, #16 ; CHECK-NEXT: mov x19, x0 ; CHECK-NEXT: str z0, [x8] // 16-byte Folded Spill ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill ; CHECK-NEXT: smstop sm ; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload ; CHECK-NEXT: bl use_v8bf16 ; CHECK-NEXT: smstart sm ; CHECK-NEXT: ptrue p0.h ; CHECK-NEXT: add x8, sp, #16 ; CHECK-NEXT: ldr z0, [x8] // 16-byte Folded Reload ; CHECK-NEXT: st1h { z0.h }, p0, [x19] ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: add sp, sp, #16 ; CHECK-NEXT: ldp x30, x19, [sp, #80] // 16-byte Folded Reload ; CHECK-NEXT: ldr x29, [sp, #64] // 8-byte Folded Reload ; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload ; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload ; CHECK-NEXT: ldp d15, d14, [sp], #96 // 16-byte Folded Reload ; CHECK-NEXT: ret %vec = call @llvm.vector.insert.nxv8bf16.v8bf16( poison, <8 x bfloat> %arg, i64 0) call void @use_v8bf16(<8 x bfloat> %arg) store %vec, ptr %ptr ret void } define void @dont_coalesce_arg_v4f32(<4 x float> %arg, ptr %ptr) #0 { ; CHECK-LABEL: dont_coalesce_arg_v4f32: ; CHECK: // %bb.0: ; CHECK-NEXT: stp d15, d14, [sp, #-96]! // 16-byte Folded Spill ; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill ; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill ; CHECK-NEXT: str x29, [sp, #64] // 8-byte Folded Spill ; CHECK-NEXT: stp x30, x19, [sp, #80] // 16-byte Folded Spill ; CHECK-NEXT: sub sp, sp, #16 ; CHECK-NEXT: addvl sp, sp, #-1 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: add x8, sp, #16 ; CHECK-NEXT: mov x19, x0 ; CHECK-NEXT: str z0, [x8] // 16-byte Folded Spill ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill ; CHECK-NEXT: smstop sm ; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload ; CHECK-NEXT: bl use_v4f32 ; CHECK-NEXT: smstart sm ; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: add x8, sp, #16 ; CHECK-NEXT: ldr z0, [x8] // 16-byte Folded Reload ; CHECK-NEXT: st1d { z0.d }, p0, [x19] ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: add sp, sp, #16 ; CHECK-NEXT: ldp x30, x19, [sp, #80] // 16-byte Folded Reload ; CHECK-NEXT: ldr x29, [sp, #64] // 8-byte Folded Reload ; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload ; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload ; CHECK-NEXT: ldp d15, d14, [sp], #96 // 16-byte Folded Reload ; CHECK-NEXT: ret %vec = call @llvm.vector.insert.nxv4f32.v4f32( poison, <4 x float> %arg, i64 0) call void @use_v4f32(<4 x float> %arg) store %vec, ptr %ptr ret void } define void @dont_coalesce_arg_v2f64(<2 x double> %arg, ptr %ptr) #0 { ; CHECK-LABEL: dont_coalesce_arg_v2f64: ; CHECK: // %bb.0: ; CHECK-NEXT: stp d15, d14, [sp, #-96]! // 16-byte Folded Spill ; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill ; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill ; CHECK-NEXT: str x29, [sp, #64] // 8-byte Folded Spill ; CHECK-NEXT: stp x30, x19, [sp, #80] // 16-byte Folded Spill ; CHECK-NEXT: sub sp, sp, #16 ; CHECK-NEXT: addvl sp, sp, #-1 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: add x8, sp, #16 ; CHECK-NEXT: mov x19, x0 ; CHECK-NEXT: str z0, [x8] // 16-byte Folded Spill ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill ; CHECK-NEXT: smstop sm ; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload ; CHECK-NEXT: bl use_v2f64 ; CHECK-NEXT: smstart sm ; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: add x8, sp, #16 ; CHECK-NEXT: ldr z0, [x8] // 16-byte Folded Reload ; CHECK-NEXT: st1d { z0.d }, p0, [x19] ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: add sp, sp, #16 ; CHECK-NEXT: ldp x30, x19, [sp, #80] // 16-byte Folded Reload ; CHECK-NEXT: ldr x29, [sp, #64] // 8-byte Folded Reload ; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload ; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload ; CHECK-NEXT: ldp d15, d14, [sp], #96 // 16-byte Folded Reload ; CHECK-NEXT: ret %vec = call @llvm.vector.insert.nxv2f64.v2f64( poison, <2 x double> %arg, i64 0) call void @use_v2f64(<2 x double> %arg) store %vec, ptr %ptr ret void } ; ; <8 x i1> type will need type promotion. ; define void @dont_coalesce_arg_v8i1(<8 x i1> %arg, ptr %ptr) #0 { ; CHECK-LABEL: dont_coalesce_arg_v8i1: ; CHECK: // %bb.0: ; CHECK-NEXT: stp d15, d14, [sp, #-96]! // 16-byte Folded Spill ; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill ; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill ; CHECK-NEXT: str x29, [sp, #64] // 8-byte Folded Spill ; CHECK-NEXT: stp x30, x19, [sp, #80] // 16-byte Folded Spill ; CHECK-NEXT: sub sp, sp, #16 ; CHECK-NEXT: addvl sp, sp, #-1 ; CHECK-NEXT: ptrue p0.b ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: mov z1.d, z0.d ; CHECK-NEXT: add x8, sp, #16 ; CHECK-NEXT: mov x19, x0 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: str d0, [sp, #8] // 8-byte Folded Spill ; CHECK-NEXT: and z1.b, z1.b, #0x1 ; CHECK-NEXT: cmpne p0.b, p0/z, z1.b, #0 ; CHECK-NEXT: str p0, [x8, #7, mul vl] // 2-byte Folded Spill ; CHECK-NEXT: smstop sm ; CHECK-NEXT: ldr d0, [sp, #8] // 8-byte Folded Reload ; CHECK-NEXT: bl use_v8i1 ; CHECK-NEXT: smstart sm ; CHECK-NEXT: add x8, sp, #16 ; CHECK-NEXT: ldr p0, [x8, #7, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: str p0, [x19] ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: add sp, sp, #16 ; CHECK-NEXT: ldp x30, x19, [sp, #80] // 16-byte Folded Reload ; CHECK-NEXT: ldr x29, [sp, #64] // 8-byte Folded Reload ; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload ; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload ; CHECK-NEXT: ldp d15, d14, [sp], #96 // 16-byte Folded Reload ; CHECK-NEXT: ret %vec = call @llvm.vector.insert.nxv8i1.v8i1( poison, <8 x i1> %arg, i64 0) call void @use_v8i1(<8 x i1> %arg) store %vec, ptr %ptr ret void } ; ; Scalar return values ; define void @dont_coalesce_res_i8(ptr %ptr) #0 { ; CHECK-LABEL: dont_coalesce_res_i8: ; CHECK: // %bb.0: ; CHECK-NEXT: stp d15, d14, [sp, #-80]! // 16-byte Folded Spill ; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill ; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill ; CHECK-NEXT: stp x30, x19, [sp, #64] // 16-byte Folded Spill ; CHECK-NEXT: mov x19, x0 ; CHECK-NEXT: smstop sm ; CHECK-NEXT: bl get_i8 ; CHECK-NEXT: smstart sm ; CHECK-NEXT: ptrue p0.b ; CHECK-NEXT: fmov s0, w0 ; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload ; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload ; CHECK-NEXT: st1b { z0.b }, p0, [x19] ; CHECK-NEXT: ldp x30, x19, [sp, #64] // 16-byte Folded Reload ; CHECK-NEXT: ldp d15, d14, [sp], #80 // 16-byte Folded Reload ; CHECK-NEXT: ret %res = call i8 @get_i8() %vec = insertelement poison, i8 %res, i32 0 store %vec, ptr %ptr ret void } define void @dont_coalesce_res_i16(ptr %ptr) #0 { ; CHECK-LABEL: dont_coalesce_res_i16: ; CHECK: // %bb.0: ; CHECK-NEXT: stp d15, d14, [sp, #-80]! // 16-byte Folded Spill ; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill ; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill ; CHECK-NEXT: stp x30, x19, [sp, #64] // 16-byte Folded Spill ; CHECK-NEXT: mov x19, x0 ; CHECK-NEXT: smstop sm ; CHECK-NEXT: bl get_i16 ; CHECK-NEXT: smstart sm ; CHECK-NEXT: ptrue p0.h ; CHECK-NEXT: fmov s0, w0 ; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload ; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload ; CHECK-NEXT: st1h { z0.h }, p0, [x19] ; CHECK-NEXT: ldp x30, x19, [sp, #64] // 16-byte Folded Reload ; CHECK-NEXT: ldp d15, d14, [sp], #80 // 16-byte Folded Reload ; CHECK-NEXT: ret %res = call i16 @get_i16() %vec = insertelement poison, i16 %res, i32 0 store %vec, ptr %ptr ret void } define void @dont_coalesce_res_i32(ptr %ptr) #0 { ; CHECK-LABEL: dont_coalesce_res_i32: ; CHECK: // %bb.0: ; CHECK-NEXT: stp d15, d14, [sp, #-80]! // 16-byte Folded Spill ; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill ; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill ; CHECK-NEXT: stp x30, x19, [sp, #64] // 16-byte Folded Spill ; CHECK-NEXT: mov x19, x0 ; CHECK-NEXT: smstop sm ; CHECK-NEXT: bl get_i32 ; CHECK-NEXT: smstart sm ; CHECK-NEXT: ptrue p0.s ; CHECK-NEXT: fmov s0, w0 ; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload ; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload ; CHECK-NEXT: st1w { z0.s }, p0, [x19] ; CHECK-NEXT: ldp x30, x19, [sp, #64] // 16-byte Folded Reload ; CHECK-NEXT: ldp d15, d14, [sp], #80 // 16-byte Folded Reload ; CHECK-NEXT: ret %res = call i32 @get_i32() %vec = insertelement poison, i32 %res, i32 0 store %vec, ptr %ptr ret void } define void @dont_coalesce_res_i64(ptr %ptr) #0 { ; CHECK-LABEL: dont_coalesce_res_i64: ; CHECK: // %bb.0: ; CHECK-NEXT: stp d15, d14, [sp, #-80]! // 16-byte Folded Spill ; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill ; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill ; CHECK-NEXT: stp x30, x19, [sp, #64] // 16-byte Folded Spill ; CHECK-NEXT: mov x19, x0 ; CHECK-NEXT: smstop sm ; CHECK-NEXT: bl get_i64 ; CHECK-NEXT: smstart sm ; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: fmov d0, x0 ; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload ; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload ; CHECK-NEXT: st1d { z0.d }, p0, [x19] ; CHECK-NEXT: ldp x30, x19, [sp, #64] // 16-byte Folded Reload ; CHECK-NEXT: ldp d15, d14, [sp], #80 // 16-byte Folded Reload ; CHECK-NEXT: ret %res = call i64 @get_i64() %vec = insertelement poison, i64 %res, i32 0 store %vec, ptr %ptr ret void } define void @dont_coalesce_res_f16(ptr %ptr) #0 { ; CHECK-LABEL: dont_coalesce_res_f16: ; CHECK: // %bb.0: ; CHECK-NEXT: sub sp, sp, #96 ; CHECK-NEXT: stp d15, d14, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: stp d13, d12, [sp, #32] // 16-byte Folded Spill ; CHECK-NEXT: stp d11, d10, [sp, #48] // 16-byte Folded Spill ; CHECK-NEXT: stp d9, d8, [sp, #64] // 16-byte Folded Spill ; CHECK-NEXT: stp x30, x19, [sp, #80] // 16-byte Folded Spill ; CHECK-NEXT: mov x19, x0 ; CHECK-NEXT: smstop sm ; CHECK-NEXT: bl get_f16 ; CHECK-NEXT: str h0, [sp, #14] // 2-byte Folded Spill ; CHECK-NEXT: smstart sm ; CHECK-NEXT: ptrue p0.h ; CHECK-NEXT: ldr h0, [sp, #14] // 2-byte Folded Reload ; CHECK-NEXT: // kill: def $h0 killed $h0 def $z0 ; CHECK-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload ; CHECK-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload ; CHECK-NEXT: ldp d13, d12, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: ldp d15, d14, [sp, #16] // 16-byte Folded Reload ; CHECK-NEXT: st1h { z0.h }, p0, [x19] ; CHECK-NEXT: ldp x30, x19, [sp, #80] // 16-byte Folded Reload ; CHECK-NEXT: add sp, sp, #96 ; CHECK-NEXT: ret %res = call half @get_f16() %vec = insertelement poison, half %res, i32 0 store %vec, ptr %ptr ret void } define void @dont_coalesce_res_f32(ptr %ptr) #0 { ; CHECK-LABEL: dont_coalesce_res_f32: ; CHECK: // %bb.0: ; CHECK-NEXT: sub sp, sp, #96 ; CHECK-NEXT: stp d15, d14, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: stp d13, d12, [sp, #32] // 16-byte Folded Spill ; CHECK-NEXT: stp d11, d10, [sp, #48] // 16-byte Folded Spill ; CHECK-NEXT: stp d9, d8, [sp, #64] // 16-byte Folded Spill ; CHECK-NEXT: stp x30, x19, [sp, #80] // 16-byte Folded Spill ; CHECK-NEXT: mov x19, x0 ; CHECK-NEXT: smstop sm ; CHECK-NEXT: bl get_f32 ; CHECK-NEXT: str s0, [sp, #12] // 4-byte Folded Spill ; CHECK-NEXT: smstart sm ; CHECK-NEXT: ptrue p0.s ; CHECK-NEXT: ldr s0, [sp, #12] // 4-byte Folded Reload ; CHECK-NEXT: // kill: def $s0 killed $s0 def $z0 ; CHECK-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload ; CHECK-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload ; CHECK-NEXT: ldp d13, d12, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: ldp d15, d14, [sp, #16] // 16-byte Folded Reload ; CHECK-NEXT: st1w { z0.s }, p0, [x19] ; CHECK-NEXT: ldp x30, x19, [sp, #80] // 16-byte Folded Reload ; CHECK-NEXT: add sp, sp, #96 ; CHECK-NEXT: ret %res = call float @get_f32() %vec = insertelement poison, float %res, i32 0 store %vec, ptr %ptr ret void } define void @dont_coalesce_res_f64(ptr %ptr) #0 { ; CHECK-LABEL: dont_coalesce_res_f64: ; CHECK: // %bb.0: ; CHECK-NEXT: sub sp, sp, #96 ; CHECK-NEXT: stp d15, d14, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: stp d13, d12, [sp, #32] // 16-byte Folded Spill ; CHECK-NEXT: stp d11, d10, [sp, #48] // 16-byte Folded Spill ; CHECK-NEXT: stp d9, d8, [sp, #64] // 16-byte Folded Spill ; CHECK-NEXT: stp x30, x19, [sp, #80] // 16-byte Folded Spill ; CHECK-NEXT: mov x19, x0 ; CHECK-NEXT: smstop sm ; CHECK-NEXT: bl get_f64 ; CHECK-NEXT: str d0, [sp, #8] // 8-byte Folded Spill ; CHECK-NEXT: smstart sm ; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: ldr d0, [sp, #8] // 8-byte Folded Reload ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload ; CHECK-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload ; CHECK-NEXT: ldp d13, d12, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: ldp d15, d14, [sp, #16] // 16-byte Folded Reload ; CHECK-NEXT: st1d { z0.d }, p0, [x19] ; CHECK-NEXT: ldp x30, x19, [sp, #80] // 16-byte Folded Reload ; CHECK-NEXT: add sp, sp, #96 ; CHECK-NEXT: ret %res = call double @get_f64() %vec = insertelement poison, double %res, i32 0 store %vec, ptr %ptr ret void } ; ; Single-element vector result values ; define void @dont_coalesce_res_v1i8(ptr %ptr) #0 { ; CHECK-LABEL: dont_coalesce_res_v1i8: ; CHECK: // %bb.0: ; CHECK-NEXT: sub sp, sp, #96 ; CHECK-NEXT: stp d15, d14, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: stp d13, d12, [sp, #32] // 16-byte Folded Spill ; CHECK-NEXT: stp d11, d10, [sp, #48] // 16-byte Folded Spill ; CHECK-NEXT: stp d9, d8, [sp, #64] // 16-byte Folded Spill ; CHECK-NEXT: stp x30, x19, [sp, #80] // 16-byte Folded Spill ; CHECK-NEXT: mov x19, x0 ; CHECK-NEXT: smstop sm ; CHECK-NEXT: bl get_v1i8 ; CHECK-NEXT: str d0, [sp, #8] // 8-byte Folded Spill ; CHECK-NEXT: smstart sm ; CHECK-NEXT: ptrue p0.b ; CHECK-NEXT: ldr d0, [sp, #8] // 8-byte Folded Reload ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload ; CHECK-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload ; CHECK-NEXT: ldp d13, d12, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: ldp d15, d14, [sp, #16] // 16-byte Folded Reload ; CHECK-NEXT: st1b { z0.b }, p0, [x19] ; CHECK-NEXT: ldp x30, x19, [sp, #80] // 16-byte Folded Reload ; CHECK-NEXT: add sp, sp, #96 ; CHECK-NEXT: ret %res = call <1 x i8> @get_v1i8() %elt = extractelement <1 x i8> %res, i32 0 %vec = insertelement poison, i8 %elt, i32 0 store %vec, ptr %ptr ret void } define void @dont_coalesce_res_v1i16(ptr %ptr) #0 { ; CHECK-LABEL: dont_coalesce_res_v1i16: ; CHECK: // %bb.0: ; CHECK-NEXT: sub sp, sp, #96 ; CHECK-NEXT: stp d15, d14, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: stp d13, d12, [sp, #32] // 16-byte Folded Spill ; CHECK-NEXT: stp d11, d10, [sp, #48] // 16-byte Folded Spill ; CHECK-NEXT: stp d9, d8, [sp, #64] // 16-byte Folded Spill ; CHECK-NEXT: stp x30, x19, [sp, #80] // 16-byte Folded Spill ; CHECK-NEXT: mov x19, x0 ; CHECK-NEXT: smstop sm ; CHECK-NEXT: bl get_v1i16 ; CHECK-NEXT: str d0, [sp, #8] // 8-byte Folded Spill ; CHECK-NEXT: smstart sm ; CHECK-NEXT: ptrue p0.h ; CHECK-NEXT: ldr d0, [sp, #8] // 8-byte Folded Reload ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload ; CHECK-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload ; CHECK-NEXT: ldp d13, d12, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: ldp d15, d14, [sp, #16] // 16-byte Folded Reload ; CHECK-NEXT: st1h { z0.h }, p0, [x19] ; CHECK-NEXT: ldp x30, x19, [sp, #80] // 16-byte Folded Reload ; CHECK-NEXT: add sp, sp, #96 ; CHECK-NEXT: ret %res = call <1 x i16> @get_v1i16() %elt = extractelement <1 x i16> %res, i32 0 %vec = insertelement poison, i16 %elt, i32 0 store %vec, ptr %ptr ret void } define void @dont_coalesce_res_v1i32(ptr %ptr) #0 { ; CHECK-LABEL: dont_coalesce_res_v1i32: ; CHECK: // %bb.0: ; CHECK-NEXT: sub sp, sp, #96 ; CHECK-NEXT: stp d15, d14, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: stp d13, d12, [sp, #32] // 16-byte Folded Spill ; CHECK-NEXT: stp d11, d10, [sp, #48] // 16-byte Folded Spill ; CHECK-NEXT: stp d9, d8, [sp, #64] // 16-byte Folded Spill ; CHECK-NEXT: stp x30, x19, [sp, #80] // 16-byte Folded Spill ; CHECK-NEXT: mov x19, x0 ; CHECK-NEXT: smstop sm ; CHECK-NEXT: bl get_v1i32 ; CHECK-NEXT: str d0, [sp, #8] // 8-byte Folded Spill ; CHECK-NEXT: smstart sm ; CHECK-NEXT: ptrue p0.s ; CHECK-NEXT: ldr d0, [sp, #8] // 8-byte Folded Reload ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload ; CHECK-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload ; CHECK-NEXT: ldp d13, d12, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: ldp d15, d14, [sp, #16] // 16-byte Folded Reload ; CHECK-NEXT: st1w { z0.s }, p0, [x19] ; CHECK-NEXT: ldp x30, x19, [sp, #80] // 16-byte Folded Reload ; CHECK-NEXT: add sp, sp, #96 ; CHECK-NEXT: ret %res = call <1 x i32> @get_v1i32() %elt = extractelement <1 x i32> %res, i32 0 %vec = insertelement poison, i32 %elt, i32 0 store %vec, ptr %ptr ret void } define void @dont_coalesce_res_v1i64(ptr %ptr) #0 { ; CHECK-LABEL: dont_coalesce_res_v1i64: ; CHECK: // %bb.0: ; CHECK-NEXT: sub sp, sp, #96 ; CHECK-NEXT: stp d15, d14, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: stp d13, d12, [sp, #32] // 16-byte Folded Spill ; CHECK-NEXT: stp d11, d10, [sp, #48] // 16-byte Folded Spill ; CHECK-NEXT: stp d9, d8, [sp, #64] // 16-byte Folded Spill ; CHECK-NEXT: stp x30, x19, [sp, #80] // 16-byte Folded Spill ; CHECK-NEXT: mov x19, x0 ; CHECK-NEXT: smstop sm ; CHECK-NEXT: bl get_v1i64 ; CHECK-NEXT: str d0, [sp, #8] // 8-byte Folded Spill ; CHECK-NEXT: smstart sm ; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: ldr d0, [sp, #8] // 8-byte Folded Reload ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload ; CHECK-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload ; CHECK-NEXT: ldp d13, d12, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: ldp d15, d14, [sp, #16] // 16-byte Folded Reload ; CHECK-NEXT: st1d { z0.d }, p0, [x19] ; CHECK-NEXT: ldp x30, x19, [sp, #80] // 16-byte Folded Reload ; CHECK-NEXT: add sp, sp, #96 ; CHECK-NEXT: ret %res = call <1 x i64> @get_v1i64() %elt = extractelement <1 x i64> %res, i32 0 %vec = insertelement poison, i64 %elt, i32 0 store %vec, ptr %ptr ret void } define void @dont_coalesce_res_v1f16(ptr %ptr) #0 { ; CHECK-LABEL: dont_coalesce_res_v1f16: ; CHECK: // %bb.0: ; CHECK-NEXT: sub sp, sp, #96 ; CHECK-NEXT: stp d15, d14, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: stp d13, d12, [sp, #32] // 16-byte Folded Spill ; CHECK-NEXT: stp d11, d10, [sp, #48] // 16-byte Folded Spill ; CHECK-NEXT: stp d9, d8, [sp, #64] // 16-byte Folded Spill ; CHECK-NEXT: stp x30, x19, [sp, #80] // 16-byte Folded Spill ; CHECK-NEXT: mov x19, x0 ; CHECK-NEXT: smstop sm ; CHECK-NEXT: bl get_v1f16 ; CHECK-NEXT: str h0, [sp, #14] // 2-byte Folded Spill ; CHECK-NEXT: smstart sm ; CHECK-NEXT: ptrue p0.h ; CHECK-NEXT: ldr h0, [sp, #14] // 2-byte Folded Reload ; CHECK-NEXT: // kill: def $h0 killed $h0 def $z0 ; CHECK-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload ; CHECK-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload ; CHECK-NEXT: ldp d13, d12, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: ldp d15, d14, [sp, #16] // 16-byte Folded Reload ; CHECK-NEXT: st1h { z0.h }, p0, [x19] ; CHECK-NEXT: ldp x30, x19, [sp, #80] // 16-byte Folded Reload ; CHECK-NEXT: add sp, sp, #96 ; CHECK-NEXT: ret %res = call <1 x half> @get_v1f16() %elt = extractelement <1 x half> %res, i32 0 %vec = insertelement poison, half %elt, i32 0 store %vec, ptr %ptr ret void } define void @dont_coalesce_res_v1f32(ptr %ptr) #0 { ; CHECK-LABEL: dont_coalesce_res_v1f32: ; CHECK: // %bb.0: ; CHECK-NEXT: sub sp, sp, #96 ; CHECK-NEXT: stp d15, d14, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: stp d13, d12, [sp, #32] // 16-byte Folded Spill ; CHECK-NEXT: stp d11, d10, [sp, #48] // 16-byte Folded Spill ; CHECK-NEXT: stp d9, d8, [sp, #64] // 16-byte Folded Spill ; CHECK-NEXT: stp x30, x19, [sp, #80] // 16-byte Folded Spill ; CHECK-NEXT: mov x19, x0 ; CHECK-NEXT: smstop sm ; CHECK-NEXT: bl get_v1f32 ; CHECK-NEXT: str d0, [sp, #8] // 8-byte Folded Spill ; CHECK-NEXT: smstart sm ; CHECK-NEXT: ptrue p0.s ; CHECK-NEXT: ldr d0, [sp, #8] // 8-byte Folded Reload ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload ; CHECK-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload ; CHECK-NEXT: ldp d13, d12, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: ldp d15, d14, [sp, #16] // 16-byte Folded Reload ; CHECK-NEXT: st1w { z0.s }, p0, [x19] ; CHECK-NEXT: ldp x30, x19, [sp, #80] // 16-byte Folded Reload ; CHECK-NEXT: add sp, sp, #96 ; CHECK-NEXT: ret %res = call <1 x float> @get_v1f32() %elt = extractelement <1 x float> %res, i32 0 %vec = insertelement poison, float %elt, i32 0 store %vec, ptr %ptr ret void } define void @dont_coalesce_res_v1f64(ptr %ptr) #0 { ; CHECK-LABEL: dont_coalesce_res_v1f64: ; CHECK: // %bb.0: ; CHECK-NEXT: sub sp, sp, #96 ; CHECK-NEXT: stp d15, d14, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: stp d13, d12, [sp, #32] // 16-byte Folded Spill ; CHECK-NEXT: stp d11, d10, [sp, #48] // 16-byte Folded Spill ; CHECK-NEXT: stp d9, d8, [sp, #64] // 16-byte Folded Spill ; CHECK-NEXT: stp x30, x19, [sp, #80] // 16-byte Folded Spill ; CHECK-NEXT: mov x19, x0 ; CHECK-NEXT: smstop sm ; CHECK-NEXT: bl get_v1f64 ; CHECK-NEXT: str d0, [sp, #8] // 8-byte Folded Spill ; CHECK-NEXT: smstart sm ; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: ldr d0, [sp, #8] // 8-byte Folded Reload ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload ; CHECK-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload ; CHECK-NEXT: ldp d13, d12, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: ldp d15, d14, [sp, #16] // 16-byte Folded Reload ; CHECK-NEXT: st1d { z0.d }, p0, [x19] ; CHECK-NEXT: ldp x30, x19, [sp, #80] // 16-byte Folded Reload ; CHECK-NEXT: add sp, sp, #96 ; CHECK-NEXT: ret %res = call <1 x double> @get_v1f64() %elt = extractelement <1 x double> %res, i32 0 %vec = insertelement poison, double %elt, i32 0 store %vec, ptr %ptr ret void } ; ; Full vector result values ; define void @dont_coalesce_res_v16i8(ptr %ptr) #0 { ; CHECK-LABEL: dont_coalesce_res_v16i8: ; CHECK: // %bb.0: ; CHECK-NEXT: sub sp, sp, #96 ; CHECK-NEXT: stp d15, d14, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: stp d13, d12, [sp, #32] // 16-byte Folded Spill ; CHECK-NEXT: stp d11, d10, [sp, #48] // 16-byte Folded Spill ; CHECK-NEXT: stp d9, d8, [sp, #64] // 16-byte Folded Spill ; CHECK-NEXT: stp x30, x19, [sp, #80] // 16-byte Folded Spill ; CHECK-NEXT: mov x19, x0 ; CHECK-NEXT: smstop sm ; CHECK-NEXT: bl get_v16i8 ; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill ; CHECK-NEXT: smstart sm ; CHECK-NEXT: ptrue p0.b ; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload ; CHECK-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload ; CHECK-NEXT: ldp d13, d12, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: ldp d15, d14, [sp, #16] // 16-byte Folded Reload ; CHECK-NEXT: st1b { z0.b }, p0, [x19] ; CHECK-NEXT: ldp x30, x19, [sp, #80] // 16-byte Folded Reload ; CHECK-NEXT: add sp, sp, #96 ; CHECK-NEXT: ret %res = call <16 x i8> @get_v16i8() %vec = call @llvm.vector.insert.nxv16i8.v16i8( poison, <16 x i8> %res, i64 0) store %vec, ptr %ptr ret void } define void @dont_coalesce_res_v8i16(ptr %ptr) #0 { ; CHECK-LABEL: dont_coalesce_res_v8i16: ; CHECK: // %bb.0: ; CHECK-NEXT: sub sp, sp, #96 ; CHECK-NEXT: stp d15, d14, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: stp d13, d12, [sp, #32] // 16-byte Folded Spill ; CHECK-NEXT: stp d11, d10, [sp, #48] // 16-byte Folded Spill ; CHECK-NEXT: stp d9, d8, [sp, #64] // 16-byte Folded Spill ; CHECK-NEXT: stp x30, x19, [sp, #80] // 16-byte Folded Spill ; CHECK-NEXT: mov x19, x0 ; CHECK-NEXT: smstop sm ; CHECK-NEXT: bl get_v8i16 ; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill ; CHECK-NEXT: smstart sm ; CHECK-NEXT: ptrue p0.h ; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload ; CHECK-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload ; CHECK-NEXT: ldp d13, d12, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: ldp d15, d14, [sp, #16] // 16-byte Folded Reload ; CHECK-NEXT: st1h { z0.h }, p0, [x19] ; CHECK-NEXT: ldp x30, x19, [sp, #80] // 16-byte Folded Reload ; CHECK-NEXT: add sp, sp, #96 ; CHECK-NEXT: ret %res = call <8 x i16> @get_v8i16() %vec = call @llvm.vector.insert.nxv8i16.v8i16( poison, <8 x i16> %res, i64 0) store %vec, ptr %ptr ret void } define void @dont_coalesce_res_v4i32(ptr %ptr) #0 { ; CHECK-LABEL: dont_coalesce_res_v4i32: ; CHECK: // %bb.0: ; CHECK-NEXT: sub sp, sp, #96 ; CHECK-NEXT: stp d15, d14, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: stp d13, d12, [sp, #32] // 16-byte Folded Spill ; CHECK-NEXT: stp d11, d10, [sp, #48] // 16-byte Folded Spill ; CHECK-NEXT: stp d9, d8, [sp, #64] // 16-byte Folded Spill ; CHECK-NEXT: stp x30, x19, [sp, #80] // 16-byte Folded Spill ; CHECK-NEXT: mov x19, x0 ; CHECK-NEXT: smstop sm ; CHECK-NEXT: bl get_v4i32 ; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill ; CHECK-NEXT: smstart sm ; CHECK-NEXT: ptrue p0.s ; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload ; CHECK-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload ; CHECK-NEXT: ldp d13, d12, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: ldp d15, d14, [sp, #16] // 16-byte Folded Reload ; CHECK-NEXT: st1w { z0.s }, p0, [x19] ; CHECK-NEXT: ldp x30, x19, [sp, #80] // 16-byte Folded Reload ; CHECK-NEXT: add sp, sp, #96 ; CHECK-NEXT: ret %res = call <4 x i32> @get_v4i32() %vec = call @llvm.vector.insert.nxv4i32.v4i32( poison, <4 x i32> %res, i64 0) store %vec, ptr %ptr ret void } define void @dont_coalesce_res_v2i64(ptr %ptr) #0 { ; CHECK-LABEL: dont_coalesce_res_v2i64: ; CHECK: // %bb.0: ; CHECK-NEXT: sub sp, sp, #96 ; CHECK-NEXT: stp d15, d14, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: stp d13, d12, [sp, #32] // 16-byte Folded Spill ; CHECK-NEXT: stp d11, d10, [sp, #48] // 16-byte Folded Spill ; CHECK-NEXT: stp d9, d8, [sp, #64] // 16-byte Folded Spill ; CHECK-NEXT: stp x30, x19, [sp, #80] // 16-byte Folded Spill ; CHECK-NEXT: mov x19, x0 ; CHECK-NEXT: smstop sm ; CHECK-NEXT: bl get_v2i64 ; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill ; CHECK-NEXT: smstart sm ; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload ; CHECK-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload ; CHECK-NEXT: ldp d13, d12, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: ldp d15, d14, [sp, #16] // 16-byte Folded Reload ; CHECK-NEXT: st1d { z0.d }, p0, [x19] ; CHECK-NEXT: ldp x30, x19, [sp, #80] // 16-byte Folded Reload ; CHECK-NEXT: add sp, sp, #96 ; CHECK-NEXT: ret %res = call <2 x i64> @get_v2i64() %vec = call @llvm.vector.insert.nxv2i64.v2i64( poison, <2 x i64> %res, i64 0) store %vec, ptr %ptr ret void } define void @dont_coalesce_res_v8f16(ptr %ptr) #0 { ; CHECK-LABEL: dont_coalesce_res_v8f16: ; CHECK: // %bb.0: ; CHECK-NEXT: sub sp, sp, #96 ; CHECK-NEXT: stp d15, d14, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: stp d13, d12, [sp, #32] // 16-byte Folded Spill ; CHECK-NEXT: stp d11, d10, [sp, #48] // 16-byte Folded Spill ; CHECK-NEXT: stp d9, d8, [sp, #64] // 16-byte Folded Spill ; CHECK-NEXT: stp x30, x19, [sp, #80] // 16-byte Folded Spill ; CHECK-NEXT: mov x19, x0 ; CHECK-NEXT: smstop sm ; CHECK-NEXT: bl get_v8f16 ; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill ; CHECK-NEXT: smstart sm ; CHECK-NEXT: ptrue p0.h ; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload ; CHECK-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload ; CHECK-NEXT: ldp d13, d12, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: ldp d15, d14, [sp, #16] // 16-byte Folded Reload ; CHECK-NEXT: st1h { z0.h }, p0, [x19] ; CHECK-NEXT: ldp x30, x19, [sp, #80] // 16-byte Folded Reload ; CHECK-NEXT: add sp, sp, #96 ; CHECK-NEXT: ret %res = call <8 x half> @get_v8f16() %vec = call @llvm.vector.insert.nxv8f16.v8f16( poison, <8 x half> %res, i64 0) store %vec, ptr %ptr ret void } define void @dont_coalesce_res_v4f32(ptr %ptr) #0 { ; CHECK-LABEL: dont_coalesce_res_v4f32: ; CHECK: // %bb.0: ; CHECK-NEXT: sub sp, sp, #96 ; CHECK-NEXT: stp d15, d14, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: stp d13, d12, [sp, #32] // 16-byte Folded Spill ; CHECK-NEXT: stp d11, d10, [sp, #48] // 16-byte Folded Spill ; CHECK-NEXT: stp d9, d8, [sp, #64] // 16-byte Folded Spill ; CHECK-NEXT: stp x30, x19, [sp, #80] // 16-byte Folded Spill ; CHECK-NEXT: mov x19, x0 ; CHECK-NEXT: smstop sm ; CHECK-NEXT: bl get_v4f32 ; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill ; CHECK-NEXT: smstart sm ; CHECK-NEXT: ptrue p0.s ; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload ; CHECK-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload ; CHECK-NEXT: ldp d13, d12, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: ldp d15, d14, [sp, #16] // 16-byte Folded Reload ; CHECK-NEXT: st1w { z0.s }, p0, [x19] ; CHECK-NEXT: ldp x30, x19, [sp, #80] // 16-byte Folded Reload ; CHECK-NEXT: add sp, sp, #96 ; CHECK-NEXT: ret %res = call <4 x float> @get_v4f32() %vec = call @llvm.vector.insert.nxv4f32.v4f32( poison, <4 x float> %res, i64 0) store %vec, ptr %ptr ret void } define void @dont_coalesce_res_v2f64(ptr %ptr) #0 { ; CHECK-LABEL: dont_coalesce_res_v2f64: ; CHECK: // %bb.0: ; CHECK-NEXT: sub sp, sp, #96 ; CHECK-NEXT: stp d15, d14, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: stp d13, d12, [sp, #32] // 16-byte Folded Spill ; CHECK-NEXT: stp d11, d10, [sp, #48] // 16-byte Folded Spill ; CHECK-NEXT: stp d9, d8, [sp, #64] // 16-byte Folded Spill ; CHECK-NEXT: stp x30, x19, [sp, #80] // 16-byte Folded Spill ; CHECK-NEXT: mov x19, x0 ; CHECK-NEXT: smstop sm ; CHECK-NEXT: bl get_v2f64 ; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill ; CHECK-NEXT: smstart sm ; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload ; CHECK-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload ; CHECK-NEXT: ldp d13, d12, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: ldp d15, d14, [sp, #16] // 16-byte Folded Reload ; CHECK-NEXT: st1d { z0.d }, p0, [x19] ; CHECK-NEXT: ldp x30, x19, [sp, #80] // 16-byte Folded Reload ; CHECK-NEXT: add sp, sp, #96 ; CHECK-NEXT: ret %res = call <2 x double> @get_v2f64() %vec = call @llvm.vector.insert.nxv2f64.v2f64( poison, <2 x double> %res, i64 0) store %vec, ptr %ptr ret void } declare half @get_f16() declare float @get_f32() declare double @get_f64() declare <1 x half> @get_v1f16() declare <1 x float> @get_v1f32() declare <1 x double> @get_v1f64() declare <8 x half> @get_v8f16() declare <4 x float> @get_v4f32() declare <2 x double> @get_v2f64() declare i8 @get_i8() declare i16 @get_i16() declare i32 @get_i32() declare i64 @get_i64() declare <1 x i8> @get_v1i8() declare <1 x i16> @get_v1i16() declare <1 x i32> @get_v1i32() declare <2 x i64> @get_v1i64() declare <16 x i8> @get_v16i8() declare <8 x i16> @get_v8i16() declare <4 x i32> @get_v4i32() declare <2 x i64> @get_v2i64() declare void @use_f16(half) declare void @use_f32(float) declare void @use_f64(double) declare void @use_v1f16(<1 x half>) declare void @use_v1f32(<1 x float>) declare void @use_v1f64(<1 x double>) declare void @use_v8f16(<8 x half>) declare void @use_v8bf16(<8 x bfloat>) declare void @use_v4f32(<4 x float>) declare void @use_v2f64(<2 x double>) declare void @use_i8(i8) declare void @use_i16(i16) declare void @use_i32(i32) declare void @use_i64(i64) declare void @use_v1i8(<1 x i8>) declare void @use_v1i16(<1 x i16>) declare void @use_v1i32(<1 x i32>) declare void @use_v1i64(<1 x i64>) declare void @use_v16i8(<16 x i8>) declare void @use_v8i16(<8 x i16>) declare void @use_v4i32(<4 x i32>) declare void @use_v2i64(<2 x i64>) declare void @use_v8i1(<8 x i1>) declare @llvm.vector.insert.nxv8i1.v8i1(, <8 x i1>, i64) declare @llvm.vector.insert.nxv16i8.v16i8(, <16 x i8>, i64) declare @llvm.vector.insert.nxv8i16.v8i16(, <8 x i16>, i64) declare @llvm.vector.insert.nxv4i32.v4i32(, <4 x i32>, i64) declare @llvm.vector.insert.nxv2i64.v2i64(, <2 x i64>, i64) declare @llvm.vector.insert.nxv8f16.v8f16(, <8 x half>, i64) declare @llvm.vector.insert.nxv4f32.v4f32(, <4 x float>, i64) declare @llvm.vector.insert.nxv2f64.v2f64(, <2 x double>, i64) attributes #0 = { nounwind "aarch64_pstate_sm_enabled" "target-features"="+sve,+sme" }