; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme -verify-machineinstrs < %s | FileCheck %s ; This file tests the following combinations related to streaming-enabled functions: ; [ ] N -> S (Normal -> Streaming) ; [ ] S -> N (Streaming -> Normal) ; [ ] S -> S (Streaming -> Streaming) ; [ ] S -> SC (Streaming -> Streaming-compatible) ; ; The following combination is tested in sme-streaming-compatible-interface.ll ; [ ] SC -> S (Streaming-compatible -> Streaming) declare void @normal_callee() declare void @streaming_callee() "aarch64_pstate_sm_enabled" declare void @streaming_compatible_callee() "aarch64_pstate_sm_compatible" ; [x] N -> S ; [ ] S -> N ; [ ] S -> S ; [ ] S -> SC define void @normal_caller_streaming_callee() nounwind { ; CHECK-LABEL: normal_caller_streaming_callee: ; CHECK: // %bb.0: ; CHECK-NEXT: stp d15, d14, [sp, #-80]! // 16-byte Folded Spill ; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill ; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill ; CHECK-NEXT: str x30, [sp, #64] // 8-byte Folded Spill ; CHECK-NEXT: smstart sm ; CHECK-NEXT: bl streaming_callee ; CHECK-NEXT: smstop sm ; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload ; CHECK-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload ; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload ; CHECK-NEXT: ldp d15, d14, [sp], #80 // 16-byte Folded Reload ; CHECK-NEXT: ret call void @streaming_callee() ret void; } ; [ ] N -> S ; [x] S -> N ; [ ] S -> S ; [ ] S -> SC define void @streaming_caller_normal_callee() nounwind "aarch64_pstate_sm_enabled" { ; CHECK-LABEL: streaming_caller_normal_callee: ; CHECK: // %bb.0: ; CHECK-NEXT: stp d15, d14, [sp, #-80]! // 16-byte Folded Spill ; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill ; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill ; CHECK-NEXT: str x30, [sp, #64] // 8-byte Folded Spill ; CHECK-NEXT: smstop sm ; CHECK-NEXT: bl normal_callee ; CHECK-NEXT: smstart sm ; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload ; CHECK-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload ; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload ; CHECK-NEXT: ldp d15, d14, [sp], #80 // 16-byte Folded Reload ; CHECK-NEXT: ret call void @normal_callee() ret void; } ; [ ] N -> S ; [ ] S -> N ; [x] S -> S ; [ ] S -> SC define void @streaming_caller_streaming_callee() nounwind "aarch64_pstate_sm_enabled" { ; CHECK-LABEL: streaming_caller_streaming_callee: ; CHECK: // %bb.0: ; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: bl streaming_callee ; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret call void @streaming_callee() ret void; } ; [ ] N -> S ; [ ] S -> N ; [ ] S -> S ; [x] S -> SC define void @streaming_caller_streaming_compatible_callee() nounwind "aarch64_pstate_sm_enabled" { ; CHECK-LABEL: streaming_caller_streaming_compatible_callee: ; CHECK: // %bb.0: ; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: bl streaming_compatible_callee ; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret call void @streaming_compatible_callee() ret void; } ; ; Handle special cases here. ; ; Call to function-pointer (with attribute) define void @call_to_function_pointer_streaming_enabled(ptr %p) nounwind { ; CHECK-LABEL: call_to_function_pointer_streaming_enabled: ; CHECK: // %bb.0: ; CHECK-NEXT: stp d15, d14, [sp, #-80]! // 16-byte Folded Spill ; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill ; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill ; CHECK-NEXT: str x30, [sp, #64] // 8-byte Folded Spill ; CHECK-NEXT: smstart sm ; CHECK-NEXT: blr x0 ; CHECK-NEXT: smstop sm ; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload ; CHECK-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload ; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload ; CHECK-NEXT: ldp d15, d14, [sp], #80 // 16-byte Folded Reload ; CHECK-NEXT: ret call void %p() "aarch64_pstate_sm_enabled" ret void } ; Ensure NEON registers are preserved correctly. define <4 x i32> @smstart_clobber_simdfp(<4 x i32> %x) nounwind { ; CHECK-LABEL: smstart_clobber_simdfp: ; CHECK: // %bb.0: ; CHECK-NEXT: sub sp, sp, #96 ; CHECK-NEXT: stp d15, d14, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: stp d13, d12, [sp, #32] // 16-byte Folded Spill ; CHECK-NEXT: stp d11, d10, [sp, #48] // 16-byte Folded Spill ; CHECK-NEXT: stp d9, d8, [sp, #64] // 16-byte Folded Spill ; CHECK-NEXT: str x30, [sp, #80] // 8-byte Folded Spill ; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill ; CHECK-NEXT: smstart sm ; CHECK-NEXT: bl streaming_callee ; CHECK-NEXT: smstop sm ; CHECK-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload ; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload ; CHECK-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload ; CHECK-NEXT: ldr x30, [sp, #80] // 8-byte Folded Reload ; CHECK-NEXT: ldp d13, d12, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: ldp d15, d14, [sp, #16] // 16-byte Folded Reload ; CHECK-NEXT: add sp, sp, #96 ; CHECK-NEXT: ret call void @streaming_callee() ret <4 x i32> %x; } ; Ensure SVE registers are preserved correctly. define @smstart_clobber_sve( %x) nounwind { ; CHECK-LABEL: smstart_clobber_sve: ; CHECK: // %bb.0: ; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-18 ; CHECK-NEXT: str p15, [sp, #4, mul vl] // 2-byte Folded Spill ; CHECK-NEXT: str p14, [sp, #5, mul vl] // 2-byte Folded Spill ; CHECK-NEXT: str p13, [sp, #6, mul vl] // 2-byte Folded Spill ; CHECK-NEXT: str p12, [sp, #7, mul vl] // 2-byte Folded Spill ; CHECK-NEXT: str p11, [sp, #8, mul vl] // 2-byte Folded Spill ; CHECK-NEXT: str p10, [sp, #9, mul vl] // 2-byte Folded Spill ; CHECK-NEXT: str p9, [sp, #10, mul vl] // 2-byte Folded Spill ; CHECK-NEXT: str p8, [sp, #11, mul vl] // 2-byte Folded Spill ; CHECK-NEXT: str p7, [sp, #12, mul vl] // 2-byte Folded Spill ; CHECK-NEXT: str p6, [sp, #13, mul vl] // 2-byte Folded Spill ; CHECK-NEXT: str p5, [sp, #14, mul vl] // 2-byte Folded Spill ; CHECK-NEXT: str p4, [sp, #15, mul vl] // 2-byte Folded Spill ; CHECK-NEXT: str z23, [sp, #2, mul vl] // 16-byte Folded Spill ; CHECK-NEXT: str z22, [sp, #3, mul vl] // 16-byte Folded Spill ; CHECK-NEXT: str z21, [sp, #4, mul vl] // 16-byte Folded Spill ; CHECK-NEXT: str z20, [sp, #5, mul vl] // 16-byte Folded Spill ; CHECK-NEXT: str z19, [sp, #6, mul vl] // 16-byte Folded Spill ; CHECK-NEXT: str z18, [sp, #7, mul vl] // 16-byte Folded Spill ; CHECK-NEXT: str z17, [sp, #8, mul vl] // 16-byte Folded Spill ; CHECK-NEXT: str z16, [sp, #9, mul vl] // 16-byte Folded Spill ; CHECK-NEXT: str z15, [sp, #10, mul vl] // 16-byte Folded Spill ; CHECK-NEXT: str z14, [sp, #11, mul vl] // 16-byte Folded Spill ; CHECK-NEXT: str z13, [sp, #12, mul vl] // 16-byte Folded Spill ; CHECK-NEXT: str z12, [sp, #13, mul vl] // 16-byte Folded Spill ; CHECK-NEXT: str z11, [sp, #14, mul vl] // 16-byte Folded Spill ; CHECK-NEXT: str z10, [sp, #15, mul vl] // 16-byte Folded Spill ; CHECK-NEXT: str z9, [sp, #16, mul vl] // 16-byte Folded Spill ; CHECK-NEXT: str z8, [sp, #17, mul vl] // 16-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 ; CHECK-NEXT: str z0, [sp] // 16-byte Folded Spill ; CHECK-NEXT: smstart sm ; CHECK-NEXT: bl streaming_callee ; CHECK-NEXT: smstop sm ; CHECK-NEXT: ldr z0, [sp] // 16-byte Folded Reload ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: ldr z23, [sp, #2, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z22, [sp, #3, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: ldr z21, [sp, #4, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z20, [sp, #5, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: ldr z19, [sp, #6, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z18, [sp, #7, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: ldr z17, [sp, #8, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z16, [sp, #9, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr p11, [sp, #8, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: ldr z15, [sp, #10, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z14, [sp, #11, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr p10, [sp, #9, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: ldr z13, [sp, #12, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z12, [sp, #13, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr p9, [sp, #10, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: ldr z11, [sp, #14, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z10, [sp, #15, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr p8, [sp, #11, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: ldr z9, [sp, #16, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z8, [sp, #17, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr p7, [sp, #12, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: ldr p6, [sp, #13, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: ldr p5, [sp, #14, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: ldr p4, [sp, #15, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: addvl sp, sp, #18 ; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload ; CHECK-NEXT: ret call void @streaming_callee() ret %x; } ; Call streaming callee twice; there should be no spills/fills between the two ; calls since the registers should have already been clobbered. define @smstart_clobber_sve_duplicate( %x) nounwind { ; CHECK-LABEL: smstart_clobber_sve_duplicate: ; CHECK: // %bb.0: ; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-18 ; CHECK-NEXT: str p15, [sp, #4, mul vl] // 2-byte Folded Spill ; CHECK-NEXT: str p14, [sp, #5, mul vl] // 2-byte Folded Spill ; CHECK-NEXT: str p13, [sp, #6, mul vl] // 2-byte Folded Spill ; CHECK-NEXT: str p12, [sp, #7, mul vl] // 2-byte Folded Spill ; CHECK-NEXT: str p11, [sp, #8, mul vl] // 2-byte Folded Spill ; CHECK-NEXT: str p10, [sp, #9, mul vl] // 2-byte Folded Spill ; CHECK-NEXT: str p9, [sp, #10, mul vl] // 2-byte Folded Spill ; CHECK-NEXT: str p8, [sp, #11, mul vl] // 2-byte Folded Spill ; CHECK-NEXT: str p7, [sp, #12, mul vl] // 2-byte Folded Spill ; CHECK-NEXT: str p6, [sp, #13, mul vl] // 2-byte Folded Spill ; CHECK-NEXT: str p5, [sp, #14, mul vl] // 2-byte Folded Spill ; CHECK-NEXT: str p4, [sp, #15, mul vl] // 2-byte Folded Spill ; CHECK-NEXT: str z23, [sp, #2, mul vl] // 16-byte Folded Spill ; CHECK-NEXT: str z22, [sp, #3, mul vl] // 16-byte Folded Spill ; CHECK-NEXT: str z21, [sp, #4, mul vl] // 16-byte Folded Spill ; CHECK-NEXT: str z20, [sp, #5, mul vl] // 16-byte Folded Spill ; CHECK-NEXT: str z19, [sp, #6, mul vl] // 16-byte Folded Spill ; CHECK-NEXT: str z18, [sp, #7, mul vl] // 16-byte Folded Spill ; CHECK-NEXT: str z17, [sp, #8, mul vl] // 16-byte Folded Spill ; CHECK-NEXT: str z16, [sp, #9, mul vl] // 16-byte Folded Spill ; CHECK-NEXT: str z15, [sp, #10, mul vl] // 16-byte Folded Spill ; CHECK-NEXT: str z14, [sp, #11, mul vl] // 16-byte Folded Spill ; CHECK-NEXT: str z13, [sp, #12, mul vl] // 16-byte Folded Spill ; CHECK-NEXT: str z12, [sp, #13, mul vl] // 16-byte Folded Spill ; CHECK-NEXT: str z11, [sp, #14, mul vl] // 16-byte Folded Spill ; CHECK-NEXT: str z10, [sp, #15, mul vl] // 16-byte Folded Spill ; CHECK-NEXT: str z9, [sp, #16, mul vl] // 16-byte Folded Spill ; CHECK-NEXT: str z8, [sp, #17, mul vl] // 16-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 ; CHECK-NEXT: str z0, [sp] // 16-byte Folded Spill ; CHECK-NEXT: smstart sm ; CHECK-NEXT: bl streaming_callee ; CHECK-NEXT: smstop sm ; CHECK-NEXT: smstart sm ; CHECK-NEXT: bl streaming_callee ; CHECK-NEXT: smstop sm ; CHECK-NEXT: ldr z0, [sp] // 16-byte Folded Reload ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: ldr z23, [sp, #2, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z22, [sp, #3, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: ldr z21, [sp, #4, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z20, [sp, #5, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: ldr z19, [sp, #6, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z18, [sp, #7, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: ldr z17, [sp, #8, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z16, [sp, #9, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr p11, [sp, #8, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: ldr z15, [sp, #10, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z14, [sp, #11, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr p10, [sp, #9, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: ldr z13, [sp, #12, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z12, [sp, #13, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr p9, [sp, #10, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: ldr z11, [sp, #14, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z10, [sp, #15, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr p8, [sp, #11, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: ldr z9, [sp, #16, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z8, [sp, #17, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr p7, [sp, #12, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: ldr p6, [sp, #13, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: ldr p5, [sp, #14, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: ldr p4, [sp, #15, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: addvl sp, sp, #18 ; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload ; CHECK-NEXT: ret call void @streaming_callee() call void @streaming_callee() ret %x; } ; Ensure smstart is not removed, because call to llvm.cos is not part of a chain. define double @call_to_intrinsic_without_chain(double %x) nounwind "aarch64_pstate_sm_enabled" { ; CHECK-LABEL: call_to_intrinsic_without_chain: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: sub sp, sp, #96 ; CHECK-NEXT: stp d15, d14, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: stp d13, d12, [sp, #32] // 16-byte Folded Spill ; CHECK-NEXT: stp d11, d10, [sp, #48] // 16-byte Folded Spill ; CHECK-NEXT: stp d9, d8, [sp, #64] // 16-byte Folded Spill ; CHECK-NEXT: str x30, [sp, #80] // 8-byte Folded Spill ; CHECK-NEXT: stp d0, d0, [sp] // 16-byte Folded Spill ; CHECK-NEXT: smstop sm ; CHECK-NEXT: ldr d0, [sp] // 8-byte Folded Reload ; CHECK-NEXT: bl cos ; CHECK-NEXT: str d0, [sp] // 8-byte Folded Spill ; CHECK-NEXT: smstart sm ; CHECK-NEXT: ldp d1, d0, [sp] // 16-byte Folded Reload ; CHECK-NEXT: ldr x30, [sp, #80] // 8-byte Folded Reload ; CHECK-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload ; CHECK-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload ; CHECK-NEXT: ldp d13, d12, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: fadd d0, d1, d0 ; CHECK-NEXT: ldp d15, d14, [sp, #16] // 16-byte Folded Reload ; CHECK-NEXT: add sp, sp, #96 ; CHECK-NEXT: ret entry: %res = call fast double @llvm.cos.f64(double %x) %res.fadd = fadd fast double %res, %x ret double %res.fadd } declare double @llvm.cos.f64(double) ; Ensure that tail call optimization is disabled when the streaming mode ; doesn't match. define void @disable_tailcallopt() nounwind { ; CHECK-LABEL: disable_tailcallopt: ; CHECK: // %bb.0: ; CHECK-NEXT: stp d15, d14, [sp, #-80]! // 16-byte Folded Spill ; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill ; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill ; CHECK-NEXT: str x30, [sp, #64] // 8-byte Folded Spill ; CHECK-NEXT: smstart sm ; CHECK-NEXT: bl streaming_callee ; CHECK-NEXT: smstop sm ; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload ; CHECK-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload ; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload ; CHECK-NEXT: ldp d15, d14, [sp], #80 // 16-byte Folded Reload ; CHECK-NEXT: ret tail call void @streaming_callee() ret void; } define i8 @call_to_non_streaming_pass_sve_objects(ptr nocapture noundef readnone %ptr) #0 { ; CHECK-LABEL: call_to_non_streaming_pass_sve_objects: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: stp d15, d14, [sp, #-80]! // 16-byte Folded Spill ; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill ; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill ; CHECK-NEXT: stp x29, x30, [sp, #64] // 16-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-3 ; CHECK-NEXT: rdsvl x3, #1 ; CHECK-NEXT: addvl x0, sp, #2 ; CHECK-NEXT: addvl x1, sp, #1 ; CHECK-NEXT: mov x2, sp ; CHECK-NEXT: smstop sm ; CHECK-NEXT: bl foo ; CHECK-NEXT: smstart sm ; CHECK-NEXT: ptrue p0.b ; CHECK-NEXT: ld1b { z0.b }, p0/z, [sp, #2, mul vl] ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: addvl sp, sp, #3 ; CHECK-NEXT: ldp x29, x30, [sp, #64] // 16-byte Folded Reload ; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload ; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload ; CHECK-NEXT: ldp d15, d14, [sp], #80 // 16-byte Folded Reload ; CHECK-NEXT: ret entry: %Data1 = alloca , align 16 %Data2 = alloca , align 16 %Data3 = alloca , align 16 %0 = tail call i64 @llvm.aarch64.sme.cntsb() call void @foo(ptr noundef nonnull %Data1, ptr noundef nonnull %Data2, ptr noundef nonnull %Data3, i64 noundef %0) %1 = load , ptr %Data1, align 16 %vecext = extractelement %1, i64 0 ret i8 %vecext } define void @call_to_non_streaming_pass_args(ptr nocapture noundef readnone %ptr, i64 %long1, i64 %long2, i32 %int1, i32 %int2, float %float1, float %float2, double %double1, double %double2) #0 { ; CHECK-LABEL: call_to_non_streaming_pass_args: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: sub sp, sp, #112 ; CHECK-NEXT: stp d15, d14, [sp, #32] // 16-byte Folded Spill ; CHECK-NEXT: stp d13, d12, [sp, #48] // 16-byte Folded Spill ; CHECK-NEXT: stp d11, d10, [sp, #64] // 16-byte Folded Spill ; CHECK-NEXT: stp d9, d8, [sp, #80] // 16-byte Folded Spill ; CHECK-NEXT: str x30, [sp, #96] // 8-byte Folded Spill ; CHECK-NEXT: stp s1, s0, [sp, #24] // 8-byte Folded Spill ; CHECK-NEXT: stp d3, d2, [sp, #8] // 16-byte Folded Spill ; CHECK-NEXT: smstop sm ; CHECK-NEXT: ldp s1, s0, [sp, #24] // 8-byte Folded Reload ; CHECK-NEXT: ldp d3, d2, [sp, #8] // 16-byte Folded Reload ; CHECK-NEXT: bl bar ; CHECK-NEXT: smstart sm ; CHECK-NEXT: ldp d9, d8, [sp, #80] // 16-byte Folded Reload ; CHECK-NEXT: ldr x30, [sp, #96] // 8-byte Folded Reload ; CHECK-NEXT: ldp d11, d10, [sp, #64] // 16-byte Folded Reload ; CHECK-NEXT: ldp d13, d12, [sp, #48] // 16-byte Folded Reload ; CHECK-NEXT: ldp d15, d14, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: add sp, sp, #112 ; CHECK-NEXT: ret entry: call void @bar(ptr noundef nonnull %ptr, i64 %long1, i64 %long2, i32 %int1, i32 %int2, float %float1, float %float2, double %double1, double %double2) ret void } declare i64 @llvm.aarch64.sme.cntsb() declare void @foo(ptr noundef, ptr noundef, ptr noundef, i64 noundef) declare void @bar(ptr noundef, i64 noundef, i64 noundef, i32 noundef, i32 noundef, float noundef, float noundef, double noundef, double noundef) attributes #0 = { nounwind vscale_range(1,16) "aarch64_pstate_sm_enabled" }