; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple aarch64-none-eabi < %s -verify-machineinstrs -enable-post-misched=false | FileCheck %s ; RUN: llc -mtriple aarch64-none-eabi < %s -verify-machineinstrs -enable-post-misched=false -global-isel | FileCheck %s ; Tests for prolog sequences for stack probing, when using a 64KiB stack guard. ; 64k bytes is the largest frame we can probe in one go. define void @static_65536(ptr %out) #0 { ; CHECK-LABEL: static_65536: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: .cfi_offset w29, -16 ; CHECK-NEXT: sub sp, sp, #16, lsl #12 // =65536 ; CHECK-NEXT: .cfi_def_cfa_offset 65552 ; CHECK-NEXT: str xzr, [sp] ; CHECK-NEXT: mov x8, sp ; CHECK-NEXT: str x8, [x0] ; CHECK-NEXT: add sp, sp, #16, lsl #12 // =65536 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: .cfi_def_cfa_offset 0 ; CHECK-NEXT: .cfi_restore w29 ; CHECK-NEXT: ret entry: %v = alloca i8, i64 65536, align 1 store i8* %v, ptr %out, align 8 ret void } ; 64k+16 bytes, still needs just one probe. define void @static_65552(ptr %out) #0 { ; CHECK-LABEL: static_65552: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: .cfi_offset w29, -16 ; CHECK-NEXT: sub sp, sp, #16, lsl #12 // =65536 ; CHECK-NEXT: .cfi_def_cfa_offset 65552 ; CHECK-NEXT: str xzr, [sp], #-16 ; CHECK-NEXT: .cfi_def_cfa_offset 65568 ; CHECK-NEXT: mov x8, sp ; CHECK-NEXT: str x8, [x0] ; CHECK-NEXT: add sp, sp, #16, lsl #12 // =65536 ; CHECK-NEXT: .cfi_def_cfa_offset 32 ; CHECK-NEXT: add sp, sp, #16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: .cfi_def_cfa_offset 0 ; CHECK-NEXT: .cfi_restore w29 ; CHECK-NEXT: ret entry: %v = alloca i8, i64 65552, align 1 store i8* %v, ptr %out, align 8 ret void } ; 64k+1024 bytes, the largest frame which needs just one probe. define void @static_66560(ptr %out) #0 { ; CHECK-LABEL: static_66560: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: .cfi_offset w29, -16 ; CHECK-NEXT: sub sp, sp, #16, lsl #12 // =65536 ; CHECK-NEXT: .cfi_def_cfa_offset 65552 ; CHECK-NEXT: str xzr, [sp] ; CHECK-NEXT: sub sp, sp, #1024 ; CHECK-NEXT: .cfi_def_cfa_offset 66576 ; CHECK-NEXT: mov x8, sp ; CHECK-NEXT: str x8, [x0] ; CHECK-NEXT: add sp, sp, #16, lsl #12 // =65536 ; CHECK-NEXT: .cfi_def_cfa_offset 1040 ; CHECK-NEXT: add sp, sp, #1024 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: .cfi_def_cfa_offset 0 ; CHECK-NEXT: .cfi_restore w29 ; CHECK-NEXT: ret entry: %v = alloca i8, i64 66560, align 1 store i8* %v, ptr %out, align 8 ret void } ; 64k+1024+16 bytes, the smallest frame which needs two probes. define void @static_66576(ptr %out) #0 { ; CHECK-LABEL: static_66576: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: .cfi_offset w29, -16 ; CHECK-NEXT: sub sp, sp, #16, lsl #12 // =65536 ; CHECK-NEXT: .cfi_def_cfa_offset 65552 ; CHECK-NEXT: str xzr, [sp] ; CHECK-NEXT: sub sp, sp, #1040 ; CHECK-NEXT: .cfi_def_cfa_offset 66592 ; CHECK-NEXT: str xzr, [sp] ; CHECK-NEXT: mov x8, sp ; CHECK-NEXT: str x8, [x0] ; CHECK-NEXT: add sp, sp, #16, lsl #12 // =65536 ; CHECK-NEXT: .cfi_def_cfa_offset 1056 ; CHECK-NEXT: add sp, sp, #1040 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: .cfi_def_cfa_offset 0 ; CHECK-NEXT: .cfi_restore w29 ; CHECK-NEXT: ret entry: %v = alloca i8, i64 66576, align 1 store i8* %v, ptr %out, align 8 ret void } ; 2*64k+1024, the largest frame needing two probes. define void @static_132096(ptr %out) #0 { ; CHECK-LABEL: static_132096: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: .cfi_offset w29, -16 ; CHECK-NEXT: sub sp, sp, #16, lsl #12 // =65536 ; CHECK-NEXT: .cfi_def_cfa_offset 65552 ; CHECK-NEXT: str xzr, [sp] ; CHECK-NEXT: sub sp, sp, #16, lsl #12 // =65536 ; CHECK-NEXT: .cfi_def_cfa_offset 131088 ; CHECK-NEXT: str xzr, [sp] ; CHECK-NEXT: sub sp, sp, #1024 ; CHECK-NEXT: .cfi_def_cfa_offset 132112 ; CHECK-NEXT: mov x8, sp ; CHECK-NEXT: str x8, [x0] ; CHECK-NEXT: add sp, sp, #32, lsl #12 // =131072 ; CHECK-NEXT: .cfi_def_cfa_offset 1040 ; CHECK-NEXT: add sp, sp, #1024 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: .cfi_def_cfa_offset 0 ; CHECK-NEXT: .cfi_restore w29 ; CHECK-NEXT: ret entry: %v = alloca i8, i64 132096, align 1 store i8* %v, ptr %out, align 8 ret void } ; 5*64k-16, the largest frame probed without a loop. define void @static_327664(ptr %out) #0 { ; CHECK-LABEL: static_327664: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: .cfi_offset w29, -16 ; CHECK-NEXT: sub sp, sp, #16, lsl #12 // =65536 ; CHECK-NEXT: .cfi_def_cfa_offset 65552 ; CHECK-NEXT: str xzr, [sp] ; CHECK-NEXT: sub sp, sp, #16, lsl #12 // =65536 ; CHECK-NEXT: .cfi_def_cfa_offset 131088 ; CHECK-NEXT: str xzr, [sp] ; CHECK-NEXT: sub sp, sp, #16, lsl #12 // =65536 ; CHECK-NEXT: .cfi_def_cfa_offset 196624 ; CHECK-NEXT: str xzr, [sp] ; CHECK-NEXT: sub sp, sp, #16, lsl #12 // =65536 ; CHECK-NEXT: .cfi_def_cfa_offset 262160 ; CHECK-NEXT: str xzr, [sp] ; CHECK-NEXT: sub sp, sp, #15, lsl #12 // =61440 ; CHECK-NEXT: .cfi_def_cfa_offset 323600 ; CHECK-NEXT: sub sp, sp, #4080 ; CHECK-NEXT: .cfi_def_cfa_offset 327680 ; CHECK-NEXT: str xzr, [sp] ; CHECK-NEXT: mov x8, sp ; CHECK-NEXT: str x8, [x0] ; CHECK-NEXT: add sp, sp, #79, lsl #12 // =323584 ; CHECK-NEXT: .cfi_def_cfa_offset 4096 ; CHECK-NEXT: add sp, sp, #4080 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: .cfi_def_cfa_offset 0 ; CHECK-NEXT: .cfi_restore w29 ; CHECK-NEXT: ret entry: %v = alloca i8, i64 327664, align 1 store i8* %v, ptr %out, align 8 ret void } ; 5*64k, smallest frame probed with a loop. define void @static_327680(ptr %out) #0 { ; CHECK-LABEL: static_327680: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: .cfi_offset w29, -16 ; CHECK-NEXT: sub x9, sp, #80, lsl #12 // =327680 ; CHECK-NEXT: .cfi_def_cfa w9, 327696 ; CHECK-NEXT: .LBB6_1: // %entry ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: sub sp, sp, #16, lsl #12 // =65536 ; CHECK-NEXT: str xzr, [sp] ; CHECK-NEXT: cmp sp, x9 ; CHECK-NEXT: b.ne .LBB6_1 ; CHECK-NEXT: // %bb.2: // %entry ; CHECK-NEXT: .cfi_def_cfa_register wsp ; CHECK-NEXT: mov x8, sp ; CHECK-NEXT: str x8, [x0] ; CHECK-NEXT: add sp, sp, #80, lsl #12 // =327680 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: .cfi_def_cfa_offset 0 ; CHECK-NEXT: .cfi_restore w29 ; CHECK-NEXT: ret entry: %v = alloca i8, i64 327680, align 1 store i8* %v, ptr %out, align 8 ret void } ; 5*64k+1024, large enough to use a loop, but not a multiple of 64KiB ; so has a reminder, but no extra probe. define void @static_328704(ptr %out) #0 { ; CHECK-LABEL: static_328704: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: .cfi_offset w29, -16 ; CHECK-NEXT: sub x9, sp, #80, lsl #12 // =327680 ; CHECK-NEXT: .cfi_def_cfa w9, 327696 ; CHECK-NEXT: .LBB7_1: // %entry ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: sub sp, sp, #16, lsl #12 // =65536 ; CHECK-NEXT: str xzr, [sp] ; CHECK-NEXT: cmp sp, x9 ; CHECK-NEXT: b.ne .LBB7_1 ; CHECK-NEXT: // %bb.2: // %entry ; CHECK-NEXT: .cfi_def_cfa_register wsp ; CHECK-NEXT: sub sp, sp, #1024 ; CHECK-NEXT: .cfi_def_cfa_offset 328720 ; CHECK-NEXT: mov x8, sp ; CHECK-NEXT: str x8, [x0] ; CHECK-NEXT: add sp, sp, #80, lsl #12 // =327680 ; CHECK-NEXT: .cfi_def_cfa_offset 1040 ; CHECK-NEXT: add sp, sp, #1024 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: .cfi_def_cfa_offset 0 ; CHECK-NEXT: .cfi_restore w29 ; CHECK-NEXT: ret entry: %v = alloca i8, i64 328704, align 1 store i8* %v, ptr %out, align 8 ret void } ; 5*64k+1040, large enough to use a loop, has a reminder and ; an extra probe. define void @static_328720(ptr %out) #0 { ; CHECK-LABEL: static_328720: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: .cfi_offset w29, -16 ; CHECK-NEXT: sub x9, sp, #80, lsl #12 // =327680 ; CHECK-NEXT: .cfi_def_cfa w9, 327696 ; CHECK-NEXT: .LBB8_1: // %entry ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: sub sp, sp, #16, lsl #12 // =65536 ; CHECK-NEXT: str xzr, [sp] ; CHECK-NEXT: cmp sp, x9 ; CHECK-NEXT: b.ne .LBB8_1 ; CHECK-NEXT: // %bb.2: // %entry ; CHECK-NEXT: .cfi_def_cfa_register wsp ; CHECK-NEXT: sub sp, sp, #1040 ; CHECK-NEXT: .cfi_def_cfa_offset 328736 ; CHECK-NEXT: str xzr, [sp] ; CHECK-NEXT: mov x8, sp ; CHECK-NEXT: str x8, [x0] ; CHECK-NEXT: add sp, sp, #80, lsl #12 // =327680 ; CHECK-NEXT: .cfi_def_cfa_offset 1056 ; CHECK-NEXT: add sp, sp, #1040 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: .cfi_def_cfa_offset 0 ; CHECK-NEXT: .cfi_restore w29 ; CHECK-NEXT: ret entry: %v = alloca i8, i64 328720, align 1 store i8* %v, ptr %out, align 8 ret void } ; A small allocation, but with a very large alignment requirement. We do this ; by moving SP far enough that a sufficiently-aligned block will exist ; somewhere in the stack frame, so must probe the whole of that larger SP move. define void @static_16_align_131072(ptr %out) #0 { ; CHECK-LABEL: static_16_align_131072: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: mov x29, sp ; CHECK-NEXT: .cfi_def_cfa w29, 16 ; CHECK-NEXT: .cfi_offset w30, -8 ; CHECK-NEXT: .cfi_offset w29, -16 ; CHECK-NEXT: sub x9, sp, #31, lsl #12 // =126976 ; CHECK-NEXT: sub x9, x9, #4080 ; CHECK-NEXT: and x9, x9, #0xfffffffffffe0000 ; CHECK-NEXT: .LBB9_1: // %entry ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: sub sp, sp, #16, lsl #12 // =65536 ; CHECK-NEXT: cmp sp, x9 ; CHECK-NEXT: b.le .LBB9_3 ; CHECK-NEXT: // %bb.2: // %entry ; CHECK-NEXT: // in Loop: Header=BB9_1 Depth=1 ; CHECK-NEXT: str xzr, [sp] ; CHECK-NEXT: b .LBB9_1 ; CHECK-NEXT: .LBB9_3: // %entry ; CHECK-NEXT: mov sp, x9 ; CHECK-NEXT: ldr xzr, [sp] ; CHECK-NEXT: mov x8, sp ; CHECK-NEXT: str x8, [x0] ; CHECK-NEXT: mov sp, x29 ; CHECK-NEXT: .cfi_def_cfa wsp, 16 ; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload ; CHECK-NEXT: .cfi_def_cfa_offset 0 ; CHECK-NEXT: .cfi_restore w30 ; CHECK-NEXT: .cfi_restore w29 ; CHECK-NEXT: ret entry: %v = alloca i8, i64 16, align 131072 store i8* %v, ptr %out, align 8 ret void } ; A small allocation, but with a very large alignment requirement which ; is nevertheless small enough as to not need a loop. define void @static_16_align_8192(ptr %out) #0 { ; CHECK-LABEL: static_16_align_8192: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: mov x29, sp ; CHECK-NEXT: .cfi_def_cfa w29, 16 ; CHECK-NEXT: .cfi_offset w30, -8 ; CHECK-NEXT: .cfi_offset w29, -16 ; CHECK-NEXT: sub x9, sp, #1, lsl #12 // =4096 ; CHECK-NEXT: sub x9, x9, #4080 ; CHECK-NEXT: and sp, x9, #0xffffffffffffe000 ; CHECK-NEXT: str xzr, [sp] ; CHECK-NEXT: mov x8, sp ; CHECK-NEXT: str x8, [x0] ; CHECK-NEXT: mov sp, x29 ; CHECK-NEXT: .cfi_def_cfa wsp, 16 ; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload ; CHECK-NEXT: .cfi_def_cfa_offset 0 ; CHECK-NEXT: .cfi_restore w30 ; CHECK-NEXT: .cfi_restore w29 ; CHECK-NEXT: ret entry: %v = alloca i8, i64 16, align 8192 store i8* %v, ptr %out, align 8 ret void } ; A large allocation with a very large alignment requirement which ; is nevertheless small enough as to not need a loop. define void @static_32752_align_32k(ptr %out) #0 { ; CHECK-LABEL: static_32752_align_32k: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: mov x29, sp ; CHECK-NEXT: .cfi_def_cfa w29, 16 ; CHECK-NEXT: .cfi_offset w30, -8 ; CHECK-NEXT: .cfi_offset w29, -16 ; CHECK-NEXT: sub x9, sp, #7, lsl #12 // =28672 ; CHECK-NEXT: sub x9, x9, #4080 ; CHECK-NEXT: and sp, x9, #0xffffffffffff8000 ; CHECK-NEXT: str xzr, [sp] ; CHECK-NEXT: mov x8, sp ; CHECK-NEXT: str x8, [x0] ; CHECK-NEXT: mov sp, x29 ; CHECK-NEXT: .cfi_def_cfa wsp, 16 ; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload ; CHECK-NEXT: .cfi_def_cfa_offset 0 ; CHECK-NEXT: .cfi_restore w30 ; CHECK-NEXT: .cfi_restore w29 ; CHECK-NEXT: ret entry: %v = alloca i8, i64 32752, align 32768 store i8* %v, ptr %out, align 8 ret void } attributes #0 = { uwtable(async) "probe-stack"="inline-asm" "stack-probe-size"="65536" "frame-pointer"="none" }