; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 ; RUN: llc < %s -o - | FileCheck %s ; RUN: llc -mattr=+alu-lsl-fast --aarch64-enable-sink-fold=false < %s -o - | FileCheck %s -check-prefix=LSLFAST target triple = "aarch64-linux" declare void @g(...) ; Check that ADDWrs/ADDXrs with shift > 4 is considered relatively ; slow, thus CSE-d. define void @f0(i1 %c0, i1 %c1, ptr %a, i64 %i) { ; CHECK-LABEL: f0: ; CHECK: // %bb.0: // %E ; CHECK-NEXT: tbz w0, #0, .LBB0_5 ; CHECK-NEXT: // %bb.1: // %A ; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: .cfi_offset w30, -16 ; CHECK-NEXT: add x0, x2, x3, lsl #5 ; CHECK-NEXT: tbz w1, #0, .LBB0_3 ; CHECK-NEXT: // %bb.2: // %B ; CHECK-NEXT: bl g ; CHECK-NEXT: b .LBB0_4 ; CHECK-NEXT: .LBB0_3: // %C ; CHECK-NEXT: mov x1, x0 ; CHECK-NEXT: bl g ; CHECK-NEXT: .LBB0_4: ; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: .LBB0_5: // %X ; CHECK-NEXT: ret ; ; LSLFAST-LABEL: f0: ; LSLFAST: // %bb.0: // %E ; LSLFAST-NEXT: tbz w0, #0, .LBB0_5 ; LSLFAST-NEXT: // %bb.1: // %A ; LSLFAST-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill ; LSLFAST-NEXT: .cfi_def_cfa_offset 16 ; LSLFAST-NEXT: .cfi_offset w30, -16 ; LSLFAST-NEXT: add x0, x2, x3, lsl #5 ; LSLFAST-NEXT: tbz w1, #0, .LBB0_3 ; LSLFAST-NEXT: // %bb.2: // %B ; LSLFAST-NEXT: bl g ; LSLFAST-NEXT: b .LBB0_4 ; LSLFAST-NEXT: .LBB0_3: // %C ; LSLFAST-NEXT: mov x1, x0 ; LSLFAST-NEXT: bl g ; LSLFAST-NEXT: .LBB0_4: ; LSLFAST-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; LSLFAST-NEXT: .LBB0_5: // %X ; LSLFAST-NEXT: ret E: %p0 = getelementptr {i64, i64, i64, i64}, ptr %a, i64 %i br i1 %c0, label %A, label %X A: br i1 %c1, label %B, label %C B: call void @g(ptr %p0) br label %X C: %p1 = getelementptr {i64, i64, i64, i64}, ptr %a, i64 %i call void @g(ptr %p1, ptr %p0) br label %X X: ret void } ; Check that ADDWrs/ADDXrs with shift <= 4 is considered relatively fast on sub-targets ; with feature +alu-lsl-fast, thus *not* CSE-d. define void @f1(i1 %c0, i1 %c1, ptr %a, i64 %i) { ; CHECK-LABEL: f1: ; CHECK: // %bb.0: // %E ; CHECK-NEXT: tbz w0, #0, .LBB1_5 ; CHECK-NEXT: // %bb.1: // %A ; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: .cfi_offset w30, -16 ; CHECK-NEXT: add x0, x2, x3, lsl #4 ; CHECK-NEXT: tbz w1, #0, .LBB1_3 ; CHECK-NEXT: // %bb.2: // %B ; CHECK-NEXT: bl g ; CHECK-NEXT: b .LBB1_4 ; CHECK-NEXT: .LBB1_3: // %C ; CHECK-NEXT: mov x1, x0 ; CHECK-NEXT: bl g ; CHECK-NEXT: .LBB1_4: ; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: .LBB1_5: // %X ; CHECK-NEXT: ret ; ; LSLFAST-LABEL: f1: ; LSLFAST: // %bb.0: // %E ; LSLFAST-NEXT: tbz w0, #0, .LBB1_5 ; LSLFAST-NEXT: // %bb.1: // %A ; LSLFAST-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill ; LSLFAST-NEXT: .cfi_def_cfa_offset 16 ; LSLFAST-NEXT: .cfi_offset w30, -16 ; LSLFAST-NEXT: add x8, x2, x3, lsl #4 ; LSLFAST-NEXT: tbz w1, #0, .LBB1_3 ; LSLFAST-NEXT: // %bb.2: // %B ; LSLFAST-NEXT: mov x0, x8 ; LSLFAST-NEXT: bl g ; LSLFAST-NEXT: b .LBB1_4 ; LSLFAST-NEXT: .LBB1_3: // %C ; LSLFAST-NEXT: add x0, x2, x3, lsl #4 ; LSLFAST-NEXT: mov x1, x8 ; LSLFAST-NEXT: bl g ; LSLFAST-NEXT: .LBB1_4: ; LSLFAST-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; LSLFAST-NEXT: .LBB1_5: // %X ; LSLFAST-NEXT: ret E: %p0 = getelementptr {i64, i64}, ptr %a, i64 %i br i1 %c0, label %A, label %X A: br i1 %c1, label %B, label %C B: call void @g(ptr %p0) br label %X C: %p1 = getelementptr {i64, i64}, ptr %a, i64 %i call void @g(ptr %p1, ptr %p0) br label %X X: ret void }