233 lines
12 KiB
Text
233 lines
12 KiB
Text
|
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 2
|
||
|
# RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs -run-pass=prologepilog -o - %s | FileCheck -check-prefix=GCN %s
|
||
|
|
||
|
# We're keeping the IR around for the callees and the CCs
|
||
|
|
||
|
--- |
|
||
|
declare amdgpu_cs_chain void @callee()
|
||
|
declare amdgpu_gfx void @gfx_callee()
|
||
|
|
||
|
define amdgpu_cs_chain void @preserve_inactive_wwm() {ret void}
|
||
|
define amdgpu_cs_chain void @preserve_inactive_detected_wwm() {ret void}
|
||
|
define amdgpu_cs_chain void @dont_preserve_wwm_if_no_chain_calls() {ret void}
|
||
|
define amdgpu_cs_chain void @dont_preserve_non_wwm() {ret void}
|
||
|
define amdgpu_cs_chain void @dont_preserve_v0_v7() {ret void}
|
||
|
define amdgpu_cs_chain void @dont_preserve_sgpr() {ret void}
|
||
|
...
|
||
|
---
|
||
|
|
||
|
# Check that we preserve the inactive lanes of registers v8+ received in the
|
||
|
# MachineFunctionInfo as wwmReservedRegs.
|
||
|
|
||
|
---
|
||
|
name: preserve_inactive_wwm
|
||
|
tracksRegLiveness: true
|
||
|
frameInfo:
|
||
|
hasTailCall: true
|
||
|
machineFunctionInfo:
|
||
|
stackPtrOffsetReg: '$sgpr32'
|
||
|
returnsVoid: true
|
||
|
wwmReservedRegs:
|
||
|
- '$vgpr8'
|
||
|
- '$vgpr9'
|
||
|
body: |
|
||
|
bb.0:
|
||
|
liveins: $sgpr0, $sgpr35, $vgpr8, $vgpr9
|
||
|
|
||
|
; GCN-LABEL: name: preserve_inactive_wwm
|
||
|
; GCN: liveins: $sgpr0, $sgpr35, $vgpr8, $vgpr9
|
||
|
; GCN-NEXT: {{ $}}
|
||
|
; GCN-NEXT: $sgpr1 = S_XOR_SAVEEXEC_B32 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
|
||
|
; GCN-NEXT: SCRATCH_STORE_DWORD_ST $vgpr8, 4, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0, addrspace 5)
|
||
|
; GCN-NEXT: SCRATCH_STORE_DWORD_ST $vgpr9, 8, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.1, addrspace 5)
|
||
|
; GCN-NEXT: $exec_lo = S_MOV_B32 killed $sgpr1
|
||
|
; GCN-NEXT: renamable $sgpr4_sgpr5 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @callee + 4, target-flags(amdgpu-gotprel32-hi) @callee + 12, implicit-def dead $scc
|
||
|
; GCN-NEXT: renamable $sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM killed renamable $sgpr4_sgpr5, 0, 0 :: (dereferenceable invariant load (p0) from got, addrspace 4)
|
||
|
; GCN-NEXT: $sgpr1 = S_XOR_SAVEEXEC_B32 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
|
||
|
; GCN-NEXT: $vgpr8 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 4, 0, implicit $exec, implicit $flat_scr, implicit $vgpr8(tied-def 0) :: (load (s32) from %stack.0, addrspace 5)
|
||
|
; GCN-NEXT: $vgpr9 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.1, addrspace 5)
|
||
|
; GCN-NEXT: $exec_lo = S_MOV_B32 killed $sgpr1
|
||
|
; GCN-NEXT: SI_CS_CHAIN_TC_W32 killed renamable $sgpr4_sgpr5, @callee, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $vgpr8
|
||
|
renamable $sgpr4_sgpr5 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @callee + 4, target-flags(amdgpu-gotprel32-hi) @callee + 12, implicit-def dead $scc
|
||
|
renamable $sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM killed renamable $sgpr4_sgpr5, 0, 0 :: (dereferenceable invariant load (p0) from got, addrspace 4)
|
||
|
SI_CS_CHAIN_TC_W32 killed renamable $sgpr4_sgpr5, @callee, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $vgpr8
|
||
|
|
||
|
...
|
||
|
|
||
|
# Check that it also works for SGPR to VGPR spills.
|
||
|
|
||
|
---
|
||
|
name: preserve_inactive_detected_wwm
|
||
|
tracksRegLiveness: true
|
||
|
frameInfo:
|
||
|
hasTailCall: true
|
||
|
machineFunctionInfo:
|
||
|
stackPtrOffsetReg: '$sgpr32'
|
||
|
returnsVoid: true
|
||
|
body: |
|
||
|
bb.0:
|
||
|
liveins: $sgpr0, $sgpr35, $vgpr8, $vgpr9
|
||
|
|
||
|
; GCN-LABEL: name: preserve_inactive_detected_wwm
|
||
|
; GCN: liveins: $sgpr0, $sgpr35, $vgpr8, $vgpr9
|
||
|
; GCN-NEXT: {{ $}}
|
||
|
; GCN-NEXT: $sgpr1 = S_XOR_SAVEEXEC_B32 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
|
||
|
; GCN-NEXT: SCRATCH_STORE_DWORD_ST $vgpr8, 4, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0, addrspace 5)
|
||
|
; GCN-NEXT: SCRATCH_STORE_DWORD_ST $vgpr9, 8, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.1, addrspace 5)
|
||
|
; GCN-NEXT: $exec_lo = S_MOV_B32 killed $sgpr1
|
||
|
; GCN-NEXT: renamable $vgpr8 = SI_SPILL_S32_TO_VGPR $sgpr35, 0, killed $vgpr8
|
||
|
; GCN-NEXT: $sgpr35 = S_MOV_B32 5
|
||
|
; GCN-NEXT: $sgpr35 = SI_RESTORE_S32_FROM_VGPR $vgpr8, 0
|
||
|
; GCN-NEXT: renamable $vgpr8 = V_MOV_B32_e32 10, implicit $exec
|
||
|
; GCN-NEXT: renamable $vgpr9 = SI_SPILL_S32_TO_VGPR $sgpr35, 0, killed $vgpr9
|
||
|
; GCN-NEXT: $sgpr35 = S_MOV_B32 5
|
||
|
; GCN-NEXT: $sgpr35 = SI_RESTORE_S32_FROM_VGPR $vgpr9, 0
|
||
|
; GCN-NEXT: renamable $vgpr9 = V_MOV_B32_e32 10, implicit $exec
|
||
|
; GCN-NEXT: renamable $sgpr4_sgpr5 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @callee + 4, target-flags(amdgpu-gotprel32-hi) @callee + 12, implicit-def dead $scc
|
||
|
; GCN-NEXT: renamable $sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM killed renamable $sgpr4_sgpr5, 0, 0 :: (dereferenceable invariant load (p0) from got, addrspace 4)
|
||
|
; GCN-NEXT: $sgpr1 = S_XOR_SAVEEXEC_B32 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
|
||
|
; GCN-NEXT: $vgpr8 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 4, 0, implicit $exec, implicit $flat_scr, implicit $vgpr8(tied-def 0) :: (load (s32) from %stack.0, addrspace 5)
|
||
|
; GCN-NEXT: $vgpr9 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.1, addrspace 5)
|
||
|
; GCN-NEXT: $exec_lo = S_MOV_B32 killed $sgpr1
|
||
|
; GCN-NEXT: SI_CS_CHAIN_TC_W32 killed renamable $sgpr4_sgpr5, @callee, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $vgpr8
|
||
|
renamable $vgpr8 = SI_SPILL_S32_TO_VGPR $sgpr35, 0, killed $vgpr8
|
||
|
$sgpr35 = S_MOV_B32 5
|
||
|
$sgpr35 = SI_RESTORE_S32_FROM_VGPR $vgpr8, 0
|
||
|
renamable $vgpr8 = V_MOV_B32_e32 10, implicit $exec
|
||
|
renamable $vgpr9 = SI_SPILL_S32_TO_VGPR $sgpr35, 0, killed $vgpr9
|
||
|
$sgpr35 = S_MOV_B32 5
|
||
|
$sgpr35 = SI_RESTORE_S32_FROM_VGPR $vgpr9, 0
|
||
|
renamable $vgpr9 = V_MOV_B32_e32 10, implicit $exec
|
||
|
renamable $sgpr4_sgpr5 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @callee + 4, target-flags(amdgpu-gotprel32-hi) @callee + 12, implicit-def dead $scc
|
||
|
renamable $sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM killed renamable $sgpr4_sgpr5, 0, 0 :: (dereferenceable invariant load (p0) from got, addrspace 4)
|
||
|
SI_CS_CHAIN_TC_W32 killed renamable $sgpr4_sgpr5, @callee, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $vgpr8
|
||
|
|
||
|
...
|
||
|
|
||
|
---
|
||
|
name: dont_preserve_wwm_if_no_chain_calls
|
||
|
tracksRegLiveness: true
|
||
|
frameInfo:
|
||
|
hasTailCall: false
|
||
|
machineFunctionInfo:
|
||
|
stackPtrOffsetReg: '$sgpr32'
|
||
|
returnsVoid: true
|
||
|
wwmReservedRegs:
|
||
|
- '$vgpr9'
|
||
|
body: |
|
||
|
bb.0:
|
||
|
liveins: $sgpr35, $vgpr8
|
||
|
|
||
|
; GCN-LABEL: name: dont_preserve_wwm_if_no_chain_calls
|
||
|
; GCN: liveins: $sgpr35, $vgpr8
|
||
|
; GCN-NEXT: {{ $}}
|
||
|
; GCN-NEXT: renamable $vgpr8 = SI_SPILL_S32_TO_VGPR $sgpr35, 0, killed $vgpr8
|
||
|
; GCN-NEXT: $sgpr35 = S_MOV_B32 5
|
||
|
; GCN-NEXT: $sgpr35 = SI_RESTORE_S32_FROM_VGPR $vgpr8, 0
|
||
|
; GCN-NEXT: renamable $vgpr8 = V_MOV_B32_e32 10, implicit $exec
|
||
|
; GCN-NEXT: S_ENDPGM 0
|
||
|
renamable $vgpr8 = SI_SPILL_S32_TO_VGPR $sgpr35, 0, killed $vgpr8
|
||
|
$sgpr35 = S_MOV_B32 5
|
||
|
$sgpr35 = SI_RESTORE_S32_FROM_VGPR $vgpr8, 0
|
||
|
renamable $vgpr8 = V_MOV_B32_e32 10, implicit $exec
|
||
|
S_ENDPGM 0
|
||
|
...
|
||
|
|
||
|
---
|
||
|
name: dont_preserve_non_wwm
|
||
|
tracksRegLiveness: true
|
||
|
frameInfo:
|
||
|
hasTailCall: true
|
||
|
machineFunctionInfo:
|
||
|
stackPtrOffsetReg: '$sgpr32'
|
||
|
isChainFunction: true
|
||
|
returnsVoid: true
|
||
|
body: |
|
||
|
bb.0:
|
||
|
liveins: $sgpr0, $sgpr35, $vgpr0, $vgpr8, $vgpr16
|
||
|
|
||
|
; GCN-LABEL: name: dont_preserve_non_wwm
|
||
|
; GCN: liveins: $sgpr0, $sgpr35, $vgpr0, $vgpr8, $vgpr16
|
||
|
; GCN-NEXT: {{ $}}
|
||
|
; GCN-NEXT: renamable $vgpr16 = V_MOV_B32_e32 16, implicit $exec
|
||
|
; GCN-NEXT: renamable $vgpr8 = V_MOV_B32_e32 8, implicit $exec
|
||
|
; GCN-NEXT: renamable $sgpr4_sgpr5 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @callee + 4, target-flags(amdgpu-gotprel32-hi) @callee + 12, implicit-def dead $scc
|
||
|
; GCN-NEXT: renamable $sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM killed renamable $sgpr4_sgpr5, 0, 0 :: (dereferenceable invariant load (p0) from got, addrspace 4)
|
||
|
; GCN-NEXT: SI_CS_CHAIN_TC_W32 killed renamable $sgpr4_sgpr5, @callee, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $vgpr8
|
||
|
renamable $vgpr16 = V_MOV_B32_e32 16, implicit $exec
|
||
|
renamable $vgpr8 = V_MOV_B32_e32 8, implicit $exec
|
||
|
renamable $sgpr4_sgpr5 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @callee + 4, target-flags(amdgpu-gotprel32-hi) @callee + 12, implicit-def dead $scc
|
||
|
renamable $sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM killed renamable $sgpr4_sgpr5, 0, 0 :: (dereferenceable invariant load (p0) from got, addrspace 4)
|
||
|
SI_CS_CHAIN_TC_W32 killed renamable $sgpr4_sgpr5, @callee, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $vgpr8
|
||
|
|
||
|
...
|
||
|
|
||
|
---
|
||
|
name: dont_preserve_v0_v7
|
||
|
tracksRegLiveness: true
|
||
|
frameInfo:
|
||
|
hasTailCall: true
|
||
|
machineFunctionInfo:
|
||
|
stackPtrOffsetReg: '$sgpr32'
|
||
|
isChainFunction: true
|
||
|
returnsVoid: true
|
||
|
wwmReservedRegs:
|
||
|
- '$vgpr1'
|
||
|
body: |
|
||
|
bb.0:
|
||
|
liveins: $sgpr0, $sgpr35, $vgpr0, $vgpr7, $vgpr8, $vgpr9
|
||
|
|
||
|
; GCN-LABEL: name: dont_preserve_v0_v7
|
||
|
; GCN: liveins: $sgpr0, $sgpr35, $vgpr0, $vgpr7, $vgpr8, $vgpr9
|
||
|
; GCN-NEXT: {{ $}}
|
||
|
; GCN-NEXT: renamable $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr35, 0, killed $vgpr0
|
||
|
; GCN-NEXT: $sgpr35 = S_MOV_B32 5
|
||
|
; GCN-NEXT: $sgpr35 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 0
|
||
|
; GCN-NEXT: renamable $vgpr0 = V_MOV_B32_e32 10, implicit $exec
|
||
|
; GCN-NEXT: renamable $vgpr7 = V_MOV_B32_e32 16, implicit $exec
|
||
|
; GCN-NEXT: renamable $vgpr8 = COPY killed renamable $vgpr0
|
||
|
; GCN-NEXT: renamable $vgpr9 = COPY killed renamable $vgpr7
|
||
|
; GCN-NEXT: renamable $sgpr4_sgpr5 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @callee + 4, target-flags(amdgpu-gotprel32-hi) @callee + 12, implicit-def dead $scc
|
||
|
; GCN-NEXT: renamable $sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM killed renamable $sgpr4_sgpr5, 0, 0 :: (dereferenceable invariant load (p0) from got, addrspace 4)
|
||
|
; GCN-NEXT: SI_CS_CHAIN_TC_W32 killed renamable $sgpr4_sgpr5, @callee, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $vgpr8, implicit $vgpr9
|
||
|
renamable $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr35, 0, killed $vgpr0
|
||
|
$sgpr35 = S_MOV_B32 5
|
||
|
$sgpr35 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 0
|
||
|
renamable $vgpr0 = V_MOV_B32_e32 10, implicit $exec
|
||
|
renamable $vgpr7 = V_MOV_B32_e32 16, implicit $exec
|
||
|
renamable $vgpr8 = COPY killed renamable $vgpr0
|
||
|
renamable $vgpr9 = COPY killed renamable $vgpr7
|
||
|
renamable $sgpr4_sgpr5 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @callee + 4, target-flags(amdgpu-gotprel32-hi) @callee + 12, implicit-def dead $scc
|
||
|
renamable $sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM killed renamable $sgpr4_sgpr5, 0, 0 :: (dereferenceable invariant load (p0) from got, addrspace 4)
|
||
|
SI_CS_CHAIN_TC_W32 killed renamable $sgpr4_sgpr5, @callee, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $vgpr8, implicit $vgpr9
|
||
|
|
||
|
...
|
||
|
|
||
|
---
|
||
|
name: dont_preserve_sgpr
|
||
|
tracksRegLiveness: true
|
||
|
frameInfo:
|
||
|
hasTailCall: true
|
||
|
machineFunctionInfo:
|
||
|
stackPtrOffsetReg: '$sgpr32'
|
||
|
returnsVoid: true
|
||
|
body: |
|
||
|
bb.0 (%ir-block.0):
|
||
|
liveins: $sgpr0
|
||
|
|
||
|
; GCN-LABEL: name: dont_preserve_sgpr
|
||
|
; GCN: liveins: $sgpr0
|
||
|
; GCN-NEXT: {{ $}}
|
||
|
; GCN-NEXT: renamable $sgpr1 = S_ADD_I32 killed renamable $sgpr0, renamable $sgpr0, implicit-def dead $scc
|
||
|
; GCN-NEXT: $sgpr0 = COPY killed renamable $sgpr1
|
||
|
; GCN-NEXT: renamable $sgpr4_sgpr5 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @callee + 4, target-flags(amdgpu-gotprel32-hi) @callee + 12, implicit-def dead $scc
|
||
|
; GCN-NEXT: renamable $sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM killed renamable $sgpr4_sgpr5, 0, 0 :: (dereferenceable invariant load (p0) from got, addrspace 4)
|
||
|
; GCN-NEXT: SI_CS_CHAIN_TC_W32 killed renamable $sgpr4_sgpr5, @callee, 0, -1, amdgpu_allvgprs, implicit $sgpr0
|
||
|
renamable $sgpr1 = S_ADD_I32 killed renamable $sgpr0, renamable $sgpr0, implicit-def dead $scc
|
||
|
$sgpr0 = COPY killed renamable $sgpr1
|
||
|
renamable $sgpr4_sgpr5 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @callee + 4, target-flags(amdgpu-gotprel32-hi) @callee + 12, implicit-def dead $scc
|
||
|
renamable $sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM killed renamable $sgpr4_sgpr5, 0, 0 :: (dereferenceable invariant load (p0) from got, addrspace 4)
|
||
|
SI_CS_CHAIN_TC_W32 killed renamable $sgpr4_sgpr5, @callee, 0, -1, amdgpu_allvgprs, implicit $sgpr0
|
||
|
|
||
|
...
|