280 lines
11 KiB
Text
280 lines
11 KiB
Text
|
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
|
||
|
# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 -start-before=si-lower-sgpr-spills -stop-after=virtregrewriter,1 -verify-machineinstrs %s -o - | FileCheck -check-prefix=GCN %s
|
||
|
|
||
|
# Tests to check the conservative lieness extension for the wwm registers during SGPR spill lowering.
|
||
|
|
||
|
# Even though the VGPR can be shared for the wwm-operand (writelane/readlane get inserted for the SGPR spills)
|
||
|
# and the regular operand (%0), they get different registers as we conservatively extend the liveness of the
|
||
|
# wwm-operands.
|
||
|
---
|
||
|
name: test_single_block
|
||
|
tracksRegLiveness: true
|
||
|
frameInfo:
|
||
|
maxAlignment: 4
|
||
|
stack:
|
||
|
- { id: 0, type: spill-slot, size: 4, alignment: 4, stack-id: sgpr-spill }
|
||
|
machineFunctionInfo:
|
||
|
isEntryFunction: false
|
||
|
scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
|
||
|
stackPtrOffsetReg: '$sgpr32'
|
||
|
frameOffsetReg: '$sgpr33'
|
||
|
hasSpilledSGPRs: true
|
||
|
body: |
|
||
|
bb.0:
|
||
|
liveins: $sgpr4, $vgpr2_vgpr3
|
||
|
; GCN-LABEL: name: test_single_block
|
||
|
; GCN: liveins: $sgpr4, $vgpr2_vgpr3
|
||
|
; GCN-NEXT: {{ $}}
|
||
|
; GCN-NEXT: renamable $vgpr0 = IMPLICIT_DEF
|
||
|
; GCN-NEXT: renamable $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr4, 0, killed $vgpr0
|
||
|
; GCN-NEXT: S_NOP 0
|
||
|
; GCN-NEXT: $sgpr4 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 0
|
||
|
; GCN-NEXT: renamable $vgpr1 = V_MOV_B32_e32 20, implicit $exec
|
||
|
; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr2_vgpr3, killed renamable $vgpr1, 0, 0, implicit $exec
|
||
|
; GCN-NEXT: KILL killed renamable $vgpr0
|
||
|
; GCN-NEXT: SI_RETURN
|
||
|
SI_SPILL_S32_SAVE killed $sgpr4, %stack.0, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr32
|
||
|
S_NOP 0
|
||
|
renamable $sgpr4 = SI_SPILL_S32_RESTORE %stack.0, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr32
|
||
|
%0:vgpr_32 = V_MOV_B32_e32 20, implicit $exec
|
||
|
GLOBAL_STORE_DWORD $vgpr2_vgpr3, %0:vgpr_32, 0, 0, implicit $exec
|
||
|
SI_RETURN
|
||
|
...
|
||
|
|
||
|
# Due to the presence of wwm-operand in the divergent flow, the regular variable (%0) shouldn't get the same register
|
||
|
# allocated for the wwm-operand in writelane/readlane when the SGPR spill is lowered.
|
||
|
|
||
|
---
|
||
|
name: test_if_else
|
||
|
tracksRegLiveness: true
|
||
|
frameInfo:
|
||
|
maxAlignment: 4
|
||
|
stack:
|
||
|
- { id: 0, type: spill-slot, size: 4, alignment: 4, stack-id: sgpr-spill }
|
||
|
machineFunctionInfo:
|
||
|
isEntryFunction: false
|
||
|
scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
|
||
|
stackPtrOffsetReg: '$sgpr32'
|
||
|
frameOffsetReg: '$sgpr33'
|
||
|
hasSpilledSGPRs: true
|
||
|
body: |
|
||
|
; GCN-LABEL: name: test_if_else
|
||
|
; GCN: bb.0:
|
||
|
; GCN-NEXT: successors: %bb.1(0x80000000)
|
||
|
; GCN-NEXT: liveins: $sgpr6, $sgpr10_sgpr11
|
||
|
; GCN-NEXT: {{ $}}
|
||
|
; GCN-NEXT: renamable $vgpr0 = IMPLICIT_DEF
|
||
|
; GCN-NEXT: S_BRANCH %bb.1
|
||
|
; GCN-NEXT: {{ $}}
|
||
|
; GCN-NEXT: bb.1:
|
||
|
; GCN-NEXT: successors: %bb.3(0x40000000), %bb.2(0x40000000)
|
||
|
; GCN-NEXT: liveins: $sgpr6, $vgpr0, $sgpr10_sgpr11
|
||
|
; GCN-NEXT: {{ $}}
|
||
|
; GCN-NEXT: renamable $vgpr1 = V_MOV_B32_e32 10, implicit $exec
|
||
|
; GCN-NEXT: S_CBRANCH_EXECZ %bb.3, implicit $exec
|
||
|
; GCN-NEXT: {{ $}}
|
||
|
; GCN-NEXT: bb.2:
|
||
|
; GCN-NEXT: successors: %bb.3(0x80000000)
|
||
|
; GCN-NEXT: liveins: $sgpr6, $vgpr0, $sgpr10_sgpr11
|
||
|
; GCN-NEXT: {{ $}}
|
||
|
; GCN-NEXT: renamable $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr6, 0, killed $vgpr0
|
||
|
; GCN-NEXT: S_NOP 0
|
||
|
; GCN-NEXT: $sgpr6 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 0
|
||
|
; GCN-NEXT: renamable $vgpr1 = V_MOV_B32_e32 20, implicit $exec
|
||
|
; GCN-NEXT: S_BRANCH %bb.3
|
||
|
; GCN-NEXT: {{ $}}
|
||
|
; GCN-NEXT: bb.3:
|
||
|
; GCN-NEXT: liveins: $vgpr0, $vgpr1, $sgpr10_sgpr11
|
||
|
; GCN-NEXT: {{ $}}
|
||
|
; GCN-NEXT: $sgpr5 = V_READFIRSTLANE_B32 killed $vgpr1, implicit $exec
|
||
|
; GCN-NEXT: S_STORE_DWORD_IMM $sgpr5, $sgpr10_sgpr11, 0, 0
|
||
|
; GCN-NEXT: KILL killed renamable $vgpr0
|
||
|
; GCN-NEXT: SI_RETURN
|
||
|
bb.0:
|
||
|
liveins: $sgpr6, $sgpr10_sgpr11
|
||
|
S_BRANCH %bb.1
|
||
|
bb.1:
|
||
|
liveins: $sgpr6, $sgpr10_sgpr11
|
||
|
%0:vgpr_32 = V_MOV_B32_e32 10, implicit $exec
|
||
|
S_CBRANCH_EXECZ %bb.3, implicit $exec
|
||
|
bb.2:
|
||
|
liveins: $sgpr6, $sgpr10_sgpr11
|
||
|
SI_SPILL_S32_SAVE killed $sgpr6, %stack.0, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr32
|
||
|
S_NOP 0
|
||
|
renamable $sgpr6 = SI_SPILL_S32_RESTORE %stack.0, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr32
|
||
|
%0:vgpr_32 = V_MOV_B32_e32 20, implicit $exec
|
||
|
S_BRANCH %bb.3
|
||
|
bb.3:
|
||
|
liveins: $sgpr10_sgpr11
|
||
|
$sgpr5 = V_READFIRSTLANE_B32 %0:vgpr_32, implicit $exec
|
||
|
S_STORE_DWORD_IMM $sgpr5, $sgpr10_sgpr11, 0, 0
|
||
|
SI_RETURN
|
||
|
...
|
||
|
|
||
|
# The wwm-register usage outside the loop should have the interference marked with
|
||
|
# all the regular virtual registers used in the test. The divergent loop index value (%1)
|
||
|
# can actually share the same VGPR as the wwm-operand. But since we extend the liveness of
|
||
|
# the wwm operand, an interference will always exist between them.
|
||
|
|
||
|
---
|
||
|
name: test_loop
|
||
|
tracksRegLiveness: true
|
||
|
frameInfo:
|
||
|
maxAlignment: 4
|
||
|
stack:
|
||
|
- { id: 0, type: spill-slot, size: 4, alignment: 4, stack-id: sgpr-spill }
|
||
|
machineFunctionInfo:
|
||
|
isEntryFunction: false
|
||
|
scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
|
||
|
stackPtrOffsetReg: '$sgpr32'
|
||
|
frameOffsetReg: '$sgpr33'
|
||
|
hasSpilledSGPRs: true
|
||
|
body: |
|
||
|
; GCN-LABEL: name: test_loop
|
||
|
; GCN: bb.0:
|
||
|
; GCN-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000)
|
||
|
; GCN-NEXT: liveins: $sgpr4, $sgpr10_sgpr11
|
||
|
; GCN-NEXT: {{ $}}
|
||
|
; GCN-NEXT: renamable $vgpr0 = IMPLICIT_DEF
|
||
|
; GCN-NEXT: renamable $vgpr1 = V_MOV_B32_e32 10, implicit $exec
|
||
|
; GCN-NEXT: S_CBRANCH_EXECZ %bb.2, implicit $exec
|
||
|
; GCN-NEXT: {{ $}}
|
||
|
; GCN-NEXT: bb.1:
|
||
|
; GCN-NEXT: successors: %bb.2(0x80000000)
|
||
|
; GCN-NEXT: liveins: $sgpr4, $vgpr0, $sgpr10_sgpr11
|
||
|
; GCN-NEXT: {{ $}}
|
||
|
; GCN-NEXT: renamable $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr4, 0, killed $vgpr0
|
||
|
; GCN-NEXT: S_NOP 0
|
||
|
; GCN-NEXT: $sgpr4 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 0
|
||
|
; GCN-NEXT: renamable $vgpr1 = V_MOV_B32_e32 20, implicit $exec
|
||
|
; GCN-NEXT: S_BRANCH %bb.2
|
||
|
; GCN-NEXT: {{ $}}
|
||
|
; GCN-NEXT: bb.2:
|
||
|
; GCN-NEXT: successors: %bb.3(0x80000000)
|
||
|
; GCN-NEXT: liveins: $sgpr4, $vgpr0, $vgpr1, $sgpr10_sgpr11
|
||
|
; GCN-NEXT: {{ $}}
|
||
|
; GCN-NEXT: S_STORE_DWORD_IMM $sgpr4, $sgpr10_sgpr11, 0, 0
|
||
|
; GCN-NEXT: $sgpr5 = V_READFIRSTLANE_B32 killed $vgpr1, implicit $exec
|
||
|
; GCN-NEXT: S_STORE_DWORD_IMM $sgpr5, $sgpr10_sgpr11, 0, 4
|
||
|
; GCN-NEXT: renamable $vgpr1 = V_MOV_B32_e32 5, implicit $exec
|
||
|
; GCN-NEXT: S_CBRANCH_EXECZ %bb.3, implicit $exec
|
||
|
; GCN-NEXT: S_BRANCH %bb.3
|
||
|
; GCN-NEXT: {{ $}}
|
||
|
; GCN-NEXT: bb.3:
|
||
|
; GCN-NEXT: successors: %bb.5(0x40000000), %bb.4(0x40000000)
|
||
|
; GCN-NEXT: liveins: $vgpr0, $vgpr1
|
||
|
; GCN-NEXT: {{ $}}
|
||
|
; GCN-NEXT: $vcc = V_CMP_EQ_U32_e64 0, $vgpr1, implicit $exec
|
||
|
; GCN-NEXT: $sgpr6_sgpr7 = S_AND_SAVEEXEC_B64 $vcc, implicit-def $exec, implicit-def $scc, implicit $exec
|
||
|
; GCN-NEXT: S_CBRANCH_SCC1 %bb.5, implicit $scc
|
||
|
; GCN-NEXT: {{ $}}
|
||
|
; GCN-NEXT: bb.4:
|
||
|
; GCN-NEXT: successors: %bb.3(0x80000000)
|
||
|
; GCN-NEXT: liveins: $vgpr0, $vgpr1, $sgpr6_sgpr7
|
||
|
; GCN-NEXT: {{ $}}
|
||
|
; GCN-NEXT: renamable $vgpr1 = V_SUB_U32_e32 1, killed $vgpr1, implicit $exec
|
||
|
; GCN-NEXT: renamable $vgpr1 = V_MOV_B32_e32 killed $vgpr1, implicit $exec
|
||
|
; GCN-NEXT: S_BRANCH %bb.3
|
||
|
; GCN-NEXT: {{ $}}
|
||
|
; GCN-NEXT: bb.5:
|
||
|
; GCN-NEXT: liveins: $vgpr0, $sgpr6_sgpr7
|
||
|
; GCN-NEXT: {{ $}}
|
||
|
; GCN-NEXT: $exec = S_OR_B64 $exec, $sgpr6_sgpr7, implicit-def $scc
|
||
|
; GCN-NEXT: KILL killed renamable $vgpr0
|
||
|
; GCN-NEXT: SI_RETURN
|
||
|
bb.0:
|
||
|
liveins: $sgpr4, $sgpr10_sgpr11
|
||
|
%0:vgpr_32 = V_MOV_B32_e32 10, implicit $exec
|
||
|
S_CBRANCH_EXECZ %bb.2, implicit $exec
|
||
|
bb.1:
|
||
|
liveins: $sgpr4, $sgpr10_sgpr11
|
||
|
SI_SPILL_S32_SAVE killed $sgpr4, %stack.0, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr32
|
||
|
S_NOP 0
|
||
|
renamable $sgpr4 = SI_SPILL_S32_RESTORE %stack.0, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr32
|
||
|
%0:vgpr_32 = V_MOV_B32_e32 20, implicit $exec
|
||
|
S_BRANCH %bb.2
|
||
|
bb.2:
|
||
|
liveins: $sgpr4, $sgpr10_sgpr11
|
||
|
S_STORE_DWORD_IMM $sgpr4, $sgpr10_sgpr11, 0, 0
|
||
|
$sgpr5 = V_READFIRSTLANE_B32 %0:vgpr_32, implicit $exec
|
||
|
S_STORE_DWORD_IMM $sgpr5, $sgpr10_sgpr11, 0, 4
|
||
|
%1:vgpr_32 = V_MOV_B32_e32 5, implicit $exec
|
||
|
S_CBRANCH_EXECZ %bb.3, implicit $exec
|
||
|
S_BRANCH %bb.3
|
||
|
bb.3:
|
||
|
$vcc = V_CMP_EQ_U32_e64 0, %1:vgpr_32, implicit $exec
|
||
|
$sgpr6_sgpr7 = S_AND_SAVEEXEC_B64 $vcc, implicit-def $exec, implicit-def $scc, implicit $exec
|
||
|
S_CBRANCH_SCC1 %bb.5, implicit $scc
|
||
|
bb.4:
|
||
|
liveins: $sgpr6_sgpr7
|
||
|
%2:vgpr_32 = V_SUB_U32_e32 1, %1:vgpr_32, implicit $exec
|
||
|
%1:vgpr_32 = V_MOV_B32_e32 %2:vgpr_32, implicit $exec
|
||
|
S_BRANCH %bb.3
|
||
|
bb.5:
|
||
|
liveins: $sgpr6_sgpr7
|
||
|
$exec = S_OR_B64 $exec, $sgpr6_sgpr7, implicit-def $scc
|
||
|
SI_RETURN
|
||
|
...
|
||
|
|
||
|
# There must be one KILL instruction for the wwm-operand in every return block.
|
||
|
# Due to that, the wwm-register allocated should be different from the ones
|
||
|
# allocated for the regular virtual registers.
|
||
|
|
||
|
---
|
||
|
name: test_multiple_return_blocks
|
||
|
tracksRegLiveness: true
|
||
|
frameInfo:
|
||
|
maxAlignment: 4
|
||
|
stack:
|
||
|
- { id: 0, type: spill-slot, size: 4, alignment: 4, stack-id: sgpr-spill }
|
||
|
machineFunctionInfo:
|
||
|
isEntryFunction: false
|
||
|
scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
|
||
|
stackPtrOffsetReg: '$sgpr32'
|
||
|
frameOffsetReg: '$sgpr33'
|
||
|
hasSpilledSGPRs: true
|
||
|
body: |
|
||
|
; GCN-LABEL: name: test_multiple_return_blocks
|
||
|
; GCN: bb.0:
|
||
|
; GCN-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000)
|
||
|
; GCN-NEXT: liveins: $sgpr4, $vgpr2_vgpr3
|
||
|
; GCN-NEXT: {{ $}}
|
||
|
; GCN-NEXT: renamable $vgpr0 = IMPLICIT_DEF
|
||
|
; GCN-NEXT: S_CBRANCH_EXECZ %bb.2, implicit $exec
|
||
|
; GCN-NEXT: {{ $}}
|
||
|
; GCN-NEXT: bb.1:
|
||
|
; GCN-NEXT: liveins: $sgpr4, $vgpr0, $vgpr2_vgpr3
|
||
|
; GCN-NEXT: {{ $}}
|
||
|
; GCN-NEXT: renamable $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr4, 0, killed $vgpr0
|
||
|
; GCN-NEXT: S_NOP 0
|
||
|
; GCN-NEXT: $sgpr4 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 0
|
||
|
; GCN-NEXT: renamable $vgpr1 = V_MOV_B32_e32 10, implicit $exec
|
||
|
; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr2_vgpr3, killed renamable $vgpr1, 0, 0, implicit $exec
|
||
|
; GCN-NEXT: KILL killed renamable $vgpr0
|
||
|
; GCN-NEXT: SI_RETURN
|
||
|
; GCN-NEXT: {{ $}}
|
||
|
; GCN-NEXT: bb.2:
|
||
|
; GCN-NEXT: liveins: $vgpr0, $vgpr2_vgpr3
|
||
|
; GCN-NEXT: {{ $}}
|
||
|
; GCN-NEXT: renamable $vgpr1 = V_MOV_B32_e32 20, implicit $exec
|
||
|
; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr2_vgpr3, killed renamable $vgpr1, 0, 0, implicit $exec
|
||
|
; GCN-NEXT: KILL killed renamable $vgpr0
|
||
|
; GCN-NEXT: SI_RETURN
|
||
|
bb.0:
|
||
|
liveins: $sgpr4, $vgpr2_vgpr3
|
||
|
S_CBRANCH_EXECZ %bb.2, implicit $exec
|
||
|
bb.1:
|
||
|
liveins: $sgpr4, $vgpr2_vgpr3
|
||
|
SI_SPILL_S32_SAVE killed $sgpr4, %stack.0, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr32
|
||
|
S_NOP 0
|
||
|
renamable $sgpr4 = SI_SPILL_S32_RESTORE %stack.0, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr32
|
||
|
%0:vgpr_32 = V_MOV_B32_e32 10, implicit $exec
|
||
|
GLOBAL_STORE_DWORD $vgpr2_vgpr3, %0:vgpr_32, 0, 0, implicit $exec
|
||
|
SI_RETURN
|
||
|
bb.2:
|
||
|
liveins: $vgpr2_vgpr3
|
||
|
%1:vgpr_32 = V_MOV_B32_e32 20, implicit $exec
|
||
|
GLOBAL_STORE_DWORD $vgpr2_vgpr3, %1:vgpr_32, 0, 0, implicit $exec
|
||
|
SI_RETURN
|
||
|
...
|