# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py # RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 -run-pass=prologepilog,machine-cp -verify-machineinstrs %s -o - | FileCheck -check-prefix=GCN %s # The COPY that moves the return value to VGPR0 should not be removed during machine-cp. The spill restore of the same register that follows, # meant to only reload its inactive lanes. By marking the reg itself as the tied-op in the spill reload prevents the undesired optimization. --- name: wwm_scratch_reg_spill_reload_of_outgoing_reg tracksRegLiveness: true machineFunctionInfo: wwmReservedRegs: ['$vgpr0'] isEntryFunction: false scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3' stackPtrOffsetReg: '$sgpr32' frameOffsetReg: '$sgpr33' body: | bb.0: liveins: $sgpr20, $vgpr1 ; GCN-LABEL: name: wwm_scratch_reg_spill_reload_of_outgoing_reg ; GCN: liveins: $sgpr20, $vgpr0, $vgpr1 ; GCN-NEXT: {{ $}} ; GCN-NEXT: $sgpr4_sgpr5 = S_XOR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5) ; GCN-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5 ; GCN-NEXT: $vgpr0 = IMPLICIT_DEF ; GCN-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR killed $sgpr20, 0, $vgpr0 ; GCN-NEXT: $vgpr0 = COPY killed renamable $vgpr1, implicit $exec ; GCN-NEXT: $sgpr4_sgpr5 = S_XOR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec ; GCN-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit $vgpr0(tied-def 0) :: (load (s32) from %stack.0, addrspace 5) ; GCN-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5 ; GCN-NEXT: SI_RETURN implicit $vgpr0 $vgpr0 = IMPLICIT_DEF $vgpr0 = SI_SPILL_S32_TO_VGPR killed $sgpr20, 0, $vgpr0 $vgpr0 = COPY killed renamable $vgpr1, implicit $exec SI_RETURN implicit $vgpr0 ... # The reload of vgpr0 require the tied-op as it is a subreg in the outgoing tuple register vgpr0_vgpr1. # The vgpr2 doesn't need the tied-op in the reload as it isn't holding any return value. --- name: wwm_scratch_reg_spill_reload_of_outgoing_tuple_subreg tracksRegLiveness: true machineFunctionInfo: wwmReservedRegs: ['$vgpr0', '$vgpr2'] isEntryFunction: false scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3' stackPtrOffsetReg: '$sgpr32' frameOffsetReg: '$sgpr33' body: | bb.0: liveins: $sgpr20, $sgpr21, $vgpr1 ; GCN-LABEL: name: wwm_scratch_reg_spill_reload_of_outgoing_tuple_subreg ; GCN: liveins: $sgpr20, $sgpr21, $vgpr0, $vgpr1, $vgpr2 ; GCN-NEXT: {{ $}} ; GCN-NEXT: $sgpr4_sgpr5 = S_XOR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5) ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5) ; GCN-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5 ; GCN-NEXT: $vgpr0 = IMPLICIT_DEF ; GCN-NEXT: $vgpr2 = IMPLICIT_DEF ; GCN-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR killed $sgpr20, 0, $vgpr0 ; GCN-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR killed $sgpr21, 0, $vgpr2 ; GCN-NEXT: $vgpr0 = COPY $vgpr1, implicit $exec ; GCN-NEXT: $sgpr4_sgpr5 = S_XOR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec ; GCN-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit $vgpr0(tied-def 0) :: (load (s32) from %stack.0, addrspace 5) ; GCN-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5) ; GCN-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5 ; GCN-NEXT: SI_RETURN implicit $vgpr0_vgpr1 $vgpr0 = IMPLICIT_DEF $vgpr2 = IMPLICIT_DEF $vgpr0 = SI_SPILL_S32_TO_VGPR killed $sgpr20, 0, $vgpr0 $vgpr2 = SI_SPILL_S32_TO_VGPR killed $sgpr21, 0, $vgpr2 $vgpr0 = COPY $vgpr1, implicit $exec SI_RETURN implicit $vgpr0_vgpr1 ... # Tied op not required in the spill reload of vgpr2. --- name: wwm_scratch_reg_spill_reload_different_outgoing_reg tracksRegLiveness: true machineFunctionInfo: wwmReservedRegs: ['$vgpr2'] isEntryFunction: false scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3' stackPtrOffsetReg: '$sgpr32' frameOffsetReg: '$sgpr33' body: | bb.0: liveins: $sgpr20, $vgpr1 ; GCN-LABEL: name: wwm_scratch_reg_spill_reload_different_outgoing_reg ; GCN: liveins: $sgpr20, $vgpr1, $vgpr2 ; GCN-NEXT: {{ $}} ; GCN-NEXT: $sgpr4_sgpr5 = S_XOR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5) ; GCN-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5 ; GCN-NEXT: $vgpr2 = IMPLICIT_DEF ; GCN-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR killed $sgpr20, 0, $vgpr2 ; GCN-NEXT: $vgpr0 = COPY $vgpr1, implicit $exec ; GCN-NEXT: $sgpr4_sgpr5 = S_XOR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec ; GCN-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5) ; GCN-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5 ; GCN-NEXT: SI_RETURN implicit $vgpr0_vgpr1 $vgpr2 = IMPLICIT_DEF $vgpr2 = SI_SPILL_S32_TO_VGPR killed $sgpr20, 0, $vgpr2 $vgpr0 = COPY $vgpr1, implicit $exec SI_RETURN implicit $vgpr0_vgpr1 ... # Tied op not required in the spill reload of vgpr40 which is in the CSR range. --- name: wwm_csr_spill_reload tracksRegLiveness: true machineFunctionInfo: wwmReservedRegs: ['$vgpr40'] isEntryFunction: false scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3' stackPtrOffsetReg: '$sgpr32' frameOffsetReg: '$sgpr33' body: | bb.0: liveins: $sgpr20, $vgpr1 ; GCN-LABEL: name: wwm_csr_spill_reload ; GCN: liveins: $sgpr20, $vgpr1, $vgpr40 ; GCN-NEXT: {{ $}} ; GCN-NEXT: $sgpr4_sgpr5 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5) ; GCN-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5 ; GCN-NEXT: $vgpr40 = IMPLICIT_DEF ; GCN-NEXT: $vgpr40 = SI_SPILL_S32_TO_VGPR killed $sgpr20, 0, $vgpr40 ; GCN-NEXT: $sgpr20 = SI_RESTORE_S32_FROM_VGPR $vgpr40, 0, implicit $exec ; GCN-NEXT: $vgpr0 = COPY killed $vgpr1, implicit $exec ; GCN-NEXT: $sgpr4_sgpr5 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec ; GCN-NEXT: $vgpr40 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5) ; GCN-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5 ; GCN-NEXT: SI_RETURN implicit $vgpr0 $vgpr40 = IMPLICIT_DEF $vgpr40 = SI_SPILL_S32_TO_VGPR killed $sgpr20, 0, $vgpr40 $sgpr20 = SI_RESTORE_S32_FROM_VGPR $vgpr40, 0, implicit $exec $vgpr0 = COPY killed $vgpr1, implicit $exec SI_RETURN implicit $vgpr0 ...