737 lines
19 KiB
YAML
737 lines
19 KiB
YAML
# RUN: llc -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs -run-pass si-insert-waitcnts -o - %s | FileCheck -check-prefix=GFX9 %s
|
|
# RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -verify-machineinstrs -run-pass si-insert-waitcnts -o - %s | FileCheck -check-prefix=GFX10 %s
|
|
|
|
---
|
|
|
|
# The loop contains a store and a use of a value loaded outside of the loop.
|
|
# We expect the waitcnt for the use to be hoisted on GFX9, but not on GFX10+
|
|
# because we have the vscnt counter.
|
|
|
|
# GFX9-LABEL: waitcnt_vm_loop
|
|
# GFX9-LABEL: bb.0:
|
|
# GFX9: S_WAITCNT 39
|
|
# GFX9-LABEL: bb.1:
|
|
# GFX9-NOT: S_WAITCNT 39
|
|
# GFX9-LABEL: bb.2:
|
|
|
|
# GFX10-LABEL: waitcnt_vm_loop
|
|
# GFX10-LABEL: bb.0:
|
|
# GFX10-NOT: S_WAITCNT 16
|
|
# GFX10-LABEL: bb.1:
|
|
# GFX10: S_WAITCNT 16
|
|
# GFX10-LABEL: bb.2:
|
|
name: waitcnt_vm_loop
|
|
body: |
|
|
bb.0:
|
|
successors: %bb.1
|
|
|
|
$vgpr0 = BUFFER_LOAD_FORMAT_X_IDXEN killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec
|
|
S_BRANCH %bb.1
|
|
|
|
bb.1:
|
|
successors: %bb.1, %bb.2
|
|
|
|
BUFFER_STORE_DWORD_OFFEN_exact $vgpr5, $vgpr6, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec
|
|
$vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr2, implicit $exec
|
|
S_CMP_LG_U32 killed $sgpr3, $sgpr4, implicit-def $scc
|
|
S_CBRANCH_SCC1 %bb.1, implicit killed $scc
|
|
S_BRANCH %bb.2
|
|
|
|
bb.2:
|
|
S_ENDPGM 0
|
|
|
|
...
|
|
---
|
|
|
|
# Same as before, but the loop preheader has no terminator.
|
|
|
|
# GFX9-LABEL: waitcnt_vm_loop_noterm
|
|
# GFX9-LABEL: bb.0:
|
|
# GFX9: S_WAITCNT 39
|
|
# GFX9-LABEL: bb.1:
|
|
# GFX9-NOT: S_WAITCNT 39
|
|
# GFX9-LABEL: bb.2:
|
|
|
|
# GFX10-LABEL: waitcnt_vm_loop_noterm
|
|
# GFX10-LABEL: bb.0:
|
|
# GFX10-NOT: S_WAITCNT 16
|
|
# GFX10-LABEL: bb.1:
|
|
# GFX10: S_WAITCNT 16
|
|
# GFX10-LABEL: bb.2:
|
|
name: waitcnt_vm_loop_noterm
|
|
body: |
|
|
bb.0:
|
|
successors: %bb.1
|
|
|
|
$vgpr0 = BUFFER_LOAD_FORMAT_X_IDXEN killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec
|
|
|
|
bb.1:
|
|
successors: %bb.1, %bb.2
|
|
|
|
BUFFER_STORE_DWORD_OFFEN_exact $vgpr5, $vgpr6, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec
|
|
$vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr2, implicit $exec
|
|
S_CMP_LG_U32 killed $sgpr3, $sgpr4, implicit-def $scc
|
|
S_CBRANCH_SCC1 %bb.1, implicit killed $scc
|
|
S_BRANCH %bb.2
|
|
|
|
bb.2:
|
|
S_ENDPGM 0
|
|
|
|
...
|
|
---
|
|
|
|
# Same as before but there is a preexisting waitcnt in the preheader.
|
|
|
|
# GFX9-LABEL: waitcnt_vm_loop_noterm_wait
|
|
# GFX9-LABEL: bb.0:
|
|
# GFX9: S_WAITCNT 39
|
|
# GFX9-NOT: S_WAITCNT 39
|
|
# GFX9-LABEL: bb.1:
|
|
# GFX9-NOT: S_WAITCNT 39
|
|
# GFX9-LABEL: bb.2:
|
|
name: waitcnt_vm_loop_noterm_wait
|
|
body: |
|
|
bb.0:
|
|
successors: %bb.1
|
|
|
|
$vgpr0 = BUFFER_LOAD_FORMAT_X_IDXEN killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec
|
|
S_WAITCNT 3952
|
|
|
|
bb.1:
|
|
successors: %bb.1, %bb.2
|
|
|
|
BUFFER_STORE_DWORD_OFFEN_exact $vgpr5, $vgpr6, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec
|
|
$vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr2, implicit $exec
|
|
S_CMP_LG_U32 killed $sgpr3, $sgpr4, implicit-def $scc
|
|
S_CBRANCH_SCC1 %bb.1, implicit killed $scc
|
|
S_BRANCH %bb.2
|
|
|
|
bb.2:
|
|
S_ENDPGM 0
|
|
|
|
...
|
|
---
|
|
|
|
# The loop contains a store, a load, and uses values loaded both inside and
|
|
# outside the loop.
|
|
# We do not expect the waitcnt to be hoisted out of the loop.
|
|
|
|
# GFX9-LABEL: waitcnt_vm_loop_load
|
|
# GFX9-LABEL: bb.0:
|
|
# GFX9-NOT: S_WAITCNT 39
|
|
# GFX9-LABEL: bb.1:
|
|
# GFX9: S_WAITCNT 39
|
|
# GFX9-LABEL: bb.2:
|
|
|
|
# GFX10-LABEL: waitcnt_vm_loop_load
|
|
# GFX10-LABEL: bb.0:
|
|
# GFX10-NOT: S_WAITCNT 16
|
|
# GFX10-LABEL: bb.1:
|
|
# GFX10: S_WAITCNT 16
|
|
# GFX10-LABEL: bb.2:
|
|
name: waitcnt_vm_loop_load
|
|
body: |
|
|
bb.0:
|
|
successors: %bb.1
|
|
|
|
$vgpr0 = BUFFER_LOAD_FORMAT_X_IDXEN killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec
|
|
S_BRANCH %bb.1
|
|
|
|
bb.1:
|
|
successors: %bb.1, %bb.2
|
|
|
|
BUFFER_STORE_DWORD_OFFEN_exact $vgpr5, $vgpr6, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec
|
|
$vgpr7 = BUFFER_LOAD_FORMAT_X_IDXEN killed $vgpr7, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec
|
|
$vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr7, implicit $exec
|
|
S_CMP_LG_U32 killed $sgpr3, $sgpr4, implicit-def $scc
|
|
S_CBRANCH_SCC1 %bb.1, implicit killed $scc
|
|
S_BRANCH %bb.2
|
|
|
|
bb.2:
|
|
S_ENDPGM 0
|
|
|
|
...
|
|
---
|
|
|
|
# The loop contains a use of a value loaded outside of the loop, and no store
|
|
# nor load.
|
|
# We do not expect the waitcnt to be hoisted out of the loop.
|
|
|
|
# GFX9-LABEL: waitcnt_vm_loop_no_store
|
|
# GFX9-LABEL: bb.0:
|
|
# GFX9-NOT: S_WAITCNT 39
|
|
# GFX9-LABEL: bb.1:
|
|
# GFX9: S_WAITCNT 39
|
|
# GFX9-LABEL: bb.2:
|
|
|
|
# GFX10-LABEL: waitcnt_vm_loop_no_store
|
|
# GFX10-LABEL: bb.0:
|
|
# GFX10-NOT: S_WAITCNT 16
|
|
# GFX10-LABEL: bb.1:
|
|
# GFX10: S_WAITCNT 16
|
|
# GFX10-LABEL: bb.2:
|
|
name: waitcnt_vm_loop_no_store
|
|
body: |
|
|
bb.0:
|
|
successors: %bb.1
|
|
|
|
$vgpr0 = BUFFER_LOAD_FORMAT_X_IDXEN killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec
|
|
S_BRANCH %bb.1
|
|
|
|
bb.1:
|
|
successors: %bb.1, %bb.2
|
|
|
|
$vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr2, implicit $exec
|
|
S_CMP_LG_U32 killed $sgpr3, $sgpr4, implicit-def $scc
|
|
S_CBRANCH_SCC1 %bb.1, implicit killed $scc
|
|
S_BRANCH %bb.2
|
|
|
|
bb.2:
|
|
S_ENDPGM 0
|
|
|
|
...
|
|
---
|
|
|
|
# The loop contains a store, no load, and doesn't use any value loaded inside
|
|
# or outside of the loop. There is only one use of the loaded value in the
|
|
# exit block.
|
|
# We don't expect any s_waitcnt vmcnt in the loop body or preheader, but expect
|
|
# one in the exit block.
|
|
|
|
|
|
# GFX9-LABEL: waitcnt_vm_loop_no_use
|
|
# GFX9-LABEL: bb.0:
|
|
# GFX9-NOT: S_WAITCNT 39
|
|
# GFX9-LABEL: bb.1:
|
|
# GFX9-NOT: S_WAITCNT 39
|
|
# GFX9-LABEL: bb.2:
|
|
|
|
# GFX10-LABEL: waitcnt_vm_loop_no_use
|
|
# GFX10-LABEL: bb.0:
|
|
# GFX10-NOT: S_WAITCNT 16
|
|
# GFX10-LABEL: bb.1:
|
|
# GFX10-NOT: S_WAITCNT 16
|
|
# GFX10-LABEL: bb.2:
|
|
name: waitcnt_vm_loop_no_use
|
|
body: |
|
|
bb.0:
|
|
successors: %bb.1
|
|
|
|
$vgpr0 = BUFFER_LOAD_FORMAT_X_IDXEN killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec
|
|
S_BRANCH %bb.1
|
|
|
|
bb.1:
|
|
successors: %bb.1, %bb.2
|
|
|
|
BUFFER_STORE_DWORD_OFFEN_exact $vgpr5, $vgpr6, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec
|
|
$vgpr1 = V_ADD_U32_e32 $vgpr2, $vgpr2, implicit $exec
|
|
S_CMP_LG_U32 killed $sgpr3, $sgpr4, implicit-def $scc
|
|
S_CBRANCH_SCC1 %bb.1, implicit killed $scc
|
|
S_BRANCH %bb.2
|
|
|
|
bb.2:
|
|
$vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
|
|
S_ENDPGM 0
|
|
|
|
...
|
|
---
|
|
|
|
# The loop loads a value that is not used in the loop, and uses a value loaded
|
|
# outside of the loop.
|
|
# We expect the waitcnt to be hoisted of the loop to wait a single time before
|
|
# the loop is executed and avoid waiting for the load to complete on each
|
|
# iteration.
|
|
|
|
# GFX9-LABEL: waitcnt_vm_loop2
|
|
# GFX9-LABEL: bb.0:
|
|
# GFX9: S_WAITCNT 39
|
|
# GFX9-LABEL: bb.1:
|
|
# GFX9-NOT: S_WAITCNT 39
|
|
# GFX9-LABEL: bb.2:
|
|
|
|
# GFX10-LABEL: waitcnt_vm_loop2
|
|
# GFX10-LABEL: bb.0:
|
|
# GFX10: S_WAITCNT 16
|
|
# GFX10-LABEL: bb.1:
|
|
# GFX10-NOT: S_WAITCNT 16
|
|
# GFX10-LABEL: bb.2:
|
|
name: waitcnt_vm_loop2
|
|
body: |
|
|
bb.0:
|
|
successors: %bb.1
|
|
|
|
$vgpr0 = BUFFER_LOAD_FORMAT_X_IDXEN killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec
|
|
S_BRANCH %bb.1
|
|
|
|
bb.1:
|
|
successors: %bb.1, %bb.2
|
|
|
|
$vgpr3 = V_ADD_U32_e32 $vgpr0, $vgpr2, implicit $exec
|
|
$vgpr1 = BUFFER_LOAD_FORMAT_X_IDXEN killed $vgpr4, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec
|
|
S_CMP_LG_U32 killed $sgpr3, $sgpr4, implicit-def $scc
|
|
S_CBRANCH_SCC1 %bb.1, implicit killed $scc
|
|
S_BRANCH %bb.2
|
|
|
|
bb.2:
|
|
S_ENDPGM 0
|
|
|
|
...
|
|
---
|
|
|
|
# Same as before with an additional store in the loop. We still expect the
|
|
# waitcnt instructions to be hoisted.
|
|
|
|
# GFX9-LABEL: waitcnt_vm_loop2_store
|
|
# GFX9-LABEL: bb.0:
|
|
# GFX9: S_WAITCNT 39
|
|
# GFX9-LABEL: bb.1:
|
|
# GFX9-NOT: S_WAITCNT 39
|
|
# GFX9-LABEL: bb.2:
|
|
|
|
# GFX10-LABEL: waitcnt_vm_loop2_store
|
|
# GFX10-LABEL: bb.0:
|
|
# GFX10: S_WAITCNT 16
|
|
# GFX10-LABEL: bb.1:
|
|
# GFX10-NOT: S_WAITCNT 16
|
|
# GFX10-LABEL: bb.2:
|
|
name: waitcnt_vm_loop2_store
|
|
body: |
|
|
bb.0:
|
|
successors: %bb.1
|
|
|
|
$vgpr0 = BUFFER_LOAD_FORMAT_X_IDXEN killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec
|
|
S_BRANCH %bb.1
|
|
|
|
bb.1:
|
|
successors: %bb.1, %bb.2
|
|
|
|
$vgpr3 = V_ADD_U32_e32 $vgpr0, $vgpr2, implicit $exec
|
|
$vgpr1 = BUFFER_LOAD_FORMAT_X_IDXEN killed $vgpr4, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec
|
|
BUFFER_STORE_DWORD_OFFEN_exact $vgpr5, $vgpr6, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec
|
|
S_CMP_LG_U32 killed $sgpr3, $sgpr4, implicit-def $scc
|
|
S_CBRANCH_SCC1 %bb.1, implicit killed $scc
|
|
S_BRANCH %bb.2
|
|
|
|
bb.2:
|
|
S_ENDPGM 0
|
|
|
|
...
|
|
---
|
|
|
|
# Same as loop2 but the value loaded inside the loop is also used in the loop.
|
|
# We do not expect the waitcnt to be hoisted out of the loop.
|
|
|
|
# GFX9-LABEL: waitcnt_vm_loop2_use_in_loop
|
|
# GFX9-LABEL: bb.0:
|
|
# GFX9-NOT: S_WAITCNT 39
|
|
# GFX9-LABEL: bb.1:
|
|
# GFX9: S_WAITCNT 39
|
|
# GFX9-LABEL: bb.2:
|
|
|
|
# GFX10-LABEL: waitcnt_vm_loop2_use_in_loop
|
|
# GFX10-LABEL: bb.0:
|
|
# GFX10-NOT: S_WAITCNT 16
|
|
# GFX10-LABEL: bb.1:
|
|
# GFX10: S_WAITCNT 16
|
|
# GFX10-LABEL: bb.2:
|
|
name: waitcnt_vm_loop2_use_in_loop
|
|
body: |
|
|
bb.0:
|
|
successors: %bb.1
|
|
|
|
$vgpr0 = BUFFER_LOAD_FORMAT_X_IDXEN killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec
|
|
S_BRANCH %bb.1
|
|
|
|
bb.1:
|
|
successors: %bb.1, %bb.2
|
|
|
|
$vgpr3 = V_ADD_U32_e32 $vgpr0, $vgpr2, implicit $exec
|
|
$vgpr1 = BUFFER_LOAD_FORMAT_X_IDXEN killed $vgpr4, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec
|
|
$vgpr4 = V_ADD_U32_e32 $vgpr5, $vgpr1, implicit $exec
|
|
S_CMP_LG_U32 killed $sgpr3, $sgpr4, implicit-def $scc
|
|
S_CBRANCH_SCC1 %bb.1, implicit killed $scc
|
|
S_BRANCH %bb.2
|
|
|
|
bb.2:
|
|
S_ENDPGM 0
|
|
|
|
...
|
|
---
|
|
|
|
# The loop contains a use of a value loaded outside of the loop, but we already
|
|
# waited for that load to complete. The loop also loads a value that is not used
|
|
# in the loop. We do not expect any waitcnt in the loop.
|
|
|
|
# GFX9-LABEL: waitcnt_vm_loop2_nowait
|
|
# GFX9-LABEL: bb.0:
|
|
# GFX9: S_WAITCNT 39
|
|
# GFX9-LABEL: bb.1:
|
|
# GFX9-NOT: S_WAITCNT 39
|
|
# GFX9-LABEL: bb.2:
|
|
# GFX9-NOT: S_WAITCNT 39
|
|
# GFX9-LABEL: bb.3:
|
|
|
|
# GFX10-LABEL: waitcnt_vm_loop2_nowait
|
|
# GFX10-LABEL: bb.0:
|
|
# GFX10: S_WAITCNT 16
|
|
# GFX10-LABEL: bb.1:
|
|
# GFX10-NOT: S_WAITCNT 16
|
|
# GFX10-LABEL: bb.2:
|
|
# GFX10-NOT: S_WAITCNT 16
|
|
# GFX10-LABEL: bb.3:
|
|
name: waitcnt_vm_loop2_nowait
|
|
body: |
|
|
bb.0:
|
|
successors: %bb.1
|
|
|
|
$vgpr0 = BUFFER_LOAD_FORMAT_X_IDXEN killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec
|
|
$vgpr3 = V_ADD_U32_e32 $vgpr0, $vgpr2, implicit $exec
|
|
S_BRANCH %bb.1
|
|
|
|
bb.1:
|
|
successors: %bb.2
|
|
|
|
$vgpr3 = V_ADD_U32_e32 $vgpr4, $vgpr5, implicit $exec
|
|
$vgpr3 = V_ADD_U32_e32 $vgpr4, $vgpr5, implicit $exec
|
|
$vgpr3 = V_ADD_U32_e32 $vgpr4, $vgpr5, implicit $exec
|
|
|
|
S_BRANCH %bb.2
|
|
|
|
bb.2:
|
|
successors: %bb.2, %bb.3
|
|
|
|
$vgpr3 = V_ADD_U32_e32 $vgpr0, $vgpr2, implicit $exec
|
|
$vgpr1 = BUFFER_LOAD_FORMAT_X_IDXEN killed $vgpr4, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec
|
|
S_CMP_LG_U32 killed $sgpr3, $sgpr4, implicit-def $scc
|
|
S_CBRANCH_SCC1 %bb.2, implicit killed $scc
|
|
S_BRANCH %bb.3
|
|
|
|
bb.3:
|
|
S_ENDPGM 0
|
|
|
|
...
|
|
---
|
|
|
|
# Similar test case but for register intervals.
|
|
|
|
# GFX9-LABEL: waitcnt_vm_loop2_reginterval
|
|
# GFX9-LABEL: bb.0:
|
|
# GFX9: S_WAITCNT 39
|
|
# GFX9-LABEL: bb.1:
|
|
# GFX9-NOT: S_WAITCNT 39
|
|
# GFX9-LABEL: bb.2:
|
|
|
|
# GFX10-LABEL: waitcnt_vm_loop2_reginterval
|
|
# GFX10-LABEL: bb.0:
|
|
# GFX10: S_WAITCNT 16
|
|
# GFX10-LABEL: bb.1:
|
|
# GFX10-NOT: S_WAITCNT 16
|
|
# GFX10-LABEL: bb.2:
|
|
name: waitcnt_vm_loop2_reginterval
|
|
body: |
|
|
bb.0:
|
|
successors: %bb.1
|
|
|
|
$vgpr0_vgpr1_vgpr2_vgpr3 = GLOBAL_LOAD_DWORDX4 $vgpr10_vgpr11, 0, 0, implicit $exec
|
|
|
|
S_BRANCH %bb.1
|
|
|
|
bb.1:
|
|
successors: %bb.1, %bb.2
|
|
|
|
$vgpr10 = COPY $vgpr0
|
|
|
|
$vgpr4_vgpr5_vgpr6_vgpr7 = IMAGE_SAMPLE_V4_V2 $vgpr20_vgpr21, $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11, $sgpr0_sgpr1_sgpr2_sgpr3, 15, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4)
|
|
S_CMP_LG_U32 killed $sgpr3, $sgpr4, implicit-def $scc
|
|
S_CBRANCH_SCC1 %bb.1, implicit killed $scc
|
|
S_BRANCH %bb.2
|
|
|
|
bb.2:
|
|
S_ENDPGM 0
|
|
|
|
...
|
|
---
|
|
|
|
# Similar test case but for register intervals.
|
|
|
|
# GFX9-LABEL: waitcnt_vm_loop2_reginterval2
|
|
# GFX9-LABEL: bb.0:
|
|
# GFX9-NOT: S_WAITCNT 39
|
|
# GFX9-LABEL: bb.1:
|
|
# GFX9: S_WAITCNT 39
|
|
# GFX9-LABEL: bb.2:
|
|
|
|
# GFX10-LABEL: waitcnt_vm_loop2_reginterval2
|
|
# GFX10-LABEL: bb.0:
|
|
# GFX10-NOT: S_WAITCNT 16
|
|
# GFX10-LABEL: bb.1:
|
|
# GFX10: S_WAITCNT 16
|
|
# GFX10-LABEL: bb.2:
|
|
name: waitcnt_vm_loop2_reginterval2
|
|
body: |
|
|
bb.0:
|
|
successors: %bb.1
|
|
|
|
$vgpr0_vgpr1_vgpr2_vgpr3 = GLOBAL_LOAD_DWORDX4 $vgpr10_vgpr11, 0, 0, implicit $exec
|
|
|
|
S_BRANCH %bb.1
|
|
|
|
bb.1:
|
|
successors: %bb.1, %bb.2
|
|
|
|
$vgpr10 = COPY $vgpr0
|
|
|
|
$vgpr4_vgpr5_vgpr6_vgpr7 = IMAGE_SAMPLE_V4_V2 $vgpr20_vgpr21, $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11, $sgpr0_sgpr1_sgpr2_sgpr3, 15, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4)
|
|
$vgpr11 = COPY $vgpr7
|
|
S_CMP_LG_U32 killed $sgpr3, $sgpr4, implicit-def $scc
|
|
S_CBRANCH_SCC1 %bb.1, implicit killed $scc
|
|
S_BRANCH %bb.2
|
|
|
|
bb.2:
|
|
S_ENDPGM 0
|
|
|
|
...
|
|
---
|
|
|
|
# The loop loads a value that is not used in the loop, but uses a value loaded
|
|
# outside of it. We expect the s_waitcnt instruction to be hoisted.
|
|
# A s_waitcnt vmcnt(0) is generated to flush in the preheader, but for this
|
|
# specific test case, it would be better to use vmcnt(1) instead. This is
|
|
# currently not implemented.
|
|
|
|
# GFX9-LABEL: waitcnt_vm_zero
|
|
# GFX9-LABEL: bb.0:
|
|
# GFX9: S_WAITCNT 3952
|
|
# GFX9-LABEL: bb.1:
|
|
# GFX9-NOT: S_WAITCNT 39
|
|
# GFX9-LABEL: bb.2:
|
|
|
|
# GFX10-LABEL: waitcnt_vm_zero
|
|
# GFX10-LABEL: bb.0:
|
|
# GFX10: S_WAITCNT 16240
|
|
# GFX10-LABEL: bb.1:
|
|
# GFX10-NOT: S_WAITCNT 16240
|
|
# GFX10-LABEL: bb.2:
|
|
|
|
name: waitcnt_vm_zero
|
|
body: |
|
|
bb.0:
|
|
successors: %bb.1
|
|
|
|
$vgpr0 = BUFFER_LOAD_FORMAT_X_IDXEN killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec
|
|
$vgpr1 = BUFFER_LOAD_FORMAT_X_IDXEN killed $vgpr1, $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, implicit $exec
|
|
S_BRANCH %bb.1
|
|
|
|
bb.1:
|
|
successors: %bb.1, %bb.2
|
|
|
|
$vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr3, implicit $exec
|
|
$vgpr2 = BUFFER_LOAD_FORMAT_X_IDXEN killed $vgpr3, $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, implicit $exec
|
|
S_CMP_LG_U32 killed $sgpr3, $sgpr4, implicit-def $scc
|
|
S_CBRANCH_SCC1 %bb.1, implicit killed $scc
|
|
S_BRANCH %bb.2
|
|
|
|
bb.2:
|
|
S_ENDPGM 0
|
|
|
|
...
|
|
---
|
|
|
|
# This test case checks that we flush the vmcnt counter only if necessary
|
|
# (i.e. if a waitcnt is needed for the vgpr use we find in the loop)
|
|
|
|
# GFX10-LABEL: waitcnt_vm_necessary
|
|
# GFX10-LABEL: bb.0:
|
|
# GFX10: S_WAITCNT 16240
|
|
# GFX10: $vgpr4
|
|
# GFX10-NOT: S_WAITCNT
|
|
# GFX10-LABEL: bb.1:
|
|
# GFX10-NOT: S_WAITCNT
|
|
|
|
# GFX9-LABEL: waitcnt_vm_necessary
|
|
# GFX9-LABEL: bb.0:
|
|
# GFX9: S_WAITCNT 3952
|
|
# GFX9: $vgpr4
|
|
# GFX9-NOT: S_WAITCNT
|
|
# GFX9-LABEL: bb.1:
|
|
# GFX9-NOT: S_WAITCNT
|
|
|
|
name: waitcnt_vm_necessary
|
|
body: |
|
|
bb.0:
|
|
successors: %bb.1(0x80000000)
|
|
|
|
$vgpr0_vgpr1_vgpr2_vgpr3 = GLOBAL_LOAD_DWORDX4 killed $vgpr0_vgpr1, 0, 0, implicit $exec
|
|
$vgpr4 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec
|
|
|
|
bb.1:
|
|
successors: %bb.1(0x40000000)
|
|
|
|
$vgpr5 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, implicit $exec
|
|
S_CBRANCH_SCC1 %bb.1, implicit killed $scc
|
|
S_ENDPGM 0
|
|
|
|
...
|
|
---
|
|
|
|
# The loop contains a global store, and uses a (global) loaded value outside of the loop.
|
|
|
|
# GFX9-LABEL: waitcnt_vm_loop_global_mem
|
|
# GFX9-LABEL: bb.0:
|
|
# GFX9: S_WAITCNT 39
|
|
# GFX9-LABEL: bb.1:
|
|
# GFX9-NOT: S_WAITCNT 39
|
|
# GFX9-LABEL: bb.2:
|
|
|
|
# GFX10-LABEL: waitcnt_vm_loop_global_mem
|
|
# GFX10-LABEL: bb.0:
|
|
# GFX10-NOT: S_WAITCNT 16
|
|
# GFX10-LABEL: bb.1:
|
|
# GFX10: S_WAITCNT 16
|
|
# GFX10-LABEL: bb.2:
|
|
|
|
name: waitcnt_vm_loop_global_mem
|
|
body: |
|
|
bb.0:
|
|
successors: %bb.1
|
|
$vgpr0 = GLOBAL_LOAD_DWORD $vgpr1_vgpr2, 0, 0, implicit $exec
|
|
S_BRANCH %bb.1
|
|
|
|
bb.1:
|
|
successors: %bb.1, %bb.2
|
|
|
|
$vgpr3 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
|
|
GLOBAL_STORE_DWORD $vgpr4_vgpr5, $vgpr6, 0, 0, implicit $exec
|
|
S_CMP_LG_U32 killed $sgpr3, $sgpr4, implicit-def $scc
|
|
S_CBRANCH_SCC1 %bb.1, implicit killed $scc
|
|
|
|
bb.2:
|
|
successors: %bb.3
|
|
S_BRANCH %bb.3
|
|
|
|
bb.3:
|
|
S_ENDPGM 0
|
|
|
|
...
|
|
---
|
|
|
|
# Same as above case, but use scratch memory instructions instead
|
|
|
|
# GFX9-LABEL: waitcnt_vm_loop_scratch_mem
|
|
# GFX9-LABEL: bb.0:
|
|
# GFX9: S_WAITCNT 39
|
|
# GFX9-LABEL: bb.1:
|
|
# GFX9-NOT: S_WAITCNT 39
|
|
# GFX9-LABEL: bb.2:
|
|
|
|
# GFX10-LABEL: waitcnt_vm_loop_scratch_mem
|
|
# GFX10-LABEL: bb.0:
|
|
# GFX10-NOT: S_WAITCNT 16
|
|
# GFX10-LABEL: bb.1:
|
|
# GFX10: S_WAITCNT 16
|
|
# GFX10-LABEL: bb.2:
|
|
|
|
name: waitcnt_vm_loop_scratch_mem
|
|
body: |
|
|
bb.0:
|
|
successors: %bb.1
|
|
$vgpr0 = SCRATCH_LOAD_DWORD $vgpr1, 0, 0, implicit $exec, implicit $flat_scr
|
|
S_BRANCH %bb.1
|
|
|
|
bb.1:
|
|
successors: %bb.1, %bb.2
|
|
|
|
$vgpr3 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
|
|
SCRATCH_STORE_DWORD $vgpr4, $vgpr6, 0, 0, implicit $exec, implicit $flat_scr
|
|
S_CMP_LG_U32 killed $sgpr3, $sgpr4, implicit-def $scc
|
|
S_CBRANCH_SCC1 %bb.1, implicit killed $scc
|
|
|
|
bb.2:
|
|
successors: %bb.3
|
|
S_BRANCH %bb.3
|
|
|
|
bb.3:
|
|
S_ENDPGM 0
|
|
|
|
...
|
|
---
|
|
|
|
# Same as above case, but use flat memory instructions instead
|
|
|
|
# GFX9-LABEL: waitcnt_vm_loop_flat_mem
|
|
# GFX9-LABEL: bb.0:
|
|
# GFX9: S_WAITCNT 39
|
|
# GFX9-LABEL: bb.1:
|
|
# GFX9-NOT: S_WAITCNT 39
|
|
# GFX9-LABEL: bb.2:
|
|
|
|
# GFX10-LABEL: waitcnt_vm_loop_flat_mem
|
|
# GFX10-LABEL: bb.0:
|
|
# GFX10-NOT: S_WAITCNT 11
|
|
# GFX10-LABEL: bb.1:
|
|
# GFX10: S_WAITCNT 11
|
|
# GFX10-LABEL: bb.2:
|
|
name: waitcnt_vm_loop_flat_mem
|
|
body: |
|
|
bb.0:
|
|
successors: %bb.1
|
|
$vgpr0 = FLAT_LOAD_DWORD $vgpr1_vgpr2, 0, 0, implicit $exec, implicit $flat_scr
|
|
S_BRANCH %bb.1
|
|
|
|
bb.1:
|
|
successors: %bb.1, %bb.2
|
|
|
|
$vgpr3 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
|
|
FLAT_STORE_DWORD $vgpr4_vgpr5, $vgpr6, 0, 0, implicit $exec, implicit $flat_scr
|
|
S_CMP_LG_U32 killed $sgpr3, $sgpr4, implicit-def $scc
|
|
S_CBRANCH_SCC1 %bb.1, implicit killed $scc
|
|
|
|
bb.2:
|
|
successors: %bb.3
|
|
S_BRANCH %bb.3
|
|
|
|
bb.3:
|
|
S_ENDPGM 0
|
|
|
|
...
|
|
---
|
|
|
|
# The loop contains a store, a load, and uses values loaded both inside and
|
|
# outside the loop.
|
|
# We do not expect the waitcnt to be hoisted out of the loop.
|
|
|
|
# GFX9-LABEL: waitcnt_vm_loop_flat_load
|
|
# GFX9-LABEL: bb.0:
|
|
# GFX9-NOT: S_WAITCNT 39
|
|
# GFX9-LABEL: bb.1:
|
|
# GFX9: S_WAITCNT 39
|
|
# GFX9-LABEL: bb.2:
|
|
|
|
# GFX10-LABEL: waitcnt_vm_loop_flat_load
|
|
# GFX10-LABEL: bb.0:
|
|
# GFX10-NOT: S_WAITCNT 16
|
|
# GFX10-LABEL: bb.1:
|
|
# GFX10: S_WAITCNT 16
|
|
# GFX10-LABEL: bb.2:
|
|
name: waitcnt_vm_loop_flat_load
|
|
body: |
|
|
bb.0:
|
|
successors: %bb.1
|
|
|
|
$vgpr0 = GLOBAL_LOAD_DWORD $vgpr1_vgpr2, 0, 0, implicit $exec
|
|
S_BRANCH %bb.1
|
|
|
|
bb.1:
|
|
successors: %bb.1, %bb.2
|
|
|
|
GLOBAL_STORE_DWORD $vgpr4_vgpr5, $vgpr6, 0, 0, implicit $exec
|
|
$vgpr7 = GLOBAL_LOAD_DWORD $vgpr1_vgpr2, 0, 0, implicit $exec
|
|
$vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr7, implicit $exec
|
|
S_CMP_LG_U32 killed $sgpr3, $sgpr4, implicit-def $scc
|
|
S_CBRANCH_SCC1 %bb.1, implicit killed $scc
|
|
S_BRANCH %bb.2
|
|
|
|
bb.2:
|
|
S_ENDPGM 0
|
|
|
|
...
|