104 lines
3.9 KiB
YAML
104 lines
3.9 KiB
YAML
# RUN: llc -mtriple=amdgcn -mcpu=gfx1030 -greedy-regclass-priority-trumps-globalness=0 -start-before greedy -o - %s | FileCheck %s -check-prefixes=GCN,OLD
|
|
# RUN: llc -mtriple=amdgcn -mcpu=gfx1030 -greedy-regclass-priority-trumps-globalness=1 -start-before greedy -o - %s | FileCheck %s -check-prefixes=GCN,NEW
|
|
|
|
# At the time of writing -greedy-regclass-priority-trumps-globalness makes a
|
|
# significant improvement in the total number of vgprs needed to compile this
|
|
# test, from 11 down to 7.
|
|
|
|
# GCN-LABEL: test1:
|
|
# OLD: NumVgprs: 11{{$}}
|
|
# NEW: NumVgprs: 7{{$}}
|
|
---
|
|
name: test1
|
|
tracksRegLiveness: true
|
|
body: |
|
|
bb.0:
|
|
successors: %bb.1, %bb.2
|
|
liveins: $vgpr0, $vgpr6
|
|
|
|
%6:vgpr_32 = COPY $vgpr6
|
|
undef %30.sub0:vreg_128 = COPY $vgpr0
|
|
undef %27.sub0:vreg_128 = V_MED3_F32_e64 0, 0, 0, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
|
|
undef %16.sub0:sgpr_256 = S_MOV_B32 0
|
|
undef %26.sub1:vreg_64 = V_LSHRREV_B32_e32 1, %6, implicit $exec
|
|
%27.sub1:vreg_128 = COPY %27.sub0
|
|
%27.sub2:vreg_128 = COPY %27.sub0
|
|
%27.sub3:vreg_128 = COPY %27.sub0
|
|
%26.sub0:vreg_64 = V_MOV_B32_e32 1, implicit $exec
|
|
%16.sub1:sgpr_256 = COPY %16.sub0
|
|
%16.sub2:sgpr_256 = COPY %16.sub0
|
|
%16.sub3:sgpr_256 = COPY %16.sub0
|
|
%16.sub4:sgpr_256 = COPY %16.sub0
|
|
%16.sub5:sgpr_256 = COPY %16.sub0
|
|
%16.sub6:sgpr_256 = COPY %16.sub0
|
|
%16.sub7:sgpr_256 = COPY %16.sub0
|
|
IMAGE_STORE_V4_V2_gfx10 %27, %26, %16, 0, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), addrspace 7)
|
|
S_CBRANCH_SCC1 %bb.2, implicit undef $scc
|
|
S_BRANCH %bb.1
|
|
|
|
bb.1:
|
|
%30.sub1:vreg_128 = V_MOV_B32_e32 0, implicit $exec
|
|
%30.sub2:vreg_128 = COPY %30.sub1
|
|
%30.sub3:vreg_128 = COPY %30.sub1
|
|
%26.sub1:vreg_64 = COPY %30.sub1
|
|
IMAGE_STORE_V4_V2_gfx10 %30, %26, %16, 0, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), addrspace 7)
|
|
|
|
bb.2:
|
|
S_ENDPGM 0
|
|
...
|
|
|
|
# GCN-LABEL: test2:
|
|
# OLD: NumVgprs: 7{{$}}
|
|
# NEW: NumVgprs: 11{{$}}
|
|
---
|
|
name: test2
|
|
tracksRegLiveness: true
|
|
body: |
|
|
bb.0:
|
|
liveins: $vgpr0, $vgpr2
|
|
|
|
%8:vgpr_32 = COPY $vgpr2
|
|
%6:vgpr_32 = COPY $vgpr0
|
|
|
|
bb.1:
|
|
successors: %bb.2, %bb.3
|
|
|
|
undef %25.sub0:sgpr_256 = S_MOV_B32 0
|
|
%10:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
|
|
%25.sub1:sgpr_256 = COPY %25.sub0
|
|
%25.sub2:sgpr_256 = COPY %25.sub0
|
|
%25.sub3:sgpr_256 = COPY %25.sub0
|
|
%19:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN %10, %25.sub0_sub1_sub2_sub3, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32))
|
|
%36:vreg_64 = DS_READ2_B32_gfx9 %10, 0, 2, 0, implicit $exec :: (load (s32)), (load (s32))
|
|
%30:vgpr_32 = V_LSHLREV_B32_e32 2, %36.sub0, implicit $exec
|
|
%3:vgpr_32 = V_ADD_U32_e32 %30, %6, implicit $exec
|
|
%4:vgpr_32 = V_ADD_U32_e32 %36.sub1, %8, implicit $exec
|
|
%15:vgpr_32 = V_OR_B32_e32 %4, %3, implicit $exec
|
|
%21:sreg_32 = V_READFIRSTLANE_B32 %19, implicit $exec
|
|
%17:sreg_32 = V_CMP_EQ_U32_e64 0, %15, implicit $exec
|
|
S_CMP_LG_U32 %21, 0, implicit-def $scc
|
|
%31:sreg_32_xm0_xexec = S_CSELECT_B32 -1, 0, implicit killed $scc
|
|
%23:sreg_32 = S_AND_B32 %31, %17, implicit-def dead $scc
|
|
%5:sreg_32 = COPY $exec_lo, implicit-def $exec_lo
|
|
%37:sreg_32 = S_AND_B32 %5, %23, implicit-def dead $scc
|
|
$exec_lo = S_MOV_B32_term %37
|
|
S_CBRANCH_EXECZ %bb.3, implicit $exec
|
|
S_BRANCH %bb.2
|
|
|
|
bb.2:
|
|
undef %28.sub0:vreg_128 = V_MOV_B32_e32 0, implicit $exec
|
|
%25.sub1:sgpr_256 = COPY %25.sub0
|
|
%25.sub2:sgpr_256 = COPY %25.sub0
|
|
%25.sub3:sgpr_256 = COPY %25.sub0
|
|
%25.sub4:sgpr_256 = COPY %25.sub0
|
|
%28.sub1:vreg_128 = COPY %28.sub0
|
|
%28.sub2:vreg_128 = COPY %28.sub0
|
|
%28.sub3:vreg_128 = COPY %28.sub0
|
|
%25.sub5:sgpr_256 = COPY %25.sub0
|
|
%25.sub6:sgpr_256 = COPY %25.sub0
|
|
%25.sub7:sgpr_256 = COPY %25.sub0
|
|
IMAGE_STORE_V4_V3_nsa_gfx10 %28, %3, %30, %4, %25, 0, 2, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s32))
|
|
|
|
bb.3:
|
|
S_ENDPGM 0
|
|
...
|