194 lines
6.9 KiB
YAML
194 lines
6.9 KiB
YAML
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
|
|
# RUN: llc -mtriple=amdgcn -run-pass register-coalescer -verify-machineinstrs -o - %s | FileCheck --check-prefix=GCN %s
|
|
#
|
|
|
|
|
|
---
|
|
# the COPY can be coalesced based on subregister liveness
|
|
name: subrange_coalesce_liveout
|
|
tracksRegLiveness: true
|
|
body: |
|
|
; GCN-LABEL: name: subrange_coalesce_liveout
|
|
; GCN: bb.0:
|
|
; GCN-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
|
|
; GCN-NEXT: liveins: $vgpr0_vgpr1
|
|
; GCN-NEXT: {{ $}}
|
|
; GCN-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
|
; GCN-NEXT: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec
|
|
; GCN-NEXT: S_CBRANCH_EXECZ %bb.2, implicit $exec
|
|
; GCN-NEXT: S_BRANCH %bb.1
|
|
; GCN-NEXT: {{ $}}
|
|
; GCN-NEXT: bb.1:
|
|
; GCN-NEXT: successors: %bb.2(0x80000000)
|
|
; GCN-NEXT: {{ $}}
|
|
; GCN-NEXT: [[GLOBAL_LOAD_DWORDX4_]].sub0:vreg_128 = V_AND_B32_e64 [[GLOBAL_LOAD_DWORDX4_]].sub0, [[GLOBAL_LOAD_DWORDX4_]].sub1, implicit $exec
|
|
; GCN-NEXT: S_BRANCH %bb.2
|
|
; GCN-NEXT: {{ $}}
|
|
; GCN-NEXT: bb.2:
|
|
; GCN-NEXT: dead %3:vgpr_32 = V_ADD_U32_e32 [[GLOBAL_LOAD_DWORDX4_]].sub2, [[GLOBAL_LOAD_DWORDX4_]].sub0, implicit $exec
|
|
; GCN-NEXT: S_ENDPGM 0
|
|
bb.0:
|
|
successors: %bb.1, %bb.2
|
|
liveins: $vgpr0_vgpr1
|
|
|
|
%0:vreg_64 = COPY $vgpr0_vgpr1
|
|
%1:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 0, 0, implicit $exec
|
|
%2:vgpr_32 = COPY %1.sub0
|
|
S_CBRANCH_EXECZ %bb.2, implicit $exec
|
|
S_BRANCH %bb.1
|
|
|
|
bb.1:
|
|
successors: %bb.2
|
|
|
|
%2:vgpr_32 = V_AND_B32_e64 %1.sub0, %1.sub1, implicit $exec
|
|
S_BRANCH %bb.2
|
|
|
|
bb.2:
|
|
%4:vgpr_32 = V_ADD_U32_e32 %1.sub2, %2, implicit $exec
|
|
S_ENDPGM 0
|
|
...
|
|
|
|
---
|
|
# early-clobber stops the coalescer from coalescing the COPY
|
|
name: subrange_coalesce_early_clobber
|
|
tracksRegLiveness: true
|
|
body: |
|
|
; GCN-LABEL: name: subrange_coalesce_early_clobber
|
|
; GCN: bb.0:
|
|
; GCN-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
|
|
; GCN-NEXT: liveins: $vgpr0_vgpr1
|
|
; GCN-NEXT: {{ $}}
|
|
; GCN-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
|
; GCN-NEXT: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec
|
|
; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[GLOBAL_LOAD_DWORDX4_]].sub0
|
|
; GCN-NEXT: S_CBRANCH_EXECZ %bb.2, implicit $exec
|
|
; GCN-NEXT: S_BRANCH %bb.1
|
|
; GCN-NEXT: {{ $}}
|
|
; GCN-NEXT: bb.1:
|
|
; GCN-NEXT: successors: %bb.2(0x80000000)
|
|
; GCN-NEXT: {{ $}}
|
|
; GCN-NEXT: early-clobber [[COPY1]]:vgpr_32 = V_AND_B32_e64 [[GLOBAL_LOAD_DWORDX4_]].sub0, [[GLOBAL_LOAD_DWORDX4_]].sub2, implicit $exec
|
|
; GCN-NEXT: S_BRANCH %bb.2
|
|
; GCN-NEXT: {{ $}}
|
|
; GCN-NEXT: bb.2:
|
|
; GCN-NEXT: dead %3:vgpr_32 = V_ADD_U32_e32 [[GLOBAL_LOAD_DWORDX4_]].sub2, [[COPY1]], implicit $exec
|
|
; GCN-NEXT: S_ENDPGM 0
|
|
bb.0:
|
|
successors: %bb.1, %bb.2
|
|
liveins: $vgpr0_vgpr1
|
|
|
|
%0:vreg_64 = COPY $vgpr0_vgpr1
|
|
%1:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 0, 0, implicit $exec
|
|
%2:vgpr_32 = COPY %1.sub0
|
|
S_CBRANCH_EXECZ %bb.2, implicit $exec
|
|
S_BRANCH %bb.1
|
|
|
|
bb.1:
|
|
successors: %bb.2
|
|
|
|
early-clobber %2:vgpr_32 = V_AND_B32_e64 %1.sub0, %1.sub2, implicit $exec
|
|
S_BRANCH %bb.2
|
|
|
|
bb.2:
|
|
%4:vgpr_32 = V_ADD_U32_e32 %1.sub2, %2, implicit $exec
|
|
S_ENDPGM 0
|
|
...
|
|
|
|
---
|
|
# non-conflict lane(sub1) was redefined, coalescable
|
|
name: subrange_coalesce_unrelated_sub_redefined
|
|
tracksRegLiveness: true
|
|
body: |
|
|
; GCN-LABEL: name: subrange_coalesce_unrelated_sub_redefined
|
|
; GCN: bb.0:
|
|
; GCN-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
|
|
; GCN-NEXT: liveins: $vgpr0_vgpr1
|
|
; GCN-NEXT: {{ $}}
|
|
; GCN-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
|
; GCN-NEXT: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec
|
|
; GCN-NEXT: S_CBRANCH_EXECZ %bb.2, implicit $exec
|
|
; GCN-NEXT: S_BRANCH %bb.1
|
|
; GCN-NEXT: {{ $}}
|
|
; GCN-NEXT: bb.1:
|
|
; GCN-NEXT: successors: %bb.2(0x80000000)
|
|
; GCN-NEXT: {{ $}}
|
|
; GCN-NEXT: [[GLOBAL_LOAD_DWORDX4_]].sub0:vreg_128 = V_AND_B32_e64 [[GLOBAL_LOAD_DWORDX4_]].sub0, [[GLOBAL_LOAD_DWORDX4_]].sub1, implicit $exec
|
|
; GCN-NEXT: [[GLOBAL_LOAD_DWORDX4_]].sub1:vreg_128 = V_AND_B32_e64 [[GLOBAL_LOAD_DWORDX4_]].sub0, [[GLOBAL_LOAD_DWORDX4_]].sub0, implicit $exec
|
|
; GCN-NEXT: S_BRANCH %bb.2
|
|
; GCN-NEXT: {{ $}}
|
|
; GCN-NEXT: bb.2:
|
|
; GCN-NEXT: dead %3:vgpr_32 = V_ADD_U32_e32 [[GLOBAL_LOAD_DWORDX4_]].sub1, [[GLOBAL_LOAD_DWORDX4_]].sub0, implicit $exec
|
|
; GCN-NEXT: S_ENDPGM 0
|
|
bb.0:
|
|
successors: %bb.1, %bb.2
|
|
liveins: $vgpr0_vgpr1
|
|
|
|
%0:vreg_64 = COPY $vgpr0_vgpr1
|
|
%1:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 0, 0, implicit $exec
|
|
%2:vgpr_32 = COPY %1.sub0
|
|
S_CBRANCH_EXECZ %bb.2, implicit $exec
|
|
S_BRANCH %bb.1
|
|
|
|
bb.1:
|
|
successors: %bb.2
|
|
|
|
%2:vgpr_32 = V_AND_B32_e64 %1.sub0, %1.sub1, implicit $exec
|
|
; %1.sub1 was re-defined
|
|
%1.sub1:vreg_128 = V_AND_B32_e64 %2, %2, implicit $exec
|
|
S_BRANCH %bb.2
|
|
|
|
bb.2:
|
|
%4:vgpr_32 = V_ADD_U32_e32 %1.sub1, %2, implicit $exec
|
|
S_ENDPGM 0
|
|
...
|
|
|
|
---
|
|
# Another complex example showing the capability of resolving lane conflict
|
|
# based on subranges.
|
|
name: subrange_coalesce_complex_pattern
|
|
tracksRegLiveness: true
|
|
body: |
|
|
; GCN-LABEL: name: subrange_coalesce_complex_pattern
|
|
; GCN: bb.0:
|
|
; GCN-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
|
|
; GCN-NEXT: liveins: $vgpr0_vgpr1
|
|
; GCN-NEXT: {{ $}}
|
|
; GCN-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
|
; GCN-NEXT: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec
|
|
; GCN-NEXT: S_CBRANCH_EXECZ %bb.2, implicit $exec
|
|
; GCN-NEXT: S_BRANCH %bb.1
|
|
; GCN-NEXT: {{ $}}
|
|
; GCN-NEXT: bb.1:
|
|
; GCN-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
|
|
; GCN-NEXT: {{ $}}
|
|
; GCN-NEXT: [[GLOBAL_LOAD_DWORDX4_]].sub0:vreg_128 = V_AND_B32_e64 [[GLOBAL_LOAD_DWORDX4_]].sub1, [[GLOBAL_LOAD_DWORDX4_]].sub0, implicit $exec
|
|
; GCN-NEXT: [[GLOBAL_LOAD_DWORDX4_]].sub2:vreg_128 = V_AND_B32_e64 [[GLOBAL_LOAD_DWORDX4_]].sub0, [[GLOBAL_LOAD_DWORDX4_]].sub0, implicit $exec
|
|
; GCN-NEXT: S_CBRANCH_EXECZ %bb.1, implicit $exec
|
|
; GCN-NEXT: S_BRANCH %bb.2
|
|
; GCN-NEXT: {{ $}}
|
|
; GCN-NEXT: bb.2:
|
|
; GCN-NEXT: dead %3:vgpr_32 = V_ADD_U32_e32 [[GLOBAL_LOAD_DWORDX4_]].sub1, [[GLOBAL_LOAD_DWORDX4_]].sub2, implicit $exec
|
|
; GCN-NEXT: S_ENDPGM 0
|
|
bb.0:
|
|
successors: %bb.1, %bb.2
|
|
liveins: $vgpr0_vgpr1
|
|
|
|
%0:vreg_64 = COPY $vgpr0_vgpr1
|
|
%1:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 0, 0, implicit $exec
|
|
%2:vgpr_32 = COPY %1.sub0
|
|
S_CBRANCH_EXECZ %bb.2, implicit $exec
|
|
S_BRANCH %bb.1
|
|
|
|
bb.1:
|
|
successors: %bb.1, %bb.2
|
|
|
|
%2:vgpr_32 = V_AND_B32_e64 %1.sub1, %2, implicit $exec
|
|
%1.sub2:vreg_128 = V_AND_B32_e64 %2, %2, implicit $exec
|
|
S_CBRANCH_EXECZ %bb.1, implicit $exec
|
|
S_BRANCH %bb.2
|
|
|
|
bb.2:
|
|
%4:vgpr_32 = V_ADD_U32_e32 %1.sub1, %1.sub2, implicit $exec
|
|
S_ENDPGM 0
|
|
|
|
...
|