570 lines
40 KiB
YAML
570 lines
40 KiB
YAML
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
|
|
# RUN: llc -mtriple=amdgcn -mcpu=gfx908 -misched-cluster=false -run-pass=machine-scheduler -verify-misched -o - %s | FileCheck %s
|
|
|
|
--- |
|
|
define amdgpu_kernel void @no_sched_barrier(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) { ret void }
|
|
define amdgpu_kernel void @sched_barrier_mask_0(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) { ret void }
|
|
define amdgpu_kernel void @sched_barrier_mask_1(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) { ret void }
|
|
define amdgpu_kernel void @sched_barrier_mask_2(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) { ret void }
|
|
define amdgpu_kernel void @sched_barrier_mask_4(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) { ret void }
|
|
define amdgpu_kernel void @sched_barrier_mask_8(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) { ret void }
|
|
define amdgpu_kernel void @sched_barrier_mask_16(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) { ret void }
|
|
define amdgpu_kernel void @sched_barrier_mask_32(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) { ret void }
|
|
define amdgpu_kernel void @sched_barrier_mask_64(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) { ret void }
|
|
define amdgpu_kernel void @sched_barrier_mask_128(ptr addrspace(3) noalias %out, ptr addrspace(3) noalias %in) { ret void }
|
|
define amdgpu_kernel void @sched_barrier_mask_256(ptr addrspace(3) noalias %out, ptr addrspace(3) noalias %in) { ret void }
|
|
define amdgpu_kernel void @sched_barrier_mask_512(ptr addrspace(3) noalias %out, ptr addrspace(3) noalias %in) { ret void }
|
|
define amdgpu_kernel void @sched_barrier_masks_8_12(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) { ret void }
|
|
define amdgpu_kernel void @sched_barrier_mask_4_bundle(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) { ret void }
|
|
define amdgpu_kernel void @sched_barrier_mask_0_bundle(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) { ret void }
|
|
|
|
!0 = distinct !{!0}
|
|
!1 = !{!1, !0}
|
|
...
|
|
|
|
---
|
|
name: no_sched_barrier
|
|
tracksRegLiveness: true
|
|
body: |
|
|
bb.0:
|
|
; CHECK-LABEL: name: no_sched_barrier
|
|
; CHECK: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
|
|
; CHECK-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
|
|
; CHECK-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[DEF]], [[DEF1]], 0, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1)
|
|
; CHECK-NEXT: [[V_MUL_LO_U32_e64_:%[0-9]+]]:vgpr_32 = nsw V_MUL_LO_U32_e64 [[GLOBAL_LOAD_DWORD_SADDR]], [[GLOBAL_LOAD_DWORD_SADDR]], implicit $exec
|
|
; CHECK-NEXT: GLOBAL_STORE_DWORD_SADDR [[DEF1]], [[V_MUL_LO_U32_e64_]], [[DEF]], 0, 0, implicit $exec :: (store (s32) into %ir.out, !noalias !0, addrspace 1)
|
|
; CHECK-NEXT: S_NOP 0
|
|
; CHECK-NEXT: [[GLOBAL_LOAD_DWORD_SADDR1:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[DEF]], [[DEF1]], 512, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1)
|
|
; CHECK-NEXT: [[V_MUL_LO_U32_e64_1:%[0-9]+]]:vgpr_32 = nsw V_MUL_LO_U32_e64 [[GLOBAL_LOAD_DWORD_SADDR1]], [[GLOBAL_LOAD_DWORD_SADDR1]], implicit $exec
|
|
; CHECK-NEXT: GLOBAL_STORE_DWORD_SADDR [[DEF1]], [[V_MUL_LO_U32_e64_1]], [[DEF]], 512, 0, implicit $exec :: (store (s32) into %ir.out, !noalias !0, addrspace 1)
|
|
; CHECK-NEXT: S_ENDPGM 0
|
|
%0:sreg_64 = IMPLICIT_DEF
|
|
%1:vgpr_32 = IMPLICIT_DEF
|
|
%3:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR %0, %1, 0, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1)
|
|
%4:vgpr_32 = nsw V_MUL_LO_U32_e64 %3, %3, implicit $exec
|
|
GLOBAL_STORE_DWORD_SADDR %1, %4, %0, 0, 0, implicit $exec :: (store (s32) into %ir.out, !noalias !0, addrspace 1)
|
|
S_NOP 0
|
|
%5:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR %0, %1, 512, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1)
|
|
%6:vgpr_32 = nsw V_MUL_LO_U32_e64 %5, %5, implicit $exec
|
|
GLOBAL_STORE_DWORD_SADDR %1, %6, %0, 512, 0, implicit $exec :: (store (s32) into %ir.out, !noalias !0, addrspace 1)
|
|
S_ENDPGM 0
|
|
...
|
|
|
|
# MASK = 0x0000 0000: No instructions may be scheduled across SCHED_BARRIER.
|
|
|
|
---
|
|
name: sched_barrier_mask_0
|
|
tracksRegLiveness: true
|
|
body: |
|
|
bb.0:
|
|
; CHECK-LABEL: name: sched_barrier_mask_0
|
|
; CHECK: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
|
|
; CHECK-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
|
|
; CHECK-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[DEF]], [[DEF1]], 0, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1)
|
|
; CHECK-NEXT: [[V_MUL_LO_U32_e64_:%[0-9]+]]:vgpr_32 = nsw V_MUL_LO_U32_e64 [[GLOBAL_LOAD_DWORD_SADDR]], [[GLOBAL_LOAD_DWORD_SADDR]], implicit $exec
|
|
; CHECK-NEXT: GLOBAL_STORE_DWORD_SADDR [[DEF1]], [[V_MUL_LO_U32_e64_]], [[DEF]], 0, 0, implicit $exec :: (store (s32) into %ir.out, !noalias !0, addrspace 1)
|
|
; CHECK-NEXT: S_NOP 0
|
|
; CHECK-NEXT: SCHED_BARRIER 0
|
|
; CHECK-NEXT: [[GLOBAL_LOAD_DWORD_SADDR1:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[DEF]], [[DEF1]], 512, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1)
|
|
; CHECK-NEXT: [[V_MUL_LO_U32_e64_1:%[0-9]+]]:vgpr_32 = nsw V_MUL_LO_U32_e64 [[GLOBAL_LOAD_DWORD_SADDR1]], [[GLOBAL_LOAD_DWORD_SADDR1]], implicit $exec
|
|
; CHECK-NEXT: GLOBAL_STORE_DWORD_SADDR [[DEF1]], [[V_MUL_LO_U32_e64_1]], [[DEF]], 512, 0, implicit $exec :: (store (s32) into %ir.out, !noalias !0, addrspace 1)
|
|
; CHECK-NEXT: S_ENDPGM 0
|
|
%0:sreg_64 = IMPLICIT_DEF
|
|
%1:vgpr_32 = IMPLICIT_DEF
|
|
%3:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR %0, %1, 0, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1)
|
|
%4:vgpr_32 = nsw V_MUL_LO_U32_e64 %3, %3, implicit $exec
|
|
GLOBAL_STORE_DWORD_SADDR %1, %4, %0, 0, 0, implicit $exec :: (store (s32) into %ir.out, !noalias !0, addrspace 1)
|
|
S_NOP 0
|
|
SCHED_BARRIER 0
|
|
%5:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR %0, %1, 512, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1)
|
|
%6:vgpr_32 = nsw V_MUL_LO_U32_e64 %5, %5, implicit $exec
|
|
GLOBAL_STORE_DWORD_SADDR %1, %6, %0, 512, 0, implicit $exec :: (store (s32) into %ir.out, !noalias !0, addrspace 1)
|
|
S_ENDPGM 0
|
|
...
|
|
|
|
# MASK = 0x0000 0001: ALL, non-memory, non-side-effect producing instructions may be
|
|
# scheduled across SCHED_BARRIER, i.e. allow ALU instructions to pass.
|
|
|
|
---
|
|
name: sched_barrier_mask_1
|
|
tracksRegLiveness: true
|
|
body: |
|
|
bb.0:
|
|
; CHECK-LABEL: name: sched_barrier_mask_1
|
|
; CHECK: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
|
|
; CHECK-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
|
|
; CHECK-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[DEF]], [[DEF1]], 0, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1)
|
|
; CHECK-NEXT: [[V_MUL_LO_U32_e64_:%[0-9]+]]:vgpr_32 = nsw V_MUL_LO_U32_e64 [[GLOBAL_LOAD_DWORD_SADDR]], [[GLOBAL_LOAD_DWORD_SADDR]], implicit $exec
|
|
; CHECK-NEXT: GLOBAL_STORE_DWORD_SADDR [[DEF1]], [[V_MUL_LO_U32_e64_]], [[DEF]], 0, 0, implicit $exec :: (store (s32) into %ir.out, !noalias !0, addrspace 1)
|
|
; CHECK-NEXT: SCHED_BARRIER 1
|
|
; CHECK-NEXT: [[GLOBAL_LOAD_DWORD_SADDR1:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[DEF]], [[DEF1]], 512, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1)
|
|
; CHECK-NEXT: [[V_MUL_LO_U32_e64_1:%[0-9]+]]:vgpr_32 = nsw V_MUL_LO_U32_e64 [[GLOBAL_LOAD_DWORD_SADDR1]], [[GLOBAL_LOAD_DWORD_SADDR1]], implicit $exec
|
|
; CHECK-NEXT: S_NOP 0
|
|
; CHECK-NEXT: GLOBAL_STORE_DWORD_SADDR [[DEF1]], [[V_MUL_LO_U32_e64_1]], [[DEF]], 512, 0, implicit $exec :: (store (s32) into %ir.out, !noalias !0, addrspace 1)
|
|
; CHECK-NEXT: S_ENDPGM 0
|
|
%0:sreg_64 = IMPLICIT_DEF
|
|
%1:vgpr_32 = IMPLICIT_DEF
|
|
%3:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR %0, %1, 0, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1)
|
|
%4:vgpr_32 = nsw V_MUL_LO_U32_e64 %3, %3, implicit $exec
|
|
GLOBAL_STORE_DWORD_SADDR %1, %4, %0, 0, 0, implicit $exec :: (store (s32) into %ir.out, !noalias !0, addrspace 1)
|
|
S_NOP 0
|
|
SCHED_BARRIER 1
|
|
%5:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR %0, %1, 512, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1)
|
|
%6:vgpr_32 = nsw V_MUL_LO_U32_e64 %5, %5, implicit $exec
|
|
GLOBAL_STORE_DWORD_SADDR %1, %6, %0, 512, 0, implicit $exec :: (store (s32) into %ir.out, !noalias !0, addrspace 1)
|
|
S_ENDPGM 0
|
|
...
|
|
|
|
# MASK = 0x0000 0002: VALU instructions may be scheduled across SCHED_BARRIER.
|
|
|
|
---
|
|
name: sched_barrier_mask_2
|
|
tracksRegLiveness: true
|
|
body: |
|
|
bb.0:
|
|
; CHECK-LABEL: name: sched_barrier_mask_2
|
|
; CHECK: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
|
|
; CHECK-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
|
|
; CHECK-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[DEF]], [[DEF1]], 0, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1)
|
|
; CHECK-NEXT: [[V_MUL_LO_U32_e64_:%[0-9]+]]:vgpr_32 = nsw V_MUL_LO_U32_e64 [[GLOBAL_LOAD_DWORD_SADDR]], [[GLOBAL_LOAD_DWORD_SADDR]], implicit $exec
|
|
; CHECK-NEXT: GLOBAL_STORE_DWORD_SADDR [[DEF1]], [[V_MUL_LO_U32_e64_]], [[DEF]], 0, 0, implicit $exec :: (store (s32) into %ir.out, !noalias !0, addrspace 1)
|
|
; CHECK-NEXT: S_NOP 0
|
|
; CHECK-NEXT: SCHED_BARRIER 2
|
|
; CHECK-NEXT: [[GLOBAL_LOAD_DWORD_SADDR1:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[DEF]], [[DEF1]], 512, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1)
|
|
; CHECK-NEXT: [[V_MUL_LO_U32_e64_1:%[0-9]+]]:vgpr_32 = nsw V_MUL_LO_U32_e64 [[GLOBAL_LOAD_DWORD_SADDR]], [[DEF1]], implicit $exec
|
|
; CHECK-NEXT: [[V_MUL_LO_U32_e64_2:%[0-9]+]]:vgpr_32 = nsw V_MUL_LO_U32_e64 [[GLOBAL_LOAD_DWORD_SADDR]], [[DEF1]], implicit $exec
|
|
; CHECK-NEXT: [[V_MUL_LO_U32_e64_3:%[0-9]+]]:vgpr_32 = nsw V_MUL_LO_U32_e64 [[GLOBAL_LOAD_DWORD_SADDR1]], [[GLOBAL_LOAD_DWORD_SADDR1]], implicit $exec
|
|
; CHECK-NEXT: GLOBAL_STORE_DWORD_SADDR [[DEF1]], [[V_MUL_LO_U32_e64_3]], [[DEF]], 512, 0, implicit $exec :: (store (s32) into %ir.out, !noalias !0, addrspace 1)
|
|
; CHECK-NEXT: S_ENDPGM 0, implicit [[V_MUL_LO_U32_e64_1]], implicit [[V_MUL_LO_U32_e64_2]]
|
|
%0:sreg_64 = IMPLICIT_DEF
|
|
%1:vgpr_32 = IMPLICIT_DEF
|
|
%2:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR %0, %1, 0, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1)
|
|
%3:vgpr_32 = nsw V_MUL_LO_U32_e64 %2, %2, implicit $exec
|
|
GLOBAL_STORE_DWORD_SADDR %1, %3, %0, 0, 0, implicit $exec :: (store (s32) into %ir.out, !noalias !0, addrspace 1)
|
|
%4:vgpr_32 = nsw V_MUL_LO_U32_e64 %2, %1, implicit $exec
|
|
%5:vgpr_32 = nsw V_MUL_LO_U32_e64 %2, %1, implicit $exec
|
|
S_NOP 0
|
|
SCHED_BARRIER 2
|
|
%6:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR %0, %1, 512, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1)
|
|
%7:vgpr_32 = nsw V_MUL_LO_U32_e64 %6, %6, implicit $exec
|
|
GLOBAL_STORE_DWORD_SADDR %1, %7, %0, 512, 0, implicit $exec :: (store (s32) into %ir.out, !noalias !0, addrspace 1)
|
|
S_ENDPGM 0, implicit %4, implicit %5
|
|
...
|
|
|
|
# MASK = 0x0000 0004: SALU instructions may be scheduled across SCHED_BARRIER.
|
|
|
|
---
|
|
name: sched_barrier_mask_4
|
|
tracksRegLiveness: true
|
|
body: |
|
|
bb.0:
|
|
; CHECK-LABEL: name: sched_barrier_mask_4
|
|
; CHECK: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
|
|
; CHECK-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
|
|
; CHECK-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[DEF]], [[DEF1]], 0, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1)
|
|
; CHECK-NEXT: [[DEF2:%[0-9]+]]:areg_128 = IMPLICIT_DEF
|
|
; CHECK-NEXT: [[V_MFMA_F32_4X4X1F32_e64_:%[0-9]+]]:areg_128 = V_MFMA_F32_4X4X1F32_e64 [[DEF1]], [[GLOBAL_LOAD_DWORD_SADDR]], [[DEF2]], 0, 0, 0, implicit $mode, implicit $exec
|
|
; CHECK-NEXT: [[V_MUL_LO_U32_e64_:%[0-9]+]]:vgpr_32 = nsw V_MUL_LO_U32_e64 [[GLOBAL_LOAD_DWORD_SADDR]], [[GLOBAL_LOAD_DWORD_SADDR]], implicit $exec
|
|
; CHECK-NEXT: [[V_MFMA_F32_4X4X1F32_e64_1:%[0-9]+]]:areg_128 = V_MFMA_F32_4X4X1F32_e64 [[DEF1]], [[GLOBAL_LOAD_DWORD_SADDR]], [[V_MFMA_F32_4X4X1F32_e64_]], 0, 0, 0, implicit $mode, implicit $exec
|
|
; CHECK-NEXT: [[V_MUL_LO_U32_e64_1:%[0-9]+]]:vgpr_32 = nsw V_MUL_LO_U32_e64 [[GLOBAL_LOAD_DWORD_SADDR]], [[DEF1]], implicit $exec
|
|
; CHECK-NEXT: [[V_MFMA_F32_4X4X1F32_e64_2:%[0-9]+]]:areg_128 = V_MFMA_F32_4X4X1F32_e64 [[DEF1]], [[GLOBAL_LOAD_DWORD_SADDR]], [[V_MFMA_F32_4X4X1F32_e64_1]], 0, 0, 0, implicit $mode, implicit $exec
|
|
; CHECK-NEXT: [[V_MUL_LO_U32_e64_2:%[0-9]+]]:vgpr_32 = nsw V_MUL_LO_U32_e64 [[GLOBAL_LOAD_DWORD_SADDR]], [[DEF1]], implicit $exec
|
|
; CHECK-NEXT: [[V_MFMA_F32_4X4X1F32_e64_3:%[0-9]+]]:areg_128 = V_MFMA_F32_4X4X1F32_e64 [[DEF1]], [[GLOBAL_LOAD_DWORD_SADDR]], [[V_MFMA_F32_4X4X1F32_e64_2]], 0, 0, 0, implicit $mode, implicit $exec
|
|
; CHECK-NEXT: GLOBAL_STORE_DWORD_SADDR [[DEF1]], [[V_MUL_LO_U32_e64_]], [[DEF]], 0, 0, implicit $exec :: (store (s32) into %ir.out, !noalias !0, addrspace 1)
|
|
; CHECK-NEXT: [[V_MFMA_F32_4X4X1F32_e64_4:%[0-9]+]]:areg_128 = V_MFMA_F32_4X4X1F32_e64 [[DEF1]], [[GLOBAL_LOAD_DWORD_SADDR]], [[V_MFMA_F32_4X4X1F32_e64_3]], 0, 0, 0, implicit $mode, implicit $exec
|
|
; CHECK-NEXT: SCHED_BARRIER 4
|
|
; CHECK-NEXT: [[GLOBAL_LOAD_DWORD_SADDR1:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[DEF]], [[DEF1]], 512, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1)
|
|
; CHECK-NEXT: [[V_MUL_LO_U32_e64_3:%[0-9]+]]:vgpr_32 = nsw V_MUL_LO_U32_e64 [[GLOBAL_LOAD_DWORD_SADDR1]], [[GLOBAL_LOAD_DWORD_SADDR1]], implicit $exec
|
|
; CHECK-NEXT: S_NOP 0
|
|
; CHECK-NEXT: GLOBAL_STORE_DWORD_SADDR [[DEF1]], [[V_MUL_LO_U32_e64_3]], [[DEF]], 512, 0, implicit $exec :: (store (s32) into %ir.out, !noalias !0, addrspace 1)
|
|
; CHECK-NEXT: S_ENDPGM 0, implicit [[V_MUL_LO_U32_e64_1]], implicit [[V_MUL_LO_U32_e64_2]], implicit [[V_MFMA_F32_4X4X1F32_e64_4]]
|
|
%0:sreg_64 = IMPLICIT_DEF
|
|
%1:vgpr_32 = IMPLICIT_DEF
|
|
%2:areg_128 = IMPLICIT_DEF
|
|
%3:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR %0, %1, 0, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1)
|
|
%4:vgpr_32 = nsw V_MUL_LO_U32_e64 %3, %3, implicit $exec
|
|
GLOBAL_STORE_DWORD_SADDR %1, %4, %0, 0, 0, implicit $exec :: (store (s32) into %ir.out, !noalias !0, addrspace 1)
|
|
%5:vgpr_32 = nsw V_MUL_LO_U32_e64 %3, %1, implicit $exec
|
|
%6:vgpr_32 = nsw V_MUL_LO_U32_e64 %3, %1, implicit $exec
|
|
S_NOP 0
|
|
%7:areg_128 = V_MFMA_F32_4X4X1F32_e64 %1, %3, %2, 0, 0, 0, implicit $mode, implicit $exec
|
|
%8:areg_128 = V_MFMA_F32_4X4X1F32_e64 %1, %3, %7, 0, 0, 0, implicit $mode, implicit $exec
|
|
%9:areg_128 = V_MFMA_F32_4X4X1F32_e64 %1, %3, %8, 0, 0, 0, implicit $mode, implicit $exec
|
|
%10:areg_128 = V_MFMA_F32_4X4X1F32_e64 %1, %3, %9, 0, 0, 0, implicit $mode, implicit $exec
|
|
%11:areg_128 = V_MFMA_F32_4X4X1F32_e64 %1, %3, %10, 0, 0, 0, implicit $mode, implicit $exec
|
|
SCHED_BARRIER 4
|
|
%12:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR %0, %1, 512, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1)
|
|
%13:vgpr_32 = nsw V_MUL_LO_U32_e64 %12, %12, implicit $exec
|
|
GLOBAL_STORE_DWORD_SADDR %1, %13, %0, 512, 0, implicit $exec :: (store (s32) into %ir.out, !noalias !0, addrspace 1)
|
|
S_ENDPGM 0, implicit %5, implicit %6, implicit %11
|
|
...
|
|
|
|
# MASK = 0x0000 0008: MFMA instructions may be scheduled across SCHED_BARRIER.
|
|
|
|
---
|
|
name: sched_barrier_mask_8
|
|
tracksRegLiveness: true
|
|
body: |
|
|
bb.0:
|
|
; CHECK-LABEL: name: sched_barrier_mask_8
|
|
; CHECK: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
|
|
; CHECK-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
|
|
; CHECK-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[DEF]], [[DEF1]], 0, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1)
|
|
; CHECK-NEXT: [[DEF2:%[0-9]+]]:areg_128 = IMPLICIT_DEF
|
|
; CHECK-NEXT: [[V_MUL_LO_U32_e64_:%[0-9]+]]:vgpr_32 = nsw V_MUL_LO_U32_e64 [[GLOBAL_LOAD_DWORD_SADDR]], [[GLOBAL_LOAD_DWORD_SADDR]], implicit $exec
|
|
; CHECK-NEXT: [[V_MUL_LO_U32_e64_1:%[0-9]+]]:vgpr_32 = nsw V_MUL_LO_U32_e64 [[GLOBAL_LOAD_DWORD_SADDR]], [[DEF1]], implicit $exec
|
|
; CHECK-NEXT: [[V_MFMA_F32_4X4X1F32_e64_:%[0-9]+]]:areg_128 = V_MFMA_F32_4X4X1F32_e64 [[DEF1]], [[GLOBAL_LOAD_DWORD_SADDR]], [[DEF2]], 0, 0, 0, implicit $mode, implicit $exec
|
|
; CHECK-NEXT: [[V_MUL_LO_U32_e64_2:%[0-9]+]]:vgpr_32 = nsw V_MUL_LO_U32_e64 [[GLOBAL_LOAD_DWORD_SADDR]], [[DEF1]], implicit $exec
|
|
; CHECK-NEXT: [[V_MFMA_F32_4X4X1F32_e64_1:%[0-9]+]]:areg_128 = V_MFMA_F32_4X4X1F32_e64 [[DEF1]], [[GLOBAL_LOAD_DWORD_SADDR]], [[V_MFMA_F32_4X4X1F32_e64_]], 0, 0, 0, implicit $mode, implicit $exec
|
|
; CHECK-NEXT: GLOBAL_STORE_DWORD_SADDR [[DEF1]], [[V_MUL_LO_U32_e64_]], [[DEF]], 0, 0, implicit $exec :: (store (s32) into %ir.out, !noalias !0, addrspace 1)
|
|
; CHECK-NEXT: S_NOP 0
|
|
; CHECK-NEXT: [[V_MFMA_F32_4X4X1F32_e64_2:%[0-9]+]]:areg_128 = V_MFMA_F32_4X4X1F32_e64 [[DEF1]], [[GLOBAL_LOAD_DWORD_SADDR]], [[V_MFMA_F32_4X4X1F32_e64_1]], 0, 0, 0, implicit $mode, implicit $exec
|
|
; CHECK-NEXT: SCHED_BARRIER 8
|
|
; CHECK-NEXT: [[GLOBAL_LOAD_DWORD_SADDR1:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[DEF]], [[DEF1]], 512, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1)
|
|
; CHECK-NEXT: [[V_MFMA_F32_4X4X1F32_e64_3:%[0-9]+]]:areg_128 = V_MFMA_F32_4X4X1F32_e64 [[DEF1]], [[GLOBAL_LOAD_DWORD_SADDR]], [[V_MFMA_F32_4X4X1F32_e64_2]], 0, 0, 0, implicit $mode, implicit $exec
|
|
; CHECK-NEXT: [[V_MUL_LO_U32_e64_3:%[0-9]+]]:vgpr_32 = nsw V_MUL_LO_U32_e64 [[GLOBAL_LOAD_DWORD_SADDR1]], [[GLOBAL_LOAD_DWORD_SADDR1]], implicit $exec
|
|
; CHECK-NEXT: [[V_MFMA_F32_4X4X1F32_e64_4:%[0-9]+]]:areg_128 = V_MFMA_F32_4X4X1F32_e64 [[DEF1]], [[GLOBAL_LOAD_DWORD_SADDR]], [[V_MFMA_F32_4X4X1F32_e64_3]], 0, 0, 0, implicit $mode, implicit $exec
|
|
; CHECK-NEXT: GLOBAL_STORE_DWORD_SADDR [[DEF1]], [[V_MUL_LO_U32_e64_3]], [[DEF]], 512, 0, implicit $exec :: (store (s32) into %ir.out, !noalias !0, addrspace 1)
|
|
; CHECK-NEXT: S_ENDPGM 0, implicit [[V_MUL_LO_U32_e64_1]], implicit [[V_MUL_LO_U32_e64_2]], implicit [[V_MFMA_F32_4X4X1F32_e64_4]]
|
|
%0:sreg_64 = IMPLICIT_DEF
|
|
%1:vgpr_32 = IMPLICIT_DEF
|
|
%2:areg_128 = IMPLICIT_DEF
|
|
%3:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR %0, %1, 0, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1)
|
|
%4:vgpr_32 = nsw V_MUL_LO_U32_e64 %3, %3, implicit $exec
|
|
GLOBAL_STORE_DWORD_SADDR %1, %4, %0, 0, 0, implicit $exec :: (store (s32) into %ir.out, !noalias !0, addrspace 1)
|
|
%5:vgpr_32 = nsw V_MUL_LO_U32_e64 %3, %1, implicit $exec
|
|
%6:vgpr_32 = nsw V_MUL_LO_U32_e64 %3, %1, implicit $exec
|
|
S_NOP 0
|
|
%7:areg_128 = V_MFMA_F32_4X4X1F32_e64 %1, %3, %2, 0, 0, 0, implicit $mode, implicit $exec
|
|
%8:areg_128 = V_MFMA_F32_4X4X1F32_e64 %1, %3, %7, 0, 0, 0, implicit $mode, implicit $exec
|
|
%9:areg_128 = V_MFMA_F32_4X4X1F32_e64 %1, %3, %8, 0, 0, 0, implicit $mode, implicit $exec
|
|
%10:areg_128 = V_MFMA_F32_4X4X1F32_e64 %1, %3, %9, 0, 0, 0, implicit $mode, implicit $exec
|
|
%11:areg_128 = V_MFMA_F32_4X4X1F32_e64 %1, %3, %10, 0, 0, 0, implicit $mode, implicit $exec
|
|
SCHED_BARRIER 8
|
|
%12:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR %0, %1, 512, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1)
|
|
%13:vgpr_32 = nsw V_MUL_LO_U32_e64 %12, %12, implicit $exec
|
|
GLOBAL_STORE_DWORD_SADDR %1, %13, %0, 512, 0, implicit $exec :: (store (s32) into %ir.out, !noalias !0, addrspace 1)
|
|
S_ENDPGM 0, implicit %5, implicit %6, implicit %11
|
|
...
|
|
|
|
# MASK = 0x0000 0010: ALL VMEM instructions may be scheduled across SCHED_BARRIER.
|
|
|
|
---
|
|
name: sched_barrier_mask_16
|
|
tracksRegLiveness: true
|
|
body: |
|
|
bb.0:
|
|
; CHECK-LABEL: name: sched_barrier_mask_16
|
|
; CHECK: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
|
|
; CHECK-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
|
|
; CHECK-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[DEF]], [[DEF1]], 0, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1)
|
|
; CHECK-NEXT: [[GLOBAL_LOAD_DWORD_SADDR1:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[DEF]], [[DEF1]], 512, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1)
|
|
; CHECK-NEXT: [[V_MUL_LO_U32_e64_:%[0-9]+]]:vgpr_32 = nsw V_MUL_LO_U32_e64 [[GLOBAL_LOAD_DWORD_SADDR]], [[GLOBAL_LOAD_DWORD_SADDR]], implicit $exec
|
|
; CHECK-NEXT: GLOBAL_STORE_DWORD_SADDR [[DEF1]], [[V_MUL_LO_U32_e64_]], [[DEF]], 0, 0, implicit $exec :: (store (s32) into %ir.out, !noalias !0, addrspace 1)
|
|
; CHECK-NEXT: S_NOP 0
|
|
; CHECK-NEXT: SCHED_BARRIER 16
|
|
; CHECK-NEXT: [[V_MUL_LO_U32_e64_1:%[0-9]+]]:vgpr_32 = nsw V_MUL_LO_U32_e64 [[GLOBAL_LOAD_DWORD_SADDR1]], [[GLOBAL_LOAD_DWORD_SADDR1]], implicit $exec
|
|
; CHECK-NEXT: GLOBAL_STORE_DWORD_SADDR [[DEF1]], [[V_MUL_LO_U32_e64_1]], [[DEF]], 512, 0, implicit $exec :: (store (s32) into %ir.out, !noalias !0, addrspace 1)
|
|
; CHECK-NEXT: S_ENDPGM 0
|
|
%0:sreg_64 = IMPLICIT_DEF
|
|
%1:vgpr_32 = IMPLICIT_DEF
|
|
%3:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR %0, %1, 0, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1)
|
|
%4:vgpr_32 = nsw V_MUL_LO_U32_e64 %3, %3, implicit $exec
|
|
GLOBAL_STORE_DWORD_SADDR %1, %4, %0, 0, 0, implicit $exec :: (store (s32) into %ir.out, !noalias !0, addrspace 1)
|
|
S_NOP 0
|
|
SCHED_BARRIER 16
|
|
%5:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR %0, %1, 512, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1)
|
|
%6:vgpr_32 = nsw V_MUL_LO_U32_e64 %5, %5, implicit $exec
|
|
GLOBAL_STORE_DWORD_SADDR %1, %6, %0, 512, 0, implicit $exec :: (store (s32) into %ir.out, !noalias !0, addrspace 1)
|
|
S_ENDPGM 0
|
|
...
|
|
|
|
# MASK = 0x0000 0020: VMEM read instructions may be scheduled across SCHED_BARRIER.
|
|
|
|
---
|
|
name: sched_barrier_mask_32
|
|
tracksRegLiveness: true
|
|
body: |
|
|
bb.0:
|
|
; CHECK-LABEL: name: sched_barrier_mask_32
|
|
; CHECK: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
|
|
; CHECK-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
|
|
; CHECK-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[DEF]], [[DEF1]], 0, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1)
|
|
; CHECK-NEXT: [[GLOBAL_LOAD_DWORD_SADDR1:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[DEF]], [[DEF1]], 512, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1)
|
|
; CHECK-NEXT: [[V_MUL_LO_U32_e64_:%[0-9]+]]:vgpr_32 = nsw V_MUL_LO_U32_e64 [[GLOBAL_LOAD_DWORD_SADDR]], [[GLOBAL_LOAD_DWORD_SADDR]], implicit $exec
|
|
; CHECK-NEXT: GLOBAL_STORE_DWORD_SADDR [[DEF1]], [[V_MUL_LO_U32_e64_]], [[DEF]], 0, 0, implicit $exec :: (store (s32) into %ir.out, !noalias !0, addrspace 1)
|
|
; CHECK-NEXT: S_NOP 0
|
|
; CHECK-NEXT: SCHED_BARRIER 32
|
|
; CHECK-NEXT: [[V_MUL_LO_U32_e64_1:%[0-9]+]]:vgpr_32 = nsw V_MUL_LO_U32_e64 [[GLOBAL_LOAD_DWORD_SADDR1]], [[GLOBAL_LOAD_DWORD_SADDR1]], implicit $exec
|
|
; CHECK-NEXT: GLOBAL_STORE_DWORD_SADDR [[DEF1]], [[V_MUL_LO_U32_e64_1]], [[DEF]], 512, 0, implicit $exec :: (store (s32) into %ir.out, !noalias !0, addrspace 1)
|
|
; CHECK-NEXT: S_ENDPGM 0
|
|
%0:sreg_64 = IMPLICIT_DEF
|
|
%1:vgpr_32 = IMPLICIT_DEF
|
|
%3:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR %0, %1, 0, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1)
|
|
%4:vgpr_32 = nsw V_MUL_LO_U32_e64 %3, %3, implicit $exec
|
|
GLOBAL_STORE_DWORD_SADDR %1, %4, %0, 0, 0, implicit $exec :: (store (s32) into %ir.out, !noalias !0, addrspace 1)
|
|
S_NOP 0
|
|
SCHED_BARRIER 32
|
|
%5:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR %0, %1, 512, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1)
|
|
%6:vgpr_32 = nsw V_MUL_LO_U32_e64 %5, %5, implicit $exec
|
|
GLOBAL_STORE_DWORD_SADDR %1, %6, %0, 512, 0, implicit $exec :: (store (s32) into %ir.out, !noalias !0, addrspace 1)
|
|
S_ENDPGM 0
|
|
...
|
|
|
|
# MASK = 0x0000 0040: VMEM write instructions may be scheduled across SCHED_BARRIER.
|
|
|
|
---
|
|
name: sched_barrier_mask_64
|
|
tracksRegLiveness: true
|
|
body: |
|
|
bb.0:
|
|
; CHECK-LABEL: name: sched_barrier_mask_64
|
|
; CHECK: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
|
|
; CHECK-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
|
|
; CHECK-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[DEF]], [[DEF1]], 0, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1)
|
|
; CHECK-NEXT: [[V_MUL_LO_U32_e64_:%[0-9]+]]:vgpr_32 = nsw V_MUL_LO_U32_e64 [[GLOBAL_LOAD_DWORD_SADDR]], [[GLOBAL_LOAD_DWORD_SADDR]], implicit $exec
|
|
; CHECK-NEXT: GLOBAL_STORE_DWORD_SADDR [[DEF1]], [[V_MUL_LO_U32_e64_]], [[DEF]], 0, 0, implicit $exec :: (store (s32) into %ir.out, !noalias !0, addrspace 1)
|
|
; CHECK-NEXT: S_NOP 0
|
|
; CHECK-NEXT: SCHED_BARRIER 64
|
|
; CHECK-NEXT: [[GLOBAL_LOAD_DWORD_SADDR1:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[DEF]], [[DEF1]], 512, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1)
|
|
; CHECK-NEXT: [[V_MUL_LO_U32_e64_1:%[0-9]+]]:vgpr_32 = nsw V_MUL_LO_U32_e64 [[GLOBAL_LOAD_DWORD_SADDR1]], [[GLOBAL_LOAD_DWORD_SADDR1]], implicit $exec
|
|
; CHECK-NEXT: GLOBAL_STORE_DWORD_SADDR [[DEF1]], [[V_MUL_LO_U32_e64_1]], [[DEF]], 512, 0, implicit $exec :: (store (s32) into %ir.out, !noalias !0, addrspace 1)
|
|
; CHECK-NEXT: S_ENDPGM 0
|
|
%0:sreg_64 = IMPLICIT_DEF
|
|
%1:vgpr_32 = IMPLICIT_DEF
|
|
%3:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR %0, %1, 0, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1)
|
|
%4:vgpr_32 = nsw V_MUL_LO_U32_e64 %3, %3, implicit $exec
|
|
GLOBAL_STORE_DWORD_SADDR %1, %4, %0, 0, 0, implicit $exec :: (store (s32) into %ir.out, !noalias !0, addrspace 1)
|
|
S_NOP 0
|
|
SCHED_BARRIER 64
|
|
%5:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR %0, %1, 512, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1)
|
|
%6:vgpr_32 = nsw V_MUL_LO_U32_e64 %5, %5, implicit $exec
|
|
GLOBAL_STORE_DWORD_SADDR %1, %6, %0, 512, 0, implicit $exec :: (store (s32) into %ir.out, !noalias !0, addrspace 1)
|
|
S_ENDPGM 0
|
|
...
|
|
|
|
# MASK = 0x0000 0080: ALL DS instructions may be scheduled across SCHED_BARRIER.
|
|
|
|
---
|
|
name: sched_barrier_mask_128
|
|
tracksRegLiveness: true
|
|
body: |
|
|
bb.0:
|
|
; CHECK-LABEL: name: sched_barrier_mask_128
|
|
; CHECK: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
|
|
; CHECK-NEXT: [[DS_READ_U16_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_U16_gfx9 [[DEF]], 0, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 3)
|
|
; CHECK-NEXT: [[V_MUL_LO_U32_e64_:%[0-9]+]]:vgpr_32 = nsw V_MUL_LO_U32_e64 [[DS_READ_U16_gfx9_]], [[DS_READ_U16_gfx9_]], implicit $exec
|
|
; CHECK-NEXT: [[DS_READ_U16_gfx9_1:%[0-9]+]]:vgpr_32 = DS_READ_U16_gfx9 [[DEF]], 0, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 3)
|
|
; CHECK-NEXT: DS_WRITE_B32 [[V_MUL_LO_U32_e64_]], [[DEF]], 0, 16, implicit $m0, implicit $exec :: (store (s32) into %ir.out, !noalias !0, addrspace 3)
|
|
; CHECK-NEXT: S_NOP 0
|
|
; CHECK-NEXT: SCHED_BARRIER 128
|
|
; CHECK-NEXT: [[V_MUL_LO_U32_e64_1:%[0-9]+]]:vgpr_32 = nsw V_MUL_LO_U32_e64 [[DS_READ_U16_gfx9_1]], [[DS_READ_U16_gfx9_1]], implicit $exec
|
|
; CHECK-NEXT: dead [[DEF1:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
|
|
; CHECK-NEXT: DS_WRITE_B32 [[V_MUL_LO_U32_e64_1]], [[V_MUL_LO_U32_e64_]], 0, 16, implicit $m0, implicit $exec :: (store (s32) into %ir.out, !noalias !0, addrspace 3)
|
|
; CHECK-NEXT: S_ENDPGM 0
|
|
%0:sreg_64 = IMPLICIT_DEF
|
|
%1:vgpr_32 = IMPLICIT_DEF
|
|
%2:vgpr_32 = DS_READ_U16_gfx9 %1, 0, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 3)
|
|
%3:vgpr_32 = nsw V_MUL_LO_U32_e64 %2, %2, implicit $exec
|
|
DS_WRITE_B32 %3, %1, 0, 16, implicit $m0, implicit $exec :: (store (s32) into %ir.out, !noalias !0, addrspace 3)
|
|
S_NOP 0
|
|
SCHED_BARRIER 128
|
|
%4:vgpr_32 = DS_READ_U16_gfx9 %1, 0, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 3)
|
|
%5:vgpr_32 = nsw V_MUL_LO_U32_e64 %4, %4, implicit $exec
|
|
DS_WRITE_B32 %5, %3, 0, 16, implicit $m0, implicit $exec :: (store (s32) into %ir.out, !noalias !0, addrspace 3)
|
|
S_ENDPGM 0
|
|
...
|
|
|
|
# MASK = 0x0000 0100: ALL DS read instructions may be scheduled across SCHED_BARRIER.
|
|
|
|
---
|
|
name: sched_barrier_mask_256
|
|
tracksRegLiveness: true
|
|
body: |
|
|
bb.0:
|
|
; CHECK-LABEL: name: sched_barrier_mask_256
|
|
; CHECK: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
|
|
; CHECK-NEXT: [[DS_READ_U16_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_U16_gfx9 [[DEF]], 0, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 3)
|
|
; CHECK-NEXT: [[V_MUL_LO_U32_e64_:%[0-9]+]]:vgpr_32 = nsw V_MUL_LO_U32_e64 [[DS_READ_U16_gfx9_]], [[DS_READ_U16_gfx9_]], implicit $exec
|
|
; CHECK-NEXT: [[DS_READ_U16_gfx9_1:%[0-9]+]]:vgpr_32 = DS_READ_U16_gfx9 [[DEF]], 0, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 3)
|
|
; CHECK-NEXT: DS_WRITE_B32 [[V_MUL_LO_U32_e64_]], [[DEF]], 0, 16, implicit $m0, implicit $exec :: (store (s32) into %ir.out, !noalias !0, addrspace 3)
|
|
; CHECK-NEXT: S_NOP 0
|
|
; CHECK-NEXT: SCHED_BARRIER 256
|
|
; CHECK-NEXT: [[V_MUL_LO_U32_e64_1:%[0-9]+]]:vgpr_32 = nsw V_MUL_LO_U32_e64 [[DS_READ_U16_gfx9_1]], [[DS_READ_U16_gfx9_1]], implicit $exec
|
|
; CHECK-NEXT: dead [[DEF1:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
|
|
; CHECK-NEXT: DS_WRITE_B32 [[V_MUL_LO_U32_e64_1]], [[V_MUL_LO_U32_e64_]], 0, 16, implicit $m0, implicit $exec :: (store (s32) into %ir.out, !noalias !0, addrspace 3)
|
|
; CHECK-NEXT: S_ENDPGM 0
|
|
%0:sreg_64 = IMPLICIT_DEF
|
|
%1:vgpr_32 = IMPLICIT_DEF
|
|
%2:vgpr_32 = DS_READ_U16_gfx9 %1, 0, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 3)
|
|
%3:vgpr_32 = nsw V_MUL_LO_U32_e64 %2, %2, implicit $exec
|
|
DS_WRITE_B32 %3, %1, 0, 16, implicit $m0, implicit $exec :: (store (s32) into %ir.out, !noalias !0, addrspace 3)
|
|
S_NOP 0
|
|
SCHED_BARRIER 256
|
|
%4:vgpr_32 = DS_READ_U16_gfx9 %1, 0, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 3)
|
|
%5:vgpr_32 = nsw V_MUL_LO_U32_e64 %4, %4, implicit $exec
|
|
DS_WRITE_B32 %5, %3, 0, 16, implicit $m0, implicit $exec :: (store (s32) into %ir.out, !noalias !0, addrspace 3)
|
|
S_ENDPGM 0
|
|
...
|
|
|
|
# MASK = 0x0000 0200: ALL DS write instructions may be scheduled across SCHED_BARRIER.
|
|
|
|
---
|
|
name: sched_barrier_mask_512
|
|
tracksRegLiveness: true
|
|
body: |
|
|
bb.0:
|
|
; CHECK-LABEL: name: sched_barrier_mask_512
|
|
; CHECK: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
|
|
; CHECK-NEXT: [[DS_READ_U16_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_U16_gfx9 [[DEF]], 0, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 3)
|
|
; CHECK-NEXT: [[V_MUL_LO_U32_e64_:%[0-9]+]]:vgpr_32 = nsw V_MUL_LO_U32_e64 [[DS_READ_U16_gfx9_]], [[DS_READ_U16_gfx9_]], implicit $exec
|
|
; CHECK-NEXT: DS_WRITE_B32 [[V_MUL_LO_U32_e64_]], [[DEF]], 0, 16, implicit $m0, implicit $exec :: (store (s32) into %ir.out, !noalias !0, addrspace 3)
|
|
; CHECK-NEXT: S_NOP 0
|
|
; CHECK-NEXT: SCHED_BARRIER 512
|
|
; CHECK-NEXT: [[DS_READ_U16_gfx9_1:%[0-9]+]]:vgpr_32 = DS_READ_U16_gfx9 [[DEF]], 0, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 3)
|
|
; CHECK-NEXT: [[V_MUL_LO_U32_e64_1:%[0-9]+]]:vgpr_32 = nsw V_MUL_LO_U32_e64 [[DS_READ_U16_gfx9_1]], [[DS_READ_U16_gfx9_1]], implicit $exec
|
|
; CHECK-NEXT: dead [[DEF1:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
|
|
; CHECK-NEXT: DS_WRITE_B32 [[V_MUL_LO_U32_e64_1]], [[V_MUL_LO_U32_e64_]], 0, 16, implicit $m0, implicit $exec :: (store (s32) into %ir.out, !noalias !0, addrspace 3)
|
|
; CHECK-NEXT: S_ENDPGM 0
|
|
%0:sreg_64 = IMPLICIT_DEF
|
|
%1:vgpr_32 = IMPLICIT_DEF
|
|
%2:vgpr_32 = DS_READ_U16_gfx9 %1, 0, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 3)
|
|
%3:vgpr_32 = nsw V_MUL_LO_U32_e64 %2, %2, implicit $exec
|
|
DS_WRITE_B32 %3, %1, 0, 16, implicit $m0, implicit $exec :: (store (s32) into %ir.out, !noalias !0, addrspace 3)
|
|
S_NOP 0
|
|
SCHED_BARRIER 512
|
|
%4:vgpr_32 = DS_READ_U16_gfx9 %1, 0, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 3)
|
|
%5:vgpr_32 = nsw V_MUL_LO_U32_e64 %4, %4, implicit $exec
|
|
DS_WRITE_B32 %5, %3, 0, 16, implicit $m0, implicit $exec :: (store (s32) into %ir.out, !noalias !0, addrspace 3)
|
|
S_ENDPGM 0
|
|
...
|
|
|
|
# MASK = 0x0000 0008: MFMA instructions may be scheduled across SCHED_BARRIER.
|
|
# MASK = 0x0000 000C: MFMA and SALU may be scheduled across SCHED_BARRIER.
|
|
# Check that S_NOP can move moved before the first SCHED_BARRIER but not the second.
|
|
|
|
---
|
|
name: sched_barrier_masks_8_12
|
|
tracksRegLiveness: true
|
|
body: |
|
|
bb.0:
|
|
; CHECK-LABEL: name: sched_barrier_masks_8_12
|
|
; CHECK: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
|
|
; CHECK-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
|
|
; CHECK-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[DEF]], [[DEF1]], 0, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1)
|
|
; CHECK-NEXT: [[DEF2:%[0-9]+]]:areg_128 = IMPLICIT_DEF
|
|
; CHECK-NEXT: S_NOP 0
|
|
; CHECK-NEXT: [[V_MUL_LO_U32_e64_:%[0-9]+]]:vgpr_32 = nsw V_MUL_LO_U32_e64 [[GLOBAL_LOAD_DWORD_SADDR]], [[GLOBAL_LOAD_DWORD_SADDR]], implicit $exec
|
|
; CHECK-NEXT: [[V_MUL_LO_U32_e64_1:%[0-9]+]]:vgpr_32 = nsw V_MUL_LO_U32_e64 [[GLOBAL_LOAD_DWORD_SADDR]], [[DEF1]], implicit $exec
|
|
; CHECK-NEXT: [[V_MFMA_F32_4X4X1F32_e64_:%[0-9]+]]:areg_128 = V_MFMA_F32_4X4X1F32_e64 [[DEF1]], [[GLOBAL_LOAD_DWORD_SADDR]], [[DEF2]], 0, 0, 0, implicit $mode, implicit $exec
|
|
; CHECK-NEXT: [[V_MUL_LO_U32_e64_2:%[0-9]+]]:vgpr_32 = nsw V_MUL_LO_U32_e64 [[GLOBAL_LOAD_DWORD_SADDR]], [[DEF1]], implicit $exec
|
|
; CHECK-NEXT: [[V_MFMA_F32_4X4X1F32_e64_1:%[0-9]+]]:areg_128 = V_MFMA_F32_4X4X1F32_e64 [[DEF1]], [[GLOBAL_LOAD_DWORD_SADDR]], [[V_MFMA_F32_4X4X1F32_e64_]], 0, 0, 0, implicit $mode, implicit $exec
|
|
; CHECK-NEXT: GLOBAL_STORE_DWORD_SADDR [[DEF1]], [[V_MUL_LO_U32_e64_]], [[DEF]], 0, 0, implicit $exec :: (store (s32) into %ir.out, !noalias !0, addrspace 1)
|
|
; CHECK-NEXT: SCHED_BARRIER 12
|
|
; CHECK-NEXT: [[V_MFMA_F32_4X4X1F32_e64_2:%[0-9]+]]:areg_128 = V_MFMA_F32_4X4X1F32_e64 [[DEF1]], [[GLOBAL_LOAD_DWORD_SADDR]], [[V_MFMA_F32_4X4X1F32_e64_1]], 0, 0, 0, implicit $mode, implicit $exec
|
|
; CHECK-NEXT: SCHED_BARRIER 8
|
|
; CHECK-NEXT: S_NOP 0
|
|
; CHECK-NEXT: [[GLOBAL_LOAD_DWORD_SADDR1:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[DEF]], [[DEF1]], 512, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1)
|
|
; CHECK-NEXT: [[V_MFMA_F32_4X4X1F32_e64_3:%[0-9]+]]:areg_128 = V_MFMA_F32_4X4X1F32_e64 [[DEF1]], [[GLOBAL_LOAD_DWORD_SADDR]], [[V_MFMA_F32_4X4X1F32_e64_2]], 0, 0, 0, implicit $mode, implicit $exec
|
|
; CHECK-NEXT: [[V_MUL_LO_U32_e64_3:%[0-9]+]]:vgpr_32 = nsw V_MUL_LO_U32_e64 [[GLOBAL_LOAD_DWORD_SADDR1]], [[GLOBAL_LOAD_DWORD_SADDR1]], implicit $exec
|
|
; CHECK-NEXT: [[V_MFMA_F32_4X4X1F32_e64_4:%[0-9]+]]:areg_128 = V_MFMA_F32_4X4X1F32_e64 [[DEF1]], [[GLOBAL_LOAD_DWORD_SADDR]], [[V_MFMA_F32_4X4X1F32_e64_3]], 0, 0, 0, implicit $mode, implicit $exec
|
|
; CHECK-NEXT: GLOBAL_STORE_DWORD_SADDR [[DEF1]], [[V_MUL_LO_U32_e64_3]], [[DEF]], 512, 0, implicit $exec :: (store (s32) into %ir.out, !noalias !0, addrspace 1)
|
|
; CHECK-NEXT: S_ENDPGM 0, implicit [[V_MUL_LO_U32_e64_1]], implicit [[V_MUL_LO_U32_e64_2]], implicit [[V_MFMA_F32_4X4X1F32_e64_4]]
|
|
%0:sreg_64 = IMPLICIT_DEF
|
|
%1:vgpr_32 = IMPLICIT_DEF
|
|
%2:areg_128 = IMPLICIT_DEF
|
|
%3:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR %0, %1, 0, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1)
|
|
%4:vgpr_32 = nsw V_MUL_LO_U32_e64 %3, %3, implicit $exec
|
|
GLOBAL_STORE_DWORD_SADDR %1, %4, %0, 0, 0, implicit $exec :: (store (s32) into %ir.out, !noalias !0, addrspace 1)
|
|
%5:vgpr_32 = nsw V_MUL_LO_U32_e64 %3, %1, implicit $exec
|
|
%6:vgpr_32 = nsw V_MUL_LO_U32_e64 %3, %1, implicit $exec
|
|
SCHED_BARRIER 12
|
|
%7:areg_128 = V_MFMA_F32_4X4X1F32_e64 %1, %3, %2, 0, 0, 0, implicit $mode, implicit $exec
|
|
%8:areg_128 = V_MFMA_F32_4X4X1F32_e64 %1, %3, %7, 0, 0, 0, implicit $mode, implicit $exec
|
|
%9:areg_128 = V_MFMA_F32_4X4X1F32_e64 %1, %3, %8, 0, 0, 0, implicit $mode, implicit $exec
|
|
%10:areg_128 = V_MFMA_F32_4X4X1F32_e64 %1, %3, %9, 0, 0, 0, implicit $mode, implicit $exec
|
|
%11:areg_128 = V_MFMA_F32_4X4X1F32_e64 %1, %3, %10, 0, 0, 0, implicit $mode, implicit $exec
|
|
S_NOP 0
|
|
SCHED_BARRIER 8
|
|
S_NOP 0
|
|
%12:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR %0, %1, 512, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1)
|
|
%13:vgpr_32 = nsw V_MUL_LO_U32_e64 %12, %12, implicit $exec
|
|
GLOBAL_STORE_DWORD_SADDR %1, %13, %0, 512, 0, implicit $exec :: (store (s32) into %ir.out, !noalias !0, addrspace 1)
|
|
S_ENDPGM 0, implicit %5, implicit %6, implicit %11
|
|
...
|
|
|
|
# MASK = 0x0000 0004: SALU instructions may be scheduled across SCHED_BARRIER.
|
|
|
|
---
|
|
name: sched_barrier_mask_4_bundle
|
|
tracksRegLiveness: true
|
|
body: |
|
|
bb.0:
|
|
; CHECK-LABEL: name: sched_barrier_mask_4_bundle
|
|
; CHECK: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
|
|
; CHECK-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
|
|
; CHECK-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[DEF]], [[DEF1]], 512, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1)
|
|
; CHECK-NEXT: [[GLOBAL_LOAD_DWORD_SADDR1:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[DEF]], [[DEF1]], 0, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1)
|
|
; CHECK-NEXT: [[V_MUL_LO_U32_e64_:%[0-9]+]]:vgpr_32 = nsw V_MUL_LO_U32_e64 [[GLOBAL_LOAD_DWORD_SADDR]], [[GLOBAL_LOAD_DWORD_SADDR]], implicit $exec
|
|
; CHECK-NEXT: [[V_MUL_LO_U32_e64_1:%[0-9]+]]:vgpr_32 = nsw V_MUL_LO_U32_e64 [[GLOBAL_LOAD_DWORD_SADDR1]], [[GLOBAL_LOAD_DWORD_SADDR1]], implicit $exec
|
|
; CHECK-NEXT: BUNDLE [[GLOBAL_LOAD_DWORD_SADDR]], implicit [[GLOBAL_LOAD_DWORD_SADDR1]] {
|
|
; CHECK-NEXT: S_NOP 0
|
|
; CHECK-NEXT: S_NOP 0
|
|
; CHECK-NEXT: }
|
|
; CHECK-NEXT: SCHED_BARRIER 4
|
|
; CHECK-NEXT: GLOBAL_STORE_DWORD_SADDR [[DEF1]], [[V_MUL_LO_U32_e64_]], [[DEF]], 512, 0, implicit $exec :: (store (s32) into %ir.out, !noalias !0, addrspace 1)
|
|
; CHECK-NEXT: GLOBAL_STORE_DWORD_SADDR [[DEF1]], [[V_MUL_LO_U32_e64_1]], [[DEF]], 0, 0, implicit $exec :: (store (s32) into %ir.out, !noalias !0, addrspace 1)
|
|
; CHECK-NEXT: S_ENDPGM 0
|
|
%0:sreg_64 = IMPLICIT_DEF
|
|
%1:vgpr_32 = IMPLICIT_DEF
|
|
%3:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR %0, %1, 0, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1)
|
|
%5:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR %0, %1, 512, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1)
|
|
%4:vgpr_32 = nsw V_MUL_LO_U32_e64 %3, %3, implicit $exec
|
|
%6:vgpr_32 = nsw V_MUL_LO_U32_e64 %5, %5, implicit $exec
|
|
SCHED_BARRIER 4
|
|
BUNDLE implicit %3, %5 {
|
|
S_NOP 0
|
|
S_NOP 0
|
|
}
|
|
GLOBAL_STORE_DWORD_SADDR %1, %6, %0, 512, 0, implicit $exec :: (store (s32) into %ir.out, !noalias !0, addrspace 1)
|
|
GLOBAL_STORE_DWORD_SADDR %1, %4, %0, 0, 0, implicit $exec :: (store (s32) into %ir.out, !noalias !0, addrspace 1)
|
|
S_ENDPGM 0
|
|
...
|
|
|
|
# MASK = 0x0000 0000: No instructions may be scheduled across SCHED_BARRIER.
|
|
|
|
---
|
|
name: sched_barrier_mask_0_bundle
|
|
tracksRegLiveness: true
|
|
body: |
|
|
bb.0:
|
|
; CHECK-LABEL: name: sched_barrier_mask_0_bundle
|
|
; CHECK: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
|
|
; CHECK-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
|
|
; CHECK-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[DEF]], [[DEF1]], 0, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1)
|
|
; CHECK-NEXT: [[GLOBAL_LOAD_DWORD_SADDR1:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[DEF]], [[DEF1]], 512, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1)
|
|
; CHECK-NEXT: [[V_MUL_LO_U32_e64_:%[0-9]+]]:vgpr_32 = nsw V_MUL_LO_U32_e64 [[GLOBAL_LOAD_DWORD_SADDR]], [[GLOBAL_LOAD_DWORD_SADDR]], implicit $exec
|
|
; CHECK-NEXT: [[V_MUL_LO_U32_e64_1:%[0-9]+]]:vgpr_32 = nsw V_MUL_LO_U32_e64 [[GLOBAL_LOAD_DWORD_SADDR1]], [[GLOBAL_LOAD_DWORD_SADDR1]], implicit $exec
|
|
; CHECK-NEXT: SCHED_BARRIER 0
|
|
; CHECK-NEXT: BUNDLE [[GLOBAL_LOAD_DWORD_SADDR1]], implicit [[GLOBAL_LOAD_DWORD_SADDR]] {
|
|
; CHECK-NEXT: S_NOP 0
|
|
; CHECK-NEXT: S_NOP 0
|
|
; CHECK-NEXT: }
|
|
; CHECK-NEXT: GLOBAL_STORE_DWORD_SADDR [[DEF1]], [[V_MUL_LO_U32_e64_1]], [[DEF]], 512, 0, implicit $exec :: (store (s32) into %ir.out, !noalias !0, addrspace 1)
|
|
; CHECK-NEXT: GLOBAL_STORE_DWORD_SADDR [[DEF1]], [[V_MUL_LO_U32_e64_]], [[DEF]], 0, 0, implicit $exec :: (store (s32) into %ir.out, !noalias !0, addrspace 1)
|
|
; CHECK-NEXT: S_ENDPGM 0
|
|
%0:sreg_64 = IMPLICIT_DEF
|
|
%1:vgpr_32 = IMPLICIT_DEF
|
|
%3:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR %0, %1, 0, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1)
|
|
%5:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR %0, %1, 512, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1)
|
|
%4:vgpr_32 = nsw V_MUL_LO_U32_e64 %3, %3, implicit $exec
|
|
%6:vgpr_32 = nsw V_MUL_LO_U32_e64 %5, %5, implicit $exec
|
|
SCHED_BARRIER 0
|
|
BUNDLE implicit %3, %5 {
|
|
S_NOP 0
|
|
S_NOP 0
|
|
}
|
|
GLOBAL_STORE_DWORD_SADDR %1, %6, %0, 512, 0, implicit $exec :: (store (s32) into %ir.out, !noalias !0, addrspace 1)
|
|
GLOBAL_STORE_DWORD_SADDR %1, %4, %0, 0, 0, implicit $exec :: (store (s32) into %ir.out, !noalias !0, addrspace 1)
|
|
S_ENDPGM 0
|
|
...
|