; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX9-NO-BACKOFF %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX9-BACKOFF %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX940-BACKOFF %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -mattr=-back-off-barrier -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX9-NO-BACKOFF %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX10-BACKOFF %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX11-BACKOFF %s ; Subtargets must wait for outstanding memory instructions before a barrier if ; they cannot back off of the barrier. define void @back_off_barrier_no_fence(ptr %in, ptr %out) #0 { ; GFX9-NO-BACKOFF-LABEL: back_off_barrier_no_fence: ; GFX9-NO-BACKOFF: ; %bb.0: ; GFX9-NO-BACKOFF-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-NO-BACKOFF-NEXT: flat_load_dword v0, v[0:1] ; GFX9-NO-BACKOFF-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NO-BACKOFF-NEXT: s_barrier ; GFX9-NO-BACKOFF-NEXT: flat_store_dword v[2:3], v0 ; GFX9-NO-BACKOFF-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NO-BACKOFF-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-BACKOFF-LABEL: back_off_barrier_no_fence: ; GFX9-BACKOFF: ; %bb.0: ; GFX9-BACKOFF-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-BACKOFF-NEXT: flat_load_dword v0, v[0:1] ; GFX9-BACKOFF-NEXT: s_barrier ; GFX9-BACKOFF-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-BACKOFF-NEXT: flat_store_dword v[2:3], v0 ; GFX9-BACKOFF-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-BACKOFF-NEXT: s_setpc_b64 s[30:31] ; ; GFX940-BACKOFF-LABEL: back_off_barrier_no_fence: ; GFX940-BACKOFF: ; %bb.0: ; GFX940-BACKOFF-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX940-BACKOFF-NEXT: flat_load_dword v0, v[0:1] ; GFX940-BACKOFF-NEXT: s_barrier ; GFX940-BACKOFF-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-BACKOFF-NEXT: flat_store_dword v[2:3], v0 sc0 sc1 ; GFX940-BACKOFF-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-BACKOFF-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-BACKOFF-LABEL: back_off_barrier_no_fence: ; GFX10-BACKOFF: ; %bb.0: ; GFX10-BACKOFF-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-BACKOFF-NEXT: flat_load_dword v0, v[0:1] ; GFX10-BACKOFF-NEXT: s_barrier ; GFX10-BACKOFF-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX10-BACKOFF-NEXT: flat_store_dword v[2:3], v0 ; GFX10-BACKOFF-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-BACKOFF-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-BACKOFF-LABEL: back_off_barrier_no_fence: ; GFX11-BACKOFF: ; %bb.0: ; GFX11-BACKOFF-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-BACKOFF-NEXT: flat_load_b32 v0, v[0:1] ; GFX11-BACKOFF-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-BACKOFF-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-BACKOFF-NEXT: s_barrier ; GFX11-BACKOFF-NEXT: flat_store_b32 v[2:3], v0 ; GFX11-BACKOFF-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-BACKOFF-NEXT: s_setpc_b64 s[30:31] %load = load i32, ptr %in call void @llvm.amdgcn.s.barrier() store i32 %load, ptr %out ret void } define void @back_off_barrier_with_fence(ptr %in, ptr %out) #0 { ; GFX9-NO-BACKOFF-LABEL: back_off_barrier_with_fence: ; GFX9-NO-BACKOFF: ; %bb.0: ; GFX9-NO-BACKOFF-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-NO-BACKOFF-NEXT: flat_load_dword v0, v[0:1] ; GFX9-NO-BACKOFF-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NO-BACKOFF-NEXT: s_barrier ; GFX9-NO-BACKOFF-NEXT: flat_store_dword v[2:3], v0 ; GFX9-NO-BACKOFF-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NO-BACKOFF-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-BACKOFF-LABEL: back_off_barrier_with_fence: ; GFX9-BACKOFF: ; %bb.0: ; GFX9-BACKOFF-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-BACKOFF-NEXT: flat_load_dword v0, v[0:1] ; GFX9-BACKOFF-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-BACKOFF-NEXT: s_barrier ; GFX9-BACKOFF-NEXT: s_waitcnt vmcnt(0) ; GFX9-BACKOFF-NEXT: flat_store_dword v[2:3], v0 ; GFX9-BACKOFF-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-BACKOFF-NEXT: s_setpc_b64 s[30:31] ; ; GFX940-BACKOFF-LABEL: back_off_barrier_with_fence: ; GFX940-BACKOFF: ; %bb.0: ; GFX940-BACKOFF-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX940-BACKOFF-NEXT: flat_load_dword v0, v[0:1] ; GFX940-BACKOFF-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-BACKOFF-NEXT: s_barrier ; GFX940-BACKOFF-NEXT: s_waitcnt vmcnt(0) ; GFX940-BACKOFF-NEXT: flat_store_dword v[2:3], v0 sc0 sc1 ; GFX940-BACKOFF-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-BACKOFF-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-BACKOFF-LABEL: back_off_barrier_with_fence: ; GFX10-BACKOFF: ; %bb.0: ; GFX10-BACKOFF-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-BACKOFF-NEXT: flat_load_dword v0, v[0:1] ; GFX10-BACKOFF-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX10-BACKOFF-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-BACKOFF-NEXT: s_barrier ; GFX10-BACKOFF-NEXT: buffer_gl0_inv ; GFX10-BACKOFF-NEXT: flat_store_dword v[2:3], v0 ; GFX10-BACKOFF-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-BACKOFF-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-BACKOFF-LABEL: back_off_barrier_with_fence: ; GFX11-BACKOFF: ; %bb.0: ; GFX11-BACKOFF-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-BACKOFF-NEXT: flat_load_b32 v0, v[0:1] ; GFX11-BACKOFF-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-BACKOFF-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-BACKOFF-NEXT: s_barrier ; GFX11-BACKOFF-NEXT: buffer_gl0_inv ; GFX11-BACKOFF-NEXT: flat_store_b32 v[2:3], v0 ; GFX11-BACKOFF-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-BACKOFF-NEXT: s_setpc_b64 s[30:31] %load = load i32, ptr %in fence syncscope("workgroup") release call void @llvm.amdgcn.s.barrier() fence syncscope("workgroup") acquire store i32 %load, ptr %out ret void } declare void @llvm.amdgcn.s.barrier() attributes #0 = { nounwind }