; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py ; RUN: llc -mtriple=amdgcn -mcpu=gfx908 -verify-machineinstrs -amdgpu-atomic-optimizer-strategy=None -stop-after=amdgpu-isel -o - %s | FileCheck -check-prefix=GCN %s define amdgpu_cs void @mmo_offsets0(ptr addrspace(6) inreg noalias align(16) dereferenceable(18446744073709551615) %arg0, i32 %arg1) { ; GCN-LABEL: name: mmo_offsets0 ; GCN: bb.0.bb.0: ; GCN-NEXT: liveins: $sgpr0, $vgpr0 ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GCN-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr0 ; GCN-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[S_MOV_B32_]], %subreg.sub1 ; GCN-NEXT: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[REG_SEQUENCE]], 0, 0 :: (dereferenceable invariant load (s128) from %ir.arg0, addrspace 6) ; GCN-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[REG_SEQUENCE]], 0, 0 :: (dereferenceable invariant load (s64) from %ir.arg0, align 16, addrspace 6) ; GCN-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[S_LOAD_DWORDX2_IMM]].sub1 ; GCN-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[S_LOAD_DWORDX2_IMM]].sub0 ; GCN-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 8 ; GCN-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY1]], killed [[S_MOV_B32_1]], implicit-def dead $scc ; GCN-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE killed [[S_ADD_I32_]], %subreg.sub0, [[S_MOV_B32_]], %subreg.sub1 ; GCN-NEXT: [[S_LOAD_DWORDX2_IMM1:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM killed [[REG_SEQUENCE1]], 0, 0 :: (dereferenceable invariant load (s64) from %ir.arg0 + 8, basealign 16, addrspace 6) ; GCN-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[S_LOAD_DWORDX2_IMM1]].sub1 ; GCN-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[S_LOAD_DWORDX2_IMM1]].sub0 ; GCN-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY3]], %subreg.sub0, killed [[COPY2]], %subreg.sub1, killed [[COPY5]], %subreg.sub2, killed [[COPY4]], %subreg.sub3 ; GCN-NEXT: [[BUFFER_LOAD_DWORDX4_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 16, 0, 0, implicit $exec :: (dereferenceable load (s128), align 1, addrspace 8) ; GCN-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec ; GCN-NEXT: [[BUFFER_LOAD_DWORDX4_IDXEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 16, 0, 0, implicit $exec :: (dereferenceable load (s128), align 1, addrspace 8) ; GCN-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (s128), align 1, addrspace 8) ; GCN-NEXT: [[BUFFER_LOAD_DWORDX4_IDXEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_IDXEN [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 16, 0, 0, implicit $exec :: (dereferenceable load (s128), align 1, addrspace 8) ; GCN-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */ ; GCN-NEXT: BUFFER_STORE_DWORDX4_OFFSET_exact killed [[BUFFER_LOAD_DWORDX4_OFFSET]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 32, 0, 0, implicit $exec :: (dereferenceable store (s128), align 1, addrspace 8) ; GCN-NEXT: BUFFER_STORE_DWORDX4_OFFEN_exact killed [[BUFFER_LOAD_DWORDX4_OFFEN]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable store (s128), align 1, addrspace 8) ; GCN-NEXT: BUFFER_STORE_DWORDX4_IDXEN_exact killed [[BUFFER_LOAD_DWORDX4_IDXEN]], [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 32, 0, 0, implicit $exec :: (dereferenceable store (s128), align 1, addrspace 8) ; GCN-NEXT: BUFFER_STORE_DWORDX4_IDXEN_exact killed [[BUFFER_LOAD_DWORDX4_IDXEN1]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 32, 0, 0, implicit $exec :: (dereferenceable store (s128), align 1, addrspace 8) ; GCN-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */ ; GCN-NEXT: [[BUFFER_LOAD_FORMAT_XYZW_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_OFFSET [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 48, 0, 0, implicit $exec :: (dereferenceable load (s128), align 1, addrspace 8) ; GCN-NEXT: [[BUFFER_LOAD_FORMAT_XYZW_IDXEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 48, 0, 0, implicit $exec :: (dereferenceable load (s128), align 1, addrspace 8) ; GCN-NEXT: [[BUFFER_LOAD_FORMAT_XYZW_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_OFFEN [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (s128), align 1, addrspace 8) ; GCN-NEXT: [[BUFFER_LOAD_FORMAT_XYZW_IDXEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_IDXEN [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 48, 0, 0, implicit $exec :: (dereferenceable load (s128), align 1, addrspace 8) ; GCN-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */ ; GCN-NEXT: BUFFER_STORE_FORMAT_XYZW_OFFSET_exact killed [[BUFFER_LOAD_FORMAT_XYZW_OFFSET]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 64, 0, 0, implicit $exec :: (dereferenceable store (s128), align 1, addrspace 8) ; GCN-NEXT: BUFFER_STORE_FORMAT_XYZW_OFFEN_exact killed [[BUFFER_LOAD_FORMAT_XYZW_OFFEN]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable store (s128), align 1, addrspace 8) ; GCN-NEXT: BUFFER_STORE_FORMAT_XYZW_IDXEN_exact killed [[BUFFER_LOAD_FORMAT_XYZW_IDXEN]], [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 64, 0, 0, implicit $exec :: (dereferenceable store (s128), align 1, addrspace 8) ; GCN-NEXT: BUFFER_STORE_FORMAT_XYZW_IDXEN_exact killed [[BUFFER_LOAD_FORMAT_XYZW_IDXEN1]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 64, 0, 0, implicit $exec :: (dereferenceable store (s128), align 1, addrspace 8) ; GCN-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */ ; GCN-NEXT: BUFFER_ATOMIC_ADD_OFFSET [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 80, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8) ; GCN-NEXT: BUFFER_ATOMIC_ADD_OFFEN [[COPY]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8) ; GCN-NEXT: BUFFER_ATOMIC_ADD_IDXEN [[COPY]], [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 80, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8) ; GCN-NEXT: BUFFER_ATOMIC_ADD_IDXEN [[COPY]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 80, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8) ; GCN-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */ ; GCN-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY]], %subreg.sub1 ; GCN-NEXT: BUFFER_ATOMIC_CMPSWAP_OFFSET [[REG_SEQUENCE3]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 96, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8) ; GCN-NEXT: BUFFER_ATOMIC_CMPSWAP_OFFEN [[REG_SEQUENCE3]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8) ; GCN-NEXT: BUFFER_ATOMIC_CMPSWAP_IDXEN [[REG_SEQUENCE3]], [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 96, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8) ; GCN-NEXT: BUFFER_ATOMIC_CMPSWAP_IDXEN [[REG_SEQUENCE3]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 96, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8) ; GCN-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */ ; GCN-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1065353216, implicit $exec ; GCN-NEXT: BUFFER_ATOMIC_ADD_F32_OFFSET [[V_MOV_B32_e32_1]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 112, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8) ; GCN-NEXT: BUFFER_ATOMIC_ADD_F32_OFFEN [[V_MOV_B32_e32_1]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8) ; GCN-NEXT: BUFFER_ATOMIC_ADD_F32_IDXEN [[V_MOV_B32_e32_1]], [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 112, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8) ; GCN-NEXT: BUFFER_ATOMIC_ADD_F32_IDXEN [[V_MOV_B32_e32_1]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 112, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8) ; GCN-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */ ; GCN-NEXT: [[BUFFER_LOAD_DWORDX4_OFFSET1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 128, 0, 0, implicit $exec :: (dereferenceable load (s128), align 1, addrspace 8) ; GCN-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 64 ; GCN-NEXT: [[BUFFER_LOAD_DWORDX4_OFFSET2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_2]], 64, 0, 0, implicit $exec :: (dereferenceable load (s128), align 1, addrspace 8) ; GCN-NEXT: [[S_MOV_B32_3:%[0-9]+]]:sreg_32 = S_MOV_B32 128 ; GCN-NEXT: [[BUFFER_LOAD_DWORDX4_OFFSET3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_3]], 0, 0, 0, implicit $exec :: (dereferenceable load (s128), align 1, addrspace 8) ; GCN-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_3]], 0, 0, 0, implicit $exec :: (dereferenceable load (s128), align 1, addrspace 8) ; GCN-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[COPY]] ; GCN-NEXT: [[BUFFER_LOAD_DWORDX4_OFFSET4:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[S_LOAD_DWORDX4_IMM]], [[COPY6]], 128, 0, 0, implicit $exec :: (dereferenceable load (s128), align 1, addrspace 8) ; GCN-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */ ; GCN-NEXT: [[BUFFER_LOAD_DWORDX4_OFFSET5:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE2]], [[S_MOV_B32_]], 128, 0, 0, implicit $exec :: (dereferenceable load (s128) from %ir.tmp1, align 1, addrspace 8) ; GCN-NEXT: [[BUFFER_LOAD_DWORDX4_OFFSET6:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE2]], [[S_MOV_B32_2]], 64, 0, 0, implicit $exec :: (dereferenceable load (s128) from %ir.tmp1, align 1, addrspace 8) ; GCN-NEXT: [[BUFFER_LOAD_DWORDX4_OFFSET7:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE2]], [[S_MOV_B32_3]], 0, 0, 0, implicit $exec :: (dereferenceable load (s128) from %ir.tmp1, align 1, addrspace 8) ; GCN-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY]], [[REG_SEQUENCE2]], [[S_MOV_B32_3]], 0, 0, 0, implicit $exec :: (dereferenceable load (s128) from %ir.tmp1, align 1, addrspace 8) ; GCN-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[COPY]] ; GCN-NEXT: [[BUFFER_LOAD_DWORDX4_OFFSET8:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE2]], [[COPY7]], 128, 0, 0, implicit $exec :: (dereferenceable load (s128) from %ir.tmp1, align 1, addrspace 8) ; GCN-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */ ; GCN-NEXT: [[BUFFER_LOAD_FORMAT_XYZW_OFFSET1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_OFFSET [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 144, 0, 0, implicit $exec :: (dereferenceable load (s128), align 1, addrspace 8) ; GCN-NEXT: [[S_MOV_B32_4:%[0-9]+]]:sreg_32 = S_MOV_B32 72 ; GCN-NEXT: [[BUFFER_LOAD_FORMAT_XYZW_OFFSET2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_OFFSET [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_4]], 72, 0, 0, implicit $exec :: (dereferenceable load (s128), align 1, addrspace 8) ; GCN-NEXT: [[S_MOV_B32_5:%[0-9]+]]:sreg_32 = S_MOV_B32 144 ; GCN-NEXT: [[BUFFER_LOAD_FORMAT_XYZW_OFFSET3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_OFFSET [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_5]], 0, 0, 0, implicit $exec :: (dereferenceable load (s128), align 1, addrspace 8) ; GCN-NEXT: [[BUFFER_LOAD_FORMAT_XYZW_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_OFFEN [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_5]], 0, 0, 0, implicit $exec :: (dereferenceable load (s128), align 1, addrspace 8) ; GCN-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[COPY]] ; GCN-NEXT: [[BUFFER_LOAD_FORMAT_XYZW_OFFSET4:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_OFFSET [[S_LOAD_DWORDX4_IMM]], [[COPY8]], 144, 0, 0, implicit $exec :: (dereferenceable load (s128), align 1, addrspace 8) ; GCN-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */ ; GCN-NEXT: [[BUFFER_LOAD_FORMAT_XYZW_OFFSET5:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_OFFSET [[REG_SEQUENCE2]], [[S_MOV_B32_]], 144, 0, 0, implicit $exec :: (dereferenceable load (s128) from %ir.tmp1, align 1, addrspace 8) ; GCN-NEXT: [[BUFFER_LOAD_FORMAT_XYZW_OFFSET6:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_OFFSET [[REG_SEQUENCE2]], [[S_MOV_B32_4]], 72, 0, 0, implicit $exec :: (dereferenceable load (s128) from %ir.tmp1, align 1, addrspace 8) ; GCN-NEXT: [[BUFFER_LOAD_FORMAT_XYZW_OFFSET7:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_OFFSET [[REG_SEQUENCE2]], [[S_MOV_B32_5]], 0, 0, 0, implicit $exec :: (dereferenceable load (s128) from %ir.tmp1, align 1, addrspace 8) ; GCN-NEXT: [[BUFFER_LOAD_FORMAT_XYZW_OFFEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_OFFEN [[COPY]], [[REG_SEQUENCE2]], [[S_MOV_B32_5]], 0, 0, 0, implicit $exec :: (dereferenceable load (s128) from %ir.tmp1, align 1, addrspace 8) ; GCN-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[COPY]] ; GCN-NEXT: [[BUFFER_LOAD_FORMAT_XYZW_OFFSET8:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_OFFSET [[REG_SEQUENCE2]], [[COPY9]], 144, 0, 0, implicit $exec :: (dereferenceable load (s128) from %ir.tmp1, align 1, addrspace 8) ; GCN-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */ ; GCN-NEXT: BUFFER_ATOMIC_ADD_OFFSET [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 160, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8) ; GCN-NEXT: [[S_MOV_B32_6:%[0-9]+]]:sreg_32 = S_MOV_B32 80 ; GCN-NEXT: BUFFER_ATOMIC_ADD_OFFSET [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_6]], 80, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8) ; GCN-NEXT: [[S_MOV_B32_7:%[0-9]+]]:sreg_32 = S_MOV_B32 160 ; GCN-NEXT: BUFFER_ATOMIC_ADD_OFFSET [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_7]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8) ; GCN-NEXT: BUFFER_ATOMIC_ADD_OFFEN [[COPY]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_7]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8) ; GCN-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[COPY]] ; GCN-NEXT: BUFFER_ATOMIC_ADD_OFFSET [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[COPY10]], 160, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8) ; GCN-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */ ; GCN-NEXT: BUFFER_ATOMIC_ADD_OFFSET [[COPY]], [[REG_SEQUENCE2]], [[S_MOV_B32_]], 160, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.tmp1, align 1, addrspace 8) ; GCN-NEXT: BUFFER_ATOMIC_ADD_OFFSET [[COPY]], [[REG_SEQUENCE2]], [[S_MOV_B32_6]], 80, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.tmp1, align 1, addrspace 8) ; GCN-NEXT: BUFFER_ATOMIC_ADD_OFFSET [[COPY]], [[REG_SEQUENCE2]], [[S_MOV_B32_7]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.tmp1, align 1, addrspace 8) ; GCN-NEXT: BUFFER_ATOMIC_ADD_OFFEN [[COPY]], [[COPY]], [[REG_SEQUENCE2]], [[S_MOV_B32_7]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.tmp1, align 1, addrspace 8) ; GCN-NEXT: [[COPY11:%[0-9]+]]:sreg_32 = COPY [[COPY]] ; GCN-NEXT: BUFFER_ATOMIC_ADD_OFFSET [[COPY]], [[REG_SEQUENCE2]], [[COPY11]], 160, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.tmp1, align 1, addrspace 8) ; GCN-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */ ; GCN-NEXT: BUFFER_ATOMIC_CMPSWAP_OFFSET [[REG_SEQUENCE3]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 176, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8) ; GCN-NEXT: [[S_MOV_B32_8:%[0-9]+]]:sreg_32 = S_MOV_B32 88 ; GCN-NEXT: BUFFER_ATOMIC_CMPSWAP_OFFSET [[REG_SEQUENCE3]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_8]], 88, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8) ; GCN-NEXT: [[S_MOV_B32_9:%[0-9]+]]:sreg_32 = S_MOV_B32 176 ; GCN-NEXT: BUFFER_ATOMIC_CMPSWAP_OFFSET [[REG_SEQUENCE3]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_9]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8) ; GCN-NEXT: BUFFER_ATOMIC_CMPSWAP_OFFEN [[REG_SEQUENCE3]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_9]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8) ; GCN-NEXT: [[COPY12:%[0-9]+]]:sreg_32 = COPY [[COPY]] ; GCN-NEXT: BUFFER_ATOMIC_CMPSWAP_OFFSET [[REG_SEQUENCE3]], [[S_LOAD_DWORDX4_IMM]], [[COPY12]], 176, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8) ; GCN-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */ ; GCN-NEXT: BUFFER_ATOMIC_CMPSWAP_OFFSET [[REG_SEQUENCE3]], [[REG_SEQUENCE2]], [[S_MOV_B32_]], 176, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.tmp1, align 1, addrspace 8) ; GCN-NEXT: BUFFER_ATOMIC_CMPSWAP_OFFSET [[REG_SEQUENCE3]], [[REG_SEQUENCE2]], [[S_MOV_B32_8]], 88, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.tmp1, align 1, addrspace 8) ; GCN-NEXT: BUFFER_ATOMIC_CMPSWAP_OFFSET [[REG_SEQUENCE3]], [[REG_SEQUENCE2]], [[S_MOV_B32_9]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.tmp1, align 1, addrspace 8) ; GCN-NEXT: BUFFER_ATOMIC_CMPSWAP_OFFEN [[REG_SEQUENCE3]], [[COPY]], [[REG_SEQUENCE2]], [[S_MOV_B32_9]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.tmp1, align 1, addrspace 8) ; GCN-NEXT: [[COPY13:%[0-9]+]]:sreg_32 = COPY [[COPY]] ; GCN-NEXT: BUFFER_ATOMIC_CMPSWAP_OFFSET [[REG_SEQUENCE3]], [[REG_SEQUENCE2]], [[COPY13]], 176, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.tmp1, align 1, addrspace 8) ; GCN-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */ ; GCN-NEXT: BUFFER_STORE_DWORDX4_OFFSET_exact killed [[BUFFER_LOAD_DWORDX4_OFFSET1]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 192, 0, 0, implicit $exec :: (dereferenceable store (s128), align 1, addrspace 8) ; GCN-NEXT: [[S_MOV_B32_10:%[0-9]+]]:sreg_32 = S_MOV_B32 96 ; GCN-NEXT: BUFFER_STORE_DWORDX4_OFFSET_exact killed [[BUFFER_LOAD_DWORDX4_OFFSET2]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_10]], 96, 0, 0, implicit $exec :: (dereferenceable store (s128), align 1, addrspace 8) ; GCN-NEXT: [[S_MOV_B32_11:%[0-9]+]]:sreg_32 = S_MOV_B32 192 ; GCN-NEXT: BUFFER_STORE_DWORDX4_OFFSET_exact killed [[BUFFER_LOAD_DWORDX4_OFFSET3]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_11]], 0, 0, 0, implicit $exec :: (dereferenceable store (s128), align 1, addrspace 8) ; GCN-NEXT: BUFFER_STORE_DWORDX4_OFFEN_exact killed [[BUFFER_LOAD_DWORDX4_OFFEN1]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_11]], 0, 0, 0, implicit $exec :: (dereferenceable store (s128), align 1, addrspace 8) ; GCN-NEXT: [[COPY14:%[0-9]+]]:sreg_32 = COPY [[COPY]] ; GCN-NEXT: BUFFER_STORE_DWORDX4_OFFSET_exact killed [[BUFFER_LOAD_DWORDX4_OFFSET4]], [[S_LOAD_DWORDX4_IMM]], [[COPY14]], 192, 0, 0, implicit $exec :: (dereferenceable store (s128), align 1, addrspace 8) ; GCN-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */ ; GCN-NEXT: BUFFER_STORE_DWORDX4_OFFSET_exact killed [[BUFFER_LOAD_DWORDX4_OFFSET5]], [[REG_SEQUENCE2]], [[S_MOV_B32_]], 192, 0, 0, implicit $exec :: (dereferenceable store (s128) into %ir.tmp1, align 1, addrspace 8) ; GCN-NEXT: BUFFER_STORE_DWORDX4_OFFSET_exact killed [[BUFFER_LOAD_DWORDX4_OFFSET6]], [[REG_SEQUENCE2]], [[S_MOV_B32_10]], 96, 0, 0, implicit $exec :: (dereferenceable store (s128) into %ir.tmp1, align 1, addrspace 8) ; GCN-NEXT: BUFFER_STORE_DWORDX4_OFFSET_exact killed [[BUFFER_LOAD_DWORDX4_OFFSET7]], [[REG_SEQUENCE2]], [[S_MOV_B32_11]], 0, 0, 0, implicit $exec :: (dereferenceable store (s128) into %ir.tmp1, align 1, addrspace 8) ; GCN-NEXT: BUFFER_STORE_DWORDX4_OFFEN_exact killed [[BUFFER_LOAD_DWORDX4_OFFEN2]], [[COPY]], [[REG_SEQUENCE2]], [[S_MOV_B32_11]], 0, 0, 0, implicit $exec :: (dereferenceable store (s128) into %ir.tmp1, align 1, addrspace 8) ; GCN-NEXT: [[COPY15:%[0-9]+]]:sreg_32 = COPY [[COPY]] ; GCN-NEXT: BUFFER_STORE_DWORDX4_OFFSET_exact killed [[BUFFER_LOAD_DWORDX4_OFFSET8]], [[REG_SEQUENCE2]], [[COPY15]], 192, 0, 0, implicit $exec :: (dereferenceable store (s128) into %ir.tmp1, align 1, addrspace 8) ; GCN-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */ ; GCN-NEXT: BUFFER_STORE_FORMAT_XYZW_OFFSET_exact killed [[BUFFER_LOAD_FORMAT_XYZW_OFFSET1]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 208, 0, 0, implicit $exec :: (dereferenceable store (s128), align 1, addrspace 8) ; GCN-NEXT: [[S_MOV_B32_12:%[0-9]+]]:sreg_32 = S_MOV_B32 104 ; GCN-NEXT: BUFFER_STORE_FORMAT_XYZW_OFFSET_exact killed [[BUFFER_LOAD_FORMAT_XYZW_OFFSET2]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_12]], 104, 0, 0, implicit $exec :: (dereferenceable store (s128), align 1, addrspace 8) ; GCN-NEXT: [[S_MOV_B32_13:%[0-9]+]]:sreg_32 = S_MOV_B32 208 ; GCN-NEXT: BUFFER_STORE_FORMAT_XYZW_OFFSET_exact killed [[BUFFER_LOAD_FORMAT_XYZW_OFFSET3]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_13]], 0, 0, 0, implicit $exec :: (dereferenceable store (s128), align 1, addrspace 8) ; GCN-NEXT: BUFFER_STORE_FORMAT_XYZW_OFFEN_exact killed [[BUFFER_LOAD_FORMAT_XYZW_OFFEN1]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_13]], 0, 0, 0, implicit $exec :: (dereferenceable store (s128), align 1, addrspace 8) ; GCN-NEXT: [[COPY16:%[0-9]+]]:sreg_32 = COPY [[COPY]] ; GCN-NEXT: BUFFER_STORE_FORMAT_XYZW_OFFSET_exact killed [[BUFFER_LOAD_FORMAT_XYZW_OFFSET4]], [[S_LOAD_DWORDX4_IMM]], [[COPY16]], 208, 0, 0, implicit $exec :: (dereferenceable store (s128), align 1, addrspace 8) ; GCN-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */ ; GCN-NEXT: BUFFER_STORE_FORMAT_XYZW_OFFSET_exact killed [[BUFFER_LOAD_FORMAT_XYZW_OFFSET5]], [[REG_SEQUENCE2]], [[S_MOV_B32_]], 208, 0, 0, implicit $exec :: (dereferenceable store (s128) into %ir.tmp1, align 1, addrspace 8) ; GCN-NEXT: BUFFER_STORE_FORMAT_XYZW_OFFSET_exact killed [[BUFFER_LOAD_FORMAT_XYZW_OFFSET6]], [[REG_SEQUENCE2]], [[S_MOV_B32_12]], 104, 0, 0, implicit $exec :: (dereferenceable store (s128) into %ir.tmp1, align 1, addrspace 8) ; GCN-NEXT: BUFFER_STORE_FORMAT_XYZW_OFFSET_exact killed [[BUFFER_LOAD_FORMAT_XYZW_OFFSET7]], [[REG_SEQUENCE2]], [[S_MOV_B32_13]], 0, 0, 0, implicit $exec :: (dereferenceable store (s128) into %ir.tmp1, align 1, addrspace 8) ; GCN-NEXT: BUFFER_STORE_FORMAT_XYZW_OFFEN_exact killed [[BUFFER_LOAD_FORMAT_XYZW_OFFEN2]], [[COPY]], [[REG_SEQUENCE2]], [[S_MOV_B32_13]], 0, 0, 0, implicit $exec :: (dereferenceable store (s128) into %ir.tmp1, align 1, addrspace 8) ; GCN-NEXT: [[COPY17:%[0-9]+]]:sreg_32 = COPY [[COPY]] ; GCN-NEXT: BUFFER_STORE_FORMAT_XYZW_OFFSET_exact killed [[BUFFER_LOAD_FORMAT_XYZW_OFFSET8]], [[REG_SEQUENCE2]], [[COPY17]], 208, 0, 0, implicit $exec :: (dereferenceable store (s128) into %ir.tmp1, align 1, addrspace 8) ; GCN-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */ ; GCN-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] ; GCN-NEXT: [[BUFFER_LOAD_DWORDX4_IDXEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_IDXEN [[COPY18]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 224, 0, 0, implicit $exec :: (dereferenceable load (s128), align 1, addrspace 8) ; GCN-NEXT: [[S_MOV_B32_14:%[0-9]+]]:sreg_32 = S_MOV_B32 112 ; GCN-NEXT: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] ; GCN-NEXT: [[BUFFER_LOAD_DWORDX4_IDXEN3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_IDXEN [[COPY19]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_14]], 112, 0, 0, implicit $exec :: (dereferenceable load (s128), align 1, addrspace 8) ; GCN-NEXT: [[S_MOV_B32_15:%[0-9]+]]:sreg_32 = S_MOV_B32 224 ; GCN-NEXT: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] ; GCN-NEXT: [[BUFFER_LOAD_DWORDX4_IDXEN4:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_IDXEN [[COPY20]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_15]], 0, 0, 0, implicit $exec :: (dereferenceable load (s128), align 1, addrspace 8) ; GCN-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[COPY]], %subreg.sub1 ; GCN-NEXT: [[BUFFER_LOAD_DWORDX4_BOTHEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_BOTHEN [[REG_SEQUENCE4]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_15]], 0, 0, 0, implicit $exec :: (dereferenceable load (s128), align 1, addrspace 8) ; GCN-NEXT: [[COPY21:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] ; GCN-NEXT: [[COPY22:%[0-9]+]]:sreg_32 = COPY [[COPY]] ; GCN-NEXT: [[BUFFER_LOAD_DWORDX4_IDXEN5:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_IDXEN [[COPY21]], [[S_LOAD_DWORDX4_IMM]], [[COPY22]], 224, 0, 0, implicit $exec :: (dereferenceable load (s128), align 1, addrspace 8) ; GCN-NEXT: [[BUFFER_LOAD_DWORDX4_IDXEN6:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 224, 0, 0, implicit $exec :: (dereferenceable load (s128), align 1, addrspace 8) ; GCN-NEXT: [[BUFFER_LOAD_DWORDX4_IDXEN7:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_IDXEN [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 224, 0, 0, implicit $exec :: (dereferenceable load (s128), align 1, addrspace 8) ; GCN-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */ ; GCN-NEXT: [[COPY23:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] ; GCN-NEXT: [[BUFFER_LOAD_DWORDX4_IDXEN8:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_IDXEN [[COPY23]], [[REG_SEQUENCE2]], [[S_MOV_B32_]], 224, 0, 0, implicit $exec :: (dereferenceable load (s128) from %ir.tmp1, align 1, addrspace 8) ; GCN-NEXT: [[COPY24:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] ; GCN-NEXT: [[BUFFER_LOAD_DWORDX4_IDXEN9:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_IDXEN [[COPY24]], [[REG_SEQUENCE2]], [[S_MOV_B32_14]], 112, 0, 0, implicit $exec :: (dereferenceable load (s128) from %ir.tmp1, align 1, addrspace 8) ; GCN-NEXT: [[COPY25:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] ; GCN-NEXT: [[BUFFER_LOAD_DWORDX4_IDXEN10:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_IDXEN [[COPY25]], [[REG_SEQUENCE2]], [[S_MOV_B32_15]], 0, 0, 0, implicit $exec :: (dereferenceable load (s128) from %ir.tmp1, align 1, addrspace 8) ; GCN-NEXT: [[BUFFER_LOAD_DWORDX4_BOTHEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_BOTHEN [[REG_SEQUENCE4]], [[REG_SEQUENCE2]], [[S_MOV_B32_15]], 0, 0, 0, implicit $exec :: (dereferenceable load (s128) from %ir.tmp1, align 1, addrspace 8) ; GCN-NEXT: [[COPY26:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] ; GCN-NEXT: [[COPY27:%[0-9]+]]:sreg_32 = COPY [[COPY]] ; GCN-NEXT: [[BUFFER_LOAD_DWORDX4_IDXEN11:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_IDXEN [[COPY26]], [[REG_SEQUENCE2]], [[COPY27]], 224, 0, 0, implicit $exec :: (dereferenceable load (s128) from %ir.tmp1, align 1, addrspace 8) ; GCN-NEXT: [[BUFFER_LOAD_DWORDX4_IDXEN12:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_IDXEN [[V_MOV_B32_e32_]], [[REG_SEQUENCE2]], [[S_MOV_B32_]], 224, 0, 0, implicit $exec :: (dereferenceable load (s128) from %ir.tmp1, align 1, addrspace 8) ; GCN-NEXT: [[BUFFER_LOAD_DWORDX4_IDXEN13:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_IDXEN [[COPY]], [[REG_SEQUENCE2]], [[S_MOV_B32_]], 224, 0, 0, implicit $exec :: (dereferenceable load (s128) from %ir.tmp1, align 1, addrspace 8) ; GCN-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */ ; GCN-NEXT: [[COPY28:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] ; GCN-NEXT: [[BUFFER_LOAD_FORMAT_XYZW_IDXEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_IDXEN [[COPY28]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 240, 0, 0, implicit $exec :: (dereferenceable load (s128), align 1, addrspace 8) ; GCN-NEXT: [[S_MOV_B32_16:%[0-9]+]]:sreg_32 = S_MOV_B32 120 ; GCN-NEXT: [[COPY29:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] ; GCN-NEXT: [[BUFFER_LOAD_FORMAT_XYZW_IDXEN3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_IDXEN [[COPY29]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_16]], 120, 0, 0, implicit $exec :: (dereferenceable load (s128), align 1, addrspace 8) ; GCN-NEXT: [[S_MOV_B32_17:%[0-9]+]]:sreg_32 = S_MOV_B32 240 ; GCN-NEXT: [[COPY30:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] ; GCN-NEXT: [[BUFFER_LOAD_FORMAT_XYZW_IDXEN4:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_IDXEN [[COPY30]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_17]], 0, 0, 0, implicit $exec :: (dereferenceable load (s128), align 1, addrspace 8) ; GCN-NEXT: [[BUFFER_LOAD_FORMAT_XYZW_BOTHEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_BOTHEN [[REG_SEQUENCE4]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_17]], 0, 0, 0, implicit $exec :: (dereferenceable load (s128), align 1, addrspace 8) ; GCN-NEXT: [[COPY31:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] ; GCN-NEXT: [[COPY32:%[0-9]+]]:sreg_32 = COPY [[COPY]] ; GCN-NEXT: [[BUFFER_LOAD_FORMAT_XYZW_IDXEN5:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_IDXEN [[COPY31]], [[S_LOAD_DWORDX4_IMM]], [[COPY32]], 240, 0, 0, implicit $exec :: (dereferenceable load (s128), align 1, addrspace 8) ; GCN-NEXT: [[BUFFER_LOAD_FORMAT_XYZW_IDXEN6:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 240, 0, 0, implicit $exec :: (dereferenceable load (s128), align 1, addrspace 8) ; GCN-NEXT: [[BUFFER_LOAD_FORMAT_XYZW_IDXEN7:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_IDXEN [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 240, 0, 0, implicit $exec :: (dereferenceable load (s128), align 1, addrspace 8) ; GCN-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */ ; GCN-NEXT: [[COPY33:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] ; GCN-NEXT: [[BUFFER_LOAD_FORMAT_XYZW_IDXEN8:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_IDXEN [[COPY33]], [[REG_SEQUENCE2]], [[S_MOV_B32_]], 240, 0, 0, implicit $exec :: (dereferenceable load (s128) from %ir.tmp1, align 1, addrspace 8) ; GCN-NEXT: [[COPY34:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] ; GCN-NEXT: [[BUFFER_LOAD_FORMAT_XYZW_IDXEN9:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_IDXEN [[COPY34]], [[REG_SEQUENCE2]], [[S_MOV_B32_16]], 120, 0, 0, implicit $exec :: (dereferenceable load (s128) from %ir.tmp1, align 1, addrspace 8) ; GCN-NEXT: [[COPY35:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] ; GCN-NEXT: [[BUFFER_LOAD_FORMAT_XYZW_IDXEN10:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_IDXEN [[COPY35]], [[REG_SEQUENCE2]], [[S_MOV_B32_17]], 0, 0, 0, implicit $exec :: (dereferenceable load (s128) from %ir.tmp1, align 1, addrspace 8) ; GCN-NEXT: [[BUFFER_LOAD_FORMAT_XYZW_BOTHEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_BOTHEN [[REG_SEQUENCE4]], [[REG_SEQUENCE2]], [[S_MOV_B32_17]], 0, 0, 0, implicit $exec :: (dereferenceable load (s128) from %ir.tmp1, align 1, addrspace 8) ; GCN-NEXT: [[COPY36:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] ; GCN-NEXT: [[COPY37:%[0-9]+]]:sreg_32 = COPY [[COPY]] ; GCN-NEXT: [[BUFFER_LOAD_FORMAT_XYZW_IDXEN11:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_IDXEN [[COPY36]], [[REG_SEQUENCE2]], [[COPY37]], 240, 0, 0, implicit $exec :: (dereferenceable load (s128) from %ir.tmp1, align 1, addrspace 8) ; GCN-NEXT: [[BUFFER_LOAD_FORMAT_XYZW_IDXEN12:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_IDXEN [[V_MOV_B32_e32_]], [[REG_SEQUENCE2]], [[S_MOV_B32_]], 240, 0, 0, implicit $exec :: (dereferenceable load (s128) from %ir.tmp1, align 1, addrspace 8) ; GCN-NEXT: [[BUFFER_LOAD_FORMAT_XYZW_IDXEN13:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_IDXEN [[COPY]], [[REG_SEQUENCE2]], [[S_MOV_B32_]], 240, 0, 0, implicit $exec :: (dereferenceable load (s128) from %ir.tmp1, align 1, addrspace 8) ; GCN-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */ ; GCN-NEXT: [[COPY38:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] ; GCN-NEXT: BUFFER_ATOMIC_ADD_IDXEN [[COPY]], [[COPY38]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 256, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8) ; GCN-NEXT: [[COPY39:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] ; GCN-NEXT: BUFFER_ATOMIC_ADD_IDXEN [[COPY]], [[COPY39]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_3]], 128, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8) ; GCN-NEXT: [[S_MOV_B32_18:%[0-9]+]]:sreg_32 = S_MOV_B32 256 ; GCN-NEXT: [[COPY40:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] ; GCN-NEXT: BUFFER_ATOMIC_ADD_IDXEN [[COPY]], [[COPY40]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_18]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8) ; GCN-NEXT: BUFFER_ATOMIC_ADD_BOTHEN [[COPY]], [[REG_SEQUENCE4]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_18]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8) ; GCN-NEXT: [[COPY41:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] ; GCN-NEXT: [[COPY42:%[0-9]+]]:sreg_32 = COPY [[COPY]] ; GCN-NEXT: BUFFER_ATOMIC_ADD_IDXEN [[COPY]], [[COPY41]], [[S_LOAD_DWORDX4_IMM]], [[COPY42]], 256, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8) ; GCN-NEXT: BUFFER_ATOMIC_ADD_IDXEN [[COPY]], [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 256, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8) ; GCN-NEXT: BUFFER_ATOMIC_ADD_IDXEN [[COPY]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 256, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8) ; GCN-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */ ; GCN-NEXT: [[COPY43:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] ; GCN-NEXT: BUFFER_ATOMIC_ADD_IDXEN [[COPY]], [[COPY43]], [[REG_SEQUENCE2]], [[S_MOV_B32_]], 256, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.tmp1, align 1, addrspace 8) ; GCN-NEXT: [[COPY44:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] ; GCN-NEXT: BUFFER_ATOMIC_ADD_IDXEN [[COPY]], [[COPY44]], [[REG_SEQUENCE2]], [[S_MOV_B32_3]], 128, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.tmp1, align 1, addrspace 8) ; GCN-NEXT: [[COPY45:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] ; GCN-NEXT: BUFFER_ATOMIC_ADD_IDXEN [[COPY]], [[COPY45]], [[REG_SEQUENCE2]], [[S_MOV_B32_18]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.tmp1, align 1, addrspace 8) ; GCN-NEXT: BUFFER_ATOMIC_ADD_BOTHEN [[COPY]], [[REG_SEQUENCE4]], [[REG_SEQUENCE2]], [[S_MOV_B32_18]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.tmp1, align 1, addrspace 8) ; GCN-NEXT: [[COPY46:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] ; GCN-NEXT: [[COPY47:%[0-9]+]]:sreg_32 = COPY [[COPY]] ; GCN-NEXT: BUFFER_ATOMIC_ADD_IDXEN [[COPY]], [[COPY46]], [[REG_SEQUENCE2]], [[COPY47]], 256, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.tmp1, align 1, addrspace 8) ; GCN-NEXT: BUFFER_ATOMIC_ADD_IDXEN [[COPY]], [[V_MOV_B32_e32_]], [[REG_SEQUENCE2]], [[S_MOV_B32_]], 256, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.tmp1, align 1, addrspace 8) ; GCN-NEXT: BUFFER_ATOMIC_ADD_IDXEN [[COPY]], [[COPY]], [[REG_SEQUENCE2]], [[S_MOV_B32_]], 256, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.tmp1, align 1, addrspace 8) ; GCN-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */ ; GCN-NEXT: [[COPY48:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] ; GCN-NEXT: BUFFER_ATOMIC_CMPSWAP_IDXEN [[REG_SEQUENCE3]], [[COPY48]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 272, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8) ; GCN-NEXT: [[S_MOV_B32_19:%[0-9]+]]:sreg_32 = S_MOV_B32 136 ; GCN-NEXT: [[COPY49:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] ; GCN-NEXT: BUFFER_ATOMIC_CMPSWAP_IDXEN [[REG_SEQUENCE3]], [[COPY49]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_19]], 136, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8) ; GCN-NEXT: [[S_MOV_B32_20:%[0-9]+]]:sreg_32 = S_MOV_B32 272 ; GCN-NEXT: [[COPY50:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] ; GCN-NEXT: BUFFER_ATOMIC_CMPSWAP_IDXEN [[REG_SEQUENCE3]], [[COPY50]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_20]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8) ; GCN-NEXT: BUFFER_ATOMIC_CMPSWAP_BOTHEN [[REG_SEQUENCE3]], [[REG_SEQUENCE4]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_20]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8) ; GCN-NEXT: [[COPY51:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] ; GCN-NEXT: [[COPY52:%[0-9]+]]:sreg_32 = COPY [[COPY]] ; GCN-NEXT: BUFFER_ATOMIC_CMPSWAP_IDXEN [[REG_SEQUENCE3]], [[COPY51]], [[S_LOAD_DWORDX4_IMM]], [[COPY52]], 272, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8) ; GCN-NEXT: BUFFER_ATOMIC_CMPSWAP_IDXEN [[REG_SEQUENCE3]], [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 272, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8) ; GCN-NEXT: BUFFER_ATOMIC_CMPSWAP_IDXEN [[REG_SEQUENCE3]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 272, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8) ; GCN-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */ ; GCN-NEXT: [[COPY53:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] ; GCN-NEXT: BUFFER_ATOMIC_CMPSWAP_IDXEN [[REG_SEQUENCE3]], [[COPY53]], [[REG_SEQUENCE2]], [[S_MOV_B32_]], 272, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.tmp1, align 1, addrspace 8) ; GCN-NEXT: [[COPY54:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] ; GCN-NEXT: BUFFER_ATOMIC_CMPSWAP_IDXEN [[REG_SEQUENCE3]], [[COPY54]], [[REG_SEQUENCE2]], [[S_MOV_B32_19]], 136, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.tmp1, align 1, addrspace 8) ; GCN-NEXT: [[COPY55:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] ; GCN-NEXT: BUFFER_ATOMIC_CMPSWAP_IDXEN [[REG_SEQUENCE3]], [[COPY55]], [[REG_SEQUENCE2]], [[S_MOV_B32_20]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.tmp1, align 1, addrspace 8) ; GCN-NEXT: BUFFER_ATOMIC_CMPSWAP_BOTHEN [[REG_SEQUENCE3]], [[REG_SEQUENCE4]], [[REG_SEQUENCE2]], [[S_MOV_B32_20]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.tmp1, align 1, addrspace 8) ; GCN-NEXT: [[COPY56:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] ; GCN-NEXT: [[COPY57:%[0-9]+]]:sreg_32 = COPY [[COPY]] ; GCN-NEXT: BUFFER_ATOMIC_CMPSWAP_IDXEN [[REG_SEQUENCE3]], [[COPY56]], [[REG_SEQUENCE2]], [[COPY57]], 272, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.tmp1, align 1, addrspace 8) ; GCN-NEXT: BUFFER_ATOMIC_CMPSWAP_IDXEN [[REG_SEQUENCE3]], [[V_MOV_B32_e32_]], [[REG_SEQUENCE2]], [[S_MOV_B32_]], 272, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.tmp1, align 1, addrspace 8) ; GCN-NEXT: BUFFER_ATOMIC_CMPSWAP_IDXEN [[REG_SEQUENCE3]], [[COPY]], [[REG_SEQUENCE2]], [[S_MOV_B32_]], 272, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.tmp1, align 1, addrspace 8) ; GCN-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */ ; GCN-NEXT: [[COPY58:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] ; GCN-NEXT: BUFFER_STORE_DWORDX4_IDXEN_exact killed [[BUFFER_LOAD_DWORDX4_IDXEN2]], [[COPY58]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 288, 0, 0, implicit $exec :: (dereferenceable store (s128), align 1, addrspace 8) ; GCN-NEXT: [[COPY59:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] ; GCN-NEXT: BUFFER_STORE_DWORDX4_IDXEN_exact killed [[BUFFER_LOAD_DWORDX4_IDXEN3]], [[COPY59]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_5]], 144, 0, 0, implicit $exec :: (dereferenceable store (s128), align 1, addrspace 8) ; GCN-NEXT: [[S_MOV_B32_21:%[0-9]+]]:sreg_32 = S_MOV_B32 288 ; GCN-NEXT: [[COPY60:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] ; GCN-NEXT: BUFFER_STORE_DWORDX4_IDXEN_exact killed [[BUFFER_LOAD_DWORDX4_IDXEN4]], [[COPY60]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_21]], 0, 0, 0, implicit $exec :: (dereferenceable store (s128), align 1, addrspace 8) ; GCN-NEXT: BUFFER_STORE_DWORDX4_BOTHEN_exact killed [[BUFFER_LOAD_DWORDX4_BOTHEN]], [[REG_SEQUENCE4]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_21]], 0, 0, 0, implicit $exec :: (dereferenceable store (s128), align 1, addrspace 8) ; GCN-NEXT: [[COPY61:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] ; GCN-NEXT: [[COPY62:%[0-9]+]]:sreg_32 = COPY [[COPY]] ; GCN-NEXT: BUFFER_STORE_DWORDX4_IDXEN_exact killed [[BUFFER_LOAD_DWORDX4_IDXEN5]], [[COPY61]], [[S_LOAD_DWORDX4_IMM]], [[COPY62]], 288, 0, 0, implicit $exec :: (dereferenceable store (s128), align 1, addrspace 8) ; GCN-NEXT: BUFFER_STORE_DWORDX4_IDXEN_exact killed [[BUFFER_LOAD_DWORDX4_IDXEN6]], [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 288, 0, 0, implicit $exec :: (dereferenceable store (s128), align 1, addrspace 8) ; GCN-NEXT: BUFFER_STORE_DWORDX4_IDXEN_exact killed [[BUFFER_LOAD_DWORDX4_IDXEN7]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 288, 0, 0, implicit $exec :: (dereferenceable store (s128), align 1, addrspace 8) ; GCN-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */ ; GCN-NEXT: [[COPY63:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] ; GCN-NEXT: BUFFER_STORE_DWORDX4_IDXEN_exact killed [[BUFFER_LOAD_DWORDX4_IDXEN8]], [[COPY63]], [[REG_SEQUENCE2]], [[S_MOV_B32_]], 288, 0, 0, implicit $exec :: (dereferenceable store (s128) into %ir.tmp1, align 1, addrspace 8) ; GCN-NEXT: [[COPY64:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] ; GCN-NEXT: BUFFER_STORE_DWORDX4_IDXEN_exact killed [[BUFFER_LOAD_DWORDX4_IDXEN9]], [[COPY64]], [[REG_SEQUENCE2]], [[S_MOV_B32_5]], 144, 0, 0, implicit $exec :: (dereferenceable store (s128) into %ir.tmp1, align 1, addrspace 8) ; GCN-NEXT: [[COPY65:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] ; GCN-NEXT: BUFFER_STORE_DWORDX4_IDXEN_exact killed [[BUFFER_LOAD_DWORDX4_IDXEN10]], [[COPY65]], [[REG_SEQUENCE2]], [[S_MOV_B32_21]], 0, 0, 0, implicit $exec :: (dereferenceable store (s128) into %ir.tmp1, align 1, addrspace 8) ; GCN-NEXT: BUFFER_STORE_DWORDX4_BOTHEN_exact killed [[BUFFER_LOAD_DWORDX4_BOTHEN1]], [[REG_SEQUENCE4]], [[REG_SEQUENCE2]], [[S_MOV_B32_21]], 0, 0, 0, implicit $exec :: (dereferenceable store (s128) into %ir.tmp1, align 1, addrspace 8) ; GCN-NEXT: [[COPY66:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] ; GCN-NEXT: [[COPY67:%[0-9]+]]:sreg_32 = COPY [[COPY]] ; GCN-NEXT: BUFFER_STORE_DWORDX4_IDXEN_exact killed [[BUFFER_LOAD_DWORDX4_IDXEN11]], [[COPY66]], [[REG_SEQUENCE2]], [[COPY67]], 288, 0, 0, implicit $exec :: (dereferenceable store (s128) into %ir.tmp1, align 1, addrspace 8) ; GCN-NEXT: BUFFER_STORE_DWORDX4_IDXEN_exact killed [[BUFFER_LOAD_DWORDX4_IDXEN12]], [[V_MOV_B32_e32_]], [[REG_SEQUENCE2]], [[S_MOV_B32_]], 288, 0, 0, implicit $exec :: (dereferenceable store (s128) into %ir.tmp1, align 1, addrspace 8) ; GCN-NEXT: BUFFER_STORE_DWORDX4_IDXEN_exact killed [[BUFFER_LOAD_DWORDX4_IDXEN13]], [[COPY]], [[REG_SEQUENCE2]], [[S_MOV_B32_]], 288, 0, 0, implicit $exec :: (dereferenceable store (s128) into %ir.tmp1, align 1, addrspace 8) ; GCN-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */ ; GCN-NEXT: [[COPY68:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] ; GCN-NEXT: BUFFER_STORE_FORMAT_XYZW_IDXEN_exact killed [[BUFFER_LOAD_FORMAT_XYZW_IDXEN2]], [[COPY68]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 304, 0, 0, implicit $exec :: (dereferenceable store (s128), align 1, addrspace 8) ; GCN-NEXT: [[S_MOV_B32_22:%[0-9]+]]:sreg_32 = S_MOV_B32 152 ; GCN-NEXT: [[COPY69:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] ; GCN-NEXT: BUFFER_STORE_FORMAT_XYZW_IDXEN_exact killed [[BUFFER_LOAD_FORMAT_XYZW_IDXEN3]], [[COPY69]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_22]], 152, 0, 0, implicit $exec :: (dereferenceable store (s128), align 1, addrspace 8) ; GCN-NEXT: [[S_MOV_B32_23:%[0-9]+]]:sreg_32 = S_MOV_B32 304 ; GCN-NEXT: [[COPY70:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] ; GCN-NEXT: BUFFER_STORE_FORMAT_XYZW_IDXEN_exact killed [[BUFFER_LOAD_FORMAT_XYZW_IDXEN4]], [[COPY70]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_23]], 0, 0, 0, implicit $exec :: (dereferenceable store (s128), align 1, addrspace 8) ; GCN-NEXT: BUFFER_STORE_FORMAT_XYZW_BOTHEN_exact killed [[BUFFER_LOAD_FORMAT_XYZW_BOTHEN]], [[REG_SEQUENCE4]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_23]], 0, 0, 0, implicit $exec :: (dereferenceable store (s128), align 1, addrspace 8) ; GCN-NEXT: [[COPY71:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] ; GCN-NEXT: [[COPY72:%[0-9]+]]:sreg_32 = COPY [[COPY]] ; GCN-NEXT: BUFFER_STORE_FORMAT_XYZW_IDXEN_exact killed [[BUFFER_LOAD_FORMAT_XYZW_IDXEN5]], [[COPY71]], [[S_LOAD_DWORDX4_IMM]], [[COPY72]], 304, 0, 0, implicit $exec :: (dereferenceable store (s128), align 1, addrspace 8) ; GCN-NEXT: BUFFER_STORE_FORMAT_XYZW_IDXEN_exact killed [[BUFFER_LOAD_FORMAT_XYZW_IDXEN6]], [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 304, 0, 0, implicit $exec :: (dereferenceable store (s128), align 1, addrspace 8) ; GCN-NEXT: BUFFER_STORE_FORMAT_XYZW_IDXEN_exact killed [[BUFFER_LOAD_FORMAT_XYZW_IDXEN7]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 304, 0, 0, implicit $exec :: (dereferenceable store (s128), align 1, addrspace 8) ; GCN-NEXT: [[COPY73:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] ; GCN-NEXT: BUFFER_STORE_FORMAT_XYZW_IDXEN_exact killed [[BUFFER_LOAD_FORMAT_XYZW_IDXEN8]], [[COPY73]], [[REG_SEQUENCE2]], [[S_MOV_B32_]], 304, 0, 0, implicit $exec :: (dereferenceable store (s128) into %ir.tmp1, align 1, addrspace 8) ; GCN-NEXT: [[COPY74:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] ; GCN-NEXT: BUFFER_STORE_FORMAT_XYZW_IDXEN_exact killed [[BUFFER_LOAD_FORMAT_XYZW_IDXEN9]], [[COPY74]], [[REG_SEQUENCE2]], [[S_MOV_B32_22]], 152, 0, 0, implicit $exec :: (dereferenceable store (s128) into %ir.tmp1, align 1, addrspace 8) ; GCN-NEXT: [[COPY75:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] ; GCN-NEXT: BUFFER_STORE_FORMAT_XYZW_IDXEN_exact killed [[BUFFER_LOAD_FORMAT_XYZW_IDXEN10]], [[COPY75]], [[REG_SEQUENCE2]], [[S_MOV_B32_23]], 0, 0, 0, implicit $exec :: (dereferenceable store (s128) into %ir.tmp1, align 1, addrspace 8) ; GCN-NEXT: BUFFER_STORE_FORMAT_XYZW_BOTHEN_exact killed [[BUFFER_LOAD_FORMAT_XYZW_BOTHEN1]], [[REG_SEQUENCE4]], [[REG_SEQUENCE2]], [[S_MOV_B32_23]], 0, 0, 0, implicit $exec :: (dereferenceable store (s128) into %ir.tmp1, align 1, addrspace 8) ; GCN-NEXT: [[COPY76:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] ; GCN-NEXT: [[COPY77:%[0-9]+]]:sreg_32 = COPY [[COPY]] ; GCN-NEXT: BUFFER_STORE_FORMAT_XYZW_IDXEN_exact killed [[BUFFER_LOAD_FORMAT_XYZW_IDXEN11]], [[COPY76]], [[REG_SEQUENCE2]], [[COPY77]], 304, 0, 0, implicit $exec :: (dereferenceable store (s128) into %ir.tmp1, align 1, addrspace 8) ; GCN-NEXT: BUFFER_STORE_FORMAT_XYZW_IDXEN_exact killed [[BUFFER_LOAD_FORMAT_XYZW_IDXEN12]], [[V_MOV_B32_e32_]], [[REG_SEQUENCE2]], [[S_MOV_B32_]], 304, 0, 0, implicit $exec :: (dereferenceable store (s128) into %ir.tmp1, align 1, addrspace 8) ; GCN-NEXT: BUFFER_STORE_FORMAT_XYZW_IDXEN_exact killed [[BUFFER_LOAD_FORMAT_XYZW_IDXEN13]], [[COPY]], [[REG_SEQUENCE2]], [[S_MOV_B32_]], 304, 0, 0, implicit $exec :: (dereferenceable store (s128) into %ir.tmp1, align 1, addrspace 8) ; GCN-NEXT: S_ENDPGM 0 bb.0: %tmp0 = load <4 x i32>, ptr addrspace(6) %arg0, align 16, !invariant.load !0 %tmp1 = load ptr addrspace(8), ptr addrspace(6) %arg0, align 16, !invariant.load !0 %buffer0 = call nsz <4 x float> @llvm.amdgcn.buffer.load.v4f32(<4 x i32> %tmp0, i32 0, i32 16, i1 false, i1 false) #0 %buffer1 = call nsz <4 x float> @llvm.amdgcn.buffer.load.v4f32(<4 x i32> %tmp0, i32 0, i32 %arg1, i1 false, i1 false) #0 %buffer2 = call nsz <4 x float> @llvm.amdgcn.buffer.load.v4f32(<4 x i32> %tmp0, i32 1, i32 16, i1 false, i1 false) #0 %buffer3 = call nsz <4 x float> @llvm.amdgcn.buffer.load.v4f32(<4 x i32> %tmp0, i32 %arg1, i32 16, i1 false, i1 false) #0 ; Insert inline asm to keep the different instruction types from being mixed. This makes the output easier to read. call void asm sideeffect "", "" () call void @llvm.amdgcn.buffer.store.v4f32(<4 x float> %buffer0, <4 x i32> %tmp0, i32 0, i32 32, i1 false, i1 false) #1 call void @llvm.amdgcn.buffer.store.v4f32(<4 x float> %buffer1, <4 x i32> %tmp0, i32 0, i32 %arg1, i1 false, i1 false) #1 call void @llvm.amdgcn.buffer.store.v4f32(<4 x float> %buffer2, <4 x i32> %tmp0, i32 1, i32 32, i1 false, i1 false) #1 call void @llvm.amdgcn.buffer.store.v4f32(<4 x float> %buffer3, <4 x i32> %tmp0, i32 %arg1, i32 32, i1 false, i1 false) #1 call void asm sideeffect "", "" () %buffer_format0 = call nsz <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32> %tmp0, i32 0, i32 48, i1 false, i1 false) #0 %buffer_format1 = call nsz <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32> %tmp0, i32 0, i32 %arg1, i1 false, i1 false) #0 %buffer_format2 = call nsz <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32> %tmp0, i32 1, i32 48, i1 false, i1 false) #0 %buffer_format3 = call nsz <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32> %tmp0, i32 %arg1, i32 48, i1 false, i1 false) #0 call void asm sideeffect "", "" () call void @llvm.amdgcn.buffer.store.format.v4f32(<4 x float> %buffer_format0, <4 x i32> %tmp0, i32 0, i32 64, i1 false, i1 false) #1 call void @llvm.amdgcn.buffer.store.format.v4f32(<4 x float> %buffer_format1, <4 x i32> %tmp0, i32 0, i32 %arg1, i1 false, i1 false) #1 call void @llvm.amdgcn.buffer.store.format.v4f32(<4 x float> %buffer_format2, <4 x i32> %tmp0, i32 1, i32 64, i1 false, i1 false) #1 call void @llvm.amdgcn.buffer.store.format.v4f32(<4 x float> %buffer_format3, <4 x i32> %tmp0, i32 %arg1, i32 64, i1 false, i1 false) #1 call void asm sideeffect "", "" () %atomic_add0 = call i32 @llvm.amdgcn.buffer.atomic.add.i32(i32 %arg1, <4 x i32> %tmp0, i32 0, i32 80, i1 false) #2 %atomic_add1 = call i32 @llvm.amdgcn.buffer.atomic.add.i32(i32 %arg1, <4 x i32> %tmp0, i32 0, i32 %arg1, i1 false) #2 %atomic_add2 = call i32 @llvm.amdgcn.buffer.atomic.add.i32(i32 %arg1, <4 x i32> %tmp0, i32 1, i32 80, i1 false) #2 %atomic_add3 = call i32 @llvm.amdgcn.buffer.atomic.add.i32(i32 %arg1, <4 x i32> %tmp0, i32 %arg1, i32 80, i1 false) #2 call void asm sideeffect "", "" () %atomic_cmpswap0 = call i32 @llvm.amdgcn.buffer.atomic.cmpswap(i32 %arg1, i32 %arg1, <4 x i32> %tmp0, i32 0, i32 96, i1 false) #2 %atomic_cmpswap1 = call i32 @llvm.amdgcn.buffer.atomic.cmpswap(i32 %arg1, i32 %arg1, <4 x i32> %tmp0, i32 0, i32 %arg1, i1 false) #2 %atomic_cmpswap2 = call i32 @llvm.amdgcn.buffer.atomic.cmpswap(i32 %arg1, i32 %arg1, <4 x i32> %tmp0, i32 1, i32 96, i1 false) #2 %atomic_cmpswap3 = call i32 @llvm.amdgcn.buffer.atomic.cmpswap(i32 %arg1, i32 %arg1, <4 x i32> %tmp0, i32 %arg1, i32 96, i1 false) #2 call void asm sideeffect "", "" () %fadd1 = call float @llvm.amdgcn.buffer.atomic.fadd.f32(float 1.0, <4 x i32> %tmp0, i32 0, i32 112, i1 false) #2 %fadd2 = call float @llvm.amdgcn.buffer.atomic.fadd.f32(float 1.0, <4 x i32> %tmp0, i32 0, i32 %arg1, i1 false) #2 %fadd3 = call float @llvm.amdgcn.buffer.atomic.fadd.f32(float 1.0, <4 x i32> %tmp0, i32 1, i32 112, i1 false) #2 %fadd4 = call float @llvm.amdgcn.buffer.atomic.fadd.f32(float 1.0, <4 x i32> %tmp0, i32 %arg1, i32 112, i1 false) #2 call void asm sideeffect "", "" () ; rsrc, offset, soffset, cachepolicy %raw_buffer0 = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %tmp0, i32 128, i32 0, i32 0) #0 %raw_buffer1 = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %tmp0, i32 64, i32 64, i32 0) #0 %raw_buffer2 = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %tmp0, i32 0, i32 128, i32 0) #0 %raw_buffer3 = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %tmp0, i32 %arg1, i32 128, i32 0) #0 %raw_buffer4 = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %tmp0, i32 128, i32 %arg1, i32 0) #0 call void asm sideeffect "", "" () ; rsrc, offset, soffset, cachepolicy %raw_ptr_buffer0 = call <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.v4f32(ptr addrspace(8) %tmp1, i32 128, i32 0, i32 0) #3 %raw_ptr_buffer1 = call <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.v4f32(ptr addrspace(8) %tmp1, i32 64, i32 64, i32 0) #3 %raw_ptr_buffer2 = call <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.v4f32(ptr addrspace(8) %tmp1, i32 0, i32 128, i32 0) #3 %raw_ptr_buffer3 = call <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.v4f32(ptr addrspace(8) %tmp1, i32 %arg1, i32 128, i32 0) #3 %raw_ptr_buffer4 = call <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.v4f32(ptr addrspace(8) %tmp1, i32 128, i32 %arg1, i32 0) #3 call void asm sideeffect "", "" () %raw_buffer_format0 = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %tmp0, i32 144, i32 0, i32 0) #0 %raw_buffer_format1 = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %tmp0, i32 72, i32 72, i32 0) #0 %raw_buffer_format2 = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %tmp0, i32 0, i32 144, i32 0) #0 %raw_buffer_format3 = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %tmp0, i32 %arg1, i32 144, i32 0) #0 %raw_buffer_format4 = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %tmp0, i32 144, i32 %arg1, i32 0) #0 call void asm sideeffect "", "" () %raw_buffer_format_ptr0 = call <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.format.v4f32(ptr addrspace(8) %tmp1, i32 144, i32 0, i32 0) #3 %raw_buffer_format_ptr1 = call <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.format.v4f32(ptr addrspace(8) %tmp1, i32 72, i32 72, i32 0) #3 %raw_buffer_format_ptr2 = call <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.format.v4f32(ptr addrspace(8) %tmp1, i32 0, i32 144, i32 0) #3 %raw_buffer_format_ptr3 = call <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.format.v4f32(ptr addrspace(8) %tmp1, i32 %arg1, i32 144, i32 0) #3 %raw_buffer_format_ptr4 = call <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.format.v4f32(ptr addrspace(8) %tmp1, i32 144, i32 %arg1, i32 0) #3 call void asm sideeffect "", "" () %raw_atomic_add0 = call i32 @llvm.amdgcn.raw.buffer.atomic.add.i32(i32 %arg1, <4 x i32> %tmp0, i32 160, i32 0, i32 0) #2 %raw_atomic_add1 = call i32 @llvm.amdgcn.raw.buffer.atomic.add.i32(i32 %arg1, <4 x i32> %tmp0, i32 80, i32 80, i32 0) #2 %raw_atomic_add2 = call i32 @llvm.amdgcn.raw.buffer.atomic.add.i32(i32 %arg1, <4 x i32> %tmp0, i32 0, i32 160, i32 0) #2 %raw_atomic_add3 = call i32 @llvm.amdgcn.raw.buffer.atomic.add.i32(i32 %arg1, <4 x i32> %tmp0, i32 %arg1, i32 160, i32 0) #2 %raw_atomic_add4 = call i32 @llvm.amdgcn.raw.buffer.atomic.add.i32(i32 %arg1, <4 x i32> %tmp0, i32 160, i32 %arg1, i32 0) #2 call void asm sideeffect "", "" () %raw_ptr_atomic_add0 = call i32 @llvm.amdgcn.raw.ptr.buffer.atomic.add.i32(i32 %arg1, ptr addrspace(8) %tmp1, i32 160, i32 0, i32 0) #5 %raw_ptr_atomic_add1 = call i32 @llvm.amdgcn.raw.ptr.buffer.atomic.add.i32(i32 %arg1, ptr addrspace(8) %tmp1, i32 80, i32 80, i32 0) #5 %raw_ptr_atomic_add2 = call i32 @llvm.amdgcn.raw.ptr.buffer.atomic.add.i32(i32 %arg1, ptr addrspace(8) %tmp1, i32 0, i32 160, i32 0) #5 %raw_ptr_atomic_add3 = call i32 @llvm.amdgcn.raw.ptr.buffer.atomic.add.i32(i32 %arg1, ptr addrspace(8) %tmp1, i32 %arg1, i32 160, i32 0) #5 %raw_ptr_atomic_add4 = call i32 @llvm.amdgcn.raw.ptr.buffer.atomic.add.i32(i32 %arg1, ptr addrspace(8) %tmp1, i32 160, i32 %arg1, i32 0) #5 call void asm sideeffect "", "" () %raw_atomic_cmpswap0 = call i32 @llvm.amdgcn.raw.buffer.atomic.cmpswap.i32(i32 %arg1, i32 %arg1, <4 x i32> %tmp0, i32 176, i32 0, i32 0) #2 %raw_atomic_cmpswap1 = call i32 @llvm.amdgcn.raw.buffer.atomic.cmpswap.i32(i32 %arg1, i32 %arg1, <4 x i32> %tmp0, i32 88, i32 88, i32 0) #2 %raw_atomic_cmpswap2 = call i32 @llvm.amdgcn.raw.buffer.atomic.cmpswap.i32(i32 %arg1, i32 %arg1, <4 x i32> %tmp0, i32 0, i32 176, i32 0) #2 %raw_atomic_cmpswap3 = call i32 @llvm.amdgcn.raw.buffer.atomic.cmpswap.i32(i32 %arg1, i32 %arg1, <4 x i32> %tmp0, i32 %arg1, i32 176, i32 0) #2 %raw_atomic_cmpswap4 = call i32 @llvm.amdgcn.raw.buffer.atomic.cmpswap.i32(i32 %arg1, i32 %arg1, <4 x i32> %tmp0, i32 176, i32 %arg1, i32 0) #2 call void asm sideeffect "", "" () %raw_ptr_atomic_cmpswap0 = call i32 @llvm.amdgcn.raw.ptr.buffer.atomic.cmpswap.i32(i32 %arg1, i32 %arg1, ptr addrspace(8) %tmp1, i32 176, i32 0, i32 0) #5 %raw_ptr_atomic_cmpswap1 = call i32 @llvm.amdgcn.raw.ptr.buffer.atomic.cmpswap.i32(i32 %arg1, i32 %arg1, ptr addrspace(8) %tmp1, i32 88, i32 88, i32 0) #5 %raw_ptr_atomic_cmpswap2 = call i32 @llvm.amdgcn.raw.ptr.buffer.atomic.cmpswap.i32(i32 %arg1, i32 %arg1, ptr addrspace(8) %tmp1, i32 0, i32 176, i32 0) #5 %raw_ptr_atomic_cmpswap3 = call i32 @llvm.amdgcn.raw.ptr.buffer.atomic.cmpswap.i32(i32 %arg1, i32 %arg1, ptr addrspace(8) %tmp1, i32 %arg1, i32 176, i32 0) #5 %raw_ptr_atomic_cmpswap4 = call i32 @llvm.amdgcn.raw.ptr.buffer.atomic.cmpswap.i32(i32 %arg1, i32 %arg1, ptr addrspace(8) %tmp1, i32 176, i32 %arg1, i32 0) #5 call void asm sideeffect "", "" () call void @llvm.amdgcn.raw.buffer.store.v4f32(<4 x float> %raw_buffer0, <4 x i32> %tmp0, i32 192, i32 0, i32 0) #2 call void @llvm.amdgcn.raw.buffer.store.v4f32(<4 x float> %raw_buffer1, <4 x i32> %tmp0, i32 96, i32 96, i32 0) #2 call void @llvm.amdgcn.raw.buffer.store.v4f32(<4 x float> %raw_buffer2, <4 x i32> %tmp0, i32 0, i32 192, i32 0) #2 call void @llvm.amdgcn.raw.buffer.store.v4f32(<4 x float> %raw_buffer3, <4 x i32> %tmp0, i32 %arg1, i32 192, i32 0) #2 call void @llvm.amdgcn.raw.buffer.store.v4f32(<4 x float> %raw_buffer4, <4 x i32> %tmp0, i32 192, i32 %arg1, i32 0) #2 call void asm sideeffect "", "" () call void @llvm.amdgcn.raw.ptr.buffer.store.v4f32(<4 x float> %raw_ptr_buffer0, ptr addrspace(8) %tmp1, i32 192, i32 0, i32 0) #5 call void @llvm.amdgcn.raw.ptr.buffer.store.v4f32(<4 x float> %raw_ptr_buffer1, ptr addrspace(8) %tmp1, i32 96, i32 96, i32 0) #5 call void @llvm.amdgcn.raw.ptr.buffer.store.v4f32(<4 x float> %raw_ptr_buffer2, ptr addrspace(8) %tmp1, i32 0, i32 192, i32 0) #5 call void @llvm.amdgcn.raw.ptr.buffer.store.v4f32(<4 x float> %raw_ptr_buffer3, ptr addrspace(8) %tmp1, i32 %arg1, i32 192, i32 0) #5 call void @llvm.amdgcn.raw.ptr.buffer.store.v4f32(<4 x float> %raw_ptr_buffer4, ptr addrspace(8) %tmp1, i32 192, i32 %arg1, i32 0) #5 call void asm sideeffect "", "" () call void @llvm.amdgcn.raw.buffer.store.format.v4f32(<4 x float> %raw_buffer_format0, <4 x i32> %tmp0, i32 208, i32 0, i32 0) #2 call void @llvm.amdgcn.raw.buffer.store.format.v4f32(<4 x float> %raw_buffer_format1, <4 x i32> %tmp0, i32 104, i32 104, i32 0) #2 call void @llvm.amdgcn.raw.buffer.store.format.v4f32(<4 x float> %raw_buffer_format2, <4 x i32> %tmp0, i32 0, i32 208, i32 0) #2 call void @llvm.amdgcn.raw.buffer.store.format.v4f32(<4 x float> %raw_buffer_format3, <4 x i32> %tmp0, i32 %arg1, i32 208, i32 0) #2 call void @llvm.amdgcn.raw.buffer.store.format.v4f32(<4 x float> %raw_buffer_format4, <4 x i32> %tmp0, i32 208, i32 %arg1, i32 0) #2 call void asm sideeffect "", "" () call void @llvm.amdgcn.raw.ptr.buffer.store.format.v4f32(<4 x float> %raw_buffer_format_ptr0, ptr addrspace(8) %tmp1, i32 208, i32 0, i32 0) #4 call void @llvm.amdgcn.raw.ptr.buffer.store.format.v4f32(<4 x float> %raw_buffer_format_ptr1, ptr addrspace(8) %tmp1, i32 104, i32 104, i32 0) #4 call void @llvm.amdgcn.raw.ptr.buffer.store.format.v4f32(<4 x float> %raw_buffer_format_ptr2, ptr addrspace(8) %tmp1, i32 0, i32 208, i32 0) #4 call void @llvm.amdgcn.raw.ptr.buffer.store.format.v4f32(<4 x float> %raw_buffer_format_ptr3, ptr addrspace(8) %tmp1, i32 %arg1, i32 208, i32 0) #4 call void @llvm.amdgcn.raw.ptr.buffer.store.format.v4f32(<4 x float> %raw_buffer_format_ptr4, ptr addrspace(8) %tmp1, i32 208, i32 %arg1, i32 0) #4 call void asm sideeffect "", "" () ; rsrc, vindex, offset, soffset, cachepolicy %struct_buffer0 = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %tmp0, i32 0, i32 224, i32 0, i32 0) #0 %struct_buffer1 = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %tmp0, i32 0, i32 112, i32 112, i32 0) #0 %struct_buffer2 = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %tmp0, i32 0, i32 0, i32 224, i32 0) #0 %struct_buffer3 = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %tmp0, i32 0, i32 %arg1, i32 224, i32 0) #0 %struct_buffer4 = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %tmp0, i32 0, i32 224, i32 %arg1, i32 0) #0 %struct_buffer5 = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %tmp0, i32 1, i32 224, i32 0, i32 0) #0 %struct_buffer6 = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %tmp0, i32 %arg1, i32 224, i32 0, i32 0) #0 call void asm sideeffect "", "" () %struct_ptr_buffer0 = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.v4f32(ptr addrspace(8) %tmp1, i32 0, i32 224, i32 0, i32 0) #3 %struct_ptr_buffer1 = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.v4f32(ptr addrspace(8) %tmp1, i32 0, i32 112, i32 112, i32 0) #3 %struct_ptr_buffer2 = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.v4f32(ptr addrspace(8) %tmp1, i32 0, i32 0, i32 224, i32 0) #3 %struct_ptr_buffer3 = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.v4f32(ptr addrspace(8) %tmp1, i32 0, i32 %arg1, i32 224, i32 0) #3 %struct_ptr_buffer4 = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.v4f32(ptr addrspace(8) %tmp1, i32 0, i32 224, i32 %arg1, i32 0) #3 %struct_ptr_buffer5 = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.v4f32(ptr addrspace(8) %tmp1, i32 1, i32 224, i32 0, i32 0) #3 %struct_ptr_buffer6 = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.v4f32(ptr addrspace(8) %tmp1, i32 %arg1, i32 224, i32 0, i32 0) #3 call void asm sideeffect "", "" () %struct_buffer_format0 = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %tmp0, i32 0, i32 240, i32 0, i32 0) #0 %struct_buffer_format1 = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %tmp0, i32 0, i32 120, i32 120, i32 0) #0 %struct_buffer_format2 = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %tmp0, i32 0, i32 0, i32 240, i32 0) #0 %struct_buffer_format3 = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %tmp0, i32 0, i32 %arg1, i32 240, i32 0) #0 %struct_buffer_format4 = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %tmp0, i32 0, i32 240, i32 %arg1, i32 0) #0 %struct_buffer_format5 = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %tmp0, i32 1, i32 240, i32 0, i32 0) #0 %struct_buffer_format6 = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %tmp0, i32 %arg1, i32 240, i32 0, i32 0) #0 call void asm sideeffect "", "" () %struct_buffer_format_ptr0 = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v4f32(ptr addrspace(8) %tmp1, i32 0, i32 240, i32 0, i32 0) #3 %struct_buffer_format_ptr1 = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v4f32(ptr addrspace(8) %tmp1, i32 0, i32 120, i32 120, i32 0) #3 %struct_buffer_format_ptr2 = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v4f32(ptr addrspace(8) %tmp1, i32 0, i32 0, i32 240, i32 0) #3 %struct_buffer_format_ptr3 = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v4f32(ptr addrspace(8) %tmp1, i32 0, i32 %arg1, i32 240, i32 0) #3 %struct_buffer_format_ptr4 = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v4f32(ptr addrspace(8) %tmp1, i32 0, i32 240, i32 %arg1, i32 0) #3 %struct_buffer_format_ptr5 = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v4f32(ptr addrspace(8) %tmp1, i32 1, i32 240, i32 0, i32 0) #3 %struct_buffer_format_ptr6 = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v4f32(ptr addrspace(8) %tmp1, i32 %arg1, i32 240, i32 0, i32 0) #3 call void asm sideeffect "", "" () %struct_atomic_add0 = call i32 @llvm.amdgcn.struct.buffer.atomic.add.i32(i32 %arg1, <4 x i32> %tmp0, i32 0, i32 256, i32 0, i32 0) #2 %struct_atomic_add1 = call i32 @llvm.amdgcn.struct.buffer.atomic.add.i32(i32 %arg1, <4 x i32> %tmp0, i32 0, i32 128, i32 128, i32 0) #2 %struct_atomic_add2 = call i32 @llvm.amdgcn.struct.buffer.atomic.add.i32(i32 %arg1, <4 x i32> %tmp0, i32 0, i32 0, i32 256, i32 0) #2 %struct_atomic_add3 = call i32 @llvm.amdgcn.struct.buffer.atomic.add.i32(i32 %arg1, <4 x i32> %tmp0, i32 0, i32 %arg1, i32 256, i32 0) #2 %struct_atomic_add4 = call i32 @llvm.amdgcn.struct.buffer.atomic.add.i32(i32 %arg1, <4 x i32> %tmp0, i32 0, i32 256, i32 %arg1, i32 0) #2 %struct_atomic_add5 = call i32 @llvm.amdgcn.struct.buffer.atomic.add.i32(i32 %arg1, <4 x i32> %tmp0, i32 1, i32 256, i32 0, i32 0) #2 %struct_atomic_add6 = call i32 @llvm.amdgcn.struct.buffer.atomic.add.i32(i32 %arg1, <4 x i32> %tmp0, i32 %arg1, i32 256, i32 0, i32 0) #2 call void asm sideeffect "", "" () %struct_atomic_add_ptr0 = call i32 @llvm.amdgcn.struct.ptr.buffer.atomic.add.i32(i32 %arg1, ptr addrspace(8) %tmp1, i32 0, i32 256, i32 0, i32 0) #5 %struct_atomic_add_ptr1 = call i32 @llvm.amdgcn.struct.ptr.buffer.atomic.add.i32(i32 %arg1, ptr addrspace(8) %tmp1, i32 0, i32 128, i32 128, i32 0) #5 %struct_atomic_add_ptr2 = call i32 @llvm.amdgcn.struct.ptr.buffer.atomic.add.i32(i32 %arg1, ptr addrspace(8) %tmp1, i32 0, i32 0, i32 256, i32 0) #5 %struct_atomic_add_ptr3 = call i32 @llvm.amdgcn.struct.ptr.buffer.atomic.add.i32(i32 %arg1, ptr addrspace(8) %tmp1, i32 0, i32 %arg1, i32 256, i32 0) #5 %struct_atomic_add_ptr4 = call i32 @llvm.amdgcn.struct.ptr.buffer.atomic.add.i32(i32 %arg1, ptr addrspace(8) %tmp1, i32 0, i32 256, i32 %arg1, i32 0) #5 %struct_atomic_add_ptr5 = call i32 @llvm.amdgcn.struct.ptr.buffer.atomic.add.i32(i32 %arg1, ptr addrspace(8) %tmp1, i32 1, i32 256, i32 0, i32 0) #5 %struct_atomic_add_ptr6 = call i32 @llvm.amdgcn.struct.ptr.buffer.atomic.add.i32(i32 %arg1, ptr addrspace(8) %tmp1, i32 %arg1, i32 256, i32 0, i32 0) #5 call void asm sideeffect "", "" () %struct_atomic_cmpswap0 = call i32 @llvm.amdgcn.struct.buffer.atomic.cmpswap.i32(i32 %arg1, i32 %arg1, <4 x i32> %tmp0, i32 0, i32 272, i32 0, i32 0) #2 %struct_atomic_cmpswap1 = call i32 @llvm.amdgcn.struct.buffer.atomic.cmpswap.i32(i32 %arg1, i32 %arg1, <4 x i32> %tmp0, i32 0, i32 136, i32 136, i32 0) #2 %struct_atomic_cmpswap2 = call i32 @llvm.amdgcn.struct.buffer.atomic.cmpswap.i32(i32 %arg1, i32 %arg1, <4 x i32> %tmp0, i32 0, i32 0, i32 272, i32 0) #2 %struct_atomic_cmpswap3 = call i32 @llvm.amdgcn.struct.buffer.atomic.cmpswap.i32(i32 %arg1, i32 %arg1, <4 x i32> %tmp0, i32 0, i32 %arg1, i32 272, i32 0) #2 %struct_atomic_cmpswap4 = call i32 @llvm.amdgcn.struct.buffer.atomic.cmpswap.i32(i32 %arg1, i32 %arg1, <4 x i32> %tmp0, i32 0, i32 272, i32 %arg1, i32 0) #2 %struct_atomic_cmpswap5 = call i32 @llvm.amdgcn.struct.buffer.atomic.cmpswap.i32(i32 %arg1, i32 %arg1, <4 x i32> %tmp0, i32 1, i32 272, i32 0, i32 0) #2 %struct_atomic_cmpswap6 = call i32 @llvm.amdgcn.struct.buffer.atomic.cmpswap.i32(i32 %arg1, i32 %arg1, <4 x i32> %tmp0, i32 %arg1, i32 272, i32 0, i32 0) #2 call void asm sideeffect "", "" () %struct_atomic_cmpswap_ptr0 = call i32 @llvm.amdgcn.struct.ptr.buffer.atomic.cmpswap.i32(i32 %arg1, i32 %arg1, ptr addrspace(8) %tmp1, i32 0, i32 272, i32 0, i32 0) #5 %struct_atomic_cmpswap_ptr1 = call i32 @llvm.amdgcn.struct.ptr.buffer.atomic.cmpswap.i32(i32 %arg1, i32 %arg1, ptr addrspace(8) %tmp1, i32 0, i32 136, i32 136, i32 0) #5 %struct_atomic_cmpswap_ptr2 = call i32 @llvm.amdgcn.struct.ptr.buffer.atomic.cmpswap.i32(i32 %arg1, i32 %arg1, ptr addrspace(8) %tmp1, i32 0, i32 0, i32 272, i32 0) #5 %struct_atomic_cmpswap_ptr3 = call i32 @llvm.amdgcn.struct.ptr.buffer.atomic.cmpswap.i32(i32 %arg1, i32 %arg1, ptr addrspace(8) %tmp1, i32 0, i32 %arg1, i32 272, i32 0) #5 %struct_atomic_cmpswap_ptr4 = call i32 @llvm.amdgcn.struct.ptr.buffer.atomic.cmpswap.i32(i32 %arg1, i32 %arg1, ptr addrspace(8) %tmp1, i32 0, i32 272, i32 %arg1, i32 0) #5 %struct_atomic_cmpswap_ptr5 = call i32 @llvm.amdgcn.struct.ptr.buffer.atomic.cmpswap.i32(i32 %arg1, i32 %arg1, ptr addrspace(8) %tmp1, i32 1, i32 272, i32 0, i32 0) #5 %struct_atomic_cmpswap_ptr6 = call i32 @llvm.amdgcn.struct.ptr.buffer.atomic.cmpswap.i32(i32 %arg1, i32 %arg1, ptr addrspace(8) %tmp1, i32 %arg1, i32 272, i32 0, i32 0) #5 call void asm sideeffect "", "" () call void @llvm.amdgcn.struct.buffer.store.v4f32(<4 x float> %struct_buffer0, <4 x i32> %tmp0, i32 0, i32 288, i32 0, i32 0) #2 call void @llvm.amdgcn.struct.buffer.store.v4f32(<4 x float> %struct_buffer1, <4 x i32> %tmp0, i32 0, i32 144, i32 144, i32 0) #2 call void @llvm.amdgcn.struct.buffer.store.v4f32(<4 x float> %struct_buffer2, <4 x i32> %tmp0, i32 0, i32 0, i32 288, i32 0) #2 call void @llvm.amdgcn.struct.buffer.store.v4f32(<4 x float> %struct_buffer3, <4 x i32> %tmp0, i32 0, i32 %arg1, i32 288, i32 0) #2 call void @llvm.amdgcn.struct.buffer.store.v4f32(<4 x float> %struct_buffer4, <4 x i32> %tmp0, i32 0, i32 288, i32 %arg1, i32 0) #2 call void @llvm.amdgcn.struct.buffer.store.v4f32(<4 x float> %struct_buffer5, <4 x i32> %tmp0, i32 1, i32 288, i32 0, i32 0) #2 call void @llvm.amdgcn.struct.buffer.store.v4f32(<4 x float> %struct_buffer6, <4 x i32> %tmp0, i32 %arg1, i32 288, i32 0, i32 0) #2 call void asm sideeffect "", "" () call void @llvm.amdgcn.struct.ptr.buffer.store.v4f32(<4 x float> %struct_ptr_buffer0, ptr addrspace(8) %tmp1, i32 0, i32 288, i32 0, i32 0) #4 call void @llvm.amdgcn.struct.ptr.buffer.store.v4f32(<4 x float> %struct_ptr_buffer1, ptr addrspace(8) %tmp1, i32 0, i32 144, i32 144, i32 0) #4 call void @llvm.amdgcn.struct.ptr.buffer.store.v4f32(<4 x float> %struct_ptr_buffer2, ptr addrspace(8) %tmp1, i32 0, i32 0, i32 288, i32 0) #4 call void @llvm.amdgcn.struct.ptr.buffer.store.v4f32(<4 x float> %struct_ptr_buffer3, ptr addrspace(8) %tmp1, i32 0, i32 %arg1, i32 288, i32 0) #4 call void @llvm.amdgcn.struct.ptr.buffer.store.v4f32(<4 x float> %struct_ptr_buffer4, ptr addrspace(8) %tmp1, i32 0, i32 288, i32 %arg1, i32 0) #4 call void @llvm.amdgcn.struct.ptr.buffer.store.v4f32(<4 x float> %struct_ptr_buffer5, ptr addrspace(8) %tmp1, i32 1, i32 288, i32 0, i32 0) #4 call void @llvm.amdgcn.struct.ptr.buffer.store.v4f32(<4 x float> %struct_ptr_buffer6, ptr addrspace(8) %tmp1, i32 %arg1, i32 288, i32 0, i32 0) #4 call void asm sideeffect "", "" () call void @llvm.amdgcn.struct.buffer.store.format.v4f32(<4 x float> %struct_buffer_format0, <4 x i32> %tmp0, i32 0, i32 304, i32 0, i32 0) #2 call void @llvm.amdgcn.struct.buffer.store.format.v4f32(<4 x float> %struct_buffer_format1, <4 x i32> %tmp0, i32 0, i32 152, i32 152, i32 0) #2 call void @llvm.amdgcn.struct.buffer.store.format.v4f32(<4 x float> %struct_buffer_format2, <4 x i32> %tmp0, i32 0, i32 0, i32 304, i32 0) #2 call void @llvm.amdgcn.struct.buffer.store.format.v4f32(<4 x float> %struct_buffer_format3, <4 x i32> %tmp0, i32 0, i32 %arg1, i32 304, i32 0) #2 call void @llvm.amdgcn.struct.buffer.store.format.v4f32(<4 x float> %struct_buffer_format4, <4 x i32> %tmp0, i32 0, i32 304, i32 %arg1, i32 0) #2 call void @llvm.amdgcn.struct.buffer.store.format.v4f32(<4 x float> %struct_buffer_format5, <4 x i32> %tmp0, i32 1, i32 304, i32 0, i32 0) #2 call void @llvm.amdgcn.struct.buffer.store.format.v4f32(<4 x float> %struct_buffer_format6, <4 x i32> %tmp0, i32 %arg1, i32 304, i32 0, i32 0) #2 call void @llvm.amdgcn.struct.ptr.buffer.store.format.v4f32(<4 x float> %struct_buffer_format_ptr0, ptr addrspace(8) %tmp1, i32 0, i32 304, i32 0, i32 0) #4 call void @llvm.amdgcn.struct.ptr.buffer.store.format.v4f32(<4 x float> %struct_buffer_format_ptr1, ptr addrspace(8) %tmp1, i32 0, i32 152, i32 152, i32 0) #4 call void @llvm.amdgcn.struct.ptr.buffer.store.format.v4f32(<4 x float> %struct_buffer_format_ptr2, ptr addrspace(8) %tmp1, i32 0, i32 0, i32 304, i32 0) #4 call void @llvm.amdgcn.struct.ptr.buffer.store.format.v4f32(<4 x float> %struct_buffer_format_ptr3, ptr addrspace(8) %tmp1, i32 0, i32 %arg1, i32 304, i32 0) #4 call void @llvm.amdgcn.struct.ptr.buffer.store.format.v4f32(<4 x float> %struct_buffer_format_ptr4, ptr addrspace(8) %tmp1, i32 0, i32 304, i32 %arg1, i32 0) #4 call void @llvm.amdgcn.struct.ptr.buffer.store.format.v4f32(<4 x float> %struct_buffer_format_ptr5, ptr addrspace(8) %tmp1, i32 1, i32 304, i32 0, i32 0) #4 call void @llvm.amdgcn.struct.ptr.buffer.store.format.v4f32(<4 x float> %struct_buffer_format_ptr6, ptr addrspace(8) %tmp1, i32 %arg1, i32 304, i32 0, i32 0) #4 ret void } declare <4 x float> @llvm.amdgcn.buffer.load.v4f32(<4 x i32>, i32, i32, i1, i1) #0 declare void @llvm.amdgcn.buffer.store.v4f32(<4 x float>, <4 x i32>, i32, i32, i1, i1) #1 declare <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32>, i32, i32, i1, i1) #0 declare void @llvm.amdgcn.buffer.store.format.v4f32(<4 x float>, <4 x i32>, i32, i32, i1, i1) #1 declare i32 @llvm.amdgcn.buffer.atomic.add.i32(i32, <4 x i32>, i32, i32, i1) #2 declare i32 @llvm.amdgcn.buffer.atomic.cmpswap(i32, i32, <4 x i32>, i32, i32, i1) #2 declare float @llvm.amdgcn.buffer.atomic.fadd.f32(float, <4 x i32>, i32, i32, i1) #2 declare <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32>, i32, i32, i32) #0 declare <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32>, i32, i32, i32) #0 declare i32 @llvm.amdgcn.raw.buffer.atomic.add.i32(i32, <4 x i32>, i32, i32, i32) #2 declare i32 @llvm.amdgcn.raw.buffer.atomic.cmpswap.i32(i32, i32, <4 x i32>, i32, i32, i32) #2 declare void @llvm.amdgcn.raw.buffer.store.v4f32(<4 x float>, <4 x i32>, i32, i32, i32) #2 declare void @llvm.amdgcn.raw.buffer.store.format.v4f32(<4 x float>, <4 x i32>, i32, i32, i32) #2 declare <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.v4f32(ptr addrspace(8), i32, i32, i32) #3 declare <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.format.v4f32(ptr addrspace(8), i32, i32, i32) #3 declare i32 @llvm.amdgcn.raw.ptr.buffer.atomic.add.i32(i32, ptr addrspace(8), i32, i32, i32) #5 declare i32 @llvm.amdgcn.raw.ptr.buffer.atomic.cmpswap.i32(i32, i32, ptr addrspace(8), i32, i32, i32) #5 declare void @llvm.amdgcn.raw.ptr.buffer.store.v4f32(<4 x float>, ptr addrspace(8), i32, i32, i32) #4 declare void @llvm.amdgcn.raw.ptr.buffer.store.format.v4f32(<4 x float>, ptr addrspace(8), i32, i32, i32) #4 declare <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32>, i32, i32, i32, i32) #0 declare <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32>, i32, i32, i32, i32) #0 declare i32 @llvm.amdgcn.struct.buffer.atomic.add.i32(i32, <4 x i32>, i32, i32, i32, i32) #2 declare i32 @llvm.amdgcn.struct.buffer.atomic.cmpswap.i32(i32, i32, <4 x i32>, i32, i32, i32, i32) #2 declare void @llvm.amdgcn.struct.buffer.store.v4f32(<4 x float>, <4 x i32>, i32, i32, i32, i32) #2 declare void @llvm.amdgcn.struct.buffer.store.format.v4f32(<4 x float>, <4 x i32>, i32, i32, i32, i32) #2 declare <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.v4f32(ptr addrspace(8), i32, i32, i32, i32) #3 declare <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v4f32(ptr addrspace(8), i32, i32, i32, i32) #3 declare i32 @llvm.amdgcn.struct.ptr.buffer.atomic.add.i32(i32, ptr addrspace(8), i32, i32, i32, i32) #5 declare i32 @llvm.amdgcn.struct.ptr.buffer.atomic.cmpswap.i32(i32, i32, ptr addrspace(8), i32, i32, i32, i32) #5 declare void @llvm.amdgcn.struct.ptr.buffer.store.v4f32(<4 x float>, ptr addrspace(8), i32, i32, i32, i32) #4 declare void @llvm.amdgcn.struct.ptr.buffer.store.format.v4f32(<4 x float>, ptr addrspace(8), i32, i32, i32, i32) #4 attributes #0 = { nounwind readonly } attributes #1 = { nounwind writeonly } attributes #2 = { nounwind } attributes #3 = { nounwind memory(argmem: read) } attributes #4 = { nounwind memory(argmem: write) } attributes #5 = { nounwind memory(argmem: readwrite) } !0 = !{}