; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9 %s ; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -mattr=+wavefrontsize64 < %s | FileCheck -check-prefixes=GCN,GFX10 %s ; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -mattr=+wavefrontsize64 < %s | FileCheck -check-prefixes=GFX11 %s ; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1200 -mattr=+wavefrontsize64 < %s | FileCheck -check-prefixes=GFX12 %s ; Test using saddr addressing mode of global_*store_* flat instructions. define amdgpu_ps void @global_store_saddr_i8_zext_vgpr(ptr addrspace(1) inreg %sbase, ptr addrspace(1) %voffset.ptr, i8 %data) { ; GCN-LABEL: global_store_saddr_i8_zext_vgpr: ; GCN: ; %bb.0: ; GCN-NEXT: global_load_dword v0, v[0:1], off ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: global_store_byte v0, v2, s[2:3] ; GCN-NEXT: s_endpgm ; ; GFX11-LABEL: global_store_saddr_i8_zext_vgpr: ; GFX11: ; %bb.0: ; GFX11-NEXT: global_load_b32 v0, v[0:1], off ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: global_store_b8 v0, v2, s[2:3] ; GFX11-NEXT: s_nop 0 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm ; ; GFX12-LABEL: global_store_saddr_i8_zext_vgpr: ; GFX12: ; %bb.0: ; GFX12-NEXT: global_load_b32 v0, v[0:1], off ; GFX12-NEXT: s_wait_loadcnt 0x0 ; GFX12-NEXT: global_store_b8 v0, v2, s[2:3] ; GFX12-NEXT: s_nop 0 ; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX12-NEXT: s_endpgm %voffset = load i32, ptr addrspace(1) %voffset.ptr %zext.offset = zext i32 %voffset to i64 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset store i8 %data, ptr addrspace(1) %gep0 ret void } ; Maximum positive offset on gfx10 define amdgpu_ps void @global_store_saddr_i8_zext_vgpr_offset_2047(ptr addrspace(1) inreg %sbase, ptr addrspace(1) %voffset.ptr, i8 %data) { ; GCN-LABEL: global_store_saddr_i8_zext_vgpr_offset_2047: ; GCN: ; %bb.0: ; GCN-NEXT: global_load_dword v0, v[0:1], off ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: global_store_byte v0, v2, s[2:3] offset:2047 ; GCN-NEXT: s_endpgm ; ; GFX11-LABEL: global_store_saddr_i8_zext_vgpr_offset_2047: ; GFX11: ; %bb.0: ; GFX11-NEXT: global_load_b32 v0, v[0:1], off ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: global_store_b8 v0, v2, s[2:3] offset:2047 ; GFX11-NEXT: s_nop 0 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm ; ; GFX12-LABEL: global_store_saddr_i8_zext_vgpr_offset_2047: ; GFX12: ; %bb.0: ; GFX12-NEXT: global_load_b32 v0, v[0:1], off ; GFX12-NEXT: s_wait_loadcnt 0x0 ; GFX12-NEXT: global_store_b8 v0, v2, s[2:3] offset:2047 ; GFX12-NEXT: s_nop 0 ; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX12-NEXT: s_endpgm %voffset = load i32, ptr addrspace(1) %voffset.ptr %zext.offset = zext i32 %voffset to i64 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 2047 store i8 %data, ptr addrspace(1) %gep1 ret void } ; Maximum negative offset on gfx10 define amdgpu_ps void @global_store_saddr_i8_zext_vgpr_offset_neg2048(ptr addrspace(1) inreg %sbase, ptr addrspace(1) %voffset.ptr, i8 %data) { ; GCN-LABEL: global_store_saddr_i8_zext_vgpr_offset_neg2048: ; GCN: ; %bb.0: ; GCN-NEXT: global_load_dword v0, v[0:1], off ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: global_store_byte v0, v2, s[2:3] offset:-2048 ; GCN-NEXT: s_endpgm ; ; GFX11-LABEL: global_store_saddr_i8_zext_vgpr_offset_neg2048: ; GFX11: ; %bb.0: ; GFX11-NEXT: global_load_b32 v0, v[0:1], off ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: global_store_b8 v0, v2, s[2:3] offset:-2048 ; GFX11-NEXT: s_nop 0 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm ; ; GFX12-LABEL: global_store_saddr_i8_zext_vgpr_offset_neg2048: ; GFX12: ; %bb.0: ; GFX12-NEXT: global_load_b32 v0, v[0:1], off ; GFX12-NEXT: s_wait_loadcnt 0x0 ; GFX12-NEXT: global_store_b8 v0, v2, s[2:3] offset:-2048 ; GFX12-NEXT: s_nop 0 ; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX12-NEXT: s_endpgm %voffset = load i32, ptr addrspace(1) %voffset.ptr %zext.offset = zext i32 %voffset to i64 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -2048 store i8 %data, ptr addrspace(1) %gep1 ret void } ; -------------------------------------------------------------------------------- ; Uniformity edge cases ; -------------------------------------------------------------------------------- @ptr.in.lds = internal addrspace(3) global ptr addrspace(1) undef ; Base pointer is uniform, but also in VGPRs define amdgpu_ps void @global_store_saddr_uniform_ptr_in_vgprs(i32 %voffset, i8 %data) { ; GFX9-LABEL: global_store_saddr_uniform_ptr_in_vgprs: ; GFX9: ; %bb.0: ; GFX9-NEXT: v_mov_b32_e32 v2, 0 ; GFX9-NEXT: ds_read_b64 v[2:3], v2 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_readfirstlane_b32 s0, v2 ; GFX9-NEXT: v_readfirstlane_b32 s1, v3 ; GFX9-NEXT: s_nop 4 ; GFX9-NEXT: global_store_byte v0, v1, s[0:1] ; GFX9-NEXT: s_endpgm ; ; GFX10-LABEL: global_store_saddr_uniform_ptr_in_vgprs: ; GFX10: ; %bb.0: ; GFX10-NEXT: v_mov_b32_e32 v2, 0 ; GFX10-NEXT: ds_read_b64 v[2:3], v2 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: v_readfirstlane_b32 s0, v2 ; GFX10-NEXT: v_readfirstlane_b32 s1, v3 ; GFX10-NEXT: global_store_byte v0, v1, s[0:1] ; GFX10-NEXT: s_endpgm ; ; GFX11-LABEL: global_store_saddr_uniform_ptr_in_vgprs: ; GFX11: ; %bb.0: ; GFX11-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-NEXT: ds_load_b64 v[2:3], v2 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: v_readfirstlane_b32 s0, v2 ; GFX11-NEXT: v_readfirstlane_b32 s1, v3 ; GFX11-NEXT: global_store_b8 v0, v1, s[0:1] ; GFX11-NEXT: s_nop 0 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm ; ; GFX12-LABEL: global_store_saddr_uniform_ptr_in_vgprs: ; GFX12: ; %bb.0: ; GFX12-NEXT: v_mov_b32_e32 v2, 0 ; GFX12-NEXT: ds_load_b64 v[2:3], v2 ; GFX12-NEXT: s_wait_dscnt 0x0 ; GFX12-NEXT: v_readfirstlane_b32 s0, v2 ; GFX12-NEXT: v_readfirstlane_b32 s1, v3 ; GFX12-NEXT: global_store_b8 v0, v1, s[0:1] ; GFX12-NEXT: s_nop 0 ; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX12-NEXT: s_endpgm %sbase = load ptr addrspace(1), ptr addrspace(3) @ptr.in.lds %zext.offset = zext i32 %voffset to i64 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset store i8 %data, ptr addrspace(1) %gep0 ret void } ; Base pointer is uniform, but also in VGPRs, with imm offset define amdgpu_ps void @global_store_saddr_uniform_ptr_in_vgprs_immoffset(i32 %voffset, i8 %data) { ; GFX9-LABEL: global_store_saddr_uniform_ptr_in_vgprs_immoffset: ; GFX9: ; %bb.0: ; GFX9-NEXT: v_mov_b32_e32 v2, 0 ; GFX9-NEXT: ds_read_b64 v[2:3], v2 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_readfirstlane_b32 s0, v2 ; GFX9-NEXT: v_readfirstlane_b32 s1, v3 ; GFX9-NEXT: s_nop 4 ; GFX9-NEXT: global_store_byte v0, v1, s[0:1] offset:-120 ; GFX9-NEXT: s_endpgm ; ; GFX10-LABEL: global_store_saddr_uniform_ptr_in_vgprs_immoffset: ; GFX10: ; %bb.0: ; GFX10-NEXT: v_mov_b32_e32 v2, 0 ; GFX10-NEXT: ds_read_b64 v[2:3], v2 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: v_readfirstlane_b32 s0, v2 ; GFX10-NEXT: v_readfirstlane_b32 s1, v3 ; GFX10-NEXT: global_store_byte v0, v1, s[0:1] offset:-120 ; GFX10-NEXT: s_endpgm ; ; GFX11-LABEL: global_store_saddr_uniform_ptr_in_vgprs_immoffset: ; GFX11: ; %bb.0: ; GFX11-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-NEXT: ds_load_b64 v[2:3], v2 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: v_readfirstlane_b32 s0, v2 ; GFX11-NEXT: v_readfirstlane_b32 s1, v3 ; GFX11-NEXT: global_store_b8 v0, v1, s[0:1] offset:-120 ; GFX11-NEXT: s_nop 0 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm ; ; GFX12-LABEL: global_store_saddr_uniform_ptr_in_vgprs_immoffset: ; GFX12: ; %bb.0: ; GFX12-NEXT: v_mov_b32_e32 v2, 0 ; GFX12-NEXT: ds_load_b64 v[2:3], v2 ; GFX12-NEXT: s_wait_dscnt 0x0 ; GFX12-NEXT: v_readfirstlane_b32 s0, v2 ; GFX12-NEXT: v_readfirstlane_b32 s1, v3 ; GFX12-NEXT: global_store_b8 v0, v1, s[0:1] offset:-120 ; GFX12-NEXT: s_nop 0 ; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX12-NEXT: s_endpgm %sbase = load ptr addrspace(1), ptr addrspace(3) @ptr.in.lds %zext.offset = zext i32 %voffset to i64 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -120 store i8 %data, ptr addrspace(1) %gep1 ret void } ; -------------------------------------------------------------------------------- ; Stress various type stores ; -------------------------------------------------------------------------------- define amdgpu_ps void @global_store_saddr_i16_zext_vgpr(ptr addrspace(1) inreg %sbase, i32 %voffset, i16 %data) { ; GCN-LABEL: global_store_saddr_i16_zext_vgpr: ; GCN: ; %bb.0: ; GCN-NEXT: global_store_short v0, v1, s[2:3] ; GCN-NEXT: s_endpgm ; ; GFX11-LABEL: global_store_saddr_i16_zext_vgpr: ; GFX11: ; %bb.0: ; GFX11-NEXT: global_store_b16 v0, v1, s[2:3] ; GFX11-NEXT: s_nop 0 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm ; ; GFX12-LABEL: global_store_saddr_i16_zext_vgpr: ; GFX12: ; %bb.0: ; GFX12-NEXT: global_store_b16 v0, v1, s[2:3] ; GFX12-NEXT: s_nop 0 ; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX12-NEXT: s_endpgm %zext.offset = zext i32 %voffset to i64 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset store i16 %data, ptr addrspace(1) %gep0 ret void } define amdgpu_ps void @global_store_saddr_i16_zext_vgpr_offset_neg128(ptr addrspace(1) inreg %sbase, i32 %voffset, i16 %data) { ; GCN-LABEL: global_store_saddr_i16_zext_vgpr_offset_neg128: ; GCN: ; %bb.0: ; GCN-NEXT: global_store_short v0, v1, s[2:3] offset:-128 ; GCN-NEXT: s_endpgm ; ; GFX11-LABEL: global_store_saddr_i16_zext_vgpr_offset_neg128: ; GFX11: ; %bb.0: ; GFX11-NEXT: global_store_b16 v0, v1, s[2:3] offset:-128 ; GFX11-NEXT: s_nop 0 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm ; ; GFX12-LABEL: global_store_saddr_i16_zext_vgpr_offset_neg128: ; GFX12: ; %bb.0: ; GFX12-NEXT: global_store_b16 v0, v1, s[2:3] offset:-128 ; GFX12-NEXT: s_nop 0 ; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX12-NEXT: s_endpgm %zext.offset = zext i32 %voffset to i64 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -128 store i16 %data, ptr addrspace(1) %gep1 ret void } define amdgpu_ps void @global_store_saddr_f16_zext_vgpr(ptr addrspace(1) inreg %sbase, i32 %voffset, half %data) { ; GCN-LABEL: global_store_saddr_f16_zext_vgpr: ; GCN: ; %bb.0: ; GCN-NEXT: global_store_short v0, v1, s[2:3] ; GCN-NEXT: s_endpgm ; ; GFX11-LABEL: global_store_saddr_f16_zext_vgpr: ; GFX11: ; %bb.0: ; GFX11-NEXT: global_store_b16 v0, v1, s[2:3] ; GFX11-NEXT: s_nop 0 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm ; ; GFX12-LABEL: global_store_saddr_f16_zext_vgpr: ; GFX12: ; %bb.0: ; GFX12-NEXT: global_store_b16 v0, v1, s[2:3] ; GFX12-NEXT: s_nop 0 ; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX12-NEXT: s_endpgm %zext.offset = zext i32 %voffset to i64 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset store half %data, ptr addrspace(1) %gep0 ret void } define amdgpu_ps void @global_store_saddr_f16_zext_vgpr_offset_neg128(ptr addrspace(1) inreg %sbase, i32 %voffset, half %data) { ; GCN-LABEL: global_store_saddr_f16_zext_vgpr_offset_neg128: ; GCN: ; %bb.0: ; GCN-NEXT: global_store_short v0, v1, s[2:3] offset:-128 ; GCN-NEXT: s_endpgm ; ; GFX11-LABEL: global_store_saddr_f16_zext_vgpr_offset_neg128: ; GFX11: ; %bb.0: ; GFX11-NEXT: global_store_b16 v0, v1, s[2:3] offset:-128 ; GFX11-NEXT: s_nop 0 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm ; ; GFX12-LABEL: global_store_saddr_f16_zext_vgpr_offset_neg128: ; GFX12: ; %bb.0: ; GFX12-NEXT: global_store_b16 v0, v1, s[2:3] offset:-128 ; GFX12-NEXT: s_nop 0 ; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX12-NEXT: s_endpgm %zext.offset = zext i32 %voffset to i64 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -128 store half %data, ptr addrspace(1) %gep1 ret void } define amdgpu_ps void @global_store_saddr_i32_zext_vgpr(ptr addrspace(1) inreg %sbase, i32 %voffset, i32 %data) { ; GCN-LABEL: global_store_saddr_i32_zext_vgpr: ; GCN: ; %bb.0: ; GCN-NEXT: global_store_dword v0, v1, s[2:3] ; GCN-NEXT: s_endpgm ; ; GFX11-LABEL: global_store_saddr_i32_zext_vgpr: ; GFX11: ; %bb.0: ; GFX11-NEXT: global_store_b32 v0, v1, s[2:3] ; GFX11-NEXT: s_nop 0 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm ; ; GFX12-LABEL: global_store_saddr_i32_zext_vgpr: ; GFX12: ; %bb.0: ; GFX12-NEXT: global_store_b32 v0, v1, s[2:3] ; GFX12-NEXT: s_nop 0 ; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX12-NEXT: s_endpgm %zext.offset = zext i32 %voffset to i64 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset store i32 %data, ptr addrspace(1) %gep0 ret void } define amdgpu_ps void @global_store_saddr_i32_zext_vgpr_offset_neg128(ptr addrspace(1) inreg %sbase, i32 %voffset, i32 %data) { ; GCN-LABEL: global_store_saddr_i32_zext_vgpr_offset_neg128: ; GCN: ; %bb.0: ; GCN-NEXT: global_store_dword v0, v1, s[2:3] offset:-128 ; GCN-NEXT: s_endpgm ; ; GFX11-LABEL: global_store_saddr_i32_zext_vgpr_offset_neg128: ; GFX11: ; %bb.0: ; GFX11-NEXT: global_store_b32 v0, v1, s[2:3] offset:-128 ; GFX11-NEXT: s_nop 0 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm ; ; GFX12-LABEL: global_store_saddr_i32_zext_vgpr_offset_neg128: ; GFX12: ; %bb.0: ; GFX12-NEXT: global_store_b32 v0, v1, s[2:3] offset:-128 ; GFX12-NEXT: s_nop 0 ; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX12-NEXT: s_endpgm %zext.offset = zext i32 %voffset to i64 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -128 store i32 %data, ptr addrspace(1) %gep1 ret void } define amdgpu_ps void @global_store_saddr_f32_zext_vgpr(ptr addrspace(1) inreg %sbase, i32 %voffset, float %data) { ; GCN-LABEL: global_store_saddr_f32_zext_vgpr: ; GCN: ; %bb.0: ; GCN-NEXT: global_store_dword v0, v1, s[2:3] ; GCN-NEXT: s_endpgm ; ; GFX11-LABEL: global_store_saddr_f32_zext_vgpr: ; GFX11: ; %bb.0: ; GFX11-NEXT: global_store_b32 v0, v1, s[2:3] ; GFX11-NEXT: s_nop 0 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm ; ; GFX12-LABEL: global_store_saddr_f32_zext_vgpr: ; GFX12: ; %bb.0: ; GFX12-NEXT: global_store_b32 v0, v1, s[2:3] ; GFX12-NEXT: s_nop 0 ; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX12-NEXT: s_endpgm %zext.offset = zext i32 %voffset to i64 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset store float %data, ptr addrspace(1) %gep0 ret void } define amdgpu_ps void @global_store_saddr_f32_zext_vgpr_offset_neg128(ptr addrspace(1) inreg %sbase, i32 %voffset, float %data) { ; GCN-LABEL: global_store_saddr_f32_zext_vgpr_offset_neg128: ; GCN: ; %bb.0: ; GCN-NEXT: global_store_dword v0, v1, s[2:3] offset:-128 ; GCN-NEXT: s_endpgm ; ; GFX11-LABEL: global_store_saddr_f32_zext_vgpr_offset_neg128: ; GFX11: ; %bb.0: ; GFX11-NEXT: global_store_b32 v0, v1, s[2:3] offset:-128 ; GFX11-NEXT: s_nop 0 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm ; ; GFX12-LABEL: global_store_saddr_f32_zext_vgpr_offset_neg128: ; GFX12: ; %bb.0: ; GFX12-NEXT: global_store_b32 v0, v1, s[2:3] offset:-128 ; GFX12-NEXT: s_nop 0 ; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX12-NEXT: s_endpgm %zext.offset = zext i32 %voffset to i64 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -128 store float %data, ptr addrspace(1) %gep1 ret void } define amdgpu_ps void @global_store_saddr_p3_zext_vgpr(ptr addrspace(1) inreg %sbase, i32 %voffset, ptr addrspace(3) %data) { ; GCN-LABEL: global_store_saddr_p3_zext_vgpr: ; GCN: ; %bb.0: ; GCN-NEXT: global_store_dword v0, v1, s[2:3] ; GCN-NEXT: s_endpgm ; ; GFX11-LABEL: global_store_saddr_p3_zext_vgpr: ; GFX11: ; %bb.0: ; GFX11-NEXT: global_store_b32 v0, v1, s[2:3] ; GFX11-NEXT: s_nop 0 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm ; ; GFX12-LABEL: global_store_saddr_p3_zext_vgpr: ; GFX12: ; %bb.0: ; GFX12-NEXT: global_store_b32 v0, v1, s[2:3] ; GFX12-NEXT: s_nop 0 ; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX12-NEXT: s_endpgm %zext.offset = zext i32 %voffset to i64 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset store ptr addrspace(3) %data, ptr addrspace(1) %gep0 ret void } define amdgpu_ps void @global_store_saddr_p3_zext_vgpr_offset_neg128(ptr addrspace(1) inreg %sbase, i32 %voffset, ptr addrspace(3) %data) { ; GCN-LABEL: global_store_saddr_p3_zext_vgpr_offset_neg128: ; GCN: ; %bb.0: ; GCN-NEXT: global_store_dword v0, v1, s[2:3] offset:-128 ; GCN-NEXT: s_endpgm ; ; GFX11-LABEL: global_store_saddr_p3_zext_vgpr_offset_neg128: ; GFX11: ; %bb.0: ; GFX11-NEXT: global_store_b32 v0, v1, s[2:3] offset:-128 ; GFX11-NEXT: s_nop 0 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm ; ; GFX12-LABEL: global_store_saddr_p3_zext_vgpr_offset_neg128: ; GFX12: ; %bb.0: ; GFX12-NEXT: global_store_b32 v0, v1, s[2:3] offset:-128 ; GFX12-NEXT: s_nop 0 ; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX12-NEXT: s_endpgm %zext.offset = zext i32 %voffset to i64 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -128 store ptr addrspace(3) %data, ptr addrspace(1) %gep1 ret void } define amdgpu_ps void @global_store_saddr_i64_zext_vgpr(ptr addrspace(1) inreg %sbase, i32 %voffset, i64 %data) { ; GCN-LABEL: global_store_saddr_i64_zext_vgpr: ; GCN: ; %bb.0: ; GCN-NEXT: global_store_dwordx2 v0, v[1:2], s[2:3] ; GCN-NEXT: s_endpgm ; ; GFX11-LABEL: global_store_saddr_i64_zext_vgpr: ; GFX11: ; %bb.0: ; GFX11-NEXT: global_store_b64 v0, v[1:2], s[2:3] ; GFX11-NEXT: s_nop 0 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm ; ; GFX12-LABEL: global_store_saddr_i64_zext_vgpr: ; GFX12: ; %bb.0: ; GFX12-NEXT: global_store_b64 v0, v[1:2], s[2:3] ; GFX12-NEXT: s_nop 0 ; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX12-NEXT: s_endpgm %zext.offset = zext i32 %voffset to i64 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset store i64 %data, ptr addrspace(1) %gep0 ret void } define amdgpu_ps void @global_store_saddr_i64_zext_vgpr_offset_neg128(ptr addrspace(1) inreg %sbase, i32 %voffset, i64 %data) { ; GCN-LABEL: global_store_saddr_i64_zext_vgpr_offset_neg128: ; GCN: ; %bb.0: ; GCN-NEXT: global_store_dwordx2 v0, v[1:2], s[2:3] offset:-128 ; GCN-NEXT: s_endpgm ; ; GFX11-LABEL: global_store_saddr_i64_zext_vgpr_offset_neg128: ; GFX11: ; %bb.0: ; GFX11-NEXT: global_store_b64 v0, v[1:2], s[2:3] offset:-128 ; GFX11-NEXT: s_nop 0 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm ; ; GFX12-LABEL: global_store_saddr_i64_zext_vgpr_offset_neg128: ; GFX12: ; %bb.0: ; GFX12-NEXT: global_store_b64 v0, v[1:2], s[2:3] offset:-128 ; GFX12-NEXT: s_nop 0 ; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX12-NEXT: s_endpgm %zext.offset = zext i32 %voffset to i64 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -128 store i64 %data, ptr addrspace(1) %gep1 ret void } define amdgpu_ps void @global_store_saddr_f64_zext_vgpr(ptr addrspace(1) inreg %sbase, i32 %voffset, double %data) { ; GCN-LABEL: global_store_saddr_f64_zext_vgpr: ; GCN: ; %bb.0: ; GCN-NEXT: global_store_dwordx2 v0, v[1:2], s[2:3] ; GCN-NEXT: s_endpgm ; ; GFX11-LABEL: global_store_saddr_f64_zext_vgpr: ; GFX11: ; %bb.0: ; GFX11-NEXT: global_store_b64 v0, v[1:2], s[2:3] ; GFX11-NEXT: s_nop 0 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm ; ; GFX12-LABEL: global_store_saddr_f64_zext_vgpr: ; GFX12: ; %bb.0: ; GFX12-NEXT: global_store_b64 v0, v[1:2], s[2:3] ; GFX12-NEXT: s_nop 0 ; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX12-NEXT: s_endpgm %zext.offset = zext i32 %voffset to i64 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset store double %data, ptr addrspace(1) %gep0 ret void } define amdgpu_ps void @global_store_saddr_f64_zext_vgpr_offset_neg128(ptr addrspace(1) inreg %sbase, i32 %voffset, double %data) { ; GCN-LABEL: global_store_saddr_f64_zext_vgpr_offset_neg128: ; GCN: ; %bb.0: ; GCN-NEXT: global_store_dwordx2 v0, v[1:2], s[2:3] offset:-128 ; GCN-NEXT: s_endpgm ; ; GFX11-LABEL: global_store_saddr_f64_zext_vgpr_offset_neg128: ; GFX11: ; %bb.0: ; GFX11-NEXT: global_store_b64 v0, v[1:2], s[2:3] offset:-128 ; GFX11-NEXT: s_nop 0 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm ; ; GFX12-LABEL: global_store_saddr_f64_zext_vgpr_offset_neg128: ; GFX12: ; %bb.0: ; GFX12-NEXT: global_store_b64 v0, v[1:2], s[2:3] offset:-128 ; GFX12-NEXT: s_nop 0 ; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX12-NEXT: s_endpgm %zext.offset = zext i32 %voffset to i64 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -128 store double %data, ptr addrspace(1) %gep1 ret void } define amdgpu_ps void @global_store_saddr_v2i32_zext_vgpr(ptr addrspace(1) inreg %sbase, i32 %voffset, <2 x i32> %data) { ; GCN-LABEL: global_store_saddr_v2i32_zext_vgpr: ; GCN: ; %bb.0: ; GCN-NEXT: global_store_dwordx2 v0, v[1:2], s[2:3] ; GCN-NEXT: s_endpgm ; ; GFX11-LABEL: global_store_saddr_v2i32_zext_vgpr: ; GFX11: ; %bb.0: ; GFX11-NEXT: global_store_b64 v0, v[1:2], s[2:3] ; GFX11-NEXT: s_nop 0 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm ; ; GFX12-LABEL: global_store_saddr_v2i32_zext_vgpr: ; GFX12: ; %bb.0: ; GFX12-NEXT: global_store_b64 v0, v[1:2], s[2:3] ; GFX12-NEXT: s_nop 0 ; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX12-NEXT: s_endpgm %zext.offset = zext i32 %voffset to i64 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset store <2 x i32> %data, ptr addrspace(1) %gep0 ret void } define amdgpu_ps void @global_store_saddr_v2i32_zext_vgpr_offset_neg128(ptr addrspace(1) inreg %sbase, i32 %voffset, <2 x i32> %data) { ; GCN-LABEL: global_store_saddr_v2i32_zext_vgpr_offset_neg128: ; GCN: ; %bb.0: ; GCN-NEXT: global_store_dwordx2 v0, v[1:2], s[2:3] offset:-128 ; GCN-NEXT: s_endpgm ; ; GFX11-LABEL: global_store_saddr_v2i32_zext_vgpr_offset_neg128: ; GFX11: ; %bb.0: ; GFX11-NEXT: global_store_b64 v0, v[1:2], s[2:3] offset:-128 ; GFX11-NEXT: s_nop 0 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm ; ; GFX12-LABEL: global_store_saddr_v2i32_zext_vgpr_offset_neg128: ; GFX12: ; %bb.0: ; GFX12-NEXT: global_store_b64 v0, v[1:2], s[2:3] offset:-128 ; GFX12-NEXT: s_nop 0 ; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX12-NEXT: s_endpgm %zext.offset = zext i32 %voffset to i64 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -128 store <2 x i32> %data, ptr addrspace(1) %gep1 ret void } define amdgpu_ps void @global_store_saddr_v2f32_zext_vgpr(ptr addrspace(1) inreg %sbase, i32 %voffset, <2 x float> %data) { ; GCN-LABEL: global_store_saddr_v2f32_zext_vgpr: ; GCN: ; %bb.0: ; GCN-NEXT: global_store_dwordx2 v0, v[1:2], s[2:3] ; GCN-NEXT: s_endpgm ; ; GFX11-LABEL: global_store_saddr_v2f32_zext_vgpr: ; GFX11: ; %bb.0: ; GFX11-NEXT: global_store_b64 v0, v[1:2], s[2:3] ; GFX11-NEXT: s_nop 0 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm ; ; GFX12-LABEL: global_store_saddr_v2f32_zext_vgpr: ; GFX12: ; %bb.0: ; GFX12-NEXT: global_store_b64 v0, v[1:2], s[2:3] ; GFX12-NEXT: s_nop 0 ; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX12-NEXT: s_endpgm %zext.offset = zext i32 %voffset to i64 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset store <2 x float> %data, ptr addrspace(1) %gep0 ret void } define amdgpu_ps void @global_store_saddr_v2f32_zext_vgpr_offset_neg128(ptr addrspace(1) inreg %sbase, i32 %voffset, <2 x float> %data) { ; GCN-LABEL: global_store_saddr_v2f32_zext_vgpr_offset_neg128: ; GCN: ; %bb.0: ; GCN-NEXT: global_store_dwordx2 v0, v[1:2], s[2:3] offset:-128 ; GCN-NEXT: s_endpgm ; ; GFX11-LABEL: global_store_saddr_v2f32_zext_vgpr_offset_neg128: ; GFX11: ; %bb.0: ; GFX11-NEXT: global_store_b64 v0, v[1:2], s[2:3] offset:-128 ; GFX11-NEXT: s_nop 0 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm ; ; GFX12-LABEL: global_store_saddr_v2f32_zext_vgpr_offset_neg128: ; GFX12: ; %bb.0: ; GFX12-NEXT: global_store_b64 v0, v[1:2], s[2:3] offset:-128 ; GFX12-NEXT: s_nop 0 ; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX12-NEXT: s_endpgm %zext.offset = zext i32 %voffset to i64 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -128 store <2 x float> %data, ptr addrspace(1) %gep1 ret void } define amdgpu_ps void @global_store_saddr_v4i16_zext_vgpr(ptr addrspace(1) inreg %sbase, i32 %voffset, <4 x i16> %data) { ; GCN-LABEL: global_store_saddr_v4i16_zext_vgpr: ; GCN: ; %bb.0: ; GCN-NEXT: global_store_dwordx2 v0, v[1:2], s[2:3] ; GCN-NEXT: s_endpgm ; ; GFX11-LABEL: global_store_saddr_v4i16_zext_vgpr: ; GFX11: ; %bb.0: ; GFX11-NEXT: global_store_b64 v0, v[1:2], s[2:3] ; GFX11-NEXT: s_nop 0 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm ; ; GFX12-LABEL: global_store_saddr_v4i16_zext_vgpr: ; GFX12: ; %bb.0: ; GFX12-NEXT: global_store_b64 v0, v[1:2], s[2:3] ; GFX12-NEXT: s_nop 0 ; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX12-NEXT: s_endpgm %zext.offset = zext i32 %voffset to i64 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset store <4 x i16> %data, ptr addrspace(1) %gep0 ret void } define amdgpu_ps void @global_store_saddr_v4i16_zext_vgpr_offset_neg128(ptr addrspace(1) inreg %sbase, i32 %voffset, <4 x i16> %data) { ; GCN-LABEL: global_store_saddr_v4i16_zext_vgpr_offset_neg128: ; GCN: ; %bb.0: ; GCN-NEXT: global_store_dwordx2 v0, v[1:2], s[2:3] offset:-128 ; GCN-NEXT: s_endpgm ; ; GFX11-LABEL: global_store_saddr_v4i16_zext_vgpr_offset_neg128: ; GFX11: ; %bb.0: ; GFX11-NEXT: global_store_b64 v0, v[1:2], s[2:3] offset:-128 ; GFX11-NEXT: s_nop 0 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm ; ; GFX12-LABEL: global_store_saddr_v4i16_zext_vgpr_offset_neg128: ; GFX12: ; %bb.0: ; GFX12-NEXT: global_store_b64 v0, v[1:2], s[2:3] offset:-128 ; GFX12-NEXT: s_nop 0 ; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX12-NEXT: s_endpgm %zext.offset = zext i32 %voffset to i64 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -128 store <4 x i16> %data, ptr addrspace(1) %gep1 ret void } define amdgpu_ps void @global_store_saddr_v4f16_zext_vgpr(ptr addrspace(1) inreg %sbase, i32 %voffset, <4 x half> %data) { ; GCN-LABEL: global_store_saddr_v4f16_zext_vgpr: ; GCN: ; %bb.0: ; GCN-NEXT: global_store_dwordx2 v0, v[1:2], s[2:3] ; GCN-NEXT: s_endpgm ; ; GFX11-LABEL: global_store_saddr_v4f16_zext_vgpr: ; GFX11: ; %bb.0: ; GFX11-NEXT: global_store_b64 v0, v[1:2], s[2:3] ; GFX11-NEXT: s_nop 0 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm ; ; GFX12-LABEL: global_store_saddr_v4f16_zext_vgpr: ; GFX12: ; %bb.0: ; GFX12-NEXT: global_store_b64 v0, v[1:2], s[2:3] ; GFX12-NEXT: s_nop 0 ; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX12-NEXT: s_endpgm %zext.offset = zext i32 %voffset to i64 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset store <4 x half> %data, ptr addrspace(1) %gep0 ret void } define amdgpu_ps void @global_store_saddr_v4f16_zext_vgpr_offset_neg128(ptr addrspace(1) inreg %sbase, i32 %voffset, <4 x half> %data) { ; GCN-LABEL: global_store_saddr_v4f16_zext_vgpr_offset_neg128: ; GCN: ; %bb.0: ; GCN-NEXT: global_store_dwordx2 v0, v[1:2], s[2:3] offset:-128 ; GCN-NEXT: s_endpgm ; ; GFX11-LABEL: global_store_saddr_v4f16_zext_vgpr_offset_neg128: ; GFX11: ; %bb.0: ; GFX11-NEXT: global_store_b64 v0, v[1:2], s[2:3] offset:-128 ; GFX11-NEXT: s_nop 0 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm ; ; GFX12-LABEL: global_store_saddr_v4f16_zext_vgpr_offset_neg128: ; GFX12: ; %bb.0: ; GFX12-NEXT: global_store_b64 v0, v[1:2], s[2:3] offset:-128 ; GFX12-NEXT: s_nop 0 ; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX12-NEXT: s_endpgm %zext.offset = zext i32 %voffset to i64 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -128 store <4 x half> %data, ptr addrspace(1) %gep1 ret void } define amdgpu_ps void @global_store_saddr_p1_zext_vgpr(ptr addrspace(1) inreg %sbase, i32 %voffset, ptr addrspace(1) %data) { ; GCN-LABEL: global_store_saddr_p1_zext_vgpr: ; GCN: ; %bb.0: ; GCN-NEXT: global_store_dwordx2 v0, v[1:2], s[2:3] ; GCN-NEXT: s_endpgm ; ; GFX11-LABEL: global_store_saddr_p1_zext_vgpr: ; GFX11: ; %bb.0: ; GFX11-NEXT: global_store_b64 v0, v[1:2], s[2:3] ; GFX11-NEXT: s_nop 0 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm ; ; GFX12-LABEL: global_store_saddr_p1_zext_vgpr: ; GFX12: ; %bb.0: ; GFX12-NEXT: global_store_b64 v0, v[1:2], s[2:3] ; GFX12-NEXT: s_nop 0 ; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX12-NEXT: s_endpgm %zext.offset = zext i32 %voffset to i64 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset store ptr addrspace(1) %data, ptr addrspace(1) %gep0 ret void } define amdgpu_ps void @global_store_saddr_p1_zext_vgpr_offset_neg128(ptr addrspace(1) inreg %sbase, i32 %voffset, ptr addrspace(1) %data) { ; GCN-LABEL: global_store_saddr_p1_zext_vgpr_offset_neg128: ; GCN: ; %bb.0: ; GCN-NEXT: global_store_dwordx2 v0, v[1:2], s[2:3] offset:-128 ; GCN-NEXT: s_endpgm ; ; GFX11-LABEL: global_store_saddr_p1_zext_vgpr_offset_neg128: ; GFX11: ; %bb.0: ; GFX11-NEXT: global_store_b64 v0, v[1:2], s[2:3] offset:-128 ; GFX11-NEXT: s_nop 0 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm ; ; GFX12-LABEL: global_store_saddr_p1_zext_vgpr_offset_neg128: ; GFX12: ; %bb.0: ; GFX12-NEXT: global_store_b64 v0, v[1:2], s[2:3] offset:-128 ; GFX12-NEXT: s_nop 0 ; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX12-NEXT: s_endpgm %zext.offset = zext i32 %voffset to i64 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -128 store ptr addrspace(1) %data, ptr addrspace(1) %gep1 ret void } define amdgpu_ps void @global_store_saddr_v3i32_zext_vgpr(ptr addrspace(1) inreg %sbase, i32 %voffset, <3 x i32> %data) { ; GCN-LABEL: global_store_saddr_v3i32_zext_vgpr: ; GCN: ; %bb.0: ; GCN-NEXT: global_store_dwordx3 v0, v[1:3], s[2:3] ; GCN-NEXT: s_endpgm ; ; GFX11-LABEL: global_store_saddr_v3i32_zext_vgpr: ; GFX11: ; %bb.0: ; GFX11-NEXT: global_store_b96 v0, v[1:3], s[2:3] ; GFX11-NEXT: s_nop 0 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm ; ; GFX12-LABEL: global_store_saddr_v3i32_zext_vgpr: ; GFX12: ; %bb.0: ; GFX12-NEXT: global_store_b96 v0, v[1:3], s[2:3] ; GFX12-NEXT: s_nop 0 ; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX12-NEXT: s_endpgm %zext.offset = zext i32 %voffset to i64 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset store <3 x i32> %data, ptr addrspace(1) %gep0 ret void } define amdgpu_ps void @global_store_saddr_v3i32_zext_vgpr_offset_neg128(ptr addrspace(1) inreg %sbase, i32 %voffset, <3 x i32> %data) { ; GCN-LABEL: global_store_saddr_v3i32_zext_vgpr_offset_neg128: ; GCN: ; %bb.0: ; GCN-NEXT: global_store_dwordx3 v0, v[1:3], s[2:3] offset:-128 ; GCN-NEXT: s_endpgm ; ; GFX11-LABEL: global_store_saddr_v3i32_zext_vgpr_offset_neg128: ; GFX11: ; %bb.0: ; GFX11-NEXT: global_store_b96 v0, v[1:3], s[2:3] offset:-128 ; GFX11-NEXT: s_nop 0 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm ; ; GFX12-LABEL: global_store_saddr_v3i32_zext_vgpr_offset_neg128: ; GFX12: ; %bb.0: ; GFX12-NEXT: global_store_b96 v0, v[1:3], s[2:3] offset:-128 ; GFX12-NEXT: s_nop 0 ; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX12-NEXT: s_endpgm %zext.offset = zext i32 %voffset to i64 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -128 store <3 x i32> %data, ptr addrspace(1) %gep1 ret void } define amdgpu_ps void @global_store_saddr_v3f32_zext_vgpr(ptr addrspace(1) inreg %sbase, i32 %voffset, <3 x float> %data) { ; GCN-LABEL: global_store_saddr_v3f32_zext_vgpr: ; GCN: ; %bb.0: ; GCN-NEXT: global_store_dwordx3 v0, v[1:3], s[2:3] ; GCN-NEXT: s_endpgm ; ; GFX11-LABEL: global_store_saddr_v3f32_zext_vgpr: ; GFX11: ; %bb.0: ; GFX11-NEXT: global_store_b96 v0, v[1:3], s[2:3] ; GFX11-NEXT: s_nop 0 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm ; ; GFX12-LABEL: global_store_saddr_v3f32_zext_vgpr: ; GFX12: ; %bb.0: ; GFX12-NEXT: global_store_b96 v0, v[1:3], s[2:3] ; GFX12-NEXT: s_nop 0 ; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX12-NEXT: s_endpgm %zext.offset = zext i32 %voffset to i64 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset store <3 x float> %data, ptr addrspace(1) %gep0 ret void } define amdgpu_ps void @global_store_saddr_v3f32_zext_vgpr_offset_neg128(ptr addrspace(1) inreg %sbase, i32 %voffset, <3 x float> %data) { ; GCN-LABEL: global_store_saddr_v3f32_zext_vgpr_offset_neg128: ; GCN: ; %bb.0: ; GCN-NEXT: global_store_dwordx3 v0, v[1:3], s[2:3] offset:-128 ; GCN-NEXT: s_endpgm ; ; GFX11-LABEL: global_store_saddr_v3f32_zext_vgpr_offset_neg128: ; GFX11: ; %bb.0: ; GFX11-NEXT: global_store_b96 v0, v[1:3], s[2:3] offset:-128 ; GFX11-NEXT: s_nop 0 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm ; ; GFX12-LABEL: global_store_saddr_v3f32_zext_vgpr_offset_neg128: ; GFX12: ; %bb.0: ; GFX12-NEXT: global_store_b96 v0, v[1:3], s[2:3] offset:-128 ; GFX12-NEXT: s_nop 0 ; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX12-NEXT: s_endpgm %zext.offset = zext i32 %voffset to i64 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -128 store <3 x float> %data, ptr addrspace(1) %gep1 ret void } define amdgpu_ps void @global_store_saddr_v6i16_zext_vgpr(ptr addrspace(1) inreg %sbase, i32 %voffset, <6 x i16> %data) { ; GCN-LABEL: global_store_saddr_v6i16_zext_vgpr: ; GCN: ; %bb.0: ; GCN-NEXT: global_store_dwordx3 v0, v[1:3], s[2:3] ; GCN-NEXT: s_endpgm ; ; GFX11-LABEL: global_store_saddr_v6i16_zext_vgpr: ; GFX11: ; %bb.0: ; GFX11-NEXT: global_store_b96 v0, v[1:3], s[2:3] ; GFX11-NEXT: s_nop 0 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm ; ; GFX12-LABEL: global_store_saddr_v6i16_zext_vgpr: ; GFX12: ; %bb.0: ; GFX12-NEXT: global_store_b96 v0, v[1:3], s[2:3] ; GFX12-NEXT: s_nop 0 ; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX12-NEXT: s_endpgm %zext.offset = zext i32 %voffset to i64 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset store <6 x i16> %data, ptr addrspace(1) %gep0 ret void } define amdgpu_ps void @global_store_saddr_v6i16_zext_vgpr_offset_neg128(ptr addrspace(1) inreg %sbase, i32 %voffset, <6 x i16> %data) { ; GCN-LABEL: global_store_saddr_v6i16_zext_vgpr_offset_neg128: ; GCN: ; %bb.0: ; GCN-NEXT: global_store_dwordx3 v0, v[1:3], s[2:3] offset:-128 ; GCN-NEXT: s_endpgm ; ; GFX11-LABEL: global_store_saddr_v6i16_zext_vgpr_offset_neg128: ; GFX11: ; %bb.0: ; GFX11-NEXT: global_store_b96 v0, v[1:3], s[2:3] offset:-128 ; GFX11-NEXT: s_nop 0 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm ; ; GFX12-LABEL: global_store_saddr_v6i16_zext_vgpr_offset_neg128: ; GFX12: ; %bb.0: ; GFX12-NEXT: global_store_b96 v0, v[1:3], s[2:3] offset:-128 ; GFX12-NEXT: s_nop 0 ; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX12-NEXT: s_endpgm %zext.offset = zext i32 %voffset to i64 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -128 store <6 x i16> %data, ptr addrspace(1) %gep1 ret void } define amdgpu_ps void @global_store_saddr_v6f16_zext_vgpr(ptr addrspace(1) inreg %sbase, i32 %voffset, <6 x half> %data) { ; GCN-LABEL: global_store_saddr_v6f16_zext_vgpr: ; GCN: ; %bb.0: ; GCN-NEXT: global_store_dwordx3 v0, v[1:3], s[2:3] ; GCN-NEXT: s_endpgm ; ; GFX11-LABEL: global_store_saddr_v6f16_zext_vgpr: ; GFX11: ; %bb.0: ; GFX11-NEXT: global_store_b96 v0, v[1:3], s[2:3] ; GFX11-NEXT: s_nop 0 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm ; ; GFX12-LABEL: global_store_saddr_v6f16_zext_vgpr: ; GFX12: ; %bb.0: ; GFX12-NEXT: global_store_b96 v0, v[1:3], s[2:3] ; GFX12-NEXT: s_nop 0 ; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX12-NEXT: s_endpgm %zext.offset = zext i32 %voffset to i64 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset store <6 x half> %data, ptr addrspace(1) %gep0 ret void } define amdgpu_ps void @global_store_saddr_v6f16_zext_vgpr_offset_neg128(ptr addrspace(1) inreg %sbase, i32 %voffset, <6 x half> %data) { ; GCN-LABEL: global_store_saddr_v6f16_zext_vgpr_offset_neg128: ; GCN: ; %bb.0: ; GCN-NEXT: global_store_dwordx3 v0, v[1:3], s[2:3] offset:-128 ; GCN-NEXT: s_endpgm ; ; GFX11-LABEL: global_store_saddr_v6f16_zext_vgpr_offset_neg128: ; GFX11: ; %bb.0: ; GFX11-NEXT: global_store_b96 v0, v[1:3], s[2:3] offset:-128 ; GFX11-NEXT: s_nop 0 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm ; ; GFX12-LABEL: global_store_saddr_v6f16_zext_vgpr_offset_neg128: ; GFX12: ; %bb.0: ; GFX12-NEXT: global_store_b96 v0, v[1:3], s[2:3] offset:-128 ; GFX12-NEXT: s_nop 0 ; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX12-NEXT: s_endpgm %zext.offset = zext i32 %voffset to i64 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -128 store <6 x half> %data, ptr addrspace(1) %gep1 ret void } define amdgpu_ps void @global_store_saddr_v4i32_zext_vgpr(ptr addrspace(1) inreg %sbase, i32 %voffset, <4 x i32> %data) { ; GCN-LABEL: global_store_saddr_v4i32_zext_vgpr: ; GCN: ; %bb.0: ; GCN-NEXT: global_store_dwordx4 v0, v[1:4], s[2:3] ; GCN-NEXT: s_endpgm ; ; GFX11-LABEL: global_store_saddr_v4i32_zext_vgpr: ; GFX11: ; %bb.0: ; GFX11-NEXT: global_store_b128 v0, v[1:4], s[2:3] ; GFX11-NEXT: s_nop 0 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm ; ; GFX12-LABEL: global_store_saddr_v4i32_zext_vgpr: ; GFX12: ; %bb.0: ; GFX12-NEXT: global_store_b128 v0, v[1:4], s[2:3] ; GFX12-NEXT: s_nop 0 ; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX12-NEXT: s_endpgm %zext.offset = zext i32 %voffset to i64 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset store <4 x i32> %data, ptr addrspace(1) %gep0 ret void } define amdgpu_ps void @global_store_saddr_v4i32_zext_vgpr_offset_neg128(ptr addrspace(1) inreg %sbase, i32 %voffset, <4 x i32> %data) { ; GCN-LABEL: global_store_saddr_v4i32_zext_vgpr_offset_neg128: ; GCN: ; %bb.0: ; GCN-NEXT: global_store_dwordx4 v0, v[1:4], s[2:3] offset:-128 ; GCN-NEXT: s_endpgm ; ; GFX11-LABEL: global_store_saddr_v4i32_zext_vgpr_offset_neg128: ; GFX11: ; %bb.0: ; GFX11-NEXT: global_store_b128 v0, v[1:4], s[2:3] offset:-128 ; GFX11-NEXT: s_nop 0 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm ; ; GFX12-LABEL: global_store_saddr_v4i32_zext_vgpr_offset_neg128: ; GFX12: ; %bb.0: ; GFX12-NEXT: global_store_b128 v0, v[1:4], s[2:3] offset:-128 ; GFX12-NEXT: s_nop 0 ; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX12-NEXT: s_endpgm %zext.offset = zext i32 %voffset to i64 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -128 store <4 x i32> %data, ptr addrspace(1) %gep1 ret void } define amdgpu_ps void @global_store_saddr_v4f32_zext_vgpr(ptr addrspace(1) inreg %sbase, i32 %voffset, <4 x float> %data) { ; GCN-LABEL: global_store_saddr_v4f32_zext_vgpr: ; GCN: ; %bb.0: ; GCN-NEXT: global_store_dwordx4 v0, v[1:4], s[2:3] ; GCN-NEXT: s_endpgm ; ; GFX11-LABEL: global_store_saddr_v4f32_zext_vgpr: ; GFX11: ; %bb.0: ; GFX11-NEXT: global_store_b128 v0, v[1:4], s[2:3] ; GFX11-NEXT: s_nop 0 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm ; ; GFX12-LABEL: global_store_saddr_v4f32_zext_vgpr: ; GFX12: ; %bb.0: ; GFX12-NEXT: global_store_b128 v0, v[1:4], s[2:3] ; GFX12-NEXT: s_nop 0 ; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX12-NEXT: s_endpgm %zext.offset = zext i32 %voffset to i64 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset store <4 x float> %data, ptr addrspace(1) %gep0 ret void } define amdgpu_ps void @global_store_saddr_v4f32_zext_vgpr_offset_neg128(ptr addrspace(1) inreg %sbase, i32 %voffset, <4 x float> %data) { ; GCN-LABEL: global_store_saddr_v4f32_zext_vgpr_offset_neg128: ; GCN: ; %bb.0: ; GCN-NEXT: global_store_dwordx4 v0, v[1:4], s[2:3] offset:-128 ; GCN-NEXT: s_endpgm ; ; GFX11-LABEL: global_store_saddr_v4f32_zext_vgpr_offset_neg128: ; GFX11: ; %bb.0: ; GFX11-NEXT: global_store_b128 v0, v[1:4], s[2:3] offset:-128 ; GFX11-NEXT: s_nop 0 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm ; ; GFX12-LABEL: global_store_saddr_v4f32_zext_vgpr_offset_neg128: ; GFX12: ; %bb.0: ; GFX12-NEXT: global_store_b128 v0, v[1:4], s[2:3] offset:-128 ; GFX12-NEXT: s_nop 0 ; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX12-NEXT: s_endpgm %zext.offset = zext i32 %voffset to i64 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -128 store <4 x float> %data, ptr addrspace(1) %gep1 ret void } define amdgpu_ps void @global_store_saddr_v2i64_zext_vgpr(ptr addrspace(1) inreg %sbase, i32 %voffset, <2 x i64> %data) { ; GCN-LABEL: global_store_saddr_v2i64_zext_vgpr: ; GCN: ; %bb.0: ; GCN-NEXT: global_store_dwordx4 v0, v[1:4], s[2:3] ; GCN-NEXT: s_endpgm ; ; GFX11-LABEL: global_store_saddr_v2i64_zext_vgpr: ; GFX11: ; %bb.0: ; GFX11-NEXT: global_store_b128 v0, v[1:4], s[2:3] ; GFX11-NEXT: s_nop 0 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm ; ; GFX12-LABEL: global_store_saddr_v2i64_zext_vgpr: ; GFX12: ; %bb.0: ; GFX12-NEXT: global_store_b128 v0, v[1:4], s[2:3] ; GFX12-NEXT: s_nop 0 ; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX12-NEXT: s_endpgm %zext.offset = zext i32 %voffset to i64 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset store <2 x i64> %data, ptr addrspace(1) %gep0 ret void } define amdgpu_ps void @global_store_saddr_v2i64_zext_vgpr_offset_neg128(ptr addrspace(1) inreg %sbase, i32 %voffset, <2 x i64> %data) { ; GCN-LABEL: global_store_saddr_v2i64_zext_vgpr_offset_neg128: ; GCN: ; %bb.0: ; GCN-NEXT: global_store_dwordx4 v0, v[1:4], s[2:3] offset:-128 ; GCN-NEXT: s_endpgm ; ; GFX11-LABEL: global_store_saddr_v2i64_zext_vgpr_offset_neg128: ; GFX11: ; %bb.0: ; GFX11-NEXT: global_store_b128 v0, v[1:4], s[2:3] offset:-128 ; GFX11-NEXT: s_nop 0 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm ; ; GFX12-LABEL: global_store_saddr_v2i64_zext_vgpr_offset_neg128: ; GFX12: ; %bb.0: ; GFX12-NEXT: global_store_b128 v0, v[1:4], s[2:3] offset:-128 ; GFX12-NEXT: s_nop 0 ; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX12-NEXT: s_endpgm %zext.offset = zext i32 %voffset to i64 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -128 store <2 x i64> %data, ptr addrspace(1) %gep1 ret void } define amdgpu_ps void @global_store_saddr_v2f64_zext_vgpr(ptr addrspace(1) inreg %sbase, i32 %voffset, <2 x double> %data) { ; GCN-LABEL: global_store_saddr_v2f64_zext_vgpr: ; GCN: ; %bb.0: ; GCN-NEXT: global_store_dwordx4 v0, v[1:4], s[2:3] ; GCN-NEXT: s_endpgm ; ; GFX11-LABEL: global_store_saddr_v2f64_zext_vgpr: ; GFX11: ; %bb.0: ; GFX11-NEXT: global_store_b128 v0, v[1:4], s[2:3] ; GFX11-NEXT: s_nop 0 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm ; ; GFX12-LABEL: global_store_saddr_v2f64_zext_vgpr: ; GFX12: ; %bb.0: ; GFX12-NEXT: global_store_b128 v0, v[1:4], s[2:3] ; GFX12-NEXT: s_nop 0 ; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX12-NEXT: s_endpgm %zext.offset = zext i32 %voffset to i64 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset store <2 x double> %data, ptr addrspace(1) %gep0 ret void } define amdgpu_ps void @global_store_saddr_v2f64_zext_vgpr_offset_neg128(ptr addrspace(1) inreg %sbase, i32 %voffset, <2 x double> %data) { ; GCN-LABEL: global_store_saddr_v2f64_zext_vgpr_offset_neg128: ; GCN: ; %bb.0: ; GCN-NEXT: global_store_dwordx4 v0, v[1:4], s[2:3] offset:-128 ; GCN-NEXT: s_endpgm ; ; GFX11-LABEL: global_store_saddr_v2f64_zext_vgpr_offset_neg128: ; GFX11: ; %bb.0: ; GFX11-NEXT: global_store_b128 v0, v[1:4], s[2:3] offset:-128 ; GFX11-NEXT: s_nop 0 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm ; ; GFX12-LABEL: global_store_saddr_v2f64_zext_vgpr_offset_neg128: ; GFX12: ; %bb.0: ; GFX12-NEXT: global_store_b128 v0, v[1:4], s[2:3] offset:-128 ; GFX12-NEXT: s_nop 0 ; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX12-NEXT: s_endpgm %zext.offset = zext i32 %voffset to i64 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -128 store <2 x double> %data, ptr addrspace(1) %gep1 ret void } define amdgpu_ps void @global_store_saddr_v8i16_zext_vgpr(ptr addrspace(1) inreg %sbase, i32 %voffset, <8 x i16> %data) { ; GCN-LABEL: global_store_saddr_v8i16_zext_vgpr: ; GCN: ; %bb.0: ; GCN-NEXT: global_store_dwordx4 v0, v[1:4], s[2:3] ; GCN-NEXT: s_endpgm ; ; GFX11-LABEL: global_store_saddr_v8i16_zext_vgpr: ; GFX11: ; %bb.0: ; GFX11-NEXT: global_store_b128 v0, v[1:4], s[2:3] ; GFX11-NEXT: s_nop 0 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm ; ; GFX12-LABEL: global_store_saddr_v8i16_zext_vgpr: ; GFX12: ; %bb.0: ; GFX12-NEXT: global_store_b128 v0, v[1:4], s[2:3] ; GFX12-NEXT: s_nop 0 ; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX12-NEXT: s_endpgm %zext.offset = zext i32 %voffset to i64 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset store <8 x i16> %data, ptr addrspace(1) %gep0 ret void } define amdgpu_ps void @global_store_saddr_v8i16_zext_vgpr_offset_neg128(ptr addrspace(1) inreg %sbase, i32 %voffset, <8 x i16> %data) { ; GCN-LABEL: global_store_saddr_v8i16_zext_vgpr_offset_neg128: ; GCN: ; %bb.0: ; GCN-NEXT: global_store_dwordx4 v0, v[1:4], s[2:3] offset:-128 ; GCN-NEXT: s_endpgm ; ; GFX11-LABEL: global_store_saddr_v8i16_zext_vgpr_offset_neg128: ; GFX11: ; %bb.0: ; GFX11-NEXT: global_store_b128 v0, v[1:4], s[2:3] offset:-128 ; GFX11-NEXT: s_nop 0 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm ; ; GFX12-LABEL: global_store_saddr_v8i16_zext_vgpr_offset_neg128: ; GFX12: ; %bb.0: ; GFX12-NEXT: global_store_b128 v0, v[1:4], s[2:3] offset:-128 ; GFX12-NEXT: s_nop 0 ; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX12-NEXT: s_endpgm %zext.offset = zext i32 %voffset to i64 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -128 store <8 x i16> %data, ptr addrspace(1) %gep1 ret void } define amdgpu_ps void @global_store_saddr_v8f16_zext_vgpr(ptr addrspace(1) inreg %sbase, i32 %voffset, <8 x half> %data) { ; GCN-LABEL: global_store_saddr_v8f16_zext_vgpr: ; GCN: ; %bb.0: ; GCN-NEXT: global_store_dwordx4 v0, v[1:4], s[2:3] ; GCN-NEXT: s_endpgm ; ; GFX11-LABEL: global_store_saddr_v8f16_zext_vgpr: ; GFX11: ; %bb.0: ; GFX11-NEXT: global_store_b128 v0, v[1:4], s[2:3] ; GFX11-NEXT: s_nop 0 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm ; ; GFX12-LABEL: global_store_saddr_v8f16_zext_vgpr: ; GFX12: ; %bb.0: ; GFX12-NEXT: global_store_b128 v0, v[1:4], s[2:3] ; GFX12-NEXT: s_nop 0 ; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX12-NEXT: s_endpgm %zext.offset = zext i32 %voffset to i64 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset store <8 x half> %data, ptr addrspace(1) %gep0 ret void } define amdgpu_ps void @global_store_saddr_v8f16_zext_vgpr_offset_neg128(ptr addrspace(1) inreg %sbase, i32 %voffset, <8 x half> %data) { ; GCN-LABEL: global_store_saddr_v8f16_zext_vgpr_offset_neg128: ; GCN: ; %bb.0: ; GCN-NEXT: global_store_dwordx4 v0, v[1:4], s[2:3] offset:-128 ; GCN-NEXT: s_endpgm ; ; GFX11-LABEL: global_store_saddr_v8f16_zext_vgpr_offset_neg128: ; GFX11: ; %bb.0: ; GFX11-NEXT: global_store_b128 v0, v[1:4], s[2:3] offset:-128 ; GFX11-NEXT: s_nop 0 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm ; ; GFX12-LABEL: global_store_saddr_v8f16_zext_vgpr_offset_neg128: ; GFX12: ; %bb.0: ; GFX12-NEXT: global_store_b128 v0, v[1:4], s[2:3] offset:-128 ; GFX12-NEXT: s_nop 0 ; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX12-NEXT: s_endpgm %zext.offset = zext i32 %voffset to i64 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -128 store <8 x half> %data, ptr addrspace(1) %gep1 ret void } define amdgpu_ps void @global_store_saddr_v2p1_zext_vgpr(ptr addrspace(1) inreg %sbase, i32 %voffset, <2 x ptr addrspace(1)> %data) { ; GCN-LABEL: global_store_saddr_v2p1_zext_vgpr: ; GCN: ; %bb.0: ; GCN-NEXT: global_store_dwordx4 v0, v[1:4], s[2:3] ; GCN-NEXT: s_endpgm ; ; GFX11-LABEL: global_store_saddr_v2p1_zext_vgpr: ; GFX11: ; %bb.0: ; GFX11-NEXT: global_store_b128 v0, v[1:4], s[2:3] ; GFX11-NEXT: s_nop 0 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm ; ; GFX12-LABEL: global_store_saddr_v2p1_zext_vgpr: ; GFX12: ; %bb.0: ; GFX12-NEXT: global_store_b128 v0, v[1:4], s[2:3] ; GFX12-NEXT: s_nop 0 ; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX12-NEXT: s_endpgm %zext.offset = zext i32 %voffset to i64 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset store <2 x ptr addrspace(1)> %data, ptr addrspace(1) %gep0 ret void } define amdgpu_ps void @global_store_saddr_v2p1_zext_vgpr_offset_neg128(ptr addrspace(1) inreg %sbase, i32 %voffset, <2 x ptr addrspace(1)> %data) { ; GCN-LABEL: global_store_saddr_v2p1_zext_vgpr_offset_neg128: ; GCN: ; %bb.0: ; GCN-NEXT: global_store_dwordx4 v0, v[1:4], s[2:3] offset:-128 ; GCN-NEXT: s_endpgm ; ; GFX11-LABEL: global_store_saddr_v2p1_zext_vgpr_offset_neg128: ; GFX11: ; %bb.0: ; GFX11-NEXT: global_store_b128 v0, v[1:4], s[2:3] offset:-128 ; GFX11-NEXT: s_nop 0 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm ; ; GFX12-LABEL: global_store_saddr_v2p1_zext_vgpr_offset_neg128: ; GFX12: ; %bb.0: ; GFX12-NEXT: global_store_b128 v0, v[1:4], s[2:3] offset:-128 ; GFX12-NEXT: s_nop 0 ; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX12-NEXT: s_endpgm %zext.offset = zext i32 %voffset to i64 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -128 store <2 x ptr addrspace(1)> %data, ptr addrspace(1) %gep1 ret void } define amdgpu_ps void @global_store_saddr_v4p3_zext_vgpr(ptr addrspace(1) inreg %sbase, i32 %voffset, <4 x ptr addrspace(3)> %data) { ; GCN-LABEL: global_store_saddr_v4p3_zext_vgpr: ; GCN: ; %bb.0: ; GCN-NEXT: global_store_dwordx4 v0, v[1:4], s[2:3] ; GCN-NEXT: s_endpgm ; ; GFX11-LABEL: global_store_saddr_v4p3_zext_vgpr: ; GFX11: ; %bb.0: ; GFX11-NEXT: global_store_b128 v0, v[1:4], s[2:3] ; GFX11-NEXT: s_nop 0 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm ; ; GFX12-LABEL: global_store_saddr_v4p3_zext_vgpr: ; GFX12: ; %bb.0: ; GFX12-NEXT: global_store_b128 v0, v[1:4], s[2:3] ; GFX12-NEXT: s_nop 0 ; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX12-NEXT: s_endpgm %zext.offset = zext i32 %voffset to i64 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset store <4 x ptr addrspace(3)> %data, ptr addrspace(1) %gep0 ret void } define amdgpu_ps void @global_store_saddr_v4p3_zext_vgpr_offset_neg128(ptr addrspace(1) inreg %sbase, i32 %voffset, <4 x ptr addrspace(3)> %data) { ; GCN-LABEL: global_store_saddr_v4p3_zext_vgpr_offset_neg128: ; GCN: ; %bb.0: ; GCN-NEXT: global_store_dwordx4 v0, v[1:4], s[2:3] offset:-128 ; GCN-NEXT: s_endpgm ; ; GFX11-LABEL: global_store_saddr_v4p3_zext_vgpr_offset_neg128: ; GFX11: ; %bb.0: ; GFX11-NEXT: global_store_b128 v0, v[1:4], s[2:3] offset:-128 ; GFX11-NEXT: s_nop 0 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm ; ; GFX12-LABEL: global_store_saddr_v4p3_zext_vgpr_offset_neg128: ; GFX12: ; %bb.0: ; GFX12-NEXT: global_store_b128 v0, v[1:4], s[2:3] offset:-128 ; GFX12-NEXT: s_nop 0 ; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX12-NEXT: s_endpgm %zext.offset = zext i32 %voffset to i64 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -128 store <4 x ptr addrspace(3)> %data, ptr addrspace(1) %gep1 ret void } ; -------------------------------------------------------------------------------- ; Atomic store ; -------------------------------------------------------------------------------- define amdgpu_ps void @atomic_global_store_saddr_i32_zext_vgpr(ptr addrspace(1) inreg %sbase, i32 %voffset, i32 %data) { ; GCN-LABEL: atomic_global_store_saddr_i32_zext_vgpr: ; GCN: ; %bb.0: ; GCN-NEXT: global_store_dword v0, v1, s[2:3] ; GCN-NEXT: s_endpgm ; ; GFX11-LABEL: atomic_global_store_saddr_i32_zext_vgpr: ; GFX11: ; %bb.0: ; GFX11-NEXT: global_store_b32 v0, v1, s[2:3] ; GFX11-NEXT: s_nop 0 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm ; ; GFX12-LABEL: atomic_global_store_saddr_i32_zext_vgpr: ; GFX12: ; %bb.0: ; GFX12-NEXT: global_store_b32 v0, v1, s[2:3] ; GFX12-NEXT: s_nop 0 ; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX12-NEXT: s_endpgm %zext.offset = zext i32 %voffset to i64 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset store atomic i32 %data, ptr addrspace(1) %gep0 seq_cst, align 4 ret void } define amdgpu_ps void @atomic_global_store_saddr_i32_zext_vgpr_offset_neg128(ptr addrspace(1) inreg %sbase, i32 %voffset, i32 %data) { ; GCN-LABEL: atomic_global_store_saddr_i32_zext_vgpr_offset_neg128: ; GCN: ; %bb.0: ; GCN-NEXT: global_store_dword v0, v1, s[2:3] offset:-128 ; GCN-NEXT: s_endpgm ; ; GFX11-LABEL: atomic_global_store_saddr_i32_zext_vgpr_offset_neg128: ; GFX11: ; %bb.0: ; GFX11-NEXT: global_store_b32 v0, v1, s[2:3] offset:-128 ; GFX11-NEXT: s_nop 0 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm ; ; GFX12-LABEL: atomic_global_store_saddr_i32_zext_vgpr_offset_neg128: ; GFX12: ; %bb.0: ; GFX12-NEXT: global_store_b32 v0, v1, s[2:3] offset:-128 ; GFX12-NEXT: s_nop 0 ; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX12-NEXT: s_endpgm %zext.offset = zext i32 %voffset to i64 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -128 store atomic i32 %data, ptr addrspace(1) %gep1 seq_cst, align 4 ret void } define amdgpu_ps void @atomic_global_store_saddr_i64_zext_vgpr(ptr addrspace(1) inreg %sbase, i32 %voffset, i64 %data) { ; GCN-LABEL: atomic_global_store_saddr_i64_zext_vgpr: ; GCN: ; %bb.0: ; GCN-NEXT: global_store_dwordx2 v0, v[1:2], s[2:3] ; GCN-NEXT: s_endpgm ; ; GFX11-LABEL: atomic_global_store_saddr_i64_zext_vgpr: ; GFX11: ; %bb.0: ; GFX11-NEXT: global_store_b64 v0, v[1:2], s[2:3] ; GFX11-NEXT: s_nop 0 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm ; ; GFX12-LABEL: atomic_global_store_saddr_i64_zext_vgpr: ; GFX12: ; %bb.0: ; GFX12-NEXT: global_store_b64 v0, v[1:2], s[2:3] ; GFX12-NEXT: s_nop 0 ; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX12-NEXT: s_endpgm %zext.offset = zext i32 %voffset to i64 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset store atomic i64 %data, ptr addrspace(1) %gep0 seq_cst, align 8 ret void } define amdgpu_ps void @atomic_global_store_saddr_i64_zext_vgpr_offset_neg128(ptr addrspace(1) inreg %sbase, i32 %voffset, i64 %data) { ; GCN-LABEL: atomic_global_store_saddr_i64_zext_vgpr_offset_neg128: ; GCN: ; %bb.0: ; GCN-NEXT: global_store_dwordx2 v0, v[1:2], s[2:3] offset:-128 ; GCN-NEXT: s_endpgm ; ; GFX11-LABEL: atomic_global_store_saddr_i64_zext_vgpr_offset_neg128: ; GFX11: ; %bb.0: ; GFX11-NEXT: global_store_b64 v0, v[1:2], s[2:3] offset:-128 ; GFX11-NEXT: s_nop 0 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm ; ; GFX12-LABEL: atomic_global_store_saddr_i64_zext_vgpr_offset_neg128: ; GFX12: ; %bb.0: ; GFX12-NEXT: global_store_b64 v0, v[1:2], s[2:3] offset:-128 ; GFX12-NEXT: s_nop 0 ; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX12-NEXT: s_endpgm %zext.offset = zext i32 %voffset to i64 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -128 store atomic i64 %data, ptr addrspace(1) %gep1 seq_cst, align 8 ret void } ; -------------------------------------------------------------------------------- ; D16 HI store (hi 16) ; -------------------------------------------------------------------------------- define amdgpu_ps void @global_store_saddr_i16_d16hi_zext_vgpr(ptr addrspace(1) inreg %sbase, i32 %voffset, <2 x i16> %data) { ; GCN-LABEL: global_store_saddr_i16_d16hi_zext_vgpr: ; GCN: ; %bb.0: ; GCN-NEXT: global_store_short_d16_hi v0, v1, s[2:3] ; GCN-NEXT: s_endpgm ; ; GFX11-LABEL: global_store_saddr_i16_d16hi_zext_vgpr: ; GFX11: ; %bb.0: ; GFX11-NEXT: global_store_d16_hi_b16 v0, v1, s[2:3] ; GFX11-NEXT: s_nop 0 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm ; ; GFX12-LABEL: global_store_saddr_i16_d16hi_zext_vgpr: ; GFX12: ; %bb.0: ; GFX12-NEXT: global_store_d16_hi_b16 v0, v1, s[2:3] ; GFX12-NEXT: s_nop 0 ; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX12-NEXT: s_endpgm %zext.offset = zext i32 %voffset to i64 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset %data.hi = extractelement <2 x i16> %data, i32 1 store i16 %data.hi, ptr addrspace(1) %gep0 ret void } define amdgpu_ps void @global_store_saddr_i16_d16hi_zext_vgpr_offset_neg128(ptr addrspace(1) inreg %sbase, i32 %voffset, <2 x i16> %data) { ; GCN-LABEL: global_store_saddr_i16_d16hi_zext_vgpr_offset_neg128: ; GCN: ; %bb.0: ; GCN-NEXT: global_store_short_d16_hi v0, v1, s[2:3] offset:-128 ; GCN-NEXT: s_endpgm ; ; GFX11-LABEL: global_store_saddr_i16_d16hi_zext_vgpr_offset_neg128: ; GFX11: ; %bb.0: ; GFX11-NEXT: global_store_d16_hi_b16 v0, v1, s[2:3] offset:-128 ; GFX11-NEXT: s_nop 0 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm ; ; GFX12-LABEL: global_store_saddr_i16_d16hi_zext_vgpr_offset_neg128: ; GFX12: ; %bb.0: ; GFX12-NEXT: global_store_d16_hi_b16 v0, v1, s[2:3] offset:-128 ; GFX12-NEXT: s_nop 0 ; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX12-NEXT: s_endpgm %zext.offset = zext i32 %voffset to i64 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -128 %data.hi = extractelement <2 x i16> %data, i32 1 store i16 %data.hi, ptr addrspace(1) %gep1 ret void } define amdgpu_ps void @global_store_saddr_i16_d16hi_trunci8_zext_vgpr(ptr addrspace(1) inreg %sbase, i32 %voffset, <2 x i16> %data) { ; GCN-LABEL: global_store_saddr_i16_d16hi_trunci8_zext_vgpr: ; GCN: ; %bb.0: ; GCN-NEXT: global_store_byte_d16_hi v0, v1, s[2:3] ; GCN-NEXT: s_endpgm ; ; GFX11-LABEL: global_store_saddr_i16_d16hi_trunci8_zext_vgpr: ; GFX11: ; %bb.0: ; GFX11-NEXT: global_store_d16_hi_b8 v0, v1, s[2:3] ; GFX11-NEXT: s_nop 0 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm ; ; GFX12-LABEL: global_store_saddr_i16_d16hi_trunci8_zext_vgpr: ; GFX12: ; %bb.0: ; GFX12-NEXT: global_store_d16_hi_b8 v0, v1, s[2:3] ; GFX12-NEXT: s_nop 0 ; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX12-NEXT: s_endpgm %zext.offset = zext i32 %voffset to i64 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset %data.hi = extractelement <2 x i16> %data, i32 1 %data.hi.trunc = trunc i16 %data.hi to i8 store i8 %data.hi.trunc, ptr addrspace(1) %gep0 ret void } define amdgpu_ps void @global_store_saddr_i16_d16hi_trunci8_zext_vgpr_offset_neg128(ptr addrspace(1) inreg %sbase, i32 %voffset, <2 x i16> %data) { ; GCN-LABEL: global_store_saddr_i16_d16hi_trunci8_zext_vgpr_offset_neg128: ; GCN: ; %bb.0: ; GCN-NEXT: global_store_byte_d16_hi v0, v1, s[2:3] offset:-128 ; GCN-NEXT: s_endpgm ; ; GFX11-LABEL: global_store_saddr_i16_d16hi_trunci8_zext_vgpr_offset_neg128: ; GFX11: ; %bb.0: ; GFX11-NEXT: global_store_d16_hi_b8 v0, v1, s[2:3] offset:-128 ; GFX11-NEXT: s_nop 0 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm ; ; GFX12-LABEL: global_store_saddr_i16_d16hi_trunci8_zext_vgpr_offset_neg128: ; GFX12: ; %bb.0: ; GFX12-NEXT: global_store_d16_hi_b8 v0, v1, s[2:3] offset:-128 ; GFX12-NEXT: s_nop 0 ; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX12-NEXT: s_endpgm %zext.offset = zext i32 %voffset to i64 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -128 %data.hi = extractelement <2 x i16> %data, i32 1 %data.hi.trunc = trunc i16 %data.hi to i8 store i8 %data.hi.trunc, ptr addrspace(1) %gep1 ret void }