; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 ; RUN: llc -mtriple=amdgcn -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=CIGFX89,CI %s ; RUN: llc -mtriple=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=CIGFX89,GFX89,VI %s ; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=CIGFX89,GFX89,GFX9 %s ; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX11 %s define void @void_func_i1(i1 %arg0) #0 { ; CIGFX89-LABEL: void_func_i1: ; CIGFX89: ; %bb.0: ; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; CIGFX89-NEXT: v_and_b32_e32 v0, 1, v0 ; CIGFX89-NEXT: s_mov_b32 s7, 0xf000 ; CIGFX89-NEXT: s_mov_b32 s6, -1 ; CIGFX89-NEXT: buffer_store_byte v0, off, s[4:7], 0 ; CIGFX89-NEXT: s_waitcnt vmcnt(0) ; CIGFX89-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: void_func_i1: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX11-NEXT: s_mov_b32 s3, 0x31016000 ; GFX11-NEXT: s_mov_b32 s2, -1 ; GFX11-NEXT: buffer_store_b8 v0, off, s[0:3], 0 ; GFX11-NEXT: s_setpc_b64 s[30:31] store i1 %arg0, ptr addrspace(1) undef ret void } define void @void_func_i1_zeroext(i1 zeroext %arg0) #0 { ; CIGFX89-LABEL: void_func_i1_zeroext: ; CIGFX89: ; %bb.0: ; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; CIGFX89-NEXT: v_or_b32_e32 v0, 12, v0 ; CIGFX89-NEXT: s_mov_b32 s7, 0xf000 ; CIGFX89-NEXT: s_mov_b32 s6, -1 ; CIGFX89-NEXT: buffer_store_dword v0, off, s[4:7], 0 ; CIGFX89-NEXT: s_waitcnt vmcnt(0) ; CIGFX89-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: void_func_i1_zeroext: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: v_or_b32_e32 v0, 12, v0 ; GFX11-NEXT: s_mov_b32 s3, 0x31016000 ; GFX11-NEXT: s_mov_b32 s2, -1 ; GFX11-NEXT: buffer_store_b32 v0, off, s[0:3], 0 ; GFX11-NEXT: s_setpc_b64 s[30:31] %ext = zext i1 %arg0 to i32 %add = add i32 %ext, 12 store i32 %add, ptr addrspace(1) undef ret void } define void @void_func_i1_signext(i1 signext %arg0) #0 { ; CI-LABEL: void_func_i1_signext: ; CI: ; %bb.0: ; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; CI-NEXT: v_add_i32_e32 v0, vcc, 12, v0 ; CI-NEXT: s_mov_b32 s7, 0xf000 ; CI-NEXT: s_mov_b32 s6, -1 ; CI-NEXT: buffer_store_dword v0, off, s[4:7], 0 ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: s_setpc_b64 s[30:31] ; ; VI-LABEL: void_func_i1_signext: ; VI: ; %bb.0: ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; VI-NEXT: v_add_u32_e32 v0, vcc, 12, v0 ; VI-NEXT: s_mov_b32 s7, 0xf000 ; VI-NEXT: s_mov_b32 s6, -1 ; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-LABEL: void_func_i1_signext: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-NEXT: v_add_u32_e32 v0, 12, v0 ; GFX9-NEXT: s_mov_b32 s7, 0xf000 ; GFX9-NEXT: s_mov_b32 s6, -1 ; GFX9-NEXT: buffer_store_dword v0, off, s[4:7], 0 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: void_func_i1_signext: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: v_add_nc_u32_e32 v0, 12, v0 ; GFX11-NEXT: s_mov_b32 s3, 0x31016000 ; GFX11-NEXT: s_mov_b32 s2, -1 ; GFX11-NEXT: buffer_store_b32 v0, off, s[0:3], 0 ; GFX11-NEXT: s_setpc_b64 s[30:31] %ext = sext i1 %arg0 to i32 %add = add i32 %ext, 12 store i32 %add, ptr addrspace(1) undef ret void } define void @i1_arg_i1_use(i1 %arg) #0 { ; CIGFX89-LABEL: i1_arg_i1_use: ; CIGFX89: ; %bb.0: ; %bb ; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; CIGFX89-NEXT: v_and_b32_e32 v0, 1, v0 ; CIGFX89-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 ; CIGFX89-NEXT: s_xor_b64 s[6:7], vcc, -1 ; CIGFX89-NEXT: s_and_saveexec_b64 s[4:5], s[6:7] ; CIGFX89-NEXT: s_cbranch_execz .LBB3_2 ; CIGFX89-NEXT: ; %bb.1: ; %bb1 ; CIGFX89-NEXT: s_mov_b32 s7, 0xf000 ; CIGFX89-NEXT: s_mov_b32 s6, -1 ; CIGFX89-NEXT: v_mov_b32_e32 v0, 0 ; CIGFX89-NEXT: buffer_store_dword v0, off, s[4:7], 0 ; CIGFX89-NEXT: s_waitcnt vmcnt(0) ; CIGFX89-NEXT: .LBB3_2: ; %bb2 ; CIGFX89-NEXT: s_or_b64 exec, exec, s[4:5] ; CIGFX89-NEXT: s_waitcnt vmcnt(0) ; CIGFX89-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: i1_arg_i1_use: ; GFX11: ; %bb.0: ; %bb ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX11-NEXT: s_mov_b32 s2, -1 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 ; GFX11-NEXT: s_xor_b32 s1, vcc_lo, -1 ; GFX11-NEXT: s_and_saveexec_b32 s0, s1 ; GFX11-NEXT: s_cbranch_execz .LBB3_2 ; GFX11-NEXT: ; %bb.1: ; %bb1 ; GFX11-NEXT: v_mov_b32_e32 v0, 0 ; GFX11-NEXT: s_mov_b32 s3, 0x31016000 ; GFX11-NEXT: buffer_store_b32 v0, off, s[0:3], 0 dlc ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: .LBB3_2: ; %bb2 ; GFX11-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX11-NEXT: s_setpc_b64 s[30:31] bb: br i1 %arg, label %bb2, label %bb1 bb1: store volatile i32 0, ptr addrspace(1) undef br label %bb2 bb2: ret void } define void @void_func_i8(i8 %arg0) #0 { ; CIGFX89-LABEL: void_func_i8: ; CIGFX89: ; %bb.0: ; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; CIGFX89-NEXT: s_mov_b32 s7, 0xf000 ; CIGFX89-NEXT: s_mov_b32 s6, -1 ; CIGFX89-NEXT: buffer_store_byte v0, off, s[4:7], 0 ; CIGFX89-NEXT: s_waitcnt vmcnt(0) ; CIGFX89-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: void_func_i8: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: s_mov_b32 s3, 0x31016000 ; GFX11-NEXT: s_mov_b32 s2, -1 ; GFX11-NEXT: buffer_store_b8 v0, off, s[0:3], 0 ; GFX11-NEXT: s_setpc_b64 s[30:31] store i8 %arg0, ptr addrspace(1) undef ret void } define void @void_func_i8_zeroext(i8 zeroext %arg0) #0 { ; CI-LABEL: void_func_i8_zeroext: ; CI: ; %bb.0: ; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; CI-NEXT: v_add_i32_e32 v0, vcc, 12, v0 ; CI-NEXT: s_mov_b32 s7, 0xf000 ; CI-NEXT: s_mov_b32 s6, -1 ; CI-NEXT: buffer_store_dword v0, off, s[4:7], 0 ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: s_setpc_b64 s[30:31] ; ; VI-LABEL: void_func_i8_zeroext: ; VI: ; %bb.0: ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; VI-NEXT: v_add_u32_e32 v0, vcc, 12, v0 ; VI-NEXT: s_mov_b32 s7, 0xf000 ; VI-NEXT: s_mov_b32 s6, -1 ; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-LABEL: void_func_i8_zeroext: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-NEXT: v_add_u32_e32 v0, 12, v0 ; GFX9-NEXT: s_mov_b32 s7, 0xf000 ; GFX9-NEXT: s_mov_b32 s6, -1 ; GFX9-NEXT: buffer_store_dword v0, off, s[4:7], 0 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: void_func_i8_zeroext: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: v_add_nc_u32_e32 v0, 12, v0 ; GFX11-NEXT: s_mov_b32 s3, 0x31016000 ; GFX11-NEXT: s_mov_b32 s2, -1 ; GFX11-NEXT: buffer_store_b32 v0, off, s[0:3], 0 ; GFX11-NEXT: s_setpc_b64 s[30:31] %ext = zext i8 %arg0 to i32 %add = add i32 %ext, 12 store i32 %add, ptr addrspace(1) undef ret void } define void @void_func_i8_signext(i8 signext %arg0) #0 { ; CI-LABEL: void_func_i8_signext: ; CI: ; %bb.0: ; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; CI-NEXT: v_add_i32_e32 v0, vcc, 12, v0 ; CI-NEXT: s_mov_b32 s7, 0xf000 ; CI-NEXT: s_mov_b32 s6, -1 ; CI-NEXT: buffer_store_dword v0, off, s[4:7], 0 ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: s_setpc_b64 s[30:31] ; ; VI-LABEL: void_func_i8_signext: ; VI: ; %bb.0: ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; VI-NEXT: v_add_u32_e32 v0, vcc, 12, v0 ; VI-NEXT: s_mov_b32 s7, 0xf000 ; VI-NEXT: s_mov_b32 s6, -1 ; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-LABEL: void_func_i8_signext: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-NEXT: v_add_u32_e32 v0, 12, v0 ; GFX9-NEXT: s_mov_b32 s7, 0xf000 ; GFX9-NEXT: s_mov_b32 s6, -1 ; GFX9-NEXT: buffer_store_dword v0, off, s[4:7], 0 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: void_func_i8_signext: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: v_add_nc_u32_e32 v0, 12, v0 ; GFX11-NEXT: s_mov_b32 s3, 0x31016000 ; GFX11-NEXT: s_mov_b32 s2, -1 ; GFX11-NEXT: buffer_store_b32 v0, off, s[0:3], 0 ; GFX11-NEXT: s_setpc_b64 s[30:31] %ext = sext i8 %arg0 to i32 %add = add i32 %ext, 12 store i32 %add, ptr addrspace(1) undef ret void } define void @void_func_i16(i16 %arg0) #0 { ; CIGFX89-LABEL: void_func_i16: ; CIGFX89: ; %bb.0: ; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; CIGFX89-NEXT: s_mov_b32 s7, 0xf000 ; CIGFX89-NEXT: s_mov_b32 s6, -1 ; CIGFX89-NEXT: buffer_store_short v0, off, s[4:7], 0 ; CIGFX89-NEXT: s_waitcnt vmcnt(0) ; CIGFX89-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: void_func_i16: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: s_mov_b32 s3, 0x31016000 ; GFX11-NEXT: s_mov_b32 s2, -1 ; GFX11-NEXT: buffer_store_b16 v0, off, s[0:3], 0 ; GFX11-NEXT: s_setpc_b64 s[30:31] store i16 %arg0, ptr addrspace(1) undef ret void } define void @void_func_i16_zeroext(i16 zeroext %arg0) #0 { ; CI-LABEL: void_func_i16_zeroext: ; CI: ; %bb.0: ; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; CI-NEXT: v_add_i32_e32 v0, vcc, 12, v0 ; CI-NEXT: s_mov_b32 s7, 0xf000 ; CI-NEXT: s_mov_b32 s6, -1 ; CI-NEXT: buffer_store_dword v0, off, s[4:7], 0 ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: s_setpc_b64 s[30:31] ; ; VI-LABEL: void_func_i16_zeroext: ; VI: ; %bb.0: ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; VI-NEXT: v_add_u32_e32 v0, vcc, 12, v0 ; VI-NEXT: s_mov_b32 s7, 0xf000 ; VI-NEXT: s_mov_b32 s6, -1 ; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-LABEL: void_func_i16_zeroext: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-NEXT: v_add_u32_e32 v0, 12, v0 ; GFX9-NEXT: s_mov_b32 s7, 0xf000 ; GFX9-NEXT: s_mov_b32 s6, -1 ; GFX9-NEXT: buffer_store_dword v0, off, s[4:7], 0 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: void_func_i16_zeroext: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: v_add_nc_u32_e32 v0, 12, v0 ; GFX11-NEXT: s_mov_b32 s3, 0x31016000 ; GFX11-NEXT: s_mov_b32 s2, -1 ; GFX11-NEXT: buffer_store_b32 v0, off, s[0:3], 0 ; GFX11-NEXT: s_setpc_b64 s[30:31] %ext = zext i16 %arg0 to i32 %add = add i32 %ext, 12 store i32 %add, ptr addrspace(1) undef ret void } define void @void_func_i16_signext(i16 signext %arg0) #0 { ; CI-LABEL: void_func_i16_signext: ; CI: ; %bb.0: ; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; CI-NEXT: v_add_i32_e32 v0, vcc, 12, v0 ; CI-NEXT: s_mov_b32 s7, 0xf000 ; CI-NEXT: s_mov_b32 s6, -1 ; CI-NEXT: buffer_store_dword v0, off, s[4:7], 0 ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: s_setpc_b64 s[30:31] ; ; VI-LABEL: void_func_i16_signext: ; VI: ; %bb.0: ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; VI-NEXT: v_add_u32_e32 v0, vcc, 12, v0 ; VI-NEXT: s_mov_b32 s7, 0xf000 ; VI-NEXT: s_mov_b32 s6, -1 ; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-LABEL: void_func_i16_signext: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-NEXT: v_add_u32_e32 v0, 12, v0 ; GFX9-NEXT: s_mov_b32 s7, 0xf000 ; GFX9-NEXT: s_mov_b32 s6, -1 ; GFX9-NEXT: buffer_store_dword v0, off, s[4:7], 0 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: void_func_i16_signext: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: v_add_nc_u32_e32 v0, 12, v0 ; GFX11-NEXT: s_mov_b32 s3, 0x31016000 ; GFX11-NEXT: s_mov_b32 s2, -1 ; GFX11-NEXT: buffer_store_b32 v0, off, s[0:3], 0 ; GFX11-NEXT: s_setpc_b64 s[30:31] %ext = sext i16 %arg0 to i32 %add = add i32 %ext, 12 store i32 %add, ptr addrspace(1) undef ret void } define void @void_func_i32(i32 %arg0) #0 { ; CIGFX89-LABEL: void_func_i32: ; CIGFX89: ; %bb.0: ; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; CIGFX89-NEXT: s_mov_b32 s7, 0xf000 ; CIGFX89-NEXT: s_mov_b32 s6, -1 ; CIGFX89-NEXT: buffer_store_dword v0, off, s[4:7], 0 ; CIGFX89-NEXT: s_waitcnt vmcnt(0) ; CIGFX89-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: void_func_i32: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: s_mov_b32 s3, 0x31016000 ; GFX11-NEXT: s_mov_b32 s2, -1 ; GFX11-NEXT: buffer_store_b32 v0, off, s[0:3], 0 ; GFX11-NEXT: s_setpc_b64 s[30:31] store i32 %arg0, ptr addrspace(1) undef ret void } define void @void_func_i64(i64 %arg0) #0 { ; CIGFX89-LABEL: void_func_i64: ; CIGFX89: ; %bb.0: ; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; CIGFX89-NEXT: s_mov_b32 s7, 0xf000 ; CIGFX89-NEXT: s_mov_b32 s6, -1 ; CIGFX89-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 ; CIGFX89-NEXT: s_waitcnt vmcnt(0) ; CIGFX89-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: void_func_i64: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: s_mov_b32 s3, 0x31016000 ; GFX11-NEXT: s_mov_b32 s2, -1 ; GFX11-NEXT: buffer_store_b64 v[0:1], off, s[0:3], 0 ; GFX11-NEXT: s_setpc_b64 s[30:31] store i64 %arg0, ptr addrspace(1) undef ret void } define void @void_func_f16(half %arg0) #0 { ; CI-LABEL: void_func_f16: ; CI: ; %bb.0: ; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; CI-NEXT: v_cvt_f16_f32_e32 v0, v0 ; CI-NEXT: s_mov_b32 s7, 0xf000 ; CI-NEXT: s_mov_b32 s6, -1 ; CI-NEXT: buffer_store_short v0, off, s[4:7], 0 ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: s_setpc_b64 s[30:31] ; ; GFX89-LABEL: void_func_f16: ; GFX89: ; %bb.0: ; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX89-NEXT: s_mov_b32 s7, 0xf000 ; GFX89-NEXT: s_mov_b32 s6, -1 ; GFX89-NEXT: buffer_store_short v0, off, s[4:7], 0 ; GFX89-NEXT: s_waitcnt vmcnt(0) ; GFX89-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: void_func_f16: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: s_mov_b32 s3, 0x31016000 ; GFX11-NEXT: s_mov_b32 s2, -1 ; GFX11-NEXT: buffer_store_b16 v0, off, s[0:3], 0 ; GFX11-NEXT: s_setpc_b64 s[30:31] store half %arg0, ptr addrspace(1) undef ret void } define void @void_func_f32(float %arg0) #0 { ; CIGFX89-LABEL: void_func_f32: ; CIGFX89: ; %bb.0: ; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; CIGFX89-NEXT: s_mov_b32 s7, 0xf000 ; CIGFX89-NEXT: s_mov_b32 s6, -1 ; CIGFX89-NEXT: buffer_store_dword v0, off, s[4:7], 0 ; CIGFX89-NEXT: s_waitcnt vmcnt(0) ; CIGFX89-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: void_func_f32: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: s_mov_b32 s3, 0x31016000 ; GFX11-NEXT: s_mov_b32 s2, -1 ; GFX11-NEXT: buffer_store_b32 v0, off, s[0:3], 0 ; GFX11-NEXT: s_setpc_b64 s[30:31] store float %arg0, ptr addrspace(1) undef ret void } define void @void_func_f64(double %arg0) #0 { ; CIGFX89-LABEL: void_func_f64: ; CIGFX89: ; %bb.0: ; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; CIGFX89-NEXT: s_mov_b32 s7, 0xf000 ; CIGFX89-NEXT: s_mov_b32 s6, -1 ; CIGFX89-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 ; CIGFX89-NEXT: s_waitcnt vmcnt(0) ; CIGFX89-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: void_func_f64: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: s_mov_b32 s3, 0x31016000 ; GFX11-NEXT: s_mov_b32 s2, -1 ; GFX11-NEXT: buffer_store_b64 v[0:1], off, s[0:3], 0 ; GFX11-NEXT: s_setpc_b64 s[30:31] store double %arg0, ptr addrspace(1) undef ret void } define void @void_func_v2i32(<2 x i32> %arg0) #0 { ; CIGFX89-LABEL: void_func_v2i32: ; CIGFX89: ; %bb.0: ; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; CIGFX89-NEXT: s_mov_b32 s7, 0xf000 ; CIGFX89-NEXT: s_mov_b32 s6, -1 ; CIGFX89-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 ; CIGFX89-NEXT: s_waitcnt vmcnt(0) ; CIGFX89-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: void_func_v2i32: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: s_mov_b32 s3, 0x31016000 ; GFX11-NEXT: s_mov_b32 s2, -1 ; GFX11-NEXT: buffer_store_b64 v[0:1], off, s[0:3], 0 ; GFX11-NEXT: s_setpc_b64 s[30:31] store <2 x i32> %arg0, ptr addrspace(1) undef ret void } define void @void_func_v3i32(<3 x i32> %arg0) #0 { ; CIGFX89-LABEL: void_func_v3i32: ; CIGFX89: ; %bb.0: ; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; CIGFX89-NEXT: s_mov_b32 s7, 0xf000 ; CIGFX89-NEXT: s_mov_b32 s6, -1 ; CIGFX89-NEXT: buffer_store_dwordx3 v[0:2], off, s[4:7], 0 ; CIGFX89-NEXT: s_waitcnt vmcnt(0) ; CIGFX89-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: void_func_v3i32: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: s_mov_b32 s3, 0x31016000 ; GFX11-NEXT: s_mov_b32 s2, -1 ; GFX11-NEXT: buffer_store_b96 v[0:2], off, s[0:3], 0 ; GFX11-NEXT: s_setpc_b64 s[30:31] store <3 x i32> %arg0, ptr addrspace(1) undef ret void } define void @void_func_v4i32(<4 x i32> %arg0) #0 { ; CIGFX89-LABEL: void_func_v4i32: ; CIGFX89: ; %bb.0: ; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; CIGFX89-NEXT: s_mov_b32 s7, 0xf000 ; CIGFX89-NEXT: s_mov_b32 s6, -1 ; CIGFX89-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 ; CIGFX89-NEXT: s_waitcnt vmcnt(0) ; CIGFX89-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: void_func_v4i32: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: s_mov_b32 s3, 0x31016000 ; GFX11-NEXT: s_mov_b32 s2, -1 ; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0 ; GFX11-NEXT: s_setpc_b64 s[30:31] store <4 x i32> %arg0, ptr addrspace(1) undef ret void } define void @void_func_v5i32(<5 x i32> %arg0) #0 { ; CIGFX89-LABEL: void_func_v5i32: ; CIGFX89: ; %bb.0: ; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; CIGFX89-NEXT: s_mov_b32 s7, 0xf000 ; CIGFX89-NEXT: s_mov_b32 s6, -1 ; CIGFX89-NEXT: buffer_store_dword v4, off, s[4:7], 0 ; CIGFX89-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 ; CIGFX89-NEXT: s_waitcnt vmcnt(0) ; CIGFX89-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: void_func_v5i32: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: s_mov_b32 s3, 0x31016000 ; GFX11-NEXT: s_mov_b32 s2, -1 ; GFX11-NEXT: s_clause 0x1 ; GFX11-NEXT: buffer_store_b32 v4, off, s[0:3], 0 ; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0 ; GFX11-NEXT: s_setpc_b64 s[30:31] store <5 x i32> %arg0, ptr addrspace(1) undef ret void } define void @void_func_v8i32(<8 x i32> %arg0) #0 { ; CIGFX89-LABEL: void_func_v8i32: ; CIGFX89: ; %bb.0: ; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; CIGFX89-NEXT: s_mov_b32 s7, 0xf000 ; CIGFX89-NEXT: s_mov_b32 s6, -1 ; CIGFX89-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0 ; CIGFX89-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 ; CIGFX89-NEXT: s_waitcnt vmcnt(0) ; CIGFX89-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: void_func_v8i32: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: s_mov_b32 s3, 0x31016000 ; GFX11-NEXT: s_mov_b32 s2, -1 ; GFX11-NEXT: s_clause 0x1 ; GFX11-NEXT: buffer_store_b128 v[4:7], off, s[0:3], 0 ; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0 ; GFX11-NEXT: s_setpc_b64 s[30:31] store <8 x i32> %arg0, ptr addrspace(1) undef ret void } define void @void_func_v16i32(<16 x i32> %arg0) #0 { ; CIGFX89-LABEL: void_func_v16i32: ; CIGFX89: ; %bb.0: ; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; CIGFX89-NEXT: s_mov_b32 s7, 0xf000 ; CIGFX89-NEXT: s_mov_b32 s6, -1 ; CIGFX89-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0 ; CIGFX89-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0 ; CIGFX89-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0 ; CIGFX89-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 ; CIGFX89-NEXT: s_waitcnt vmcnt(0) ; CIGFX89-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: void_func_v16i32: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: s_mov_b32 s3, 0x31016000 ; GFX11-NEXT: s_mov_b32 s2, -1 ; GFX11-NEXT: s_clause 0x3 ; GFX11-NEXT: buffer_store_b128 v[12:15], off, s[0:3], 0 ; GFX11-NEXT: buffer_store_b128 v[8:11], off, s[0:3], 0 ; GFX11-NEXT: buffer_store_b128 v[4:7], off, s[0:3], 0 ; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0 ; GFX11-NEXT: s_setpc_b64 s[30:31] store <16 x i32> %arg0, ptr addrspace(1) undef ret void } define void @void_func_v32i32(<32 x i32> %arg0) #0 { ; CIGFX89-LABEL: void_func_v32i32: ; CIGFX89: ; %bb.0: ; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; CIGFX89-NEXT: buffer_load_dword v31, off, s[0:3], s32 ; CIGFX89-NEXT: s_mov_b32 s7, 0xf000 ; CIGFX89-NEXT: s_mov_b32 s6, -1 ; CIGFX89-NEXT: buffer_store_dwordx4 v[24:27], off, s[4:7], 0 ; CIGFX89-NEXT: buffer_store_dwordx4 v[20:23], off, s[4:7], 0 ; CIGFX89-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0 ; CIGFX89-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0 ; CIGFX89-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0 ; CIGFX89-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0 ; CIGFX89-NEXT: s_waitcnt vmcnt(6) ; CIGFX89-NEXT: buffer_store_dwordx4 v[28:31], off, s[4:7], 0 ; CIGFX89-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 ; CIGFX89-NEXT: s_waitcnt vmcnt(0) ; CIGFX89-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: void_func_v32i32: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: scratch_load_b32 v31, off, s32 ; GFX11-NEXT: s_mov_b32 s3, 0x31016000 ; GFX11-NEXT: s_mov_b32 s2, -1 ; GFX11-NEXT: s_clause 0x3 ; GFX11-NEXT: buffer_store_b128 v[20:23], off, s[0:3], 0 ; GFX11-NEXT: buffer_store_b128 v[16:19], off, s[0:3], 0 ; GFX11-NEXT: buffer_store_b128 v[12:15], off, s[0:3], 0 ; GFX11-NEXT: buffer_store_b128 v[8:11], off, s[0:3], 0 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_clause 0x3 ; GFX11-NEXT: buffer_store_b128 v[28:31], off, s[0:3], 0 ; GFX11-NEXT: buffer_store_b128 v[24:27], off, s[0:3], 0 ; GFX11-NEXT: buffer_store_b128 v[4:7], off, s[0:3], 0 ; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0 ; GFX11-NEXT: s_setpc_b64 s[30:31] store <32 x i32> %arg0, ptr addrspace(1) undef ret void } ; 1 over register limit define void @void_func_v33i32(<33 x i32> %arg0) #0 { ; CI-LABEL: void_func_v33i32: ; CI: ; %bb.0: ; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; CI-NEXT: s_mov_b32 s7, 0xf000 ; CI-NEXT: s_mov_b32 s6, -1 ; CI-NEXT: buffer_store_dwordx4 v[24:27], off, s[4:7], 0 ; CI-NEXT: buffer_store_dwordx4 v[20:23], off, s[4:7], 0 ; CI-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0 ; CI-NEXT: buffer_load_dword v31, off, s[0:3], s32 ; CI-NEXT: buffer_load_dword v16, off, s[0:3], s32 offset:4 ; CI-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0 ; CI-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0 ; CI-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0 ; CI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 ; CI-NEXT: s_waitcnt vmcnt(5) ; CI-NEXT: buffer_store_dwordx4 v[28:31], off, s[4:7], 0 ; CI-NEXT: s_waitcnt vmcnt(5) ; CI-NEXT: buffer_store_dword v16, off, s[4:7], 0 ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: s_setpc_b64 s[30:31] ; ; VI-LABEL: void_func_v33i32: ; VI: ; %bb.0: ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; VI-NEXT: s_mov_b32 s7, 0xf000 ; VI-NEXT: s_mov_b32 s6, -1 ; VI-NEXT: buffer_store_dwordx4 v[24:27], off, s[4:7], 0 ; VI-NEXT: buffer_store_dwordx4 v[20:23], off, s[4:7], 0 ; VI-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0 ; VI-NEXT: buffer_load_dword v31, off, s[0:3], s32 ; VI-NEXT: buffer_load_dword v16, off, s[0:3], s32 offset:4 ; VI-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0 ; VI-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0 ; VI-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0 ; VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 ; VI-NEXT: s_waitcnt vmcnt(5) ; VI-NEXT: buffer_store_dwordx4 v[28:31], off, s[4:7], 0 ; VI-NEXT: s_waitcnt vmcnt(5) ; VI-NEXT: buffer_store_dword v16, off, s[4:7], 0 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-LABEL: void_func_v33i32: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-NEXT: s_mov_b32 s7, 0xf000 ; GFX9-NEXT: s_mov_b32 s6, -1 ; GFX9-NEXT: buffer_store_dwordx4 v[24:27], off, s[4:7], 0 ; GFX9-NEXT: buffer_store_dwordx4 v[20:23], off, s[4:7], 0 ; GFX9-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0 ; GFX9-NEXT: buffer_load_dword v31, off, s[0:3], s32 ; GFX9-NEXT: s_nop 0 ; GFX9-NEXT: buffer_load_dword v16, off, s[0:3], s32 offset:4 ; GFX9-NEXT: s_nop 0 ; GFX9-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0 ; GFX9-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0 ; GFX9-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0 ; GFX9-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 ; GFX9-NEXT: s_waitcnt vmcnt(5) ; GFX9-NEXT: buffer_store_dwordx4 v[28:31], off, s[4:7], 0 ; GFX9-NEXT: s_waitcnt vmcnt(5) ; GFX9-NEXT: buffer_store_dword v16, off, s[4:7], 0 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: void_func_v33i32: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: s_clause 0x1 ; GFX11-NEXT: scratch_load_b32 v31, off, s32 ; GFX11-NEXT: scratch_load_b32 v32, off, s32 offset:4 ; GFX11-NEXT: s_mov_b32 s3, 0x31016000 ; GFX11-NEXT: s_mov_b32 s2, -1 ; GFX11-NEXT: s_clause 0x5 ; GFX11-NEXT: buffer_store_b128 v[20:23], off, s[0:3], 0 ; GFX11-NEXT: buffer_store_b128 v[16:19], off, s[0:3], 0 ; GFX11-NEXT: buffer_store_b128 v[12:15], off, s[0:3], 0 ; GFX11-NEXT: buffer_store_b128 v[8:11], off, s[0:3], 0 ; GFX11-NEXT: buffer_store_b128 v[4:7], off, s[0:3], 0 ; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0 ; GFX11-NEXT: s_waitcnt vmcnt(1) ; GFX11-NEXT: s_clause 0x1 ; GFX11-NEXT: buffer_store_b128 v[28:31], off, s[0:3], 0 ; GFX11-NEXT: buffer_store_b128 v[24:27], off, s[0:3], 0 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: buffer_store_b32 v32, off, s[0:3], 0 ; GFX11-NEXT: s_setpc_b64 s[30:31] store <33 x i32> %arg0, ptr addrspace(1) undef ret void } define void @void_func_v2i64(<2 x i64> %arg0) #0 { ; CIGFX89-LABEL: void_func_v2i64: ; CIGFX89: ; %bb.0: ; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; CIGFX89-NEXT: s_mov_b32 s7, 0xf000 ; CIGFX89-NEXT: s_mov_b32 s6, -1 ; CIGFX89-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 ; CIGFX89-NEXT: s_waitcnt vmcnt(0) ; CIGFX89-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: void_func_v2i64: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: s_mov_b32 s3, 0x31016000 ; GFX11-NEXT: s_mov_b32 s2, -1 ; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0 ; GFX11-NEXT: s_setpc_b64 s[30:31] store <2 x i64> %arg0, ptr addrspace(1) undef ret void } define void @void_func_v3i64(<3 x i64> %arg0) #0 { ; CIGFX89-LABEL: void_func_v3i64: ; CIGFX89: ; %bb.0: ; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; CIGFX89-NEXT: s_mov_b32 s7, 0xf000 ; CIGFX89-NEXT: s_mov_b32 s6, -1 ; CIGFX89-NEXT: buffer_store_dwordx2 v[4:5], off, s[4:7], 0 ; CIGFX89-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 ; CIGFX89-NEXT: s_waitcnt vmcnt(0) ; CIGFX89-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: void_func_v3i64: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: s_mov_b32 s3, 0x31016000 ; GFX11-NEXT: s_mov_b32 s2, -1 ; GFX11-NEXT: s_clause 0x1 ; GFX11-NEXT: buffer_store_b64 v[4:5], off, s[0:3], 0 ; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0 ; GFX11-NEXT: s_setpc_b64 s[30:31] store <3 x i64> %arg0, ptr addrspace(1) undef ret void } define void @void_func_v4i64(<4 x i64> %arg0) #0 { ; CIGFX89-LABEL: void_func_v4i64: ; CIGFX89: ; %bb.0: ; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; CIGFX89-NEXT: s_mov_b32 s7, 0xf000 ; CIGFX89-NEXT: s_mov_b32 s6, -1 ; CIGFX89-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0 ; CIGFX89-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 ; CIGFX89-NEXT: s_waitcnt vmcnt(0) ; CIGFX89-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: void_func_v4i64: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: s_mov_b32 s3, 0x31016000 ; GFX11-NEXT: s_mov_b32 s2, -1 ; GFX11-NEXT: s_clause 0x1 ; GFX11-NEXT: buffer_store_b128 v[4:7], off, s[0:3], 0 ; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0 ; GFX11-NEXT: s_setpc_b64 s[30:31] store <4 x i64> %arg0, ptr addrspace(1) undef ret void } define void @void_func_v5i64(<5 x i64> %arg0) #0 { ; CIGFX89-LABEL: void_func_v5i64: ; CIGFX89: ; %bb.0: ; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; CIGFX89-NEXT: s_mov_b32 s7, 0xf000 ; CIGFX89-NEXT: s_mov_b32 s6, -1 ; CIGFX89-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0 ; CIGFX89-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 ; CIGFX89-NEXT: buffer_store_dwordx2 v[8:9], off, s[4:7], 0 ; CIGFX89-NEXT: s_waitcnt vmcnt(0) ; CIGFX89-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: void_func_v5i64: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: s_mov_b32 s3, 0x31016000 ; GFX11-NEXT: s_mov_b32 s2, -1 ; GFX11-NEXT: s_clause 0x2 ; GFX11-NEXT: buffer_store_b128 v[4:7], off, s[0:3], 0 ; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0 ; GFX11-NEXT: buffer_store_b64 v[8:9], off, s[0:3], 0 ; GFX11-NEXT: s_setpc_b64 s[30:31] store <5 x i64> %arg0, ptr addrspace(1) undef ret void } define void @void_func_v8i64(<8 x i64> %arg0) #0 { ; CIGFX89-LABEL: void_func_v8i64: ; CIGFX89: ; %bb.0: ; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; CIGFX89-NEXT: s_mov_b32 s7, 0xf000 ; CIGFX89-NEXT: s_mov_b32 s6, -1 ; CIGFX89-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0 ; CIGFX89-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0 ; CIGFX89-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0 ; CIGFX89-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 ; CIGFX89-NEXT: s_waitcnt vmcnt(0) ; CIGFX89-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: void_func_v8i64: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: s_mov_b32 s3, 0x31016000 ; GFX11-NEXT: s_mov_b32 s2, -1 ; GFX11-NEXT: s_clause 0x3 ; GFX11-NEXT: buffer_store_b128 v[12:15], off, s[0:3], 0 ; GFX11-NEXT: buffer_store_b128 v[8:11], off, s[0:3], 0 ; GFX11-NEXT: buffer_store_b128 v[4:7], off, s[0:3], 0 ; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0 ; GFX11-NEXT: s_setpc_b64 s[30:31] store <8 x i64> %arg0, ptr addrspace(1) undef ret void } define void @void_func_v16i64(<16 x i64> %arg0) #0 { ; CIGFX89-LABEL: void_func_v16i64: ; CIGFX89: ; %bb.0: ; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; CIGFX89-NEXT: buffer_load_dword v31, off, s[0:3], s32 ; CIGFX89-NEXT: s_mov_b32 s7, 0xf000 ; CIGFX89-NEXT: s_mov_b32 s6, -1 ; CIGFX89-NEXT: buffer_store_dwordx4 v[24:27], off, s[4:7], 0 ; CIGFX89-NEXT: buffer_store_dwordx4 v[20:23], off, s[4:7], 0 ; CIGFX89-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0 ; CIGFX89-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0 ; CIGFX89-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0 ; CIGFX89-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0 ; CIGFX89-NEXT: s_waitcnt vmcnt(6) ; CIGFX89-NEXT: buffer_store_dwordx4 v[28:31], off, s[4:7], 0 ; CIGFX89-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 ; CIGFX89-NEXT: s_waitcnt vmcnt(0) ; CIGFX89-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: void_func_v16i64: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: scratch_load_b32 v31, off, s32 ; GFX11-NEXT: s_mov_b32 s3, 0x31016000 ; GFX11-NEXT: s_mov_b32 s2, -1 ; GFX11-NEXT: s_clause 0x3 ; GFX11-NEXT: buffer_store_b128 v[20:23], off, s[0:3], 0 ; GFX11-NEXT: buffer_store_b128 v[16:19], off, s[0:3], 0 ; GFX11-NEXT: buffer_store_b128 v[12:15], off, s[0:3], 0 ; GFX11-NEXT: buffer_store_b128 v[8:11], off, s[0:3], 0 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_clause 0x3 ; GFX11-NEXT: buffer_store_b128 v[28:31], off, s[0:3], 0 ; GFX11-NEXT: buffer_store_b128 v[24:27], off, s[0:3], 0 ; GFX11-NEXT: buffer_store_b128 v[4:7], off, s[0:3], 0 ; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0 ; GFX11-NEXT: s_setpc_b64 s[30:31] store <16 x i64> %arg0, ptr addrspace(1) undef ret void } define void @void_func_v2i8(<2 x i8> %arg0) #0 { ; CI-LABEL: void_func_v2i8: ; CI: ; %bb.0: ; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; CI-NEXT: v_lshlrev_b32_e32 v1, 8, v1 ; CI-NEXT: v_and_b32_e32 v0, 0xff, v0 ; CI-NEXT: s_mov_b32 s4, 0 ; CI-NEXT: v_or_b32_e32 v0, v0, v1 ; CI-NEXT: s_mov_b32 s7, 0xf000 ; CI-NEXT: s_mov_b32 s6, -1 ; CI-NEXT: s_mov_b32 s5, s4 ; CI-NEXT: buffer_store_short v0, off, s[4:7], 0 ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: s_setpc_b64 s[30:31] ; ; GFX89-LABEL: void_func_v2i8: ; GFX89: ; %bb.0: ; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX89-NEXT: v_lshlrev_b16_e32 v1, 8, v1 ; GFX89-NEXT: s_mov_b32 s4, 0 ; GFX89-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; GFX89-NEXT: s_mov_b32 s7, 0xf000 ; GFX89-NEXT: s_mov_b32 s6, -1 ; GFX89-NEXT: s_mov_b32 s5, s4 ; GFX89-NEXT: buffer_store_short v0, off, s[4:7], 0 ; GFX89-NEXT: s_waitcnt vmcnt(0) ; GFX89-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: void_func_v2i8: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: v_lshlrev_b16 v1, 8, v1 ; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v0 ; GFX11-NEXT: s_mov_b32 s0, 0 ; GFX11-NEXT: s_mov_b32 s3, 0x31016000 ; GFX11-NEXT: s_mov_b32 s2, -1 ; GFX11-NEXT: s_mov_b32 s1, s0 ; GFX11-NEXT: v_or_b32_e32 v0, v0, v1 ; GFX11-NEXT: buffer_store_b16 v0, off, s[0:3], 0 ; GFX11-NEXT: s_setpc_b64 s[30:31] store <2 x i8> %arg0, ptr addrspace(1) null ret void } define void @void_func_v2i16(<2 x i16> %arg0) #0 { ; CI-LABEL: void_func_v2i16: ; CI: ; %bb.0: ; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; CI-NEXT: v_lshlrev_b32_e32 v1, 16, v1 ; CI-NEXT: v_and_b32_e32 v0, 0xffff, v0 ; CI-NEXT: v_or_b32_e32 v0, v0, v1 ; CI-NEXT: s_mov_b32 s7, 0xf000 ; CI-NEXT: s_mov_b32 s6, -1 ; CI-NEXT: buffer_store_dword v0, off, s[4:7], 0 ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: s_setpc_b64 s[30:31] ; ; GFX89-LABEL: void_func_v2i16: ; GFX89: ; %bb.0: ; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX89-NEXT: s_mov_b32 s7, 0xf000 ; GFX89-NEXT: s_mov_b32 s6, -1 ; GFX89-NEXT: buffer_store_dword v0, off, s[4:7], 0 ; GFX89-NEXT: s_waitcnt vmcnt(0) ; GFX89-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: void_func_v2i16: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: s_mov_b32 s3, 0x31016000 ; GFX11-NEXT: s_mov_b32 s2, -1 ; GFX11-NEXT: buffer_store_b32 v0, off, s[0:3], 0 ; GFX11-NEXT: s_setpc_b64 s[30:31] store <2 x i16> %arg0, ptr addrspace(1) undef ret void } define void @void_func_v3i8(<3 x i8> %arg0) #0 { ; CI-LABEL: void_func_v3i8: ; CI: ; %bb.0: ; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; CI-NEXT: v_lshlrev_b32_e32 v1, 8, v1 ; CI-NEXT: v_and_b32_e32 v0, 0xff, v0 ; CI-NEXT: s_mov_b32 s5, 0 ; CI-NEXT: s_mov_b32 s4, 2 ; CI-NEXT: s_mov_b32 s7, 0xf000 ; CI-NEXT: s_mov_b32 s6, -1 ; CI-NEXT: v_or_b32_e32 v0, v0, v1 ; CI-NEXT: buffer_store_byte v2, off, s[4:7], 0 ; CI-NEXT: s_mov_b32 s4, s5 ; CI-NEXT: buffer_store_short v0, off, s[4:7], 0 ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: s_setpc_b64 s[30:31] ; ; GFX89-LABEL: void_func_v3i8: ; GFX89: ; %bb.0: ; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX89-NEXT: v_lshlrev_b16_e32 v1, 8, v1 ; GFX89-NEXT: s_mov_b32 s5, 0 ; GFX89-NEXT: s_mov_b32 s4, 2 ; GFX89-NEXT: s_mov_b32 s7, 0xf000 ; GFX89-NEXT: s_mov_b32 s6, -1 ; GFX89-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; GFX89-NEXT: buffer_store_byte v2, off, s[4:7], 0 ; GFX89-NEXT: s_mov_b32 s4, s5 ; GFX89-NEXT: buffer_store_short v0, off, s[4:7], 0 ; GFX89-NEXT: s_waitcnt vmcnt(0) ; GFX89-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: void_func_v3i8: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: v_lshlrev_b16 v1, 8, v1 ; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v0 ; GFX11-NEXT: s_mov_b32 s1, 0 ; GFX11-NEXT: s_mov_b32 s0, 2 ; GFX11-NEXT: s_mov_b32 s3, 0x31016000 ; GFX11-NEXT: s_mov_b32 s2, -1 ; GFX11-NEXT: v_or_b32_e32 v0, v0, v1 ; GFX11-NEXT: buffer_store_b8 v2, off, s[0:3], 0 ; GFX11-NEXT: s_mov_b32 s0, s1 ; GFX11-NEXT: buffer_store_b16 v0, off, s[0:3], 0 ; GFX11-NEXT: s_setpc_b64 s[30:31] store <3 x i8> %arg0, ptr addrspace(1) null ret void } define void @void_func_v4i8(<4 x i8> %arg0) #0 { ; CI-LABEL: void_func_v4i8: ; CI: ; %bb.0: ; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; CI-NEXT: v_and_b32_e32 v2, 0xff, v2 ; CI-NEXT: v_lshlrev_b32_e32 v1, 8, v1 ; CI-NEXT: v_and_b32_e32 v0, 0xff, v0 ; CI-NEXT: v_lshlrev_b32_e32 v3, 24, v3 ; CI-NEXT: v_lshlrev_b32_e32 v2, 16, v2 ; CI-NEXT: v_or_b32_e32 v0, v0, v1 ; CI-NEXT: v_or_b32_e32 v2, v3, v2 ; CI-NEXT: v_and_b32_e32 v0, 0xffff, v0 ; CI-NEXT: s_mov_b32 s4, 0 ; CI-NEXT: v_or_b32_e32 v0, v0, v2 ; CI-NEXT: s_mov_b32 s7, 0xf000 ; CI-NEXT: s_mov_b32 s6, -1 ; CI-NEXT: s_mov_b32 s5, s4 ; CI-NEXT: buffer_store_dword v0, off, s[4:7], 0 ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: s_setpc_b64 s[30:31] ; ; GFX89-LABEL: void_func_v4i8: ; GFX89: ; %bb.0: ; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX89-NEXT: v_lshlrev_b16_e32 v1, 8, v1 ; GFX89-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; GFX89-NEXT: v_lshlrev_b16_e32 v1, 8, v3 ; GFX89-NEXT: v_or_b32_sdwa v1, v2, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; GFX89-NEXT: s_mov_b32 s4, 0 ; GFX89-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD ; GFX89-NEXT: s_mov_b32 s7, 0xf000 ; GFX89-NEXT: s_mov_b32 s6, -1 ; GFX89-NEXT: s_mov_b32 s5, s4 ; GFX89-NEXT: buffer_store_dword v0, off, s[4:7], 0 ; GFX89-NEXT: s_waitcnt vmcnt(0) ; GFX89-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: void_func_v4i8: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: v_lshlrev_b16 v1, 8, v1 ; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v0 ; GFX11-NEXT: v_lshlrev_b16 v3, 8, v3 ; GFX11-NEXT: v_and_b32_e32 v2, 0xff, v2 ; GFX11-NEXT: s_mov_b32 s0, 0 ; GFX11-NEXT: s_mov_b32 s3, 0x31016000 ; GFX11-NEXT: v_or_b32_e32 v0, v0, v1 ; GFX11-NEXT: s_mov_b32 s2, -1 ; GFX11-NEXT: v_or_b32_e32 v1, v2, v3 ; GFX11-NEXT: s_mov_b32 s1, s0 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) ; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 ; GFX11-NEXT: v_lshlrev_b32_e32 v1, 16, v1 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-NEXT: v_or_b32_e32 v0, v0, v1 ; GFX11-NEXT: buffer_store_b32 v0, off, s[0:3], 0 ; GFX11-NEXT: s_setpc_b64 s[30:31] store <4 x i8> %arg0, ptr addrspace(1) null ret void } define void @void_func_v5i8(<5 x i8> %arg0) #0 { ; CI-LABEL: void_func_v5i8: ; CI: ; %bb.0: ; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; CI-NEXT: v_and_b32_e32 v2, 0xff, v2 ; CI-NEXT: v_lshlrev_b32_e32 v1, 8, v1 ; CI-NEXT: v_and_b32_e32 v0, 0xff, v0 ; CI-NEXT: v_lshlrev_b32_e32 v3, 24, v3 ; CI-NEXT: v_lshlrev_b32_e32 v2, 16, v2 ; CI-NEXT: v_or_b32_e32 v0, v0, v1 ; CI-NEXT: v_or_b32_e32 v2, v3, v2 ; CI-NEXT: v_and_b32_e32 v0, 0xffff, v0 ; CI-NEXT: s_mov_b32 s5, 0 ; CI-NEXT: s_mov_b32 s4, 4 ; CI-NEXT: s_mov_b32 s7, 0xf000 ; CI-NEXT: s_mov_b32 s6, -1 ; CI-NEXT: v_or_b32_e32 v0, v0, v2 ; CI-NEXT: buffer_store_byte v4, off, s[4:7], 0 ; CI-NEXT: s_mov_b32 s4, s5 ; CI-NEXT: buffer_store_dword v0, off, s[4:7], 0 ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: s_setpc_b64 s[30:31] ; ; GFX89-LABEL: void_func_v5i8: ; GFX89: ; %bb.0: ; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX89-NEXT: v_lshlrev_b16_e32 v1, 8, v1 ; GFX89-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; GFX89-NEXT: v_lshlrev_b16_e32 v1, 8, v3 ; GFX89-NEXT: v_or_b32_sdwa v1, v2, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; GFX89-NEXT: s_mov_b32 s5, 0 ; GFX89-NEXT: s_mov_b32 s4, 4 ; GFX89-NEXT: s_mov_b32 s7, 0xf000 ; GFX89-NEXT: s_mov_b32 s6, -1 ; GFX89-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD ; GFX89-NEXT: buffer_store_byte v4, off, s[4:7], 0 ; GFX89-NEXT: s_mov_b32 s4, s5 ; GFX89-NEXT: buffer_store_dword v0, off, s[4:7], 0 ; GFX89-NEXT: s_waitcnt vmcnt(0) ; GFX89-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: void_func_v5i8: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: v_lshlrev_b16 v1, 8, v1 ; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v0 ; GFX11-NEXT: v_lshlrev_b16 v3, 8, v3 ; GFX11-NEXT: v_and_b32_e32 v2, 0xff, v2 ; GFX11-NEXT: s_mov_b32 s1, 0 ; GFX11-NEXT: s_mov_b32 s0, 4 ; GFX11-NEXT: v_or_b32_e32 v0, v0, v1 ; GFX11-NEXT: s_mov_b32 s3, 0x31016000 ; GFX11-NEXT: v_or_b32_e32 v1, v2, v3 ; GFX11-NEXT: s_mov_b32 s2, -1 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_3) | instid1(VALU_DEP_1) ; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 ; GFX11-NEXT: buffer_store_b8 v4, off, s[0:3], 0 ; GFX11-NEXT: v_lshlrev_b32_e32 v1, 16, v1 ; GFX11-NEXT: s_mov_b32 s0, s1 ; GFX11-NEXT: v_or_b32_e32 v0, v0, v1 ; GFX11-NEXT: buffer_store_b32 v0, off, s[0:3], 0 ; GFX11-NEXT: s_setpc_b64 s[30:31] store <5 x i8> %arg0, ptr addrspace(1) null ret void } define void @void_func_v8i8(<8 x i8> %arg0) #0 { ; CI-LABEL: void_func_v8i8: ; CI: ; %bb.0: ; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; CI-NEXT: v_and_b32_e32 v6, 0xff, v6 ; CI-NEXT: v_lshlrev_b32_e32 v5, 8, v5 ; CI-NEXT: v_and_b32_e32 v4, 0xff, v4 ; CI-NEXT: v_and_b32_e32 v2, 0xff, v2 ; CI-NEXT: v_lshlrev_b32_e32 v1, 8, v1 ; CI-NEXT: v_and_b32_e32 v0, 0xff, v0 ; CI-NEXT: v_lshlrev_b32_e32 v7, 24, v7 ; CI-NEXT: v_lshlrev_b32_e32 v6, 16, v6 ; CI-NEXT: v_or_b32_e32 v4, v4, v5 ; CI-NEXT: v_lshlrev_b32_e32 v3, 24, v3 ; CI-NEXT: v_lshlrev_b32_e32 v2, 16, v2 ; CI-NEXT: v_or_b32_e32 v0, v0, v1 ; CI-NEXT: v_or_b32_e32 v6, v7, v6 ; CI-NEXT: v_and_b32_e32 v4, 0xffff, v4 ; CI-NEXT: v_or_b32_e32 v2, v3, v2 ; CI-NEXT: v_and_b32_e32 v0, 0xffff, v0 ; CI-NEXT: s_mov_b32 s4, 0 ; CI-NEXT: v_or_b32_e32 v4, v4, v6 ; CI-NEXT: v_or_b32_e32 v3, v0, v2 ; CI-NEXT: s_mov_b32 s7, 0xf000 ; CI-NEXT: s_mov_b32 s6, -1 ; CI-NEXT: s_mov_b32 s5, s4 ; CI-NEXT: buffer_store_dwordx2 v[3:4], off, s[4:7], 0 ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: s_setpc_b64 s[30:31] ; ; GFX89-LABEL: void_func_v8i8: ; GFX89: ; %bb.0: ; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX89-NEXT: v_lshlrev_b16_e32 v5, 8, v5 ; GFX89-NEXT: v_lshlrev_b16_e32 v1, 8, v1 ; GFX89-NEXT: v_or_b32_sdwa v4, v4, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; GFX89-NEXT: v_lshlrev_b16_e32 v5, 8, v7 ; GFX89-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; GFX89-NEXT: v_lshlrev_b16_e32 v1, 8, v3 ; GFX89-NEXT: v_or_b32_sdwa v5, v6, v5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; GFX89-NEXT: v_or_b32_sdwa v1, v2, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; GFX89-NEXT: s_mov_b32 s4, 0 ; GFX89-NEXT: v_or_b32_sdwa v4, v4, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD ; GFX89-NEXT: v_or_b32_sdwa v3, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD ; GFX89-NEXT: s_mov_b32 s7, 0xf000 ; GFX89-NEXT: s_mov_b32 s6, -1 ; GFX89-NEXT: s_mov_b32 s5, s4 ; GFX89-NEXT: buffer_store_dwordx2 v[3:4], off, s[4:7], 0 ; GFX89-NEXT: s_waitcnt vmcnt(0) ; GFX89-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: void_func_v8i8: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: v_lshlrev_b16 v5, 8, v5 ; GFX11-NEXT: v_and_b32_e32 v4, 0xff, v4 ; GFX11-NEXT: v_lshlrev_b16 v7, 8, v7 ; GFX11-NEXT: v_and_b32_e32 v6, 0xff, v6 ; GFX11-NEXT: v_lshlrev_b16 v1, 8, v1 ; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v0 ; GFX11-NEXT: v_lshlrev_b16 v3, 8, v3 ; GFX11-NEXT: v_and_b32_e32 v2, 0xff, v2 ; GFX11-NEXT: v_or_b32_e32 v4, v4, v5 ; GFX11-NEXT: v_or_b32_e32 v5, v6, v7 ; GFX11-NEXT: v_or_b32_e32 v0, v0, v1 ; GFX11-NEXT: s_mov_b32 s0, 0 ; GFX11-NEXT: v_or_b32_e32 v1, v2, v3 ; GFX11-NEXT: v_and_b32_e32 v2, 0xffff, v4 ; GFX11-NEXT: v_lshlrev_b32_e32 v3, 16, v5 ; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 ; GFX11-NEXT: s_mov_b32 s3, 0x31016000 ; GFX11-NEXT: v_lshlrev_b32_e32 v4, 16, v1 ; GFX11-NEXT: s_mov_b32 s2, -1 ; GFX11-NEXT: v_or_b32_e32 v1, v2, v3 ; GFX11-NEXT: s_mov_b32 s1, s0 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) ; GFX11-NEXT: v_or_b32_e32 v0, v0, v4 ; GFX11-NEXT: buffer_store_b64 v[0:1], off, s[0:3], 0 ; GFX11-NEXT: s_setpc_b64 s[30:31] store <8 x i8> %arg0, ptr addrspace(1) null ret void } define void @void_func_v16i8(<16 x i8> %arg0) #0 { ; CI-LABEL: void_func_v16i8: ; CI: ; %bb.0: ; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; CI-NEXT: v_and_b32_e32 v14, 0xff, v14 ; CI-NEXT: v_lshlrev_b32_e32 v13, 8, v13 ; CI-NEXT: v_and_b32_e32 v12, 0xff, v12 ; CI-NEXT: v_and_b32_e32 v10, 0xff, v10 ; CI-NEXT: v_lshlrev_b32_e32 v9, 8, v9 ; CI-NEXT: v_and_b32_e32 v8, 0xff, v8 ; CI-NEXT: v_and_b32_e32 v6, 0xff, v6 ; CI-NEXT: v_lshlrev_b32_e32 v5, 8, v5 ; CI-NEXT: v_and_b32_e32 v4, 0xff, v4 ; CI-NEXT: v_and_b32_e32 v2, 0xff, v2 ; CI-NEXT: v_lshlrev_b32_e32 v1, 8, v1 ; CI-NEXT: v_and_b32_e32 v0, 0xff, v0 ; CI-NEXT: v_lshlrev_b32_e32 v15, 24, v15 ; CI-NEXT: v_lshlrev_b32_e32 v14, 16, v14 ; CI-NEXT: v_or_b32_e32 v12, v12, v13 ; CI-NEXT: v_lshlrev_b32_e32 v11, 24, v11 ; CI-NEXT: v_lshlrev_b32_e32 v10, 16, v10 ; CI-NEXT: v_or_b32_e32 v8, v8, v9 ; CI-NEXT: v_lshlrev_b32_e32 v7, 24, v7 ; CI-NEXT: v_lshlrev_b32_e32 v6, 16, v6 ; CI-NEXT: v_or_b32_e32 v4, v4, v5 ; CI-NEXT: v_lshlrev_b32_e32 v3, 24, v3 ; CI-NEXT: v_lshlrev_b32_e32 v2, 16, v2 ; CI-NEXT: v_or_b32_e32 v0, v0, v1 ; CI-NEXT: v_or_b32_e32 v14, v15, v14 ; CI-NEXT: v_and_b32_e32 v12, 0xffff, v12 ; CI-NEXT: v_or_b32_e32 v10, v11, v10 ; CI-NEXT: v_and_b32_e32 v8, 0xffff, v8 ; CI-NEXT: v_or_b32_e32 v6, v7, v6 ; CI-NEXT: v_and_b32_e32 v4, 0xffff, v4 ; CI-NEXT: v_or_b32_e32 v2, v3, v2 ; CI-NEXT: v_and_b32_e32 v0, 0xffff, v0 ; CI-NEXT: s_mov_b32 s4, 0 ; CI-NEXT: v_or_b32_e32 v12, v12, v14 ; CI-NEXT: v_or_b32_e32 v11, v8, v10 ; CI-NEXT: v_or_b32_e32 v10, v4, v6 ; CI-NEXT: v_or_b32_e32 v9, v0, v2 ; CI-NEXT: s_mov_b32 s7, 0xf000 ; CI-NEXT: s_mov_b32 s6, -1 ; CI-NEXT: s_mov_b32 s5, s4 ; CI-NEXT: buffer_store_dwordx4 v[9:12], off, s[4:7], 0 ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: s_setpc_b64 s[30:31] ; ; GFX89-LABEL: void_func_v16i8: ; GFX89: ; %bb.0: ; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX89-NEXT: v_lshlrev_b16_e32 v13, 8, v13 ; GFX89-NEXT: v_lshlrev_b16_e32 v9, 8, v9 ; GFX89-NEXT: v_lshlrev_b16_e32 v5, 8, v5 ; GFX89-NEXT: v_lshlrev_b16_e32 v1, 8, v1 ; GFX89-NEXT: v_or_b32_sdwa v12, v12, v13 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; GFX89-NEXT: v_lshlrev_b16_e32 v13, 8, v15 ; GFX89-NEXT: v_or_b32_sdwa v8, v8, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; GFX89-NEXT: v_lshlrev_b16_e32 v9, 8, v11 ; GFX89-NEXT: v_or_b32_sdwa v4, v4, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; GFX89-NEXT: v_lshlrev_b16_e32 v5, 8, v7 ; GFX89-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; GFX89-NEXT: v_lshlrev_b16_e32 v1, 8, v3 ; GFX89-NEXT: v_or_b32_sdwa v13, v14, v13 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; GFX89-NEXT: v_or_b32_sdwa v9, v10, v9 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; GFX89-NEXT: v_or_b32_sdwa v5, v6, v5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; GFX89-NEXT: v_or_b32_sdwa v1, v2, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; GFX89-NEXT: s_mov_b32 s4, 0 ; GFX89-NEXT: v_or_b32_sdwa v12, v12, v13 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD ; GFX89-NEXT: v_or_b32_sdwa v11, v8, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD ; GFX89-NEXT: v_or_b32_sdwa v10, v4, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD ; GFX89-NEXT: v_or_b32_sdwa v9, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD ; GFX89-NEXT: s_mov_b32 s7, 0xf000 ; GFX89-NEXT: s_mov_b32 s6, -1 ; GFX89-NEXT: s_mov_b32 s5, s4 ; GFX89-NEXT: buffer_store_dwordx4 v[9:12], off, s[4:7], 0 ; GFX89-NEXT: s_waitcnt vmcnt(0) ; GFX89-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: void_func_v16i8: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: v_lshlrev_b16 v13, 8, v13 ; GFX11-NEXT: v_and_b32_e32 v12, 0xff, v12 ; GFX11-NEXT: v_lshlrev_b16 v15, 8, v15 ; GFX11-NEXT: v_and_b32_e32 v14, 0xff, v14 ; GFX11-NEXT: v_lshlrev_b16 v9, 8, v9 ; GFX11-NEXT: v_and_b32_e32 v8, 0xff, v8 ; GFX11-NEXT: v_lshlrev_b16 v11, 8, v11 ; GFX11-NEXT: v_and_b32_e32 v10, 0xff, v10 ; GFX11-NEXT: v_lshlrev_b16 v5, 8, v5 ; GFX11-NEXT: v_and_b32_e32 v4, 0xff, v4 ; GFX11-NEXT: v_lshlrev_b16 v7, 8, v7 ; GFX11-NEXT: v_and_b32_e32 v6, 0xff, v6 ; GFX11-NEXT: v_lshlrev_b16 v1, 8, v1 ; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v0 ; GFX11-NEXT: v_lshlrev_b16 v3, 8, v3 ; GFX11-NEXT: v_and_b32_e32 v2, 0xff, v2 ; GFX11-NEXT: v_or_b32_e32 v12, v12, v13 ; GFX11-NEXT: v_or_b32_e32 v13, v14, v15 ; GFX11-NEXT: v_or_b32_e32 v8, v8, v9 ; GFX11-NEXT: v_or_b32_e32 v10, v10, v11 ; GFX11-NEXT: v_or_b32_e32 v4, v4, v5 ; GFX11-NEXT: v_or_b32_e32 v5, v6, v7 ; GFX11-NEXT: v_or_b32_e32 v0, v0, v1 ; GFX11-NEXT: v_or_b32_e32 v1, v2, v3 ; GFX11-NEXT: v_and_b32_e32 v9, 0xffff, v12 ; GFX11-NEXT: v_lshlrev_b32_e32 v12, 16, v13 ; GFX11-NEXT: v_and_b32_e32 v8, 0xffff, v8 ; GFX11-NEXT: v_lshlrev_b32_e32 v2, 16, v10 ; GFX11-NEXT: v_and_b32_e32 v4, 0xffff, v4 ; GFX11-NEXT: v_lshlrev_b32_e32 v5, 16, v5 ; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 ; GFX11-NEXT: v_lshlrev_b32_e32 v6, 16, v1 ; GFX11-NEXT: v_or_b32_e32 v3, v9, v12 ; GFX11-NEXT: v_or_b32_e32 v2, v8, v2 ; GFX11-NEXT: v_or_b32_e32 v1, v4, v5 ; GFX11-NEXT: s_mov_b32 s0, 0 ; GFX11-NEXT: v_or_b32_e32 v0, v0, v6 ; GFX11-NEXT: s_mov_b32 s3, 0x31016000 ; GFX11-NEXT: s_mov_b32 s2, -1 ; GFX11-NEXT: s_mov_b32 s1, s0 ; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0 ; GFX11-NEXT: s_setpc_b64 s[30:31] store <16 x i8> %arg0, ptr addrspace(1) null ret void } define void @void_func_v32i8(<32 x i8> %arg0) #0 { ; CI-LABEL: void_func_v32i8: ; CI: ; %bb.0: ; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; CI-NEXT: v_and_b32_e32 v2, 0xff, v2 ; CI-NEXT: v_lshlrev_b32_e32 v3, 24, v3 ; CI-NEXT: v_lshlrev_b32_e32 v2, 16, v2 ; CI-NEXT: v_or_b32_e32 v2, v3, v2 ; CI-NEXT: buffer_load_dword v3, off, s[0:3], s32 ; CI-NEXT: v_lshlrev_b32_e32 v5, 8, v5 ; CI-NEXT: v_and_b32_e32 v4, 0xff, v4 ; CI-NEXT: v_and_b32_e32 v6, 0xff, v6 ; CI-NEXT: v_or_b32_e32 v4, v4, v5 ; CI-NEXT: v_lshlrev_b32_e32 v5, 24, v7 ; CI-NEXT: v_lshlrev_b32_e32 v6, 16, v6 ; CI-NEXT: v_lshlrev_b32_e32 v13, 8, v13 ; CI-NEXT: v_and_b32_e32 v12, 0xff, v12 ; CI-NEXT: v_or_b32_e32 v5, v5, v6 ; CI-NEXT: v_and_b32_e32 v4, 0xffff, v4 ; CI-NEXT: v_lshlrev_b32_e32 v1, 8, v1 ; CI-NEXT: v_and_b32_e32 v0, 0xff, v0 ; CI-NEXT: v_or_b32_e32 v12, v12, v13 ; CI-NEXT: v_lshlrev_b32_e32 v9, 8, v9 ; CI-NEXT: v_and_b32_e32 v8, 0xff, v8 ; CI-NEXT: v_and_b32_e32 v13, 0xff, v14 ; CI-NEXT: v_and_b32_e32 v10, 0xff, v10 ; CI-NEXT: v_or_b32_e32 v7, v4, v5 ; CI-NEXT: v_or_b32_e32 v0, v0, v1 ; CI-NEXT: v_lshlrev_b32_e32 v1, 8, v29 ; CI-NEXT: v_and_b32_e32 v4, 0xff, v28 ; CI-NEXT: v_and_b32_e32 v6, 0xff, v26 ; CI-NEXT: v_or_b32_e32 v8, v8, v9 ; CI-NEXT: v_lshlrev_b32_e32 v9, 24, v15 ; CI-NEXT: v_lshlrev_b32_e32 v13, 16, v13 ; CI-NEXT: v_lshlrev_b32_e32 v11, 24, v11 ; CI-NEXT: v_lshlrev_b32_e32 v10, 16, v10 ; CI-NEXT: v_or_b32_e32 v1, v4, v1 ; CI-NEXT: v_and_b32_e32 v4, 0xff, v30 ; CI-NEXT: v_lshlrev_b32_e32 v5, 24, v27 ; CI-NEXT: v_lshlrev_b32_e32 v6, 16, v6 ; CI-NEXT: v_and_b32_e32 v0, 0xffff, v0 ; CI-NEXT: v_or_b32_e32 v9, v9, v13 ; CI-NEXT: v_or_b32_e32 v10, v11, v10 ; CI-NEXT: v_and_b32_e32 v11, 0xffff, v12 ; CI-NEXT: v_and_b32_e32 v8, 0xffff, v8 ; CI-NEXT: v_lshlrev_b32_e32 v4, 16, v4 ; CI-NEXT: v_or_b32_e32 v5, v5, v6 ; CI-NEXT: v_or_b32_e32 v6, v0, v2 ; CI-NEXT: v_or_b32_e32 v9, v11, v9 ; CI-NEXT: v_or_b32_e32 v8, v8, v10 ; CI-NEXT: v_lshlrev_b32_e32 v10, 8, v25 ; CI-NEXT: v_and_b32_e32 v11, 0xff, v24 ; CI-NEXT: v_and_b32_e32 v1, 0xffff, v1 ; CI-NEXT: s_mov_b32 s5, 0 ; CI-NEXT: s_mov_b32 s4, 16 ; CI-NEXT: s_mov_b32 s7, 0xf000 ; CI-NEXT: s_mov_b32 s6, -1 ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: v_lshlrev_b32_e32 v0, 24, v3 ; CI-NEXT: v_or_b32_e32 v0, v0, v4 ; CI-NEXT: v_or_b32_e32 v3, v1, v0 ; CI-NEXT: v_or_b32_e32 v0, v11, v10 ; CI-NEXT: v_and_b32_e32 v0, 0xffff, v0 ; CI-NEXT: v_and_b32_e32 v1, 0xff, v22 ; CI-NEXT: v_or_b32_e32 v2, v0, v5 ; CI-NEXT: v_lshlrev_b32_e32 v0, 24, v23 ; CI-NEXT: v_lshlrev_b32_e32 v1, 16, v1 ; CI-NEXT: v_or_b32_e32 v0, v0, v1 ; CI-NEXT: v_lshlrev_b32_e32 v1, 8, v21 ; CI-NEXT: v_and_b32_e32 v4, 0xff, v20 ; CI-NEXT: v_or_b32_e32 v1, v4, v1 ; CI-NEXT: v_and_b32_e32 v1, 0xffff, v1 ; CI-NEXT: v_and_b32_e32 v4, 0xff, v18 ; CI-NEXT: v_or_b32_e32 v1, v1, v0 ; CI-NEXT: v_lshlrev_b32_e32 v0, 24, v19 ; CI-NEXT: v_lshlrev_b32_e32 v4, 16, v4 ; CI-NEXT: v_or_b32_e32 v0, v0, v4 ; CI-NEXT: v_lshlrev_b32_e32 v4, 8, v17 ; CI-NEXT: v_and_b32_e32 v5, 0xff, v16 ; CI-NEXT: v_or_b32_e32 v4, v5, v4 ; CI-NEXT: v_and_b32_e32 v4, 0xffff, v4 ; CI-NEXT: v_or_b32_e32 v0, v4, v0 ; CI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 ; CI-NEXT: s_mov_b32 s4, s5 ; CI-NEXT: buffer_store_dwordx4 v[6:9], off, s[4:7], 0 ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: s_setpc_b64 s[30:31] ; ; GFX89-LABEL: void_func_v32i8: ; GFX89: ; %bb.0: ; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX89-NEXT: v_lshlrev_b16_e32 v13, 8, v13 ; GFX89-NEXT: v_or_b32_sdwa v12, v12, v13 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; GFX89-NEXT: v_lshlrev_b16_e32 v13, 8, v15 ; GFX89-NEXT: v_or_b32_sdwa v13, v14, v13 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; GFX89-NEXT: buffer_load_ubyte v14, off, s[0:3], s32 ; GFX89-NEXT: v_lshlrev_b16_e32 v5, 8, v5 ; GFX89-NEXT: v_lshlrev_b16_e32 v1, 8, v1 ; GFX89-NEXT: v_or_b32_sdwa v4, v4, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; GFX89-NEXT: v_lshlrev_b16_e32 v5, 8, v7 ; GFX89-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; GFX89-NEXT: v_lshlrev_b16_e32 v1, 8, v3 ; GFX89-NEXT: v_lshlrev_b16_e32 v9, 8, v9 ; GFX89-NEXT: v_or_b32_sdwa v5, v6, v5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; GFX89-NEXT: v_or_b32_sdwa v6, v2, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; GFX89-NEXT: v_lshlrev_b16_e32 v1, 8, v29 ; GFX89-NEXT: v_or_b32_sdwa v8, v8, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; GFX89-NEXT: v_lshlrev_b16_e32 v9, 8, v11 ; GFX89-NEXT: v_or_b32_sdwa v7, v28, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; GFX89-NEXT: v_lshlrev_b16_e32 v1, 8, v25 ; GFX89-NEXT: v_or_b32_sdwa v9, v10, v9 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; GFX89-NEXT: v_or_b32_sdwa v10, v24, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; GFX89-NEXT: v_lshlrev_b16_e32 v1, 8, v27 ; GFX89-NEXT: v_lshlrev_b16_e32 v2, 8, v23 ; GFX89-NEXT: v_or_b32_sdwa v11, v26, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; GFX89-NEXT: v_lshlrev_b16_e32 v1, 8, v21 ; GFX89-NEXT: v_lshlrev_b16_e32 v3, 8, v17 ; GFX89-NEXT: v_lshlrev_b16_e32 v15, 8, v19 ; GFX89-NEXT: v_or_b32_sdwa v19, v22, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; GFX89-NEXT: v_or_b32_sdwa v2, v8, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD ; GFX89-NEXT: v_or_b32_sdwa v17, v20, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; GFX89-NEXT: v_or_b32_sdwa v16, v16, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; GFX89-NEXT: v_or_b32_sdwa v15, v18, v15 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; GFX89-NEXT: s_mov_b32 s5, 0 ; GFX89-NEXT: s_mov_b32 s4, 16 ; GFX89-NEXT: s_mov_b32 s7, 0xf000 ; GFX89-NEXT: s_mov_b32 s6, -1 ; GFX89-NEXT: v_or_b32_sdwa v1, v4, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD ; GFX89-NEXT: v_or_b32_sdwa v0, v0, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD ; GFX89-NEXT: v_or_b32_sdwa v6, v10, v11 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD ; GFX89-NEXT: v_or_b32_sdwa v5, v17, v19 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD ; GFX89-NEXT: v_or_b32_sdwa v4, v16, v15 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD ; GFX89-NEXT: v_or_b32_sdwa v3, v12, v13 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD ; GFX89-NEXT: s_waitcnt vmcnt(0) ; GFX89-NEXT: v_lshlrev_b16_e32 v8, 8, v14 ; GFX89-NEXT: v_or_b32_sdwa v8, v30, v8 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; GFX89-NEXT: v_or_b32_sdwa v7, v7, v8 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD ; GFX89-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0 ; GFX89-NEXT: s_mov_b32 s4, s5 ; GFX89-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 ; GFX89-NEXT: s_waitcnt vmcnt(0) ; GFX89-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: void_func_v32i8: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: scratch_load_u8 v31, off, s32 ; GFX11-NEXT: v_lshlrev_b16 v1, 8, v1 ; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v0 ; GFX11-NEXT: v_lshlrev_b16 v3, 8, v3 ; GFX11-NEXT: v_and_b32_e32 v2, 0xff, v2 ; GFX11-NEXT: v_lshlrev_b16 v9, 8, v9 ; GFX11-NEXT: v_and_b32_e32 v8, 0xff, v8 ; GFX11-NEXT: v_lshlrev_b16 v11, 8, v11 ; GFX11-NEXT: v_and_b32_e32 v10, 0xff, v10 ; GFX11-NEXT: v_lshlrev_b16 v17, 8, v17 ; GFX11-NEXT: v_and_b32_e32 v16, 0xff, v16 ; GFX11-NEXT: v_or_b32_e32 v0, v0, v1 ; GFX11-NEXT: v_or_b32_e32 v1, v2, v3 ; GFX11-NEXT: v_lshlrev_b16 v13, 8, v13 ; GFX11-NEXT: v_and_b32_e32 v12, 0xff, v12 ; GFX11-NEXT: v_lshlrev_b16 v15, 8, v15 ; GFX11-NEXT: v_and_b32_e32 v14, 0xff, v14 ; GFX11-NEXT: v_lshlrev_b16 v5, 8, v5 ; GFX11-NEXT: v_and_b32_e32 v4, 0xff, v4 ; GFX11-NEXT: v_lshlrev_b16 v7, 8, v7 ; GFX11-NEXT: v_and_b32_e32 v6, 0xff, v6 ; GFX11-NEXT: v_lshlrev_b16 v29, 8, v29 ; GFX11-NEXT: v_and_b32_e32 v28, 0xff, v28 ; GFX11-NEXT: v_and_b32_e32 v30, 0xff, v30 ; GFX11-NEXT: v_lshlrev_b16 v25, 8, v25 ; GFX11-NEXT: v_and_b32_e32 v24, 0xff, v24 ; GFX11-NEXT: v_lshlrev_b16 v27, 8, v27 ; GFX11-NEXT: v_and_b32_e32 v26, 0xff, v26 ; GFX11-NEXT: v_lshlrev_b16 v21, 8, v21 ; GFX11-NEXT: v_and_b32_e32 v20, 0xff, v20 ; GFX11-NEXT: v_lshlrev_b16 v23, 8, v23 ; GFX11-NEXT: v_and_b32_e32 v22, 0xff, v22 ; GFX11-NEXT: v_lshlrev_b16 v19, 8, v19 ; GFX11-NEXT: v_and_b32_e32 v18, 0xff, v18 ; GFX11-NEXT: v_or_b32_e32 v8, v8, v9 ; GFX11-NEXT: v_or_b32_e32 v9, v10, v11 ; GFX11-NEXT: v_or_b32_e32 v11, v16, v17 ; GFX11-NEXT: v_lshlrev_b32_e32 v17, 16, v1 ; GFX11-NEXT: v_or_b32_e32 v12, v12, v13 ; GFX11-NEXT: v_or_b32_e32 v13, v14, v15 ; GFX11-NEXT: v_or_b32_e32 v4, v4, v5 ; GFX11-NEXT: v_or_b32_e32 v5, v6, v7 ; GFX11-NEXT: v_or_b32_e32 v2, v28, v29 ; GFX11-NEXT: v_or_b32_e32 v3, v24, v25 ; GFX11-NEXT: v_or_b32_e32 v6, v26, v27 ; GFX11-NEXT: v_or_b32_e32 v7, v20, v21 ; GFX11-NEXT: v_or_b32_e32 v10, v22, v23 ; GFX11-NEXT: v_or_b32_e32 v14, v18, v19 ; GFX11-NEXT: v_and_b32_e32 v15, 0xffff, v4 ; GFX11-NEXT: v_lshlrev_b32_e32 v16, 16, v5 ; GFX11-NEXT: v_and_b32_e32 v18, 0xffff, v2 ; GFX11-NEXT: v_and_b32_e32 v4, 0xffff, v3 ; GFX11-NEXT: v_lshlrev_b32_e32 v5, 16, v6 ; GFX11-NEXT: v_and_b32_e32 v7, 0xffff, v7 ; GFX11-NEXT: v_lshlrev_b32_e32 v10, 16, v10 ; GFX11-NEXT: v_and_b32_e32 v11, 0xffff, v11 ; GFX11-NEXT: v_lshlrev_b32_e32 v14, 16, v14 ; GFX11-NEXT: v_and_b32_e32 v12, 0xffff, v12 ; GFX11-NEXT: v_lshlrev_b32_e32 v13, 16, v13 ; GFX11-NEXT: v_and_b32_e32 v8, 0xffff, v8 ; GFX11-NEXT: v_lshlrev_b32_e32 v9, 16, v9 ; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 ; GFX11-NEXT: v_or_b32_e32 v6, v4, v5 ; GFX11-NEXT: v_or_b32_e32 v5, v7, v10 ; GFX11-NEXT: v_or_b32_e32 v4, v11, v14 ; GFX11-NEXT: v_or_b32_e32 v3, v12, v13 ; GFX11-NEXT: v_or_b32_e32 v2, v8, v9 ; GFX11-NEXT: v_or_b32_e32 v0, v0, v17 ; GFX11-NEXT: s_mov_b32 s1, 0 ; GFX11-NEXT: s_mov_b32 s0, 16 ; GFX11-NEXT: s_mov_b32 s3, 0x31016000 ; GFX11-NEXT: s_mov_b32 s2, -1 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: v_lshlrev_b16 v1, 8, v31 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-NEXT: v_or_b32_e32 v1, v30, v1 ; GFX11-NEXT: v_lshlrev_b32_e32 v1, 16, v1 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-NEXT: v_or_b32_e32 v7, v18, v1 ; GFX11-NEXT: v_or_b32_e32 v1, v15, v16 ; GFX11-NEXT: buffer_store_b128 v[4:7], off, s[0:3], 0 ; GFX11-NEXT: s_mov_b32 s0, s1 ; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0 ; GFX11-NEXT: s_setpc_b64 s[30:31] store <32 x i8> %arg0, ptr addrspace(1) null ret void } define void @void_func_v3i16(<3 x i16> %arg0) #0 { ; CI-LABEL: void_func_v3i16: ; CI: ; %bb.0: ; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; CI-NEXT: v_lshlrev_b32_e32 v1, 16, v1 ; CI-NEXT: v_and_b32_e32 v0, 0xffff, v0 ; CI-NEXT: s_mov_b32 s7, 0xf000 ; CI-NEXT: s_mov_b32 s6, -1 ; CI-NEXT: v_or_b32_e32 v0, v0, v1 ; CI-NEXT: buffer_store_short v2, off, s[4:7], 0 ; CI-NEXT: buffer_store_dword v0, off, s[4:7], 0 ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: s_setpc_b64 s[30:31] ; ; GFX89-LABEL: void_func_v3i16: ; GFX89: ; %bb.0: ; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX89-NEXT: s_mov_b32 s7, 0xf000 ; GFX89-NEXT: s_mov_b32 s6, -1 ; GFX89-NEXT: buffer_store_short v1, off, s[4:7], 0 ; GFX89-NEXT: buffer_store_dword v0, off, s[4:7], 0 ; GFX89-NEXT: s_waitcnt vmcnt(0) ; GFX89-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: void_func_v3i16: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: s_mov_b32 s3, 0x31016000 ; GFX11-NEXT: s_mov_b32 s2, -1 ; GFX11-NEXT: s_clause 0x1 ; GFX11-NEXT: buffer_store_b16 v1, off, s[0:3], 0 ; GFX11-NEXT: buffer_store_b32 v0, off, s[0:3], 0 ; GFX11-NEXT: s_setpc_b64 s[30:31] store <3 x i16> %arg0, ptr addrspace(1) undef ret void } define void @void_func_v4i16(<4 x i16> %arg0) #0 { ; CI-LABEL: void_func_v4i16: ; CI: ; %bb.0: ; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; CI-NEXT: v_lshlrev_b32_e32 v3, 16, v3 ; CI-NEXT: v_and_b32_e32 v2, 0xffff, v2 ; CI-NEXT: v_lshlrev_b32_e32 v1, 16, v1 ; CI-NEXT: v_and_b32_e32 v0, 0xffff, v0 ; CI-NEXT: v_or_b32_e32 v2, v2, v3 ; CI-NEXT: v_or_b32_e32 v1, v0, v1 ; CI-NEXT: s_mov_b32 s7, 0xf000 ; CI-NEXT: s_mov_b32 s6, -1 ; CI-NEXT: buffer_store_dwordx2 v[1:2], off, s[4:7], 0 ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: s_setpc_b64 s[30:31] ; ; GFX89-LABEL: void_func_v4i16: ; GFX89: ; %bb.0: ; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX89-NEXT: s_mov_b32 s7, 0xf000 ; GFX89-NEXT: s_mov_b32 s6, -1 ; GFX89-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 ; GFX89-NEXT: s_waitcnt vmcnt(0) ; GFX89-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: void_func_v4i16: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: s_mov_b32 s3, 0x31016000 ; GFX11-NEXT: s_mov_b32 s2, -1 ; GFX11-NEXT: buffer_store_b64 v[0:1], off, s[0:3], 0 ; GFX11-NEXT: s_setpc_b64 s[30:31] store <4 x i16> %arg0, ptr addrspace(1) undef ret void } define void @void_func_v5i16(<5 x i16> %arg0) #0 { ; CI-LABEL: void_func_v5i16: ; CI: ; %bb.0: ; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; CI-NEXT: v_lshlrev_b32_e32 v3, 16, v3 ; CI-NEXT: v_and_b32_e32 v2, 0xffff, v2 ; CI-NEXT: v_lshlrev_b32_e32 v1, 16, v1 ; CI-NEXT: v_and_b32_e32 v0, 0xffff, v0 ; CI-NEXT: s_mov_b32 s7, 0xf000 ; CI-NEXT: s_mov_b32 s6, -1 ; CI-NEXT: v_or_b32_e32 v2, v2, v3 ; CI-NEXT: v_or_b32_e32 v1, v0, v1 ; CI-NEXT: buffer_store_short v4, off, s[4:7], 0 ; CI-NEXT: buffer_store_dwordx2 v[1:2], off, s[4:7], 0 ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: s_setpc_b64 s[30:31] ; ; GFX89-LABEL: void_func_v5i16: ; GFX89: ; %bb.0: ; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX89-NEXT: s_mov_b32 s7, 0xf000 ; GFX89-NEXT: s_mov_b32 s6, -1 ; GFX89-NEXT: buffer_store_short v2, off, s[4:7], 0 ; GFX89-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 ; GFX89-NEXT: s_waitcnt vmcnt(0) ; GFX89-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: void_func_v5i16: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: s_mov_b32 s3, 0x31016000 ; GFX11-NEXT: s_mov_b32 s2, -1 ; GFX11-NEXT: s_clause 0x1 ; GFX11-NEXT: buffer_store_b16 v2, off, s[0:3], 0 ; GFX11-NEXT: buffer_store_b64 v[0:1], off, s[0:3], 0 ; GFX11-NEXT: s_setpc_b64 s[30:31] store <5 x i16> %arg0, ptr addrspace(1) undef ret void } define void @void_func_v8i16(<8 x i16> %arg0) #0 { ; CI-LABEL: void_func_v8i16: ; CI: ; %bb.0: ; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; CI-NEXT: v_lshlrev_b32_e32 v7, 16, v7 ; CI-NEXT: v_and_b32_e32 v6, 0xffff, v6 ; CI-NEXT: v_lshlrev_b32_e32 v5, 16, v5 ; CI-NEXT: v_and_b32_e32 v4, 0xffff, v4 ; CI-NEXT: v_lshlrev_b32_e32 v3, 16, v3 ; CI-NEXT: v_and_b32_e32 v2, 0xffff, v2 ; CI-NEXT: v_lshlrev_b32_e32 v1, 16, v1 ; CI-NEXT: v_and_b32_e32 v0, 0xffff, v0 ; CI-NEXT: v_or_b32_e32 v6, v6, v7 ; CI-NEXT: v_or_b32_e32 v5, v4, v5 ; CI-NEXT: v_or_b32_e32 v4, v2, v3 ; CI-NEXT: v_or_b32_e32 v3, v0, v1 ; CI-NEXT: s_mov_b32 s7, 0xf000 ; CI-NEXT: s_mov_b32 s6, -1 ; CI-NEXT: buffer_store_dwordx4 v[3:6], off, s[4:7], 0 ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: s_setpc_b64 s[30:31] ; ; GFX89-LABEL: void_func_v8i16: ; GFX89: ; %bb.0: ; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX89-NEXT: s_mov_b32 s7, 0xf000 ; GFX89-NEXT: s_mov_b32 s6, -1 ; GFX89-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 ; GFX89-NEXT: s_waitcnt vmcnt(0) ; GFX89-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: void_func_v8i16: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: s_mov_b32 s3, 0x31016000 ; GFX11-NEXT: s_mov_b32 s2, -1 ; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0 ; GFX11-NEXT: s_setpc_b64 s[30:31] store <8 x i16> %arg0, ptr addrspace(1) undef ret void } define void @void_func_v16i16(<16 x i16> %arg0) #0 { ; CI-LABEL: void_func_v16i16: ; CI: ; %bb.0: ; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; CI-NEXT: v_lshlrev_b32_e32 v5, 16, v5 ; CI-NEXT: v_and_b32_e32 v4, 0xffff, v4 ; CI-NEXT: v_lshlrev_b32_e32 v3, 16, v3 ; CI-NEXT: v_and_b32_e32 v2, 0xffff, v2 ; CI-NEXT: v_lshlrev_b32_e32 v1, 16, v1 ; CI-NEXT: v_and_b32_e32 v0, 0xffff, v0 ; CI-NEXT: v_or_b32_e32 v5, v4, v5 ; CI-NEXT: v_or_b32_e32 v4, v2, v3 ; CI-NEXT: v_or_b32_e32 v3, v0, v1 ; CI-NEXT: v_lshlrev_b32_e32 v0, 16, v15 ; CI-NEXT: v_and_b32_e32 v1, 0xffff, v14 ; CI-NEXT: v_or_b32_e32 v14, v1, v0 ; CI-NEXT: v_lshlrev_b32_e32 v0, 16, v13 ; CI-NEXT: v_and_b32_e32 v1, 0xffff, v12 ; CI-NEXT: v_or_b32_e32 v13, v1, v0 ; CI-NEXT: v_lshlrev_b32_e32 v0, 16, v11 ; CI-NEXT: v_and_b32_e32 v1, 0xffff, v10 ; CI-NEXT: v_or_b32_e32 v12, v1, v0 ; CI-NEXT: v_lshlrev_b32_e32 v0, 16, v9 ; CI-NEXT: v_and_b32_e32 v1, 0xffff, v8 ; CI-NEXT: v_lshlrev_b32_e32 v7, 16, v7 ; CI-NEXT: v_and_b32_e32 v6, 0xffff, v6 ; CI-NEXT: v_or_b32_e32 v11, v1, v0 ; CI-NEXT: s_mov_b32 s7, 0xf000 ; CI-NEXT: s_mov_b32 s6, -1 ; CI-NEXT: v_or_b32_e32 v6, v6, v7 ; CI-NEXT: buffer_store_dwordx4 v[11:14], off, s[4:7], 0 ; CI-NEXT: buffer_store_dwordx4 v[3:6], off, s[4:7], 0 ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: s_setpc_b64 s[30:31] ; ; GFX89-LABEL: void_func_v16i16: ; GFX89: ; %bb.0: ; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX89-NEXT: s_mov_b32 s7, 0xf000 ; GFX89-NEXT: s_mov_b32 s6, -1 ; GFX89-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0 ; GFX89-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 ; GFX89-NEXT: s_waitcnt vmcnt(0) ; GFX89-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: void_func_v16i16: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: s_mov_b32 s3, 0x31016000 ; GFX11-NEXT: s_mov_b32 s2, -1 ; GFX11-NEXT: s_clause 0x1 ; GFX11-NEXT: buffer_store_b128 v[4:7], off, s[0:3], 0 ; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0 ; GFX11-NEXT: s_setpc_b64 s[30:31] store <16 x i16> %arg0, ptr addrspace(1) undef ret void } define void @void_func_v2i24(<2 x i24> %arg0) #0 { ; CI-LABEL: void_func_v2i24: ; CI: ; %bb.0: ; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; CI-NEXT: v_add_i32_e32 v0, vcc, v0, v1 ; CI-NEXT: v_lshrrev_b32_e32 v1, 16, v0 ; CI-NEXT: s_mov_b32 s7, 0xf000 ; CI-NEXT: s_mov_b32 s6, -1 ; CI-NEXT: buffer_store_byte v1, off, s[4:7], 0 ; CI-NEXT: buffer_store_short v0, off, s[4:7], 0 ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: s_setpc_b64 s[30:31] ; ; VI-LABEL: void_func_v2i24: ; VI: ; %bb.0: ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; VI-NEXT: v_add_u32_e32 v0, vcc, v0, v1 ; VI-NEXT: v_lshrrev_b32_e32 v1, 16, v0 ; VI-NEXT: s_mov_b32 s7, 0xf000 ; VI-NEXT: s_mov_b32 s6, -1 ; VI-NEXT: buffer_store_byte v1, off, s[4:7], 0 ; VI-NEXT: buffer_store_short v0, off, s[4:7], 0 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-LABEL: void_func_v2i24: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-NEXT: v_add_u32_e32 v0, v0, v1 ; GFX9-NEXT: v_lshrrev_b32_e32 v1, 16, v0 ; GFX9-NEXT: s_mov_b32 s7, 0xf000 ; GFX9-NEXT: s_mov_b32 s6, -1 ; GFX9-NEXT: buffer_store_byte v1, off, s[4:7], 0 ; GFX9-NEXT: buffer_store_short v0, off, s[4:7], 0 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: void_func_v2i24: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: v_add_nc_u32_e32 v0, v0, v1 ; GFX11-NEXT: s_mov_b32 s3, 0x31016000 ; GFX11-NEXT: s_mov_b32 s2, -1 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-NEXT: v_lshrrev_b32_e32 v1, 16, v0 ; GFX11-NEXT: s_clause 0x1 ; GFX11-NEXT: buffer_store_b8 v1, off, s[0:3], 0 ; GFX11-NEXT: buffer_store_b16 v0, off, s[0:3], 0 ; GFX11-NEXT: s_setpc_b64 s[30:31] %elt0 = extractelement <2 x i24> %arg0, i32 0 %elt1 = extractelement <2 x i24> %arg0, i32 1 %add = add i24 %elt0, %elt1 store i24 %add, ptr addrspace(1) undef ret void } define void @void_func_v2f32(<2 x float> %arg0) #0 { ; CIGFX89-LABEL: void_func_v2f32: ; CIGFX89: ; %bb.0: ; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; CIGFX89-NEXT: s_mov_b32 s7, 0xf000 ; CIGFX89-NEXT: s_mov_b32 s6, -1 ; CIGFX89-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 ; CIGFX89-NEXT: s_waitcnt vmcnt(0) ; CIGFX89-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: void_func_v2f32: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: s_mov_b32 s3, 0x31016000 ; GFX11-NEXT: s_mov_b32 s2, -1 ; GFX11-NEXT: buffer_store_b64 v[0:1], off, s[0:3], 0 ; GFX11-NEXT: s_setpc_b64 s[30:31] store <2 x float> %arg0, ptr addrspace(1) undef ret void } define void @void_func_v3f32(<3 x float> %arg0) #0 { ; CIGFX89-LABEL: void_func_v3f32: ; CIGFX89: ; %bb.0: ; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; CIGFX89-NEXT: s_mov_b32 s7, 0xf000 ; CIGFX89-NEXT: s_mov_b32 s6, -1 ; CIGFX89-NEXT: buffer_store_dwordx3 v[0:2], off, s[4:7], 0 ; CIGFX89-NEXT: s_waitcnt vmcnt(0) ; CIGFX89-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: void_func_v3f32: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: s_mov_b32 s3, 0x31016000 ; GFX11-NEXT: s_mov_b32 s2, -1 ; GFX11-NEXT: buffer_store_b96 v[0:2], off, s[0:3], 0 ; GFX11-NEXT: s_setpc_b64 s[30:31] store <3 x float> %arg0, ptr addrspace(1) undef ret void } define void @void_func_v4f32(<4 x float> %arg0) #0 { ; CIGFX89-LABEL: void_func_v4f32: ; CIGFX89: ; %bb.0: ; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; CIGFX89-NEXT: s_mov_b32 s7, 0xf000 ; CIGFX89-NEXT: s_mov_b32 s6, -1 ; CIGFX89-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 ; CIGFX89-NEXT: s_waitcnt vmcnt(0) ; CIGFX89-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: void_func_v4f32: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: s_mov_b32 s3, 0x31016000 ; GFX11-NEXT: s_mov_b32 s2, -1 ; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0 ; GFX11-NEXT: s_setpc_b64 s[30:31] store <4 x float> %arg0, ptr addrspace(1) undef ret void } define void @void_func_v8f32(<8 x float> %arg0) #0 { ; CIGFX89-LABEL: void_func_v8f32: ; CIGFX89: ; %bb.0: ; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; CIGFX89-NEXT: s_mov_b32 s7, 0xf000 ; CIGFX89-NEXT: s_mov_b32 s6, -1 ; CIGFX89-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0 ; CIGFX89-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 ; CIGFX89-NEXT: s_waitcnt vmcnt(0) ; CIGFX89-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: void_func_v8f32: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: s_mov_b32 s3, 0x31016000 ; GFX11-NEXT: s_mov_b32 s2, -1 ; GFX11-NEXT: s_clause 0x1 ; GFX11-NEXT: buffer_store_b128 v[4:7], off, s[0:3], 0 ; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0 ; GFX11-NEXT: s_setpc_b64 s[30:31] store <8 x float> %arg0, ptr addrspace(1) undef ret void } define void @void_func_v16f32(<16 x float> %arg0) #0 { ; CIGFX89-LABEL: void_func_v16f32: ; CIGFX89: ; %bb.0: ; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; CIGFX89-NEXT: s_mov_b32 s7, 0xf000 ; CIGFX89-NEXT: s_mov_b32 s6, -1 ; CIGFX89-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0 ; CIGFX89-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0 ; CIGFX89-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0 ; CIGFX89-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 ; CIGFX89-NEXT: s_waitcnt vmcnt(0) ; CIGFX89-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: void_func_v16f32: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: s_mov_b32 s3, 0x31016000 ; GFX11-NEXT: s_mov_b32 s2, -1 ; GFX11-NEXT: s_clause 0x3 ; GFX11-NEXT: buffer_store_b128 v[12:15], off, s[0:3], 0 ; GFX11-NEXT: buffer_store_b128 v[8:11], off, s[0:3], 0 ; GFX11-NEXT: buffer_store_b128 v[4:7], off, s[0:3], 0 ; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0 ; GFX11-NEXT: s_setpc_b64 s[30:31] store <16 x float> %arg0, ptr addrspace(1) undef ret void } define void @void_func_v2f64(<2 x double> %arg0) #0 { ; CIGFX89-LABEL: void_func_v2f64: ; CIGFX89: ; %bb.0: ; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; CIGFX89-NEXT: s_mov_b32 s7, 0xf000 ; CIGFX89-NEXT: s_mov_b32 s6, -1 ; CIGFX89-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 ; CIGFX89-NEXT: s_waitcnt vmcnt(0) ; CIGFX89-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: void_func_v2f64: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: s_mov_b32 s3, 0x31016000 ; GFX11-NEXT: s_mov_b32 s2, -1 ; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0 ; GFX11-NEXT: s_setpc_b64 s[30:31] store <2 x double> %arg0, ptr addrspace(1) undef ret void } define void @void_func_v3f64(<3 x double> %arg0) #0 { ; CIGFX89-LABEL: void_func_v3f64: ; CIGFX89: ; %bb.0: ; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; CIGFX89-NEXT: s_mov_b32 s7, 0xf000 ; CIGFX89-NEXT: s_mov_b32 s6, -1 ; CIGFX89-NEXT: buffer_store_dwordx2 v[4:5], off, s[4:7], 0 ; CIGFX89-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 ; CIGFX89-NEXT: s_waitcnt vmcnt(0) ; CIGFX89-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: void_func_v3f64: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: s_mov_b32 s3, 0x31016000 ; GFX11-NEXT: s_mov_b32 s2, -1 ; GFX11-NEXT: s_clause 0x1 ; GFX11-NEXT: buffer_store_b64 v[4:5], off, s[0:3], 0 ; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0 ; GFX11-NEXT: s_setpc_b64 s[30:31] store <3 x double> %arg0, ptr addrspace(1) undef ret void } define void @void_func_v4f64(<4 x double> %arg0) #0 { ; CIGFX89-LABEL: void_func_v4f64: ; CIGFX89: ; %bb.0: ; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; CIGFX89-NEXT: s_mov_b32 s7, 0xf000 ; CIGFX89-NEXT: s_mov_b32 s6, -1 ; CIGFX89-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0 ; CIGFX89-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 ; CIGFX89-NEXT: s_waitcnt vmcnt(0) ; CIGFX89-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: void_func_v4f64: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: s_mov_b32 s3, 0x31016000 ; GFX11-NEXT: s_mov_b32 s2, -1 ; GFX11-NEXT: s_clause 0x1 ; GFX11-NEXT: buffer_store_b128 v[4:7], off, s[0:3], 0 ; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0 ; GFX11-NEXT: s_setpc_b64 s[30:31] store <4 x double> %arg0, ptr addrspace(1) undef ret void } define void @void_func_v8f64(<8 x double> %arg0) #0 { ; CIGFX89-LABEL: void_func_v8f64: ; CIGFX89: ; %bb.0: ; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; CIGFX89-NEXT: s_mov_b32 s7, 0xf000 ; CIGFX89-NEXT: s_mov_b32 s6, -1 ; CIGFX89-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0 ; CIGFX89-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0 ; CIGFX89-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0 ; CIGFX89-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 ; CIGFX89-NEXT: s_waitcnt vmcnt(0) ; CIGFX89-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: void_func_v8f64: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: s_mov_b32 s3, 0x31016000 ; GFX11-NEXT: s_mov_b32 s2, -1 ; GFX11-NEXT: s_clause 0x3 ; GFX11-NEXT: buffer_store_b128 v[12:15], off, s[0:3], 0 ; GFX11-NEXT: buffer_store_b128 v[8:11], off, s[0:3], 0 ; GFX11-NEXT: buffer_store_b128 v[4:7], off, s[0:3], 0 ; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0 ; GFX11-NEXT: s_setpc_b64 s[30:31] store <8 x double> %arg0, ptr addrspace(1) undef ret void } define void @void_func_v16f64(<16 x double> %arg0) #0 { ; CIGFX89-LABEL: void_func_v16f64: ; CIGFX89: ; %bb.0: ; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; CIGFX89-NEXT: buffer_load_dword v31, off, s[0:3], s32 ; CIGFX89-NEXT: s_mov_b32 s7, 0xf000 ; CIGFX89-NEXT: s_mov_b32 s6, -1 ; CIGFX89-NEXT: buffer_store_dwordx4 v[24:27], off, s[4:7], 0 ; CIGFX89-NEXT: buffer_store_dwordx4 v[20:23], off, s[4:7], 0 ; CIGFX89-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0 ; CIGFX89-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0 ; CIGFX89-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0 ; CIGFX89-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0 ; CIGFX89-NEXT: s_waitcnt vmcnt(6) ; CIGFX89-NEXT: buffer_store_dwordx4 v[28:31], off, s[4:7], 0 ; CIGFX89-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 ; CIGFX89-NEXT: s_waitcnt vmcnt(0) ; CIGFX89-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: void_func_v16f64: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: scratch_load_b32 v31, off, s32 ; GFX11-NEXT: s_mov_b32 s3, 0x31016000 ; GFX11-NEXT: s_mov_b32 s2, -1 ; GFX11-NEXT: s_clause 0x3 ; GFX11-NEXT: buffer_store_b128 v[20:23], off, s[0:3], 0 ; GFX11-NEXT: buffer_store_b128 v[16:19], off, s[0:3], 0 ; GFX11-NEXT: buffer_store_b128 v[12:15], off, s[0:3], 0 ; GFX11-NEXT: buffer_store_b128 v[8:11], off, s[0:3], 0 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_clause 0x3 ; GFX11-NEXT: buffer_store_b128 v[28:31], off, s[0:3], 0 ; GFX11-NEXT: buffer_store_b128 v[24:27], off, s[0:3], 0 ; GFX11-NEXT: buffer_store_b128 v[4:7], off, s[0:3], 0 ; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0 ; GFX11-NEXT: s_setpc_b64 s[30:31] store <16 x double> %arg0, ptr addrspace(1) undef ret void } define void @void_func_v2f16(<2 x half> %arg0) #0 { ; CI-LABEL: void_func_v2f16: ; CI: ; %bb.0: ; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; CI-NEXT: v_cvt_f16_f32_e32 v1, v1 ; CI-NEXT: v_cvt_f16_f32_e32 v0, v0 ; CI-NEXT: s_mov_b32 s7, 0xf000 ; CI-NEXT: s_mov_b32 s6, -1 ; CI-NEXT: v_lshlrev_b32_e32 v1, 16, v1 ; CI-NEXT: v_or_b32_e32 v0, v0, v1 ; CI-NEXT: buffer_store_dword v0, off, s[4:7], 0 ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: s_setpc_b64 s[30:31] ; ; GFX89-LABEL: void_func_v2f16: ; GFX89: ; %bb.0: ; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX89-NEXT: s_mov_b32 s7, 0xf000 ; GFX89-NEXT: s_mov_b32 s6, -1 ; GFX89-NEXT: buffer_store_dword v0, off, s[4:7], 0 ; GFX89-NEXT: s_waitcnt vmcnt(0) ; GFX89-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: void_func_v2f16: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: s_mov_b32 s3, 0x31016000 ; GFX11-NEXT: s_mov_b32 s2, -1 ; GFX11-NEXT: buffer_store_b32 v0, off, s[0:3], 0 ; GFX11-NEXT: s_setpc_b64 s[30:31] store <2 x half> %arg0, ptr addrspace(1) undef ret void } ; FIXME: Different abi if f16 legal define void @void_func_v3f16(<3 x half> %arg0) #0 { ; CI-LABEL: void_func_v3f16: ; CI: ; %bb.0: ; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; CI-NEXT: v_cvt_f16_f32_e32 v1, v1 ; CI-NEXT: v_cvt_f16_f32_e32 v0, v0 ; CI-NEXT: v_cvt_f16_f32_e32 v2, v2 ; CI-NEXT: s_mov_b32 s7, 0xf000 ; CI-NEXT: v_lshlrev_b32_e32 v1, 16, v1 ; CI-NEXT: s_mov_b32 s6, -1 ; CI-NEXT: v_or_b32_e32 v0, v0, v1 ; CI-NEXT: buffer_store_short v2, off, s[4:7], 0 ; CI-NEXT: buffer_store_dword v0, off, s[4:7], 0 ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: s_setpc_b64 s[30:31] ; ; GFX89-LABEL: void_func_v3f16: ; GFX89: ; %bb.0: ; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX89-NEXT: s_mov_b32 s7, 0xf000 ; GFX89-NEXT: s_mov_b32 s6, -1 ; GFX89-NEXT: buffer_store_short v1, off, s[4:7], 0 ; GFX89-NEXT: buffer_store_dword v0, off, s[4:7], 0 ; GFX89-NEXT: s_waitcnt vmcnt(0) ; GFX89-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: void_func_v3f16: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: s_mov_b32 s3, 0x31016000 ; GFX11-NEXT: s_mov_b32 s2, -1 ; GFX11-NEXT: s_clause 0x1 ; GFX11-NEXT: buffer_store_b16 v1, off, s[0:3], 0 ; GFX11-NEXT: buffer_store_b32 v0, off, s[0:3], 0 ; GFX11-NEXT: s_setpc_b64 s[30:31] store <3 x half> %arg0, ptr addrspace(1) undef ret void } define void @void_func_v4f16(<4 x half> %arg0) #0 { ; CI-LABEL: void_func_v4f16: ; CI: ; %bb.0: ; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; CI-NEXT: v_cvt_f16_f32_e32 v3, v3 ; CI-NEXT: v_cvt_f16_f32_e32 v2, v2 ; CI-NEXT: v_cvt_f16_f32_e32 v4, v1 ; CI-NEXT: v_cvt_f16_f32_e32 v0, v0 ; CI-NEXT: v_lshlrev_b32_e32 v1, 16, v3 ; CI-NEXT: v_or_b32_e32 v1, v2, v1 ; CI-NEXT: v_lshlrev_b32_e32 v2, 16, v4 ; CI-NEXT: v_or_b32_e32 v0, v0, v2 ; CI-NEXT: s_mov_b32 s7, 0xf000 ; CI-NEXT: s_mov_b32 s6, -1 ; CI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: s_setpc_b64 s[30:31] ; ; GFX89-LABEL: void_func_v4f16: ; GFX89: ; %bb.0: ; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX89-NEXT: s_mov_b32 s7, 0xf000 ; GFX89-NEXT: s_mov_b32 s6, -1 ; GFX89-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 ; GFX89-NEXT: s_waitcnt vmcnt(0) ; GFX89-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: void_func_v4f16: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: s_mov_b32 s3, 0x31016000 ; GFX11-NEXT: s_mov_b32 s2, -1 ; GFX11-NEXT: buffer_store_b64 v[0:1], off, s[0:3], 0 ; GFX11-NEXT: s_setpc_b64 s[30:31] store <4 x half> %arg0, ptr addrspace(1) undef ret void } define void @void_func_v8f16(<8 x half> %arg0) #0 { ; CI-LABEL: void_func_v8f16: ; CI: ; %bb.0: ; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; CI-NEXT: v_cvt_f16_f32_e32 v7, v7 ; CI-NEXT: v_cvt_f16_f32_e32 v6, v6 ; CI-NEXT: v_cvt_f16_f32_e32 v8, v5 ; CI-NEXT: v_cvt_f16_f32_e32 v3, v3 ; CI-NEXT: v_cvt_f16_f32_e32 v1, v1 ; CI-NEXT: v_cvt_f16_f32_e32 v4, v4 ; CI-NEXT: v_cvt_f16_f32_e32 v2, v2 ; CI-NEXT: v_cvt_f16_f32_e32 v0, v0 ; CI-NEXT: v_lshlrev_b32_e32 v5, 16, v7 ; CI-NEXT: v_or_b32_e32 v5, v6, v5 ; CI-NEXT: v_lshlrev_b32_e32 v6, 16, v8 ; CI-NEXT: v_lshlrev_b32_e32 v3, 16, v3 ; CI-NEXT: v_lshlrev_b32_e32 v1, 16, v1 ; CI-NEXT: v_or_b32_e32 v4, v4, v6 ; CI-NEXT: v_or_b32_e32 v3, v2, v3 ; CI-NEXT: v_or_b32_e32 v2, v0, v1 ; CI-NEXT: s_mov_b32 s7, 0xf000 ; CI-NEXT: s_mov_b32 s6, -1 ; CI-NEXT: buffer_store_dwordx4 v[2:5], off, s[4:7], 0 ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: s_setpc_b64 s[30:31] ; ; GFX89-LABEL: void_func_v8f16: ; GFX89: ; %bb.0: ; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX89-NEXT: s_mov_b32 s7, 0xf000 ; GFX89-NEXT: s_mov_b32 s6, -1 ; GFX89-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 ; GFX89-NEXT: s_waitcnt vmcnt(0) ; GFX89-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: void_func_v8f16: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: s_mov_b32 s3, 0x31016000 ; GFX11-NEXT: s_mov_b32 s2, -1 ; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0 ; GFX11-NEXT: s_setpc_b64 s[30:31] store <8 x half> %arg0, ptr addrspace(1) undef ret void } define void @void_func_v16f16(<16 x half> %arg0) #0 { ; CI-LABEL: void_func_v16f16: ; CI: ; %bb.0: ; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; CI-NEXT: v_cvt_f16_f32_e32 v7, v7 ; CI-NEXT: v_cvt_f16_f32_e32 v3, v3 ; CI-NEXT: v_cvt_f16_f32_e32 v1, v1 ; CI-NEXT: v_cvt_f16_f32_e32 v6, v6 ; CI-NEXT: v_cvt_f16_f32_e32 v16, v5 ; CI-NEXT: v_cvt_f16_f32_e32 v2, v2 ; CI-NEXT: v_cvt_f16_f32_e32 v0, v0 ; CI-NEXT: v_cvt_f16_f32_e32 v4, v4 ; CI-NEXT: v_lshlrev_b32_e32 v5, 16, v7 ; CI-NEXT: v_lshlrev_b32_e32 v3, 16, v3 ; CI-NEXT: v_lshlrev_b32_e32 v1, 16, v1 ; CI-NEXT: v_or_b32_e32 v5, v6, v5 ; CI-NEXT: v_lshlrev_b32_e32 v6, 16, v16 ; CI-NEXT: v_or_b32_e32 v3, v2, v3 ; CI-NEXT: v_or_b32_e32 v2, v0, v1 ; CI-NEXT: v_cvt_f16_f32_e32 v0, v15 ; CI-NEXT: v_or_b32_e32 v4, v4, v6 ; CI-NEXT: v_cvt_f16_f32_e32 v1, v14 ; CI-NEXT: v_cvt_f16_f32_e32 v6, v13 ; CI-NEXT: v_cvt_f16_f32_e32 v7, v12 ; CI-NEXT: v_lshlrev_b32_e32 v0, 16, v0 ; CI-NEXT: v_or_b32_e32 v13, v1, v0 ; CI-NEXT: v_lshlrev_b32_e32 v0, 16, v6 ; CI-NEXT: v_or_b32_e32 v12, v7, v0 ; CI-NEXT: v_cvt_f16_f32_e32 v0, v11 ; CI-NEXT: v_cvt_f16_f32_e32 v1, v10 ; CI-NEXT: v_cvt_f16_f32_e32 v6, v9 ; CI-NEXT: v_cvt_f16_f32_e32 v7, v8 ; CI-NEXT: v_lshlrev_b32_e32 v0, 16, v0 ; CI-NEXT: v_or_b32_e32 v11, v1, v0 ; CI-NEXT: v_lshlrev_b32_e32 v0, 16, v6 ; CI-NEXT: v_or_b32_e32 v10, v7, v0 ; CI-NEXT: s_mov_b32 s7, 0xf000 ; CI-NEXT: s_mov_b32 s6, -1 ; CI-NEXT: buffer_store_dwordx4 v[10:13], off, s[4:7], 0 ; CI-NEXT: buffer_store_dwordx4 v[2:5], off, s[4:7], 0 ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: s_setpc_b64 s[30:31] ; ; GFX89-LABEL: void_func_v16f16: ; GFX89: ; %bb.0: ; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX89-NEXT: s_mov_b32 s7, 0xf000 ; GFX89-NEXT: s_mov_b32 s6, -1 ; GFX89-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0 ; GFX89-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 ; GFX89-NEXT: s_waitcnt vmcnt(0) ; GFX89-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: void_func_v16f16: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: s_mov_b32 s3, 0x31016000 ; GFX11-NEXT: s_mov_b32 s2, -1 ; GFX11-NEXT: s_clause 0x1 ; GFX11-NEXT: buffer_store_b128 v[4:7], off, s[0:3], 0 ; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0 ; GFX11-NEXT: s_setpc_b64 s[30:31] store <16 x half> %arg0, ptr addrspace(1) undef ret void } ; Make sure there is no alignment requirement for passed vgprs. define void @void_func_i32_i64_i32(i32 %arg0, i64 %arg1, i32 %arg2) #0 { ; CIGFX89-LABEL: void_func_i32_i64_i32: ; CIGFX89: ; %bb.0: ; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; CIGFX89-NEXT: s_mov_b32 s7, 0xf000 ; CIGFX89-NEXT: s_mov_b32 s6, -1 ; CIGFX89-NEXT: buffer_store_dword v0, off, s[4:7], 0 ; CIGFX89-NEXT: s_waitcnt vmcnt(0) ; CIGFX89-NEXT: buffer_store_dwordx2 v[1:2], off, s[4:7], 0 ; CIGFX89-NEXT: s_waitcnt vmcnt(0) ; CIGFX89-NEXT: buffer_store_dword v3, off, s[4:7], 0 ; CIGFX89-NEXT: s_waitcnt vmcnt(0) ; CIGFX89-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: void_func_i32_i64_i32: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: s_mov_b32 s3, 0x31016000 ; GFX11-NEXT: s_mov_b32 s2, -1 ; GFX11-NEXT: buffer_store_b32 v0, off, s[0:3], 0 dlc ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: buffer_store_b64 v[1:2], off, s[0:3], 0 dlc ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: buffer_store_b32 v3, off, s[0:3], 0 dlc ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_setpc_b64 s[30:31] store volatile i32 %arg0, ptr addrspace(1) undef store volatile i64 %arg1, ptr addrspace(1) undef store volatile i32 %arg2, ptr addrspace(1) undef ret void } define void @void_func_struct_i32({ i32 } %arg0) #0 { ; CIGFX89-LABEL: void_func_struct_i32: ; CIGFX89: ; %bb.0: ; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; CIGFX89-NEXT: s_mov_b32 s7, 0xf000 ; CIGFX89-NEXT: s_mov_b32 s6, -1 ; CIGFX89-NEXT: buffer_store_dword v0, off, s[4:7], 0 ; CIGFX89-NEXT: s_waitcnt vmcnt(0) ; CIGFX89-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: void_func_struct_i32: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: s_mov_b32 s3, 0x31016000 ; GFX11-NEXT: s_mov_b32 s2, -1 ; GFX11-NEXT: buffer_store_b32 v0, off, s[0:3], 0 ; GFX11-NEXT: s_setpc_b64 s[30:31] store { i32 } %arg0, ptr addrspace(1) undef ret void } define void @void_func_struct_i8_i32({ i8, i32 } %arg0) #0 { ; CIGFX89-LABEL: void_func_struct_i8_i32: ; CIGFX89: ; %bb.0: ; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; CIGFX89-NEXT: s_mov_b32 s7, 0xf000 ; CIGFX89-NEXT: s_mov_b32 s6, -1 ; CIGFX89-NEXT: buffer_store_dword v1, off, s[4:7], 0 ; CIGFX89-NEXT: buffer_store_byte v0, off, s[4:7], 0 ; CIGFX89-NEXT: s_waitcnt vmcnt(0) ; CIGFX89-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: void_func_struct_i8_i32: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: s_mov_b32 s3, 0x31016000 ; GFX11-NEXT: s_mov_b32 s2, -1 ; GFX11-NEXT: s_clause 0x1 ; GFX11-NEXT: buffer_store_b32 v1, off, s[0:3], 0 ; GFX11-NEXT: buffer_store_b8 v0, off, s[0:3], 0 ; GFX11-NEXT: s_setpc_b64 s[30:31] store { i8, i32 } %arg0, ptr addrspace(1) undef ret void } define void @void_func_byval_struct_i8_i32(ptr addrspace(5) byval({ i8, i32 }) %arg0) #0 { ; CIGFX89-LABEL: void_func_byval_struct_i8_i32: ; CIGFX89: ; %bb.0: ; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; CIGFX89-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:4 ; CIGFX89-NEXT: buffer_load_ubyte v1, off, s[0:3], s32 ; CIGFX89-NEXT: s_mov_b32 s7, 0xf000 ; CIGFX89-NEXT: s_mov_b32 s6, -1 ; CIGFX89-NEXT: s_waitcnt vmcnt(1) ; CIGFX89-NEXT: buffer_store_dword v0, off, s[4:7], 0 ; CIGFX89-NEXT: s_waitcnt vmcnt(1) ; CIGFX89-NEXT: buffer_store_byte v1, off, s[4:7], 0 ; CIGFX89-NEXT: s_waitcnt vmcnt(0) ; CIGFX89-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: void_func_byval_struct_i8_i32: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: s_clause 0x1 ; GFX11-NEXT: scratch_load_b32 v0, off, s32 offset:4 ; GFX11-NEXT: scratch_load_u8 v1, off, s32 ; GFX11-NEXT: s_mov_b32 s3, 0x31016000 ; GFX11-NEXT: s_mov_b32 s2, -1 ; GFX11-NEXT: s_waitcnt vmcnt(1) ; GFX11-NEXT: buffer_store_b32 v0, off, s[0:3], 0 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: buffer_store_b8 v1, off, s[0:3], 0 ; GFX11-NEXT: s_setpc_b64 s[30:31] %arg0.load = load { i8, i32 }, ptr addrspace(5) %arg0 store { i8, i32 } %arg0.load, ptr addrspace(1) undef ret void } define void @void_func_byval_struct_i8_i32_x2(ptr addrspace(5) byval({ i8, i32 }) %arg0, ptr addrspace(5) byval({ i8, i32 }) %arg1, i32 %arg2) #0 { ; CI-LABEL: void_func_byval_struct_i8_i32_x2: ; CI: ; %bb.0: ; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; CI-NEXT: buffer_load_ubyte v1, off, s[0:3], s32 glc ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:4 glc ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: buffer_load_ubyte v3, off, s[0:3], s32 offset:8 glc ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:12 glc ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: s_mov_b32 s7, 0xf000 ; CI-NEXT: s_mov_b32 s6, -1 ; CI-NEXT: s_mov_b32 m0, -1 ; CI-NEXT: buffer_store_dword v2, off, s[4:7], 0 ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: buffer_store_byte v1, off, s[4:7], 0 ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: buffer_store_dword v4, off, s[4:7], 0 ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: buffer_store_byte v3, off, s[4:7], 0 ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: ds_write_b32 v0, v0 ; CI-NEXT: s_waitcnt lgkmcnt(0) ; CI-NEXT: s_setpc_b64 s[30:31] ; ; VI-LABEL: void_func_byval_struct_i8_i32_x2: ; VI: ; %bb.0: ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; VI-NEXT: buffer_load_ubyte v1, off, s[0:3], s32 glc ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:4 glc ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_load_ubyte v3, off, s[0:3], s32 offset:8 glc ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:12 glc ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: s_mov_b32 s7, 0xf000 ; VI-NEXT: s_mov_b32 s6, -1 ; VI-NEXT: s_mov_b32 m0, -1 ; VI-NEXT: buffer_store_dword v2, off, s[4:7], 0 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_store_byte v1, off, s[4:7], 0 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_store_dword v4, off, s[4:7], 0 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_store_byte v3, off, s[4:7], 0 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: ds_write_b32 v0, v0 ; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-LABEL: void_func_byval_struct_i8_i32_x2: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-NEXT: buffer_load_ubyte v1, off, s[0:3], s32 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:4 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_load_ubyte v3, off, s[0:3], s32 offset:8 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:12 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_mov_b32 s7, 0xf000 ; GFX9-NEXT: s_mov_b32 s6, -1 ; GFX9-NEXT: buffer_store_dword v2, off, s[4:7], 0 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_store_byte v1, off, s[4:7], 0 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_store_dword v4, off, s[4:7], 0 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_store_byte v3, off, s[4:7], 0 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: ds_write_b32 v0, v0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: void_func_byval_struct_i8_i32_x2: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: scratch_load_u8 v1, off, s32 glc dlc ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: scratch_load_b32 v2, off, s32 offset:4 glc dlc ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: scratch_load_u8 v3, off, s32 offset:8 glc dlc ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: scratch_load_b32 v4, off, s32 offset:12 glc dlc ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_mov_b32 s3, 0x31016000 ; GFX11-NEXT: s_mov_b32 s2, -1 ; GFX11-NEXT: buffer_store_b32 v2, off, s[0:3], 0 dlc ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: buffer_store_b8 v1, off, s[0:3], 0 dlc ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: buffer_store_b32 v4, off, s[0:3], 0 dlc ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: buffer_store_b8 v3, off, s[0:3], 0 dlc ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: ds_store_b32 v0, v0 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] %arg0.load = load volatile { i8, i32 }, ptr addrspace(5) %arg0 %arg1.load = load volatile { i8, i32 }, ptr addrspace(5) %arg1 store volatile { i8, i32 } %arg0.load, ptr addrspace(1) undef store volatile { i8, i32 } %arg1.load, ptr addrspace(1) undef store volatile i32 %arg2, ptr addrspace(3) undef ret void } define void @void_func_byval_i32_byval_i64(ptr addrspace(5) byval(i32) %arg0, ptr addrspace(5) byval(i64) %arg1) #0 { ; CIGFX89-LABEL: void_func_byval_i32_byval_i64: ; CIGFX89: ; %bb.0: ; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; CIGFX89-NEXT: buffer_load_dword v2, off, s[0:3], s32 ; CIGFX89-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:8 ; CIGFX89-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:12 ; CIGFX89-NEXT: s_mov_b32 s7, 0xf000 ; CIGFX89-NEXT: s_mov_b32 s6, -1 ; CIGFX89-NEXT: s_waitcnt vmcnt(2) ; CIGFX89-NEXT: buffer_store_dword v2, off, s[4:7], 0 ; CIGFX89-NEXT: s_waitcnt vmcnt(1) ; CIGFX89-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 ; CIGFX89-NEXT: s_waitcnt vmcnt(0) ; CIGFX89-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: void_func_byval_i32_byval_i64: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: s_clause 0x1 ; GFX11-NEXT: scratch_load_b32 v2, off, s32 ; GFX11-NEXT: scratch_load_b64 v[0:1], off, s32 offset:8 ; GFX11-NEXT: s_mov_b32 s3, 0x31016000 ; GFX11-NEXT: s_mov_b32 s2, -1 ; GFX11-NEXT: s_waitcnt vmcnt(1) ; GFX11-NEXT: buffer_store_b32 v2, off, s[0:3], 0 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: buffer_store_b64 v[0:1], off, s[0:3], 0 ; GFX11-NEXT: s_setpc_b64 s[30:31] %arg0.load = load i32, ptr addrspace(5) %arg0 %arg1.load = load i64, ptr addrspace(5) %arg1 store i32 %arg0.load, ptr addrspace(1) undef store i64 %arg1.load, ptr addrspace(1) undef ret void } define void @void_func_v32i32_i32_i64(<32 x i32> %arg0, i32 %arg1, i64 %arg2) #0 { ; CI-LABEL: void_func_v32i32_i32_i64: ; CI: ; %bb.0: ; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; CI-NEXT: buffer_load_dword v31, off, s[0:3], s32 ; CI-NEXT: s_mov_b32 s7, 0xf000 ; CI-NEXT: s_mov_b32 s6, -1 ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: buffer_store_dwordx4 v[28:31], off, s[4:7], 0 ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: buffer_store_dwordx4 v[24:27], off, s[4:7], 0 ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: buffer_store_dwordx4 v[20:23], off, s[4:7], 0 ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0 ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: buffer_load_dword v17, off, s[0:3], s32 offset:12 ; CI-NEXT: buffer_load_dword v16, off, s[0:3], s32 offset:8 ; CI-NEXT: buffer_load_dword v20, off, s[0:3], s32 offset:4 ; CI-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0 ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0 ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0 ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: buffer_store_dword v20, off, s[4:7], 0 ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: buffer_store_dwordx2 v[16:17], off, s[4:7], 0 ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: s_setpc_b64 s[30:31] ; ; VI-LABEL: void_func_v32i32_i32_i64: ; VI: ; %bb.0: ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; VI-NEXT: buffer_load_dword v31, off, s[0:3], s32 ; VI-NEXT: s_mov_b32 s7, 0xf000 ; VI-NEXT: s_mov_b32 s6, -1 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_store_dwordx4 v[28:31], off, s[4:7], 0 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_store_dwordx4 v[24:27], off, s[4:7], 0 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_store_dwordx4 v[20:23], off, s[4:7], 0 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_load_dword v17, off, s[0:3], s32 offset:12 ; VI-NEXT: buffer_load_dword v16, off, s[0:3], s32 offset:8 ; VI-NEXT: buffer_load_dword v20, off, s[0:3], s32 offset:4 ; VI-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_store_dword v20, off, s[4:7], 0 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_store_dwordx2 v[16:17], off, s[4:7], 0 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-LABEL: void_func_v32i32_i32_i64: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-NEXT: buffer_load_dword v31, off, s[0:3], s32 ; GFX9-NEXT: s_mov_b32 s7, 0xf000 ; GFX9-NEXT: s_mov_b32 s6, -1 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_store_dwordx4 v[28:31], off, s[4:7], 0 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_store_dwordx4 v[24:27], off, s[4:7], 0 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_store_dwordx4 v[20:23], off, s[4:7], 0 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_load_dword v17, off, s[0:3], s32 offset:12 ; GFX9-NEXT: buffer_load_dword v16, off, s[0:3], s32 offset:8 ; GFX9-NEXT: buffer_load_dword v20, off, s[0:3], s32 offset:4 ; GFX9-NEXT: s_nop 0 ; GFX9-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_store_dword v20, off, s[4:7], 0 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_store_dwordx2 v[16:17], off, s[4:7], 0 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: void_func_v32i32_i32_i64: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: s_clause 0x3 ; GFX11-NEXT: scratch_load_b32 v31, off, s32 ; GFX11-NEXT: scratch_load_b32 v33, off, s32 offset:12 ; GFX11-NEXT: scratch_load_b32 v34, off, s32 offset:4 ; GFX11-NEXT: scratch_load_b32 v32, off, s32 offset:8 ; GFX11-NEXT: s_mov_b32 s3, 0x31016000 ; GFX11-NEXT: s_mov_b32 s2, -1 ; GFX11-NEXT: s_waitcnt vmcnt(3) ; GFX11-NEXT: buffer_store_b128 v[28:31], off, s[0:3], 0 dlc ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: buffer_store_b128 v[24:27], off, s[0:3], 0 dlc ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: buffer_store_b128 v[20:23], off, s[0:3], 0 dlc ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: buffer_store_b128 v[16:19], off, s[0:3], 0 dlc ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: buffer_store_b128 v[12:15], off, s[0:3], 0 dlc ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: buffer_store_b128 v[8:11], off, s[0:3], 0 dlc ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: buffer_store_b128 v[4:7], off, s[0:3], 0 dlc ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0 dlc ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_waitcnt vmcnt(1) ; GFX11-NEXT: buffer_store_b32 v34, off, s[0:3], 0 dlc ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: buffer_store_b64 v[32:33], off, s[0:3], 0 dlc ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_setpc_b64 s[30:31] store volatile <32 x i32> %arg0, ptr addrspace(1) undef store volatile i32 %arg1, ptr addrspace(1) undef store volatile i64 %arg2, ptr addrspace(1) undef ret void } ; FIXME: Different ext load types on CI vs. VI define void @void_func_v32i32_i1_i8_i16_bf16(<32 x i32> %arg0, i1 %arg1, i8 %arg2, i16 %arg3, half %arg4, bfloat %arg5) #0 { ; CI-LABEL: void_func_v32i32_i1_i8_i16_bf16: ; CI: ; %bb.0: ; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; CI-NEXT: buffer_load_dword v31, off, s[0:3], s32 ; CI-NEXT: s_mov_b32 s7, 0xf000 ; CI-NEXT: s_mov_b32 s6, -1 ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: buffer_store_dwordx4 v[28:31], off, s[4:7], 0 ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: buffer_store_dwordx4 v[24:27], off, s[4:7], 0 ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: buffer_store_dwordx4 v[20:23], off, s[4:7], 0 ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0 ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: buffer_load_dword v20, off, s[0:3], s32 offset:20 ; CI-NEXT: buffer_load_dword v16, off, s[0:3], s32 offset:16 ; CI-NEXT: buffer_load_ubyte v17, off, s[0:3], s32 offset:4 ; CI-NEXT: buffer_load_dword v18, off, s[0:3], s32 offset:8 ; CI-NEXT: buffer_load_dword v19, off, s[0:3], s32 offset:12 ; CI-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0 ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0 ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0 ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: v_cvt_f16_f32_e32 v16, v16 ; CI-NEXT: v_and_b32_e32 v0, 1, v17 ; CI-NEXT: v_lshrrev_b32_e32 v20, 16, v20 ; CI-NEXT: buffer_store_byte v0, off, s[4:7], 0 ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: buffer_store_byte v18, off, s[4:7], 0 ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: buffer_store_short v19, off, s[4:7], 0 ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: buffer_store_short v16, off, s[4:7], 0 ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: buffer_store_short v20, off, s[4:7], 0 ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: s_setpc_b64 s[30:31] ; ; VI-LABEL: void_func_v32i32_i1_i8_i16_bf16: ; VI: ; %bb.0: ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; VI-NEXT: buffer_load_dword v31, off, s[0:3], s32 ; VI-NEXT: s_mov_b32 s7, 0xf000 ; VI-NEXT: s_mov_b32 s6, -1 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_store_dwordx4 v[28:31], off, s[4:7], 0 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_store_dwordx4 v[24:27], off, s[4:7], 0 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_store_dwordx4 v[20:23], off, s[4:7], 0 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_load_ubyte v20, off, s[0:3], s32 offset:4 ; VI-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_load_ushort v16, off, s[0:3], s32 offset:8 ; VI-NEXT: buffer_load_ushort v17, off, s[0:3], s32 offset:12 ; VI-NEXT: buffer_load_ushort v18, off, s[0:3], s32 offset:16 ; VI-NEXT: buffer_load_ushort v19, off, s[0:3], s32 offset:20 ; VI-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: v_and_b32_e32 v0, 1, v20 ; VI-NEXT: buffer_store_byte v0, off, s[4:7], 0 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_store_byte v16, off, s[4:7], 0 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_store_short v17, off, s[4:7], 0 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_store_short v18, off, s[4:7], 0 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_store_short v19, off, s[4:7], 0 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-LABEL: void_func_v32i32_i1_i8_i16_bf16: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-NEXT: buffer_load_dword v31, off, s[0:3], s32 ; GFX9-NEXT: s_mov_b32 s7, 0xf000 ; GFX9-NEXT: s_mov_b32 s6, -1 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_store_dwordx4 v[28:31], off, s[4:7], 0 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_store_dwordx4 v[24:27], off, s[4:7], 0 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_store_dwordx4 v[20:23], off, s[4:7], 0 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_load_ubyte v20, off, s[0:3], s32 offset:4 ; GFX9-NEXT: s_nop 0 ; GFX9-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_load_ushort v16, off, s[0:3], s32 offset:8 ; GFX9-NEXT: buffer_load_ushort v17, off, s[0:3], s32 offset:12 ; GFX9-NEXT: buffer_load_ushort v18, off, s[0:3], s32 offset:16 ; GFX9-NEXT: buffer_load_ushort v19, off, s[0:3], s32 offset:20 ; GFX9-NEXT: s_nop 0 ; GFX9-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: v_and_b32_e32 v0, 1, v20 ; GFX9-NEXT: buffer_store_byte v0, off, s[4:7], 0 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_store_byte v16, off, s[4:7], 0 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_store_short v17, off, s[4:7], 0 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_store_short v18, off, s[4:7], 0 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_store_short v19, off, s[4:7], 0 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: void_func_v32i32_i1_i8_i16_bf16: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: s_clause 0x5 ; GFX11-NEXT: scratch_load_b32 v31, off, s32 ; GFX11-NEXT: scratch_load_u8 v32, off, s32 offset:4 ; GFX11-NEXT: scratch_load_u16 v33, off, s32 offset:8 ; GFX11-NEXT: scratch_load_u16 v34, off, s32 offset:12 ; GFX11-NEXT: scratch_load_u16 v35, off, s32 offset:16 ; GFX11-NEXT: scratch_load_u16 v36, off, s32 offset:20 ; GFX11-NEXT: s_mov_b32 s3, 0x31016000 ; GFX11-NEXT: s_mov_b32 s2, -1 ; GFX11-NEXT: s_waitcnt vmcnt(5) ; GFX11-NEXT: buffer_store_b128 v[28:31], off, s[0:3], 0 dlc ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: buffer_store_b128 v[24:27], off, s[0:3], 0 dlc ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: buffer_store_b128 v[20:23], off, s[0:3], 0 dlc ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: buffer_store_b128 v[16:19], off, s[0:3], 0 dlc ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_waitcnt vmcnt(4) ; GFX11-NEXT: v_and_b32_e32 v16, 1, v32 ; GFX11-NEXT: buffer_store_b128 v[12:15], off, s[0:3], 0 dlc ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: buffer_store_b128 v[8:11], off, s[0:3], 0 dlc ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: buffer_store_b128 v[4:7], off, s[0:3], 0 dlc ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0 dlc ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: buffer_store_b8 v16, off, s[0:3], 0 dlc ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_waitcnt vmcnt(3) ; GFX11-NEXT: buffer_store_b8 v33, off, s[0:3], 0 dlc ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_waitcnt vmcnt(2) ; GFX11-NEXT: buffer_store_b16 v34, off, s[0:3], 0 dlc ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_waitcnt vmcnt(1) ; GFX11-NEXT: buffer_store_b16 v35, off, s[0:3], 0 dlc ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: buffer_store_b16 v36, off, s[0:3], 0 dlc ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_setpc_b64 s[30:31] store volatile <32 x i32> %arg0, ptr addrspace(1) undef store volatile i1 %arg1, ptr addrspace(1) undef store volatile i8 %arg2, ptr addrspace(1) undef store volatile i16 %arg3, ptr addrspace(1) undef store volatile half %arg4, ptr addrspace(1) undef store volatile bfloat %arg5, ptr addrspace(1) undef ret void } define void @void_func_v32i32_v2i32_v2f32(<32 x i32> %arg0, <2 x i32> %arg1, <2 x float> %arg2) #0 { ; CI-LABEL: void_func_v32i32_v2i32_v2f32: ; CI: ; %bb.0: ; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; CI-NEXT: buffer_load_dword v31, off, s[0:3], s32 ; CI-NEXT: s_mov_b32 s7, 0xf000 ; CI-NEXT: s_mov_b32 s6, -1 ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: buffer_store_dwordx4 v[28:31], off, s[4:7], 0 ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: buffer_store_dwordx4 v[24:27], off, s[4:7], 0 ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: buffer_store_dwordx4 v[20:23], off, s[4:7], 0 ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0 ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: buffer_load_dword v17, off, s[0:3], s32 offset:8 ; CI-NEXT: buffer_load_dword v16, off, s[0:3], s32 offset:4 ; CI-NEXT: buffer_load_dword v19, off, s[0:3], s32 offset:16 ; CI-NEXT: buffer_load_dword v18, off, s[0:3], s32 offset:12 ; CI-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0 ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0 ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0 ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: buffer_store_dwordx2 v[16:17], off, s[4:7], 0 ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: buffer_store_dwordx2 v[18:19], off, s[4:7], 0 ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: s_setpc_b64 s[30:31] ; ; VI-LABEL: void_func_v32i32_v2i32_v2f32: ; VI: ; %bb.0: ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; VI-NEXT: buffer_load_dword v31, off, s[0:3], s32 ; VI-NEXT: s_mov_b32 s7, 0xf000 ; VI-NEXT: s_mov_b32 s6, -1 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_store_dwordx4 v[28:31], off, s[4:7], 0 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_store_dwordx4 v[24:27], off, s[4:7], 0 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_store_dwordx4 v[20:23], off, s[4:7], 0 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_load_dword v17, off, s[0:3], s32 offset:8 ; VI-NEXT: buffer_load_dword v16, off, s[0:3], s32 offset:4 ; VI-NEXT: buffer_load_dword v19, off, s[0:3], s32 offset:16 ; VI-NEXT: buffer_load_dword v18, off, s[0:3], s32 offset:12 ; VI-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_store_dwordx2 v[16:17], off, s[4:7], 0 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_store_dwordx2 v[18:19], off, s[4:7], 0 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-LABEL: void_func_v32i32_v2i32_v2f32: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-NEXT: buffer_load_dword v31, off, s[0:3], s32 ; GFX9-NEXT: s_mov_b32 s7, 0xf000 ; GFX9-NEXT: s_mov_b32 s6, -1 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_store_dwordx4 v[28:31], off, s[4:7], 0 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_store_dwordx4 v[24:27], off, s[4:7], 0 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_store_dwordx4 v[20:23], off, s[4:7], 0 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_load_dword v17, off, s[0:3], s32 offset:8 ; GFX9-NEXT: buffer_load_dword v16, off, s[0:3], s32 offset:4 ; GFX9-NEXT: buffer_load_dword v19, off, s[0:3], s32 offset:16 ; GFX9-NEXT: buffer_load_dword v18, off, s[0:3], s32 offset:12 ; GFX9-NEXT: s_nop 0 ; GFX9-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_store_dwordx2 v[16:17], off, s[4:7], 0 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_store_dwordx2 v[18:19], off, s[4:7], 0 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: void_func_v32i32_v2i32_v2f32: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: s_clause 0x4 ; GFX11-NEXT: scratch_load_b32 v31, off, s32 ; GFX11-NEXT: scratch_load_b32 v33, off, s32 offset:8 ; GFX11-NEXT: scratch_load_b32 v32, off, s32 offset:4 ; GFX11-NEXT: scratch_load_b32 v35, off, s32 offset:16 ; GFX11-NEXT: scratch_load_b32 v34, off, s32 offset:12 ; GFX11-NEXT: s_mov_b32 s3, 0x31016000 ; GFX11-NEXT: s_mov_b32 s2, -1 ; GFX11-NEXT: s_waitcnt vmcnt(4) ; GFX11-NEXT: buffer_store_b128 v[28:31], off, s[0:3], 0 dlc ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: buffer_store_b128 v[24:27], off, s[0:3], 0 dlc ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: buffer_store_b128 v[20:23], off, s[0:3], 0 dlc ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: buffer_store_b128 v[16:19], off, s[0:3], 0 dlc ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: buffer_store_b128 v[12:15], off, s[0:3], 0 dlc ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: buffer_store_b128 v[8:11], off, s[0:3], 0 dlc ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: buffer_store_b128 v[4:7], off, s[0:3], 0 dlc ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0 dlc ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_waitcnt vmcnt(2) ; GFX11-NEXT: buffer_store_b64 v[32:33], off, s[0:3], 0 dlc ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: buffer_store_b64 v[34:35], off, s[0:3], 0 dlc ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_setpc_b64 s[30:31] store volatile <32 x i32> %arg0, ptr addrspace(1) undef store volatile <2 x i32> %arg1, ptr addrspace(1) undef store volatile <2 x float> %arg2, ptr addrspace(1) undef ret void } define void @void_func_v32i32_v2i16_v2f16_v2bf16_v4bf16(<32 x i32> %arg0, <2 x i16> %arg1, <2 x half> %arg2, <2 x bfloat> %arg3, <4 x bfloat> %arg4) #0 { ; CI-LABEL: void_func_v32i32_v2i16_v2f16_v2bf16_v4bf16: ; CI: ; %bb.0: ; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; CI-NEXT: buffer_load_dword v31, off, s[0:3], s32 ; CI-NEXT: s_mov_b32 s7, 0xf000 ; CI-NEXT: s_mov_b32 s6, -1 ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: buffer_store_dwordx4 v[28:31], off, s[4:7], 0 ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: buffer_store_dwordx4 v[24:27], off, s[4:7], 0 ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: buffer_store_dwordx4 v[20:23], off, s[4:7], 0 ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0 ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: buffer_load_dword v16, off, s[0:3], s32 offset:32 ; CI-NEXT: buffer_load_dword v17, off, s[0:3], s32 offset:36 ; CI-NEXT: buffer_load_dword v18, off, s[0:3], s32 offset:40 ; CI-NEXT: buffer_load_dword v19, off, s[0:3], s32 offset:20 ; CI-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0 ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: buffer_load_dword v20, off, s[0:3], s32 offset:28 ; CI-NEXT: buffer_load_dword v12, off, s[0:3], s32 offset:24 ; CI-NEXT: buffer_load_dword v13, off, s[0:3], s32 offset:12 ; CI-NEXT: buffer_load_dword v14, off, s[0:3], s32 offset:16 ; CI-NEXT: buffer_load_dword v15, off, s[0:3], s32 offset:8 ; CI-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0 ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: buffer_load_dword v8, off, s[0:3], s32 offset:4 ; CI-NEXT: v_lshrrev_b32_e32 v10, 16, v16 ; CI-NEXT: v_lshrrev_b32_e32 v11, 16, v17 ; CI-NEXT: v_lshrrev_b32_e32 v16, 16, v18 ; CI-NEXT: v_lshrrev_b32_e32 v17, 16, v19 ; CI-NEXT: v_lshrrev_b32_e32 v12, 16, v12 ; CI-NEXT: v_lshrrev_b32_e32 v9, 16, v20 ; CI-NEXT: v_cvt_f16_f32_e32 v13, v13 ; CI-NEXT: v_cvt_f16_f32_e32 v14, v14 ; CI-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0 ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: buffer_store_short v15, off, s[4:7], 0 ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: buffer_store_short v8, off, s[4:7], 0 ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: buffer_store_short v14, off, s[4:7], 0 ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: buffer_store_short v13, off, s[4:7], 0 ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: buffer_store_short v12, off, s[4:7], 0 ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: buffer_store_short v17, off, s[4:7], 0 ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: buffer_store_short v16, off, s[4:7], 0 ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: buffer_store_short v11, off, s[4:7], 0 ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: buffer_store_short v10, off, s[4:7], 0 ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: buffer_store_short v9, off, s[4:7], 0 ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: s_setpc_b64 s[30:31] ; ; VI-LABEL: void_func_v32i32_v2i16_v2f16_v2bf16_v4bf16: ; VI: ; %bb.0: ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; VI-NEXT: buffer_load_dword v31, off, s[0:3], s32 ; VI-NEXT: s_mov_b32 s7, 0xf000 ; VI-NEXT: s_mov_b32 s6, -1 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_store_dwordx4 v[28:31], off, s[4:7], 0 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_store_dwordx4 v[24:27], off, s[4:7], 0 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_store_dwordx4 v[20:23], off, s[4:7], 0 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_load_dword v18, off, s[0:3], s32 offset:4 ; VI-NEXT: buffer_load_dword v19, off, s[0:3], s32 offset:8 ; VI-NEXT: buffer_load_dword v17, off, s[0:3], s32 offset:20 ; VI-NEXT: buffer_load_dword v16, off, s[0:3], s32 offset:16 ; VI-NEXT: buffer_load_dword v20, off, s[0:3], s32 offset:12 ; VI-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_store_dword v18, off, s[4:7], 0 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_store_dword v19, off, s[4:7], 0 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_store_dword v20, off, s[4:7], 0 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_store_dwordx2 v[16:17], off, s[4:7], 0 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-LABEL: void_func_v32i32_v2i16_v2f16_v2bf16_v4bf16: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-NEXT: buffer_load_dword v31, off, s[0:3], s32 ; GFX9-NEXT: s_mov_b32 s7, 0xf000 ; GFX9-NEXT: s_mov_b32 s6, -1 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_store_dwordx4 v[28:31], off, s[4:7], 0 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_store_dwordx4 v[24:27], off, s[4:7], 0 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_store_dwordx4 v[20:23], off, s[4:7], 0 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_load_dword v18, off, s[0:3], s32 offset:4 ; GFX9-NEXT: buffer_load_dword v19, off, s[0:3], s32 offset:8 ; GFX9-NEXT: buffer_load_dword v17, off, s[0:3], s32 offset:20 ; GFX9-NEXT: buffer_load_dword v16, off, s[0:3], s32 offset:16 ; GFX9-NEXT: buffer_load_dword v20, off, s[0:3], s32 offset:12 ; GFX9-NEXT: s_nop 0 ; GFX9-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_store_dword v18, off, s[4:7], 0 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_store_dword v19, off, s[4:7], 0 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_store_dword v20, off, s[4:7], 0 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_store_dwordx2 v[16:17], off, s[4:7], 0 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: void_func_v32i32_v2i16_v2f16_v2bf16_v4bf16: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: s_clause 0x5 ; GFX11-NEXT: scratch_load_b32 v31, off, s32 ; GFX11-NEXT: scratch_load_b32 v33, off, s32 offset:20 ; GFX11-NEXT: scratch_load_b32 v34, off, s32 offset:4 ; GFX11-NEXT: scratch_load_b32 v35, off, s32 offset:8 ; GFX11-NEXT: scratch_load_b32 v36, off, s32 offset:12 ; GFX11-NEXT: scratch_load_b32 v32, off, s32 offset:16 ; GFX11-NEXT: s_mov_b32 s3, 0x31016000 ; GFX11-NEXT: s_mov_b32 s2, -1 ; GFX11-NEXT: s_waitcnt vmcnt(5) ; GFX11-NEXT: buffer_store_b128 v[28:31], off, s[0:3], 0 dlc ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: buffer_store_b128 v[24:27], off, s[0:3], 0 dlc ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: buffer_store_b128 v[20:23], off, s[0:3], 0 dlc ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: buffer_store_b128 v[16:19], off, s[0:3], 0 dlc ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: buffer_store_b128 v[12:15], off, s[0:3], 0 dlc ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: buffer_store_b128 v[8:11], off, s[0:3], 0 dlc ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: buffer_store_b128 v[4:7], off, s[0:3], 0 dlc ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0 dlc ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_waitcnt vmcnt(3) ; GFX11-NEXT: buffer_store_b32 v34, off, s[0:3], 0 dlc ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_waitcnt vmcnt(2) ; GFX11-NEXT: buffer_store_b32 v35, off, s[0:3], 0 dlc ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_waitcnt vmcnt(1) ; GFX11-NEXT: buffer_store_b32 v36, off, s[0:3], 0 dlc ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: buffer_store_b64 v[32:33], off, s[0:3], 0 dlc ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_setpc_b64 s[30:31] store volatile <32 x i32> %arg0, ptr addrspace(1) undef store volatile <2 x i16> %arg1, ptr addrspace(1) undef store volatile <2 x half> %arg2, ptr addrspace(1) undef store volatile <2 x bfloat> %arg3, ptr addrspace(1) undef store volatile <4 x bfloat> %arg4, ptr addrspace(1) undef ret void } define void @void_func_v32i32_v2i64_v2f64(<32 x i32> %arg0, <2 x i64> %arg1, <2 x double> %arg2) #0 { ; CI-LABEL: void_func_v32i32_v2i64_v2f64: ; CI: ; %bb.0: ; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; CI-NEXT: buffer_load_dword v31, off, s[0:3], s32 ; CI-NEXT: s_mov_b32 s7, 0xf000 ; CI-NEXT: s_mov_b32 s6, -1 ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: buffer_store_dwordx4 v[28:31], off, s[4:7], 0 ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: buffer_store_dwordx4 v[24:27], off, s[4:7], 0 ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: buffer_store_dwordx4 v[20:23], off, s[4:7], 0 ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0 ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: buffer_load_dword v19, off, s[0:3], s32 offset:16 ; CI-NEXT: buffer_load_dword v18, off, s[0:3], s32 offset:12 ; CI-NEXT: buffer_load_dword v17, off, s[0:3], s32 offset:8 ; CI-NEXT: buffer_load_dword v16, off, s[0:3], s32 offset:4 ; CI-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0 ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: buffer_load_dword v15, off, s[0:3], s32 offset:32 ; CI-NEXT: buffer_load_dword v14, off, s[0:3], s32 offset:28 ; CI-NEXT: buffer_load_dword v13, off, s[0:3], s32 offset:24 ; CI-NEXT: buffer_load_dword v12, off, s[0:3], s32 offset:20 ; CI-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0 ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0 ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0 ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0 ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: s_setpc_b64 s[30:31] ; ; VI-LABEL: void_func_v32i32_v2i64_v2f64: ; VI: ; %bb.0: ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; VI-NEXT: buffer_load_dword v31, off, s[0:3], s32 ; VI-NEXT: s_mov_b32 s7, 0xf000 ; VI-NEXT: s_mov_b32 s6, -1 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_store_dwordx4 v[28:31], off, s[4:7], 0 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_store_dwordx4 v[24:27], off, s[4:7], 0 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_store_dwordx4 v[20:23], off, s[4:7], 0 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_load_dword v19, off, s[0:3], s32 offset:16 ; VI-NEXT: buffer_load_dword v18, off, s[0:3], s32 offset:12 ; VI-NEXT: buffer_load_dword v17, off, s[0:3], s32 offset:8 ; VI-NEXT: buffer_load_dword v16, off, s[0:3], s32 offset:4 ; VI-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_load_dword v15, off, s[0:3], s32 offset:32 ; VI-NEXT: buffer_load_dword v14, off, s[0:3], s32 offset:28 ; VI-NEXT: buffer_load_dword v13, off, s[0:3], s32 offset:24 ; VI-NEXT: buffer_load_dword v12, off, s[0:3], s32 offset:20 ; VI-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-LABEL: void_func_v32i32_v2i64_v2f64: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-NEXT: buffer_load_dword v31, off, s[0:3], s32 ; GFX9-NEXT: s_mov_b32 s7, 0xf000 ; GFX9-NEXT: s_mov_b32 s6, -1 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_store_dwordx4 v[28:31], off, s[4:7], 0 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_store_dwordx4 v[24:27], off, s[4:7], 0 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_store_dwordx4 v[20:23], off, s[4:7], 0 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_load_dword v19, off, s[0:3], s32 offset:16 ; GFX9-NEXT: buffer_load_dword v18, off, s[0:3], s32 offset:12 ; GFX9-NEXT: buffer_load_dword v17, off, s[0:3], s32 offset:8 ; GFX9-NEXT: buffer_load_dword v16, off, s[0:3], s32 offset:4 ; GFX9-NEXT: s_nop 0 ; GFX9-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_load_dword v15, off, s[0:3], s32 offset:32 ; GFX9-NEXT: buffer_load_dword v14, off, s[0:3], s32 offset:28 ; GFX9-NEXT: buffer_load_dword v13, off, s[0:3], s32 offset:24 ; GFX9-NEXT: buffer_load_dword v12, off, s[0:3], s32 offset:20 ; GFX9-NEXT: s_nop 0 ; GFX9-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: void_func_v32i32_v2i64_v2f64: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: s_clause 0x8 ; GFX11-NEXT: scratch_load_b32 v31, off, s32 ; GFX11-NEXT: scratch_load_b32 v35, off, s32 offset:32 ; GFX11-NEXT: scratch_load_b32 v34, off, s32 offset:28 ; GFX11-NEXT: scratch_load_b32 v33, off, s32 offset:24 ; GFX11-NEXT: scratch_load_b32 v39, off, s32 offset:16 ; GFX11-NEXT: scratch_load_b32 v38, off, s32 offset:12 ; GFX11-NEXT: scratch_load_b32 v37, off, s32 offset:8 ; GFX11-NEXT: scratch_load_b32 v36, off, s32 offset:4 ; GFX11-NEXT: scratch_load_b32 v32, off, s32 offset:20 ; GFX11-NEXT: s_mov_b32 s3, 0x31016000 ; GFX11-NEXT: s_mov_b32 s2, -1 ; GFX11-NEXT: s_waitcnt vmcnt(8) ; GFX11-NEXT: buffer_store_b128 v[28:31], off, s[0:3], 0 dlc ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: buffer_store_b128 v[24:27], off, s[0:3], 0 dlc ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: buffer_store_b128 v[20:23], off, s[0:3], 0 dlc ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: buffer_store_b128 v[16:19], off, s[0:3], 0 dlc ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: buffer_store_b128 v[12:15], off, s[0:3], 0 dlc ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: buffer_store_b128 v[8:11], off, s[0:3], 0 dlc ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: buffer_store_b128 v[4:7], off, s[0:3], 0 dlc ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0 dlc ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_waitcnt vmcnt(1) ; GFX11-NEXT: buffer_store_b128 v[36:39], off, s[0:3], 0 dlc ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: buffer_store_b128 v[32:35], off, s[0:3], 0 dlc ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_setpc_b64 s[30:31] store volatile <32 x i32> %arg0, ptr addrspace(1) undef store volatile <2 x i64> %arg1, ptr addrspace(1) undef store volatile <2 x double> %arg2, ptr addrspace(1) undef ret void } define void @void_func_v32i32_v4i32_v4f32(<32 x i32> %arg0, <4 x i32> %arg1, <4 x float> %arg2) #0 { ; CI-LABEL: void_func_v32i32_v4i32_v4f32: ; CI: ; %bb.0: ; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; CI-NEXT: buffer_load_dword v31, off, s[0:3], s32 ; CI-NEXT: s_mov_b32 s7, 0xf000 ; CI-NEXT: s_mov_b32 s6, -1 ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: buffer_store_dwordx4 v[28:31], off, s[4:7], 0 ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: buffer_store_dwordx4 v[24:27], off, s[4:7], 0 ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: buffer_store_dwordx4 v[20:23], off, s[4:7], 0 ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0 ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: buffer_load_dword v19, off, s[0:3], s32 offset:16 ; CI-NEXT: buffer_load_dword v18, off, s[0:3], s32 offset:12 ; CI-NEXT: buffer_load_dword v17, off, s[0:3], s32 offset:8 ; CI-NEXT: buffer_load_dword v16, off, s[0:3], s32 offset:4 ; CI-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0 ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: buffer_load_dword v15, off, s[0:3], s32 offset:32 ; CI-NEXT: buffer_load_dword v14, off, s[0:3], s32 offset:28 ; CI-NEXT: buffer_load_dword v13, off, s[0:3], s32 offset:24 ; CI-NEXT: buffer_load_dword v12, off, s[0:3], s32 offset:20 ; CI-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0 ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0 ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0 ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0 ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: s_setpc_b64 s[30:31] ; ; VI-LABEL: void_func_v32i32_v4i32_v4f32: ; VI: ; %bb.0: ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; VI-NEXT: buffer_load_dword v31, off, s[0:3], s32 ; VI-NEXT: s_mov_b32 s7, 0xf000 ; VI-NEXT: s_mov_b32 s6, -1 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_store_dwordx4 v[28:31], off, s[4:7], 0 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_store_dwordx4 v[24:27], off, s[4:7], 0 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_store_dwordx4 v[20:23], off, s[4:7], 0 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_load_dword v19, off, s[0:3], s32 offset:16 ; VI-NEXT: buffer_load_dword v18, off, s[0:3], s32 offset:12 ; VI-NEXT: buffer_load_dword v17, off, s[0:3], s32 offset:8 ; VI-NEXT: buffer_load_dword v16, off, s[0:3], s32 offset:4 ; VI-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_load_dword v15, off, s[0:3], s32 offset:32 ; VI-NEXT: buffer_load_dword v14, off, s[0:3], s32 offset:28 ; VI-NEXT: buffer_load_dword v13, off, s[0:3], s32 offset:24 ; VI-NEXT: buffer_load_dword v12, off, s[0:3], s32 offset:20 ; VI-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-LABEL: void_func_v32i32_v4i32_v4f32: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-NEXT: buffer_load_dword v31, off, s[0:3], s32 ; GFX9-NEXT: s_mov_b32 s7, 0xf000 ; GFX9-NEXT: s_mov_b32 s6, -1 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_store_dwordx4 v[28:31], off, s[4:7], 0 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_store_dwordx4 v[24:27], off, s[4:7], 0 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_store_dwordx4 v[20:23], off, s[4:7], 0 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_load_dword v19, off, s[0:3], s32 offset:16 ; GFX9-NEXT: buffer_load_dword v18, off, s[0:3], s32 offset:12 ; GFX9-NEXT: buffer_load_dword v17, off, s[0:3], s32 offset:8 ; GFX9-NEXT: buffer_load_dword v16, off, s[0:3], s32 offset:4 ; GFX9-NEXT: s_nop 0 ; GFX9-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_load_dword v15, off, s[0:3], s32 offset:32 ; GFX9-NEXT: buffer_load_dword v14, off, s[0:3], s32 offset:28 ; GFX9-NEXT: buffer_load_dword v13, off, s[0:3], s32 offset:24 ; GFX9-NEXT: buffer_load_dword v12, off, s[0:3], s32 offset:20 ; GFX9-NEXT: s_nop 0 ; GFX9-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: void_func_v32i32_v4i32_v4f32: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: s_clause 0x8 ; GFX11-NEXT: scratch_load_b32 v31, off, s32 ; GFX11-NEXT: scratch_load_b32 v35, off, s32 offset:16 ; GFX11-NEXT: scratch_load_b32 v34, off, s32 offset:12 ; GFX11-NEXT: scratch_load_b32 v33, off, s32 offset:8 ; GFX11-NEXT: scratch_load_b32 v32, off, s32 offset:4 ; GFX11-NEXT: scratch_load_b32 v39, off, s32 offset:32 ; GFX11-NEXT: scratch_load_b32 v38, off, s32 offset:28 ; GFX11-NEXT: scratch_load_b32 v37, off, s32 offset:24 ; GFX11-NEXT: scratch_load_b32 v36, off, s32 offset:20 ; GFX11-NEXT: s_mov_b32 s3, 0x31016000 ; GFX11-NEXT: s_mov_b32 s2, -1 ; GFX11-NEXT: s_waitcnt vmcnt(8) ; GFX11-NEXT: buffer_store_b128 v[28:31], off, s[0:3], 0 dlc ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: buffer_store_b128 v[24:27], off, s[0:3], 0 dlc ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: buffer_store_b128 v[20:23], off, s[0:3], 0 dlc ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: buffer_store_b128 v[16:19], off, s[0:3], 0 dlc ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: buffer_store_b128 v[12:15], off, s[0:3], 0 dlc ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: buffer_store_b128 v[8:11], off, s[0:3], 0 dlc ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: buffer_store_b128 v[4:7], off, s[0:3], 0 dlc ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0 dlc ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_waitcnt vmcnt(4) ; GFX11-NEXT: buffer_store_b128 v[32:35], off, s[0:3], 0 dlc ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: buffer_store_b128 v[36:39], off, s[0:3], 0 dlc ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_setpc_b64 s[30:31] store volatile <32 x i32> %arg0, ptr addrspace(1) undef store volatile <4 x i32> %arg1, ptr addrspace(1) undef store volatile <4 x float> %arg2, ptr addrspace(1) undef ret void } define void @void_func_v32i32_v8i32_v8f32(<32 x i32> %arg0, <8 x i32> %arg1, <8 x float> %arg2) #0 { ; CI-LABEL: void_func_v32i32_v8i32_v8f32: ; CI: ; %bb.0: ; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; CI-NEXT: buffer_load_dword v31, off, s[0:3], s32 ; CI-NEXT: s_mov_b32 s7, 0xf000 ; CI-NEXT: s_mov_b32 s6, -1 ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: buffer_store_dwordx4 v[28:31], off, s[4:7], 0 ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: buffer_store_dwordx4 v[24:27], off, s[4:7], 0 ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: buffer_store_dwordx4 v[20:23], off, s[4:7], 0 ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0 ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: buffer_load_dword v19, off, s[0:3], s32 offset:64 ; CI-NEXT: buffer_load_dword v18, off, s[0:3], s32 offset:60 ; CI-NEXT: buffer_load_dword v17, off, s[0:3], s32 offset:56 ; CI-NEXT: buffer_load_dword v16, off, s[0:3], s32 offset:52 ; CI-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0 ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: buffer_load_dword v15, off, s[0:3], s32 offset:16 ; CI-NEXT: buffer_load_dword v14, off, s[0:3], s32 offset:12 ; CI-NEXT: buffer_load_dword v13, off, s[0:3], s32 offset:8 ; CI-NEXT: buffer_load_dword v12, off, s[0:3], s32 offset:4 ; CI-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0 ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: buffer_load_dword v11, off, s[0:3], s32 offset:32 ; CI-NEXT: buffer_load_dword v10, off, s[0:3], s32 offset:28 ; CI-NEXT: buffer_load_dword v9, off, s[0:3], s32 offset:24 ; CI-NEXT: buffer_load_dword v8, off, s[0:3], s32 offset:20 ; CI-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0 ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:48 ; CI-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:44 ; CI-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:40 ; CI-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:36 ; CI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0 ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0 ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0 ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0 ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: s_setpc_b64 s[30:31] ; ; VI-LABEL: void_func_v32i32_v8i32_v8f32: ; VI: ; %bb.0: ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; VI-NEXT: buffer_load_dword v31, off, s[0:3], s32 ; VI-NEXT: s_mov_b32 s7, 0xf000 ; VI-NEXT: s_mov_b32 s6, -1 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_store_dwordx4 v[28:31], off, s[4:7], 0 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_store_dwordx4 v[24:27], off, s[4:7], 0 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_store_dwordx4 v[20:23], off, s[4:7], 0 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_load_dword v19, off, s[0:3], s32 offset:64 ; VI-NEXT: buffer_load_dword v18, off, s[0:3], s32 offset:60 ; VI-NEXT: buffer_load_dword v17, off, s[0:3], s32 offset:56 ; VI-NEXT: buffer_load_dword v16, off, s[0:3], s32 offset:52 ; VI-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_load_dword v15, off, s[0:3], s32 offset:16 ; VI-NEXT: buffer_load_dword v14, off, s[0:3], s32 offset:12 ; VI-NEXT: buffer_load_dword v13, off, s[0:3], s32 offset:8 ; VI-NEXT: buffer_load_dword v12, off, s[0:3], s32 offset:4 ; VI-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_load_dword v11, off, s[0:3], s32 offset:32 ; VI-NEXT: buffer_load_dword v10, off, s[0:3], s32 offset:28 ; VI-NEXT: buffer_load_dword v9, off, s[0:3], s32 offset:24 ; VI-NEXT: buffer_load_dword v8, off, s[0:3], s32 offset:20 ; VI-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:48 ; VI-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:44 ; VI-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:40 ; VI-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:36 ; VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-LABEL: void_func_v32i32_v8i32_v8f32: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-NEXT: buffer_load_dword v31, off, s[0:3], s32 ; GFX9-NEXT: s_mov_b32 s7, 0xf000 ; GFX9-NEXT: s_mov_b32 s6, -1 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_store_dwordx4 v[28:31], off, s[4:7], 0 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_store_dwordx4 v[24:27], off, s[4:7], 0 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_store_dwordx4 v[20:23], off, s[4:7], 0 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_load_dword v19, off, s[0:3], s32 offset:64 ; GFX9-NEXT: buffer_load_dword v18, off, s[0:3], s32 offset:60 ; GFX9-NEXT: buffer_load_dword v17, off, s[0:3], s32 offset:56 ; GFX9-NEXT: buffer_load_dword v16, off, s[0:3], s32 offset:52 ; GFX9-NEXT: s_nop 0 ; GFX9-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_load_dword v15, off, s[0:3], s32 offset:16 ; GFX9-NEXT: buffer_load_dword v14, off, s[0:3], s32 offset:12 ; GFX9-NEXT: buffer_load_dword v13, off, s[0:3], s32 offset:8 ; GFX9-NEXT: buffer_load_dword v12, off, s[0:3], s32 offset:4 ; GFX9-NEXT: s_nop 0 ; GFX9-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_load_dword v11, off, s[0:3], s32 offset:32 ; GFX9-NEXT: buffer_load_dword v10, off, s[0:3], s32 offset:28 ; GFX9-NEXT: buffer_load_dword v9, off, s[0:3], s32 offset:24 ; GFX9-NEXT: buffer_load_dword v8, off, s[0:3], s32 offset:20 ; GFX9-NEXT: s_nop 0 ; GFX9-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:48 ; GFX9-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:44 ; GFX9-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:40 ; GFX9-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:36 ; GFX9-NEXT: s_nop 0 ; GFX9-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: void_func_v32i32_v8i32_v8f32: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: s_clause 0x10 ; GFX11-NEXT: scratch_load_b32 v35, off, s32 offset:48 ; GFX11-NEXT: scratch_load_b32 v34, off, s32 offset:44 ; GFX11-NEXT: scratch_load_b32 v33, off, s32 offset:40 ; GFX11-NEXT: scratch_load_b32 v39, off, s32 offset:64 ; GFX11-NEXT: scratch_load_b32 v38, off, s32 offset:60 ; GFX11-NEXT: scratch_load_b32 v31, off, s32 ; GFX11-NEXT: scratch_load_b32 v37, off, s32 offset:56 ; GFX11-NEXT: scratch_load_b32 v51, off, s32 offset:16 ; GFX11-NEXT: scratch_load_b32 v50, off, s32 offset:12 ; GFX11-NEXT: scratch_load_b32 v49, off, s32 offset:8 ; GFX11-NEXT: scratch_load_b32 v55, off, s32 offset:32 ; GFX11-NEXT: scratch_load_b32 v54, off, s32 offset:28 ; GFX11-NEXT: scratch_load_b32 v53, off, s32 offset:24 ; GFX11-NEXT: scratch_load_b32 v52, off, s32 offset:20 ; GFX11-NEXT: scratch_load_b32 v48, off, s32 offset:4 ; GFX11-NEXT: scratch_load_b32 v36, off, s32 offset:52 ; GFX11-NEXT: scratch_load_b32 v32, off, s32 offset:36 ; GFX11-NEXT: s_mov_b32 s3, 0x31016000 ; GFX11-NEXT: s_mov_b32 s2, -1 ; GFX11-NEXT: s_waitcnt vmcnt(11) ; GFX11-NEXT: buffer_store_b128 v[28:31], off, s[0:3], 0 dlc ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: buffer_store_b128 v[24:27], off, s[0:3], 0 dlc ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: buffer_store_b128 v[20:23], off, s[0:3], 0 dlc ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: buffer_store_b128 v[16:19], off, s[0:3], 0 dlc ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: buffer_store_b128 v[12:15], off, s[0:3], 0 dlc ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: buffer_store_b128 v[8:11], off, s[0:3], 0 dlc ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: buffer_store_b128 v[4:7], off, s[0:3], 0 dlc ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0 dlc ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_waitcnt vmcnt(3) ; GFX11-NEXT: buffer_store_b128 v[52:55], off, s[0:3], 0 dlc ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_waitcnt vmcnt(2) ; GFX11-NEXT: buffer_store_b128 v[48:51], off, s[0:3], 0 dlc ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_waitcnt vmcnt(1) ; GFX11-NEXT: buffer_store_b128 v[36:39], off, s[0:3], 0 dlc ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: buffer_store_b128 v[32:35], off, s[0:3], 0 dlc ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_setpc_b64 s[30:31] store volatile <32 x i32> %arg0, ptr addrspace(1) undef store volatile <8 x i32> %arg1, ptr addrspace(1) undef store volatile <8 x float> %arg2, ptr addrspace(1) undef ret void } define void @void_func_v32i32_v16i32_v16f32(<32 x i32> %arg0, <16 x i32> %arg1, <16 x float> %arg2) #0 { ; CI-LABEL: void_func_v32i32_v16i32_v16f32: ; CI: ; %bb.0: ; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; CI-NEXT: buffer_load_dword v31, off, s[0:3], s32 ; CI-NEXT: s_mov_b32 s7, 0xf000 ; CI-NEXT: s_mov_b32 s6, -1 ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: buffer_store_dwordx4 v[28:31], off, s[4:7], 0 ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: buffer_store_dwordx4 v[24:27], off, s[4:7], 0 ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: buffer_store_dwordx4 v[20:23], off, s[4:7], 0 ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0 ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: buffer_load_dword v19, off, s[0:3], s32 offset:64 ; CI-NEXT: buffer_load_dword v18, off, s[0:3], s32 offset:60 ; CI-NEXT: buffer_load_dword v17, off, s[0:3], s32 offset:56 ; CI-NEXT: buffer_load_dword v16, off, s[0:3], s32 offset:52 ; CI-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0 ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: buffer_load_dword v15, off, s[0:3], s32 offset:48 ; CI-NEXT: buffer_load_dword v14, off, s[0:3], s32 offset:44 ; CI-NEXT: buffer_load_dword v13, off, s[0:3], s32 offset:40 ; CI-NEXT: buffer_load_dword v12, off, s[0:3], s32 offset:36 ; CI-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0 ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: buffer_load_dword v11, off, s[0:3], s32 offset:32 ; CI-NEXT: buffer_load_dword v10, off, s[0:3], s32 offset:28 ; CI-NEXT: buffer_load_dword v9, off, s[0:3], s32 offset:24 ; CI-NEXT: buffer_load_dword v8, off, s[0:3], s32 offset:20 ; CI-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0 ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:96 ; CI-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:92 ; CI-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:88 ; CI-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:84 ; CI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:112 ; CI-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:108 ; CI-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:104 ; CI-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:100 ; CI-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0 ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: buffer_load_dword v19, off, s[0:3], s32 offset:128 ; CI-NEXT: buffer_load_dword v18, off, s[0:3], s32 offset:124 ; CI-NEXT: buffer_load_dword v17, off, s[0:3], s32 offset:120 ; CI-NEXT: buffer_load_dword v16, off, s[0:3], s32 offset:116 ; CI-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0 ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: buffer_load_dword v15, off, s[0:3], s32 offset:16 ; CI-NEXT: buffer_load_dword v14, off, s[0:3], s32 offset:12 ; CI-NEXT: buffer_load_dword v13, off, s[0:3], s32 offset:8 ; CI-NEXT: buffer_load_dword v12, off, s[0:3], s32 offset:4 ; CI-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0 ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: buffer_load_dword v11, off, s[0:3], s32 offset:80 ; CI-NEXT: buffer_load_dword v10, off, s[0:3], s32 offset:76 ; CI-NEXT: buffer_load_dword v9, off, s[0:3], s32 offset:72 ; CI-NEXT: buffer_load_dword v8, off, s[0:3], s32 offset:68 ; CI-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0 ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0 ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0 ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0 ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: s_setpc_b64 s[30:31] ; ; VI-LABEL: void_func_v32i32_v16i32_v16f32: ; VI: ; %bb.0: ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; VI-NEXT: buffer_load_dword v31, off, s[0:3], s32 ; VI-NEXT: s_mov_b32 s7, 0xf000 ; VI-NEXT: s_mov_b32 s6, -1 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_store_dwordx4 v[28:31], off, s[4:7], 0 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_store_dwordx4 v[24:27], off, s[4:7], 0 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_store_dwordx4 v[20:23], off, s[4:7], 0 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_load_dword v19, off, s[0:3], s32 offset:64 ; VI-NEXT: buffer_load_dword v18, off, s[0:3], s32 offset:60 ; VI-NEXT: buffer_load_dword v17, off, s[0:3], s32 offset:56 ; VI-NEXT: buffer_load_dword v16, off, s[0:3], s32 offset:52 ; VI-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_load_dword v15, off, s[0:3], s32 offset:48 ; VI-NEXT: buffer_load_dword v14, off, s[0:3], s32 offset:44 ; VI-NEXT: buffer_load_dword v13, off, s[0:3], s32 offset:40 ; VI-NEXT: buffer_load_dword v12, off, s[0:3], s32 offset:36 ; VI-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_load_dword v11, off, s[0:3], s32 offset:32 ; VI-NEXT: buffer_load_dword v10, off, s[0:3], s32 offset:28 ; VI-NEXT: buffer_load_dword v9, off, s[0:3], s32 offset:24 ; VI-NEXT: buffer_load_dword v8, off, s[0:3], s32 offset:20 ; VI-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:96 ; VI-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:92 ; VI-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:88 ; VI-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:84 ; VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:112 ; VI-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:108 ; VI-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:104 ; VI-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:100 ; VI-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_load_dword v19, off, s[0:3], s32 offset:128 ; VI-NEXT: buffer_load_dword v18, off, s[0:3], s32 offset:124 ; VI-NEXT: buffer_load_dword v17, off, s[0:3], s32 offset:120 ; VI-NEXT: buffer_load_dword v16, off, s[0:3], s32 offset:116 ; VI-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_load_dword v15, off, s[0:3], s32 offset:16 ; VI-NEXT: buffer_load_dword v14, off, s[0:3], s32 offset:12 ; VI-NEXT: buffer_load_dword v13, off, s[0:3], s32 offset:8 ; VI-NEXT: buffer_load_dword v12, off, s[0:3], s32 offset:4 ; VI-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_load_dword v11, off, s[0:3], s32 offset:80 ; VI-NEXT: buffer_load_dword v10, off, s[0:3], s32 offset:76 ; VI-NEXT: buffer_load_dword v9, off, s[0:3], s32 offset:72 ; VI-NEXT: buffer_load_dword v8, off, s[0:3], s32 offset:68 ; VI-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-LABEL: void_func_v32i32_v16i32_v16f32: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-NEXT: buffer_load_dword v31, off, s[0:3], s32 ; GFX9-NEXT: s_mov_b32 s7, 0xf000 ; GFX9-NEXT: s_mov_b32 s6, -1 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_store_dwordx4 v[28:31], off, s[4:7], 0 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_store_dwordx4 v[24:27], off, s[4:7], 0 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_store_dwordx4 v[20:23], off, s[4:7], 0 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_load_dword v19, off, s[0:3], s32 offset:64 ; GFX9-NEXT: buffer_load_dword v18, off, s[0:3], s32 offset:60 ; GFX9-NEXT: buffer_load_dword v17, off, s[0:3], s32 offset:56 ; GFX9-NEXT: buffer_load_dword v16, off, s[0:3], s32 offset:52 ; GFX9-NEXT: s_nop 0 ; GFX9-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_load_dword v15, off, s[0:3], s32 offset:48 ; GFX9-NEXT: buffer_load_dword v14, off, s[0:3], s32 offset:44 ; GFX9-NEXT: buffer_load_dword v13, off, s[0:3], s32 offset:40 ; GFX9-NEXT: buffer_load_dword v12, off, s[0:3], s32 offset:36 ; GFX9-NEXT: s_nop 0 ; GFX9-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_load_dword v11, off, s[0:3], s32 offset:32 ; GFX9-NEXT: buffer_load_dword v10, off, s[0:3], s32 offset:28 ; GFX9-NEXT: buffer_load_dword v9, off, s[0:3], s32 offset:24 ; GFX9-NEXT: buffer_load_dword v8, off, s[0:3], s32 offset:20 ; GFX9-NEXT: s_nop 0 ; GFX9-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:96 ; GFX9-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:92 ; GFX9-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:88 ; GFX9-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:84 ; GFX9-NEXT: s_nop 0 ; GFX9-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:112 ; GFX9-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:108 ; GFX9-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:104 ; GFX9-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:100 ; GFX9-NEXT: s_nop 0 ; GFX9-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_load_dword v19, off, s[0:3], s32 offset:128 ; GFX9-NEXT: buffer_load_dword v18, off, s[0:3], s32 offset:124 ; GFX9-NEXT: buffer_load_dword v17, off, s[0:3], s32 offset:120 ; GFX9-NEXT: buffer_load_dword v16, off, s[0:3], s32 offset:116 ; GFX9-NEXT: s_nop 0 ; GFX9-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_load_dword v15, off, s[0:3], s32 offset:16 ; GFX9-NEXT: buffer_load_dword v14, off, s[0:3], s32 offset:12 ; GFX9-NEXT: buffer_load_dword v13, off, s[0:3], s32 offset:8 ; GFX9-NEXT: buffer_load_dword v12, off, s[0:3], s32 offset:4 ; GFX9-NEXT: s_nop 0 ; GFX9-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_load_dword v11, off, s[0:3], s32 offset:80 ; GFX9-NEXT: buffer_load_dword v10, off, s[0:3], s32 offset:76 ; GFX9-NEXT: buffer_load_dword v9, off, s[0:3], s32 offset:72 ; GFX9-NEXT: buffer_load_dword v8, off, s[0:3], s32 offset:68 ; GFX9-NEXT: s_nop 0 ; GFX9-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: void_func_v32i32_v16i32_v16f32: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: s_clause 0x20 ; GFX11-NEXT: scratch_load_b32 v35, off, s32 offset:80 ; GFX11-NEXT: scratch_load_b32 v34, off, s32 offset:76 ; GFX11-NEXT: scratch_load_b32 v33, off, s32 offset:72 ; GFX11-NEXT: scratch_load_b32 v39, off, s32 offset:96 ; GFX11-NEXT: scratch_load_b32 v38, off, s32 offset:92 ; GFX11-NEXT: scratch_load_b32 v37, off, s32 offset:88 ; GFX11-NEXT: scratch_load_b32 v51, off, s32 offset:112 ; GFX11-NEXT: scratch_load_b32 v50, off, s32 offset:108 ; GFX11-NEXT: scratch_load_b32 v49, off, s32 offset:104 ; GFX11-NEXT: scratch_load_b32 v55, off, s32 offset:128 ; GFX11-NEXT: scratch_load_b32 v54, off, s32 offset:124 ; GFX11-NEXT: scratch_load_b32 v53, off, s32 offset:120 ; GFX11-NEXT: scratch_load_b32 v67, off, s32 offset:16 ; GFX11-NEXT: scratch_load_b32 v66, off, s32 offset:12 ; GFX11-NEXT: scratch_load_b32 v65, off, s32 offset:8 ; GFX11-NEXT: scratch_load_b32 v71, off, s32 offset:32 ; GFX11-NEXT: scratch_load_b32 v70, off, s32 offset:28 ; GFX11-NEXT: scratch_load_b32 v31, off, s32 ; GFX11-NEXT: scratch_load_b32 v69, off, s32 offset:24 ; GFX11-NEXT: scratch_load_b32 v83, off, s32 offset:48 ; GFX11-NEXT: scratch_load_b32 v82, off, s32 offset:44 ; GFX11-NEXT: scratch_load_b32 v81, off, s32 offset:40 ; GFX11-NEXT: scratch_load_b32 v87, off, s32 offset:64 ; GFX11-NEXT: scratch_load_b32 v86, off, s32 offset:60 ; GFX11-NEXT: scratch_load_b32 v85, off, s32 offset:56 ; GFX11-NEXT: scratch_load_b32 v84, off, s32 offset:52 ; GFX11-NEXT: scratch_load_b32 v80, off, s32 offset:36 ; GFX11-NEXT: scratch_load_b32 v68, off, s32 offset:20 ; GFX11-NEXT: scratch_load_b32 v64, off, s32 offset:4 ; GFX11-NEXT: scratch_load_b32 v52, off, s32 offset:116 ; GFX11-NEXT: scratch_load_b32 v48, off, s32 offset:100 ; GFX11-NEXT: scratch_load_b32 v36, off, s32 offset:84 ; GFX11-NEXT: scratch_load_b32 v32, off, s32 offset:68 ; GFX11-NEXT: s_mov_b32 s3, 0x31016000 ; GFX11-NEXT: s_mov_b32 s2, -1 ; GFX11-NEXT: s_waitcnt vmcnt(15) ; GFX11-NEXT: buffer_store_b128 v[28:31], off, s[0:3], 0 dlc ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: buffer_store_b128 v[24:27], off, s[0:3], 0 dlc ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: buffer_store_b128 v[20:23], off, s[0:3], 0 dlc ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: buffer_store_b128 v[16:19], off, s[0:3], 0 dlc ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: buffer_store_b128 v[12:15], off, s[0:3], 0 dlc ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: buffer_store_b128 v[8:11], off, s[0:3], 0 dlc ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: buffer_store_b128 v[4:7], off, s[0:3], 0 dlc ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0 dlc ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_waitcnt vmcnt(7) ; GFX11-NEXT: buffer_store_b128 v[84:87], off, s[0:3], 0 dlc ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_waitcnt vmcnt(6) ; GFX11-NEXT: buffer_store_b128 v[80:83], off, s[0:3], 0 dlc ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_waitcnt vmcnt(5) ; GFX11-NEXT: buffer_store_b128 v[68:71], off, s[0:3], 0 dlc ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_waitcnt vmcnt(4) ; GFX11-NEXT: buffer_store_b128 v[64:67], off, s[0:3], 0 dlc ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_waitcnt vmcnt(3) ; GFX11-NEXT: buffer_store_b128 v[52:55], off, s[0:3], 0 dlc ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_waitcnt vmcnt(2) ; GFX11-NEXT: buffer_store_b128 v[48:51], off, s[0:3], 0 dlc ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_waitcnt vmcnt(1) ; GFX11-NEXT: buffer_store_b128 v[36:39], off, s[0:3], 0 dlc ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: buffer_store_b128 v[32:35], off, s[0:3], 0 dlc ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_setpc_b64 s[30:31] store volatile <32 x i32> %arg0, ptr addrspace(1) undef store volatile <16 x i32> %arg1, ptr addrspace(1) undef store volatile <16 x float> %arg2, ptr addrspace(1) undef ret void } ; Make sure v3 isn't a wasted register because of v3 types being promoted to v4 define void @void_func_v3f32_wasted_reg(<3 x float> %arg0, i32 %arg1) #0 { ; CI-LABEL: void_func_v3f32_wasted_reg: ; CI: ; %bb.0: ; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; CI-NEXT: s_mov_b32 m0, -1 ; CI-NEXT: ds_write_b32 v0, v0 ; CI-NEXT: ds_write_b32 v0, v1 ; CI-NEXT: ds_write_b32 v0, v2 ; CI-NEXT: ds_write_b32 v0, v3 ; CI-NEXT: s_waitcnt lgkmcnt(0) ; CI-NEXT: s_setpc_b64 s[30:31] ; ; VI-LABEL: void_func_v3f32_wasted_reg: ; VI: ; %bb.0: ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; VI-NEXT: s_mov_b32 m0, -1 ; VI-NEXT: ds_write_b32 v0, v0 ; VI-NEXT: ds_write_b32 v0, v1 ; VI-NEXT: ds_write_b32 v0, v2 ; VI-NEXT: ds_write_b32 v0, v3 ; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-LABEL: void_func_v3f32_wasted_reg: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-NEXT: ds_write_b32 v0, v0 ; GFX9-NEXT: ds_write_b32 v0, v1 ; GFX9-NEXT: ds_write_b32 v0, v2 ; GFX9-NEXT: ds_write_b32 v0, v3 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: void_func_v3f32_wasted_reg: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: ds_store_b32 v0, v0 ; GFX11-NEXT: ds_store_b32 v0, v1 ; GFX11-NEXT: ds_store_b32 v0, v2 ; GFX11-NEXT: ds_store_b32 v0, v3 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] %arg0.0 = extractelement <3 x float> %arg0, i32 0 %arg0.1 = extractelement <3 x float> %arg0, i32 1 %arg0.2 = extractelement <3 x float> %arg0, i32 2 store volatile float %arg0.0, ptr addrspace(3) undef store volatile float %arg0.1, ptr addrspace(3) undef store volatile float %arg0.2, ptr addrspace(3) undef store volatile i32 %arg1, ptr addrspace(3) undef ret void } define void @void_func_v3i32_wasted_reg(<3 x i32> %arg0, i32 %arg1) #0 { ; CI-LABEL: void_func_v3i32_wasted_reg: ; CI: ; %bb.0: ; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; CI-NEXT: s_mov_b32 m0, -1 ; CI-NEXT: ds_write_b32 v0, v0 ; CI-NEXT: ds_write_b32 v0, v1 ; CI-NEXT: ds_write_b32 v0, v2 ; CI-NEXT: ds_write_b32 v0, v3 ; CI-NEXT: s_waitcnt lgkmcnt(0) ; CI-NEXT: s_setpc_b64 s[30:31] ; ; VI-LABEL: void_func_v3i32_wasted_reg: ; VI: ; %bb.0: ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; VI-NEXT: s_mov_b32 m0, -1 ; VI-NEXT: ds_write_b32 v0, v0 ; VI-NEXT: ds_write_b32 v0, v1 ; VI-NEXT: ds_write_b32 v0, v2 ; VI-NEXT: ds_write_b32 v0, v3 ; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-LABEL: void_func_v3i32_wasted_reg: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-NEXT: ds_write_b32 v0, v0 ; GFX9-NEXT: ds_write_b32 v0, v1 ; GFX9-NEXT: ds_write_b32 v0, v2 ; GFX9-NEXT: ds_write_b32 v0, v3 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: void_func_v3i32_wasted_reg: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: ds_store_b32 v0, v0 ; GFX11-NEXT: ds_store_b32 v0, v1 ; GFX11-NEXT: ds_store_b32 v0, v2 ; GFX11-NEXT: ds_store_b32 v0, v3 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] %arg0.0 = extractelement <3 x i32> %arg0, i32 0 %arg0.1 = extractelement <3 x i32> %arg0, i32 1 %arg0.2 = extractelement <3 x i32> %arg0, i32 2 store volatile i32 %arg0.0, ptr addrspace(3) undef store volatile i32 %arg0.1, ptr addrspace(3) undef store volatile i32 %arg0.2, ptr addrspace(3) undef store volatile i32 %arg1, ptr addrspace(3) undef ret void } ; Check there is no crash. define void @void_func_volatile_v16i8(<16 x i8> %arg0) #0 { ; CIGFX89-LABEL: void_func_volatile_v16i8: ; CIGFX89: ; %bb.0: ; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; CIGFX89-NEXT: s_mov_b32 s7, 0xf000 ; CIGFX89-NEXT: s_mov_b32 s6, -1 ; CIGFX89-NEXT: buffer_store_byte v15, off, s[4:7], 0 ; CIGFX89-NEXT: s_waitcnt vmcnt(0) ; CIGFX89-NEXT: buffer_store_byte v14, off, s[4:7], 0 ; CIGFX89-NEXT: s_waitcnt vmcnt(0) ; CIGFX89-NEXT: buffer_store_byte v13, off, s[4:7], 0 ; CIGFX89-NEXT: s_waitcnt vmcnt(0) ; CIGFX89-NEXT: buffer_store_byte v12, off, s[4:7], 0 ; CIGFX89-NEXT: s_waitcnt vmcnt(0) ; CIGFX89-NEXT: buffer_store_byte v11, off, s[4:7], 0 ; CIGFX89-NEXT: s_waitcnt vmcnt(0) ; CIGFX89-NEXT: buffer_store_byte v10, off, s[4:7], 0 ; CIGFX89-NEXT: s_waitcnt vmcnt(0) ; CIGFX89-NEXT: buffer_store_byte v9, off, s[4:7], 0 ; CIGFX89-NEXT: s_waitcnt vmcnt(0) ; CIGFX89-NEXT: buffer_store_byte v8, off, s[4:7], 0 ; CIGFX89-NEXT: s_waitcnt vmcnt(0) ; CIGFX89-NEXT: buffer_store_byte v7, off, s[4:7], 0 ; CIGFX89-NEXT: s_waitcnt vmcnt(0) ; CIGFX89-NEXT: buffer_store_byte v6, off, s[4:7], 0 ; CIGFX89-NEXT: s_waitcnt vmcnt(0) ; CIGFX89-NEXT: buffer_store_byte v5, off, s[4:7], 0 ; CIGFX89-NEXT: s_waitcnt vmcnt(0) ; CIGFX89-NEXT: buffer_store_byte v4, off, s[4:7], 0 ; CIGFX89-NEXT: s_waitcnt vmcnt(0) ; CIGFX89-NEXT: buffer_store_byte v3, off, s[4:7], 0 ; CIGFX89-NEXT: s_waitcnt vmcnt(0) ; CIGFX89-NEXT: buffer_store_byte v2, off, s[4:7], 0 ; CIGFX89-NEXT: s_waitcnt vmcnt(0) ; CIGFX89-NEXT: buffer_store_byte v1, off, s[4:7], 0 ; CIGFX89-NEXT: s_waitcnt vmcnt(0) ; CIGFX89-NEXT: buffer_store_byte v0, off, s[4:7], 0 ; CIGFX89-NEXT: s_waitcnt vmcnt(0) ; CIGFX89-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: void_func_volatile_v16i8: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: s_mov_b32 s3, 0x31016000 ; GFX11-NEXT: s_mov_b32 s2, -1 ; GFX11-NEXT: buffer_store_b8 v15, off, s[0:3], 0 dlc ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: buffer_store_b8 v14, off, s[0:3], 0 dlc ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: buffer_store_b8 v13, off, s[0:3], 0 dlc ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: buffer_store_b8 v12, off, s[0:3], 0 dlc ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: buffer_store_b8 v11, off, s[0:3], 0 dlc ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: buffer_store_b8 v10, off, s[0:3], 0 dlc ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: buffer_store_b8 v9, off, s[0:3], 0 dlc ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: buffer_store_b8 v8, off, s[0:3], 0 dlc ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: buffer_store_b8 v7, off, s[0:3], 0 dlc ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: buffer_store_b8 v6, off, s[0:3], 0 dlc ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: buffer_store_b8 v5, off, s[0:3], 0 dlc ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: buffer_store_b8 v4, off, s[0:3], 0 dlc ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: buffer_store_b8 v3, off, s[0:3], 0 dlc ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: buffer_store_b8 v2, off, s[0:3], 0 dlc ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: buffer_store_b8 v1, off, s[0:3], 0 dlc ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: buffer_store_b8 v0, off, s[0:3], 0 dlc ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_setpc_b64 s[30:31] store volatile <16 x i8> %arg0, ptr addrspace(1) undef ret void } ; Check there is no crash. define void @void_func_v32i32_v16i8(<32 x i32> %arg0, <16 x i8> %arg1) #0 { ; CI-LABEL: void_func_v32i32_v16i8: ; CI: ; %bb.0: ; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; CI-NEXT: buffer_load_dword v31, off, s[0:3], s32 ; CI-NEXT: s_mov_b32 s7, 0xf000 ; CI-NEXT: s_mov_b32 s6, -1 ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: buffer_store_dwordx4 v[28:31], off, s[4:7], 0 ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: buffer_store_dwordx4 v[24:27], off, s[4:7], 0 ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: buffer_store_dwordx4 v[20:23], off, s[4:7], 0 ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0 ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: buffer_load_dword v16, off, s[0:3], s32 offset:64 ; CI-NEXT: buffer_load_dword v17, off, s[0:3], s32 offset:48 ; CI-NEXT: buffer_load_dword v18, off, s[0:3], s32 offset:52 ; CI-NEXT: buffer_load_dword v19, off, s[0:3], s32 offset:56 ; CI-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0 ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: buffer_load_dword v12, off, s[0:3], s32 offset:36 ; CI-NEXT: buffer_load_dword v13, off, s[0:3], s32 offset:40 ; CI-NEXT: buffer_load_dword v14, off, s[0:3], s32 offset:44 ; CI-NEXT: buffer_load_dword v15, off, s[0:3], s32 offset:28 ; CI-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0 ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: buffer_load_dword v8, off, s[0:3], s32 offset:32 ; CI-NEXT: buffer_load_dword v9, off, s[0:3], s32 offset:20 ; CI-NEXT: buffer_load_dword v10, off, s[0:3], s32 offset:24 ; CI-NEXT: buffer_load_dword v11, off, s[0:3], s32 offset:16 ; CI-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0 ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:12 ; CI-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:8 ; CI-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:4 ; CI-NEXT: buffer_load_dword v20, off, s[0:3], s32 offset:60 ; CI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: buffer_store_byte v16, off, s[4:7], 0 ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: buffer_store_byte v20, off, s[4:7], 0 ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: buffer_store_byte v19, off, s[4:7], 0 ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: buffer_store_byte v18, off, s[4:7], 0 ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: buffer_store_byte v17, off, s[4:7], 0 ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: buffer_store_byte v14, off, s[4:7], 0 ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: buffer_store_byte v13, off, s[4:7], 0 ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: buffer_store_byte v12, off, s[4:7], 0 ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: buffer_store_byte v8, off, s[4:7], 0 ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: buffer_store_byte v15, off, s[4:7], 0 ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: buffer_store_byte v10, off, s[4:7], 0 ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: buffer_store_byte v9, off, s[4:7], 0 ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: buffer_store_byte v11, off, s[4:7], 0 ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: buffer_store_byte v4, off, s[4:7], 0 ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: buffer_store_byte v5, off, s[4:7], 0 ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: buffer_store_byte v6, off, s[4:7], 0 ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: s_setpc_b64 s[30:31] ; ; VI-LABEL: void_func_v32i32_v16i8: ; VI: ; %bb.0: ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; VI-NEXT: buffer_load_dword v31, off, s[0:3], s32 ; VI-NEXT: s_mov_b32 s7, 0xf000 ; VI-NEXT: s_mov_b32 s6, -1 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_store_dwordx4 v[28:31], off, s[4:7], 0 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_store_dwordx4 v[24:27], off, s[4:7], 0 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_store_dwordx4 v[20:23], off, s[4:7], 0 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_load_ubyte v16, off, s[0:3], s32 offset:64 ; VI-NEXT: buffer_load_ubyte v17, off, s[0:3], s32 offset:48 ; VI-NEXT: buffer_load_ubyte v18, off, s[0:3], s32 offset:52 ; VI-NEXT: buffer_load_ubyte v19, off, s[0:3], s32 offset:56 ; VI-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_load_ubyte v12, off, s[0:3], s32 offset:36 ; VI-NEXT: buffer_load_ubyte v13, off, s[0:3], s32 offset:40 ; VI-NEXT: buffer_load_ubyte v14, off, s[0:3], s32 offset:44 ; VI-NEXT: buffer_load_ubyte v15, off, s[0:3], s32 offset:28 ; VI-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_load_ubyte v8, off, s[0:3], s32 offset:32 ; VI-NEXT: buffer_load_ubyte v9, off, s[0:3], s32 offset:20 ; VI-NEXT: buffer_load_ubyte v10, off, s[0:3], s32 offset:24 ; VI-NEXT: buffer_load_ubyte v11, off, s[0:3], s32 offset:16 ; VI-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_load_ubyte v4, off, s[0:3], s32 offset:12 ; VI-NEXT: buffer_load_ubyte v5, off, s[0:3], s32 offset:8 ; VI-NEXT: buffer_load_ubyte v6, off, s[0:3], s32 offset:4 ; VI-NEXT: buffer_load_ubyte v20, off, s[0:3], s32 offset:60 ; VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_store_byte v16, off, s[4:7], 0 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_store_byte v20, off, s[4:7], 0 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_store_byte v19, off, s[4:7], 0 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_store_byte v18, off, s[4:7], 0 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_store_byte v17, off, s[4:7], 0 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_store_byte v14, off, s[4:7], 0 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_store_byte v13, off, s[4:7], 0 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_store_byte v12, off, s[4:7], 0 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_store_byte v8, off, s[4:7], 0 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_store_byte v15, off, s[4:7], 0 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_store_byte v10, off, s[4:7], 0 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_store_byte v9, off, s[4:7], 0 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_store_byte v11, off, s[4:7], 0 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_store_byte v4, off, s[4:7], 0 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_store_byte v5, off, s[4:7], 0 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_store_byte v6, off, s[4:7], 0 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-LABEL: void_func_v32i32_v16i8: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-NEXT: buffer_load_dword v31, off, s[0:3], s32 ; GFX9-NEXT: s_mov_b32 s7, 0xf000 ; GFX9-NEXT: s_mov_b32 s6, -1 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_store_dwordx4 v[28:31], off, s[4:7], 0 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_store_dwordx4 v[24:27], off, s[4:7], 0 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_store_dwordx4 v[20:23], off, s[4:7], 0 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_load_ubyte v16, off, s[0:3], s32 offset:64 ; GFX9-NEXT: buffer_load_ubyte v17, off, s[0:3], s32 offset:48 ; GFX9-NEXT: buffer_load_ubyte v18, off, s[0:3], s32 offset:52 ; GFX9-NEXT: buffer_load_ubyte v19, off, s[0:3], s32 offset:56 ; GFX9-NEXT: buffer_load_ubyte v20, off, s[0:3], s32 offset:60 ; GFX9-NEXT: s_nop 0 ; GFX9-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_load_ubyte v12, off, s[0:3], s32 offset:36 ; GFX9-NEXT: buffer_load_ubyte v13, off, s[0:3], s32 offset:40 ; GFX9-NEXT: buffer_load_ubyte v14, off, s[0:3], s32 offset:44 ; GFX9-NEXT: buffer_load_ubyte v15, off, s[0:3], s32 offset:28 ; GFX9-NEXT: s_nop 0 ; GFX9-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_load_ubyte v8, off, s[0:3], s32 offset:32 ; GFX9-NEXT: buffer_load_ubyte v9, off, s[0:3], s32 offset:20 ; GFX9-NEXT: buffer_load_ubyte v10, off, s[0:3], s32 offset:24 ; GFX9-NEXT: buffer_load_ubyte v11, off, s[0:3], s32 offset:16 ; GFX9-NEXT: s_nop 0 ; GFX9-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_load_ubyte v4, off, s[0:3], s32 offset:12 ; GFX9-NEXT: buffer_load_ubyte v5, off, s[0:3], s32 offset:8 ; GFX9-NEXT: buffer_load_ubyte v6, off, s[0:3], s32 offset:4 ; GFX9-NEXT: s_nop 0 ; GFX9-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_store_byte v16, off, s[4:7], 0 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_store_byte v20, off, s[4:7], 0 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_store_byte v19, off, s[4:7], 0 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_store_byte v18, off, s[4:7], 0 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_store_byte v17, off, s[4:7], 0 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_store_byte v14, off, s[4:7], 0 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_store_byte v13, off, s[4:7], 0 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_store_byte v12, off, s[4:7], 0 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_store_byte v8, off, s[4:7], 0 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_store_byte v15, off, s[4:7], 0 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_store_byte v10, off, s[4:7], 0 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_store_byte v9, off, s[4:7], 0 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_store_byte v11, off, s[4:7], 0 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_store_byte v4, off, s[4:7], 0 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_store_byte v5, off, s[4:7], 0 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_store_byte v6, off, s[4:7], 0 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: void_func_v32i32_v16i8: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: s_clause 0x10 ; GFX11-NEXT: scratch_load_b32 v31, off, s32 ; GFX11-NEXT: scratch_load_u8 v32, off, s32 offset:64 ; GFX11-NEXT: scratch_load_u8 v33, off, s32 offset:60 ; GFX11-NEXT: scratch_load_u8 v34, off, s32 offset:56 ; GFX11-NEXT: scratch_load_u8 v35, off, s32 offset:52 ; GFX11-NEXT: scratch_load_u8 v36, off, s32 offset:48 ; GFX11-NEXT: scratch_load_u8 v37, off, s32 offset:44 ; GFX11-NEXT: scratch_load_u8 v38, off, s32 offset:40 ; GFX11-NEXT: scratch_load_u8 v39, off, s32 offset:36 ; GFX11-NEXT: scratch_load_u8 v48, off, s32 offset:32 ; GFX11-NEXT: scratch_load_u8 v49, off, s32 offset:28 ; GFX11-NEXT: scratch_load_u8 v50, off, s32 offset:24 ; GFX11-NEXT: scratch_load_u8 v51, off, s32 offset:20 ; GFX11-NEXT: scratch_load_u8 v52, off, s32 offset:16 ; GFX11-NEXT: scratch_load_u8 v53, off, s32 offset:12 ; GFX11-NEXT: scratch_load_u8 v54, off, s32 offset:8 ; GFX11-NEXT: scratch_load_u8 v55, off, s32 offset:4 ; GFX11-NEXT: s_mov_b32 s3, 0x31016000 ; GFX11-NEXT: s_mov_b32 s2, -1 ; GFX11-NEXT: s_waitcnt vmcnt(16) ; GFX11-NEXT: buffer_store_b128 v[28:31], off, s[0:3], 0 dlc ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: buffer_store_b128 v[24:27], off, s[0:3], 0 dlc ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: buffer_store_b128 v[20:23], off, s[0:3], 0 dlc ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: buffer_store_b128 v[16:19], off, s[0:3], 0 dlc ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: buffer_store_b128 v[12:15], off, s[0:3], 0 dlc ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: buffer_store_b128 v[8:11], off, s[0:3], 0 dlc ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: buffer_store_b128 v[4:7], off, s[0:3], 0 dlc ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0 dlc ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_waitcnt vmcnt(15) ; GFX11-NEXT: buffer_store_b8 v32, off, s[0:3], 0 dlc ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_waitcnt vmcnt(14) ; GFX11-NEXT: buffer_store_b8 v33, off, s[0:3], 0 dlc ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_waitcnt vmcnt(13) ; GFX11-NEXT: buffer_store_b8 v34, off, s[0:3], 0 dlc ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_waitcnt vmcnt(12) ; GFX11-NEXT: buffer_store_b8 v35, off, s[0:3], 0 dlc ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_waitcnt vmcnt(11) ; GFX11-NEXT: buffer_store_b8 v36, off, s[0:3], 0 dlc ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_waitcnt vmcnt(10) ; GFX11-NEXT: buffer_store_b8 v37, off, s[0:3], 0 dlc ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_waitcnt vmcnt(9) ; GFX11-NEXT: buffer_store_b8 v38, off, s[0:3], 0 dlc ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_waitcnt vmcnt(8) ; GFX11-NEXT: buffer_store_b8 v39, off, s[0:3], 0 dlc ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_waitcnt vmcnt(7) ; GFX11-NEXT: buffer_store_b8 v48, off, s[0:3], 0 dlc ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_waitcnt vmcnt(6) ; GFX11-NEXT: buffer_store_b8 v49, off, s[0:3], 0 dlc ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_waitcnt vmcnt(5) ; GFX11-NEXT: buffer_store_b8 v50, off, s[0:3], 0 dlc ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_waitcnt vmcnt(4) ; GFX11-NEXT: buffer_store_b8 v51, off, s[0:3], 0 dlc ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_waitcnt vmcnt(3) ; GFX11-NEXT: buffer_store_b8 v52, off, s[0:3], 0 dlc ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_waitcnt vmcnt(2) ; GFX11-NEXT: buffer_store_b8 v53, off, s[0:3], 0 dlc ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_waitcnt vmcnt(1) ; GFX11-NEXT: buffer_store_b8 v54, off, s[0:3], 0 dlc ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: buffer_store_b8 v55, off, s[0:3], 0 dlc ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_setpc_b64 s[30:31] store volatile <32 x i32> %arg0, ptr addrspace(1) undef store volatile <16 x i8> %arg1, ptr addrspace(1) undef ret void } define void @void_func_bf16(bfloat %arg0) #0 { ; CI-LABEL: void_func_bf16: ; CI: ; %bb.0: ; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; CI-NEXT: v_lshrrev_b32_e32 v0, 16, v0 ; CI-NEXT: s_mov_b32 s7, 0xf000 ; CI-NEXT: s_mov_b32 s6, -1 ; CI-NEXT: buffer_store_short v0, off, s[4:7], 0 ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: s_setpc_b64 s[30:31] ; ; GFX89-LABEL: void_func_bf16: ; GFX89: ; %bb.0: ; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX89-NEXT: s_mov_b32 s7, 0xf000 ; GFX89-NEXT: s_mov_b32 s6, -1 ; GFX89-NEXT: buffer_store_short v0, off, s[4:7], 0 ; GFX89-NEXT: s_waitcnt vmcnt(0) ; GFX89-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: void_func_bf16: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: s_mov_b32 s3, 0x31016000 ; GFX11-NEXT: s_mov_b32 s2, -1 ; GFX11-NEXT: buffer_store_b16 v0, off, s[0:3], 0 ; GFX11-NEXT: s_setpc_b64 s[30:31] store bfloat %arg0, ptr addrspace(1) undef ret void } define void @void_func_v2bf16(<2 x bfloat> %arg0) #0 { ; CI-LABEL: void_func_v2bf16: ; CI: ; %bb.0: ; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; CI-NEXT: v_lshrrev_b32_e32 v1, 16, v1 ; CI-NEXT: v_alignbit_b32 v0, v1, v0, 16 ; CI-NEXT: s_mov_b32 s7, 0xf000 ; CI-NEXT: s_mov_b32 s6, -1 ; CI-NEXT: buffer_store_dword v0, off, s[4:7], 0 ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: s_setpc_b64 s[30:31] ; ; GFX89-LABEL: void_func_v2bf16: ; GFX89: ; %bb.0: ; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX89-NEXT: s_mov_b32 s7, 0xf000 ; GFX89-NEXT: s_mov_b32 s6, -1 ; GFX89-NEXT: buffer_store_dword v0, off, s[4:7], 0 ; GFX89-NEXT: s_waitcnt vmcnt(0) ; GFX89-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: void_func_v2bf16: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: s_mov_b32 s3, 0x31016000 ; GFX11-NEXT: s_mov_b32 s2, -1 ; GFX11-NEXT: buffer_store_b32 v0, off, s[0:3], 0 ; GFX11-NEXT: s_setpc_b64 s[30:31] store <2 x bfloat> %arg0, ptr addrspace(1) undef ret void } define void @void_func_v3bf16(<3 x bfloat> %arg0) #0 { ; CI-LABEL: void_func_v3bf16: ; CI: ; %bb.0: ; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; CI-NEXT: v_lshrrev_b32_e32 v1, 16, v1 ; CI-NEXT: v_alignbit_b32 v0, v1, v0, 16 ; CI-NEXT: v_lshrrev_b32_e32 v1, 16, v2 ; CI-NEXT: s_mov_b32 s7, 0xf000 ; CI-NEXT: s_mov_b32 s6, -1 ; CI-NEXT: buffer_store_short v1, off, s[4:7], 0 ; CI-NEXT: buffer_store_dword v0, off, s[4:7], 0 ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: s_setpc_b64 s[30:31] ; ; GFX89-LABEL: void_func_v3bf16: ; GFX89: ; %bb.0: ; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX89-NEXT: s_mov_b32 s7, 0xf000 ; GFX89-NEXT: s_mov_b32 s6, -1 ; GFX89-NEXT: buffer_store_short v1, off, s[4:7], 0 ; GFX89-NEXT: buffer_store_dword v0, off, s[4:7], 0 ; GFX89-NEXT: s_waitcnt vmcnt(0) ; GFX89-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: void_func_v3bf16: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: s_mov_b32 s3, 0x31016000 ; GFX11-NEXT: s_mov_b32 s2, -1 ; GFX11-NEXT: s_clause 0x1 ; GFX11-NEXT: buffer_store_b16 v1, off, s[0:3], 0 ; GFX11-NEXT: buffer_store_b32 v0, off, s[0:3], 0 ; GFX11-NEXT: s_setpc_b64 s[30:31] store <3 x bfloat> %arg0, ptr addrspace(1) undef ret void } define void @void_func_v4bf16(<4 x bfloat> %arg0) #0 { ; CI-LABEL: void_func_v4bf16: ; CI: ; %bb.0: ; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; CI-NEXT: v_lshrrev_b32_e32 v3, 16, v3 ; CI-NEXT: v_lshrrev_b32_e32 v1, 16, v1 ; CI-NEXT: v_alignbit_b32 v2, v3, v2, 16 ; CI-NEXT: v_alignbit_b32 v1, v1, v0, 16 ; CI-NEXT: s_mov_b32 s7, 0xf000 ; CI-NEXT: s_mov_b32 s6, -1 ; CI-NEXT: buffer_store_dwordx2 v[1:2], off, s[4:7], 0 ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: s_setpc_b64 s[30:31] ; ; GFX89-LABEL: void_func_v4bf16: ; GFX89: ; %bb.0: ; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX89-NEXT: s_mov_b32 s7, 0xf000 ; GFX89-NEXT: s_mov_b32 s6, -1 ; GFX89-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 ; GFX89-NEXT: s_waitcnt vmcnt(0) ; GFX89-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: void_func_v4bf16: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: s_mov_b32 s3, 0x31016000 ; GFX11-NEXT: s_mov_b32 s2, -1 ; GFX11-NEXT: buffer_store_b64 v[0:1], off, s[0:3], 0 ; GFX11-NEXT: s_setpc_b64 s[30:31] store <4 x bfloat> %arg0, ptr addrspace(1) undef ret void } define void @void_func_v8bf16(<8 x bfloat> %arg0) #0 { ; CI-LABEL: void_func_v8bf16: ; CI: ; %bb.0: ; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; CI-NEXT: v_lshrrev_b32_e32 v7, 16, v7 ; CI-NEXT: v_lshrrev_b32_e32 v5, 16, v5 ; CI-NEXT: v_lshrrev_b32_e32 v3, 16, v3 ; CI-NEXT: v_lshrrev_b32_e32 v1, 16, v1 ; CI-NEXT: v_alignbit_b32 v6, v7, v6, 16 ; CI-NEXT: v_alignbit_b32 v5, v5, v4, 16 ; CI-NEXT: v_alignbit_b32 v4, v3, v2, 16 ; CI-NEXT: v_alignbit_b32 v3, v1, v0, 16 ; CI-NEXT: s_mov_b32 s7, 0xf000 ; CI-NEXT: s_mov_b32 s6, -1 ; CI-NEXT: buffer_store_dwordx4 v[3:6], off, s[4:7], 0 ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: s_setpc_b64 s[30:31] ; ; GFX89-LABEL: void_func_v8bf16: ; GFX89: ; %bb.0: ; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX89-NEXT: s_mov_b32 s7, 0xf000 ; GFX89-NEXT: s_mov_b32 s6, -1 ; GFX89-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 ; GFX89-NEXT: s_waitcnt vmcnt(0) ; GFX89-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: void_func_v8bf16: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: s_mov_b32 s3, 0x31016000 ; GFX11-NEXT: s_mov_b32 s2, -1 ; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0 ; GFX11-NEXT: s_setpc_b64 s[30:31] store <8 x bfloat> %arg0, ptr addrspace(1) undef ret void } define void @void_func_v16bf16(<16 x bfloat> %arg0) #0 { ; CI-LABEL: void_func_v16bf16: ; CI: ; %bb.0: ; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; CI-NEXT: v_lshrrev_b32_e32 v5, 16, v5 ; CI-NEXT: v_lshrrev_b32_e32 v3, 16, v3 ; CI-NEXT: v_lshrrev_b32_e32 v1, 16, v1 ; CI-NEXT: v_alignbit_b32 v5, v5, v4, 16 ; CI-NEXT: v_alignbit_b32 v4, v3, v2, 16 ; CI-NEXT: v_alignbit_b32 v3, v1, v0, 16 ; CI-NEXT: v_lshrrev_b32_e32 v0, 16, v15 ; CI-NEXT: v_alignbit_b32 v14, v0, v14, 16 ; CI-NEXT: v_lshrrev_b32_e32 v0, 16, v13 ; CI-NEXT: v_alignbit_b32 v13, v0, v12, 16 ; CI-NEXT: v_lshrrev_b32_e32 v0, 16, v11 ; CI-NEXT: v_alignbit_b32 v12, v0, v10, 16 ; CI-NEXT: v_lshrrev_b32_e32 v0, 16, v9 ; CI-NEXT: v_lshrrev_b32_e32 v7, 16, v7 ; CI-NEXT: v_alignbit_b32 v11, v0, v8, 16 ; CI-NEXT: s_mov_b32 s7, 0xf000 ; CI-NEXT: s_mov_b32 s6, -1 ; CI-NEXT: v_alignbit_b32 v6, v7, v6, 16 ; CI-NEXT: buffer_store_dwordx4 v[11:14], off, s[4:7], 0 ; CI-NEXT: buffer_store_dwordx4 v[3:6], off, s[4:7], 0 ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: s_setpc_b64 s[30:31] ; ; GFX89-LABEL: void_func_v16bf16: ; GFX89: ; %bb.0: ; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX89-NEXT: s_mov_b32 s7, 0xf000 ; GFX89-NEXT: s_mov_b32 s6, -1 ; GFX89-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0 ; GFX89-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 ; GFX89-NEXT: s_waitcnt vmcnt(0) ; GFX89-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: void_func_v16bf16: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: s_mov_b32 s3, 0x31016000 ; GFX11-NEXT: s_mov_b32 s2, -1 ; GFX11-NEXT: s_clause 0x1 ; GFX11-NEXT: buffer_store_b128 v[4:7], off, s[0:3], 0 ; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0 ; GFX11-NEXT: s_setpc_b64 s[30:31] store <16 x bfloat> %arg0, ptr addrspace(1) undef ret void } attributes #0 = { nounwind }