937 lines
44 KiB
LLVM
937 lines
44 KiB
LLVM
|
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
|
||
|
; RUN: llc -global-isel=1 -mtriple=amdgcn--amdpal -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefix=GISEL-GFX11 %s
|
||
|
; RUN: llc -global-isel=1 -mtriple=amdgcn--amdpal -mcpu=gfx1030 -verify-machineinstrs < %s | FileCheck -check-prefix=GISEL-GFX10 %s
|
||
|
; RUN: llc -global-isel=0 -mtriple=amdgcn--amdpal -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefix=DAGISEL-GFX11 %s
|
||
|
; RUN: llc -global-isel=0 -mtriple=amdgcn--amdpal -mcpu=gfx1030 -verify-machineinstrs < %s | FileCheck -check-prefix=DAGISEL-GFX10 %s
|
||
|
|
||
|
declare amdgpu_gfx void @use(...)
|
||
|
|
||
|
define amdgpu_cs_chain void @amdgpu_cs_chain_no_stack({ptr, i32, <4 x i32>} inreg %a, {ptr, i32, <4 x i32>} %b) {
|
||
|
; GISEL-GFX11-LABEL: amdgpu_cs_chain_no_stack:
|
||
|
; GISEL-GFX11: ; %bb.0:
|
||
|
; GISEL-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||
|
; GISEL-GFX11-NEXT: s_endpgm
|
||
|
;
|
||
|
; GISEL-GFX10-LABEL: amdgpu_cs_chain_no_stack:
|
||
|
; GISEL-GFX10: ; %bb.0:
|
||
|
; GISEL-GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||
|
; GISEL-GFX10-NEXT: s_endpgm
|
||
|
;
|
||
|
; DAGISEL-GFX11-LABEL: amdgpu_cs_chain_no_stack:
|
||
|
; DAGISEL-GFX11: ; %bb.0:
|
||
|
; DAGISEL-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||
|
; DAGISEL-GFX11-NEXT: s_endpgm
|
||
|
;
|
||
|
; DAGISEL-GFX10-LABEL: amdgpu_cs_chain_no_stack:
|
||
|
; DAGISEL-GFX10: ; %bb.0:
|
||
|
; DAGISEL-GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||
|
; DAGISEL-GFX10-NEXT: s_endpgm
|
||
|
ret void
|
||
|
}
|
||
|
|
||
|
define amdgpu_cs_chain void @amdgpu_cs_chain_simple_call(<4 x i32> inreg %sgpr, <4 x i32> %vgpr) {
|
||
|
; GISEL-GFX11-LABEL: amdgpu_cs_chain_simple_call:
|
||
|
; GISEL-GFX11: ; %bb.0:
|
||
|
; GISEL-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||
|
; GISEL-GFX11-NEXT: v_dual_mov_b32 v4, v8 :: v_dual_mov_b32 v5, v9
|
||
|
; GISEL-GFX11-NEXT: v_dual_mov_b32 v6, v10 :: v_dual_mov_b32 v7, v11
|
||
|
; GISEL-GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
|
||
|
; GISEL-GFX11-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3
|
||
|
; GISEL-GFX11-NEXT: s_mov_b32 s4, use@abs32@lo
|
||
|
; GISEL-GFX11-NEXT: s_mov_b32 s5, use@abs32@hi
|
||
|
; GISEL-GFX11-NEXT: s_mov_b32 s32, 0
|
||
|
; GISEL-GFX11-NEXT: s_swappc_b64 s[30:31], s[4:5]
|
||
|
; GISEL-GFX11-NEXT: s_endpgm
|
||
|
;
|
||
|
; GISEL-GFX10-LABEL: amdgpu_cs_chain_simple_call:
|
||
|
; GISEL-GFX10: ; %bb.0:
|
||
|
; GISEL-GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||
|
; GISEL-GFX10-NEXT: v_mov_b32_e32 v4, v8
|
||
|
; GISEL-GFX10-NEXT: v_mov_b32_e32 v5, v9
|
||
|
; GISEL-GFX10-NEXT: v_mov_b32_e32 v6, v10
|
||
|
; GISEL-GFX10-NEXT: v_mov_b32_e32 v7, v11
|
||
|
; GISEL-GFX10-NEXT: v_mov_b32_e32 v0, s0
|
||
|
; GISEL-GFX10-NEXT: v_mov_b32_e32 v1, s1
|
||
|
; GISEL-GFX10-NEXT: v_mov_b32_e32 v2, s2
|
||
|
; GISEL-GFX10-NEXT: v_mov_b32_e32 v3, s3
|
||
|
; GISEL-GFX10-NEXT: s_mov_b64 s[0:1], s[48:49]
|
||
|
; GISEL-GFX10-NEXT: s_mov_b32 s4, use@abs32@lo
|
||
|
; GISEL-GFX10-NEXT: s_mov_b32 s5, use@abs32@hi
|
||
|
; GISEL-GFX10-NEXT: s_mov_b64 s[2:3], s[50:51]
|
||
|
; GISEL-GFX10-NEXT: s_mov_b32 s32, 0
|
||
|
; GISEL-GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5]
|
||
|
; GISEL-GFX10-NEXT: s_endpgm
|
||
|
;
|
||
|
; DAGISEL-GFX11-LABEL: amdgpu_cs_chain_simple_call:
|
||
|
; DAGISEL-GFX11: ; %bb.0:
|
||
|
; DAGISEL-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||
|
; DAGISEL-GFX11-NEXT: v_dual_mov_b32 v7, v11 :: v_dual_mov_b32 v6, v10
|
||
|
; DAGISEL-GFX11-NEXT: v_dual_mov_b32 v5, v9 :: v_dual_mov_b32 v4, v8
|
||
|
; DAGISEL-GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
|
||
|
; DAGISEL-GFX11-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3
|
||
|
; DAGISEL-GFX11-NEXT: s_mov_b32 s5, use@abs32@hi
|
||
|
; DAGISEL-GFX11-NEXT: s_mov_b32 s4, use@abs32@lo
|
||
|
; DAGISEL-GFX11-NEXT: s_mov_b32 s32, 0
|
||
|
; DAGISEL-GFX11-NEXT: s_swappc_b64 s[30:31], s[4:5]
|
||
|
; DAGISEL-GFX11-NEXT: s_endpgm
|
||
|
;
|
||
|
; DAGISEL-GFX10-LABEL: amdgpu_cs_chain_simple_call:
|
||
|
; DAGISEL-GFX10: ; %bb.0:
|
||
|
; DAGISEL-GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||
|
; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v7, v11
|
||
|
; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v6, v10
|
||
|
; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v5, v9
|
||
|
; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v4, v8
|
||
|
; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v0, s0
|
||
|
; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v1, s1
|
||
|
; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v2, s2
|
||
|
; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v3, s3
|
||
|
; DAGISEL-GFX10-NEXT: s_mov_b64 s[0:1], s[48:49]
|
||
|
; DAGISEL-GFX10-NEXT: s_mov_b32 s5, use@abs32@hi
|
||
|
; DAGISEL-GFX10-NEXT: s_mov_b32 s4, use@abs32@lo
|
||
|
; DAGISEL-GFX10-NEXT: s_mov_b64 s[2:3], s[50:51]
|
||
|
; DAGISEL-GFX10-NEXT: s_mov_b32 s32, 0
|
||
|
; DAGISEL-GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5]
|
||
|
; DAGISEL-GFX10-NEXT: s_endpgm
|
||
|
call amdgpu_gfx void @use(<4 x i32> %sgpr, <4 x i32> %vgpr)
|
||
|
ret void
|
||
|
}
|
||
|
|
||
|
define amdgpu_cs_chain void @amdgpu_cs_chain_spill(<24 x i32> inreg %sgprs, <24 x i32> %vgprs) {
|
||
|
; GISEL-GFX11-LABEL: amdgpu_cs_chain_spill:
|
||
|
; GISEL-GFX11: ; %bb.0:
|
||
|
; GISEL-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||
|
; GISEL-GFX11-NEXT: s_mov_b32 s32, 0
|
||
|
; GISEL-GFX11-NEXT: v_dual_mov_b32 v32, v8 :: v_dual_mov_b32 v33, v9
|
||
|
; GISEL-GFX11-NEXT: s_add_u32 s24, s32, 4
|
||
|
; GISEL-GFX11-NEXT: scratch_store_b32 off, v16, s32
|
||
|
; GISEL-GFX11-NEXT: scratch_store_b32 off, v17, s24
|
||
|
; GISEL-GFX11-NEXT: s_add_u32 s24, s32, 8
|
||
|
; GISEL-GFX11-NEXT: s_add_u32 s25, s32, 12
|
||
|
; GISEL-GFX11-NEXT: scratch_store_b32 off, v18, s24
|
||
|
; GISEL-GFX11-NEXT: scratch_store_b32 off, v19, s25
|
||
|
; GISEL-GFX11-NEXT: s_add_u32 s24, s32, 16
|
||
|
; GISEL-GFX11-NEXT: s_add_u32 s25, s32, 20
|
||
|
; GISEL-GFX11-NEXT: scratch_store_b32 off, v20, s24
|
||
|
; GISEL-GFX11-NEXT: scratch_store_b32 off, v21, s25
|
||
|
; GISEL-GFX11-NEXT: s_add_u32 s24, s32, 24
|
||
|
; GISEL-GFX11-NEXT: s_add_u32 s25, s32, 28
|
||
|
; GISEL-GFX11-NEXT: scratch_store_b32 off, v22, s24
|
||
|
; GISEL-GFX11-NEXT: scratch_store_b32 off, v23, s25
|
||
|
; GISEL-GFX11-NEXT: s_add_u32 s24, s32, 32
|
||
|
; GISEL-GFX11-NEXT: s_add_u32 s25, s32, 36
|
||
|
; GISEL-GFX11-NEXT: scratch_store_b32 off, v24, s24
|
||
|
; GISEL-GFX11-NEXT: scratch_store_b32 off, v25, s25
|
||
|
; GISEL-GFX11-NEXT: s_add_u32 s24, s32, 40
|
||
|
; GISEL-GFX11-NEXT: v_dual_mov_b32 v34, v10 :: v_dual_mov_b32 v35, v11
|
||
|
; GISEL-GFX11-NEXT: v_dual_mov_b32 v36, v12 :: v_dual_mov_b32 v37, v13
|
||
|
; GISEL-GFX11-NEXT: v_dual_mov_b32 v38, v14 :: v_dual_mov_b32 v39, v15
|
||
|
; GISEL-GFX11-NEXT: s_add_u32 s25, s32, 44
|
||
|
; GISEL-GFX11-NEXT: scratch_store_b32 off, v26, s24
|
||
|
; GISEL-GFX11-NEXT: scratch_store_b32 off, v27, s25
|
||
|
; GISEL-GFX11-NEXT: s_add_u32 s24, s32, 48
|
||
|
; GISEL-GFX11-NEXT: s_add_u32 s25, s32, 52
|
||
|
; GISEL-GFX11-NEXT: scratch_store_b32 off, v28, s24
|
||
|
; GISEL-GFX11-NEXT: scratch_store_b32 off, v29, s25
|
||
|
; GISEL-GFX11-NEXT: s_add_u32 s24, s32, 56
|
||
|
; GISEL-GFX11-NEXT: s_add_u32 s25, s32, 60
|
||
|
; GISEL-GFX11-NEXT: scratch_store_b32 off, v30, s24
|
||
|
; GISEL-GFX11-NEXT: scratch_store_b32 off, v31, s25
|
||
|
; GISEL-GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
|
||
|
; GISEL-GFX11-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3
|
||
|
; GISEL-GFX11-NEXT: v_dual_mov_b32 v4, s4 :: v_dual_mov_b32 v5, s5
|
||
|
; GISEL-GFX11-NEXT: v_dual_mov_b32 v6, s6 :: v_dual_mov_b32 v7, s7
|
||
|
; GISEL-GFX11-NEXT: v_dual_mov_b32 v8, s8 :: v_dual_mov_b32 v9, s9
|
||
|
; GISEL-GFX11-NEXT: v_dual_mov_b32 v10, s10 :: v_dual_mov_b32 v11, s11
|
||
|
; GISEL-GFX11-NEXT: v_dual_mov_b32 v12, s12 :: v_dual_mov_b32 v13, s13
|
||
|
; GISEL-GFX11-NEXT: v_dual_mov_b32 v14, s14 :: v_dual_mov_b32 v15, s15
|
||
|
; GISEL-GFX11-NEXT: v_dual_mov_b32 v16, s16 :: v_dual_mov_b32 v17, s17
|
||
|
; GISEL-GFX11-NEXT: v_dual_mov_b32 v18, s18 :: v_dual_mov_b32 v19, s19
|
||
|
; GISEL-GFX11-NEXT: v_dual_mov_b32 v20, s20 :: v_dual_mov_b32 v21, s21
|
||
|
; GISEL-GFX11-NEXT: v_dual_mov_b32 v22, s22 :: v_dual_mov_b32 v23, s23
|
||
|
; GISEL-GFX11-NEXT: v_dual_mov_b32 v24, v32 :: v_dual_mov_b32 v25, v33
|
||
|
; GISEL-GFX11-NEXT: v_dual_mov_b32 v26, v34 :: v_dual_mov_b32 v27, v35
|
||
|
; GISEL-GFX11-NEXT: v_dual_mov_b32 v28, v36 :: v_dual_mov_b32 v29, v37
|
||
|
; GISEL-GFX11-NEXT: v_dual_mov_b32 v30, v38 :: v_dual_mov_b32 v31, v39
|
||
|
; GISEL-GFX11-NEXT: s_mov_b32 s24, use@abs32@lo
|
||
|
; GISEL-GFX11-NEXT: s_mov_b32 s25, use@abs32@hi
|
||
|
; GISEL-GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
|
||
|
; GISEL-GFX11-NEXT: s_swappc_b64 s[30:31], s[24:25]
|
||
|
; GISEL-GFX11-NEXT: s_endpgm
|
||
|
;
|
||
|
; GISEL-GFX10-LABEL: amdgpu_cs_chain_spill:
|
||
|
; GISEL-GFX10: ; %bb.0:
|
||
|
; GISEL-GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||
|
; GISEL-GFX10-NEXT: v_mov_b32_e32 v32, v8
|
||
|
; GISEL-GFX10-NEXT: v_mov_b32_e32 v33, v9
|
||
|
; GISEL-GFX10-NEXT: v_mov_b32_e32 v34, v10
|
||
|
; GISEL-GFX10-NEXT: v_mov_b32_e32 v35, v11
|
||
|
; GISEL-GFX10-NEXT: v_mov_b32_e32 v36, v12
|
||
|
; GISEL-GFX10-NEXT: v_mov_b32_e32 v37, v13
|
||
|
; GISEL-GFX10-NEXT: v_mov_b32_e32 v38, v14
|
||
|
; GISEL-GFX10-NEXT: v_mov_b32_e32 v39, v15
|
||
|
; GISEL-GFX10-NEXT: s_mov_b32 s32, 0
|
||
|
; GISEL-GFX10-NEXT: buffer_store_dword v16, off, s[48:51], s32
|
||
|
; GISEL-GFX10-NEXT: buffer_store_dword v17, off, s[48:51], s32 offset:4
|
||
|
; GISEL-GFX10-NEXT: buffer_store_dword v18, off, s[48:51], s32 offset:8
|
||
|
; GISEL-GFX10-NEXT: buffer_store_dword v19, off, s[48:51], s32 offset:12
|
||
|
; GISEL-GFX10-NEXT: buffer_store_dword v20, off, s[48:51], s32 offset:16
|
||
|
; GISEL-GFX10-NEXT: buffer_store_dword v21, off, s[48:51], s32 offset:20
|
||
|
; GISEL-GFX10-NEXT: buffer_store_dword v22, off, s[48:51], s32 offset:24
|
||
|
; GISEL-GFX10-NEXT: buffer_store_dword v23, off, s[48:51], s32 offset:28
|
||
|
; GISEL-GFX10-NEXT: buffer_store_dword v24, off, s[48:51], s32 offset:32
|
||
|
; GISEL-GFX10-NEXT: buffer_store_dword v25, off, s[48:51], s32 offset:36
|
||
|
; GISEL-GFX10-NEXT: buffer_store_dword v26, off, s[48:51], s32 offset:40
|
||
|
; GISEL-GFX10-NEXT: buffer_store_dword v27, off, s[48:51], s32 offset:44
|
||
|
; GISEL-GFX10-NEXT: buffer_store_dword v28, off, s[48:51], s32 offset:48
|
||
|
; GISEL-GFX10-NEXT: buffer_store_dword v29, off, s[48:51], s32 offset:52
|
||
|
; GISEL-GFX10-NEXT: buffer_store_dword v30, off, s[48:51], s32 offset:56
|
||
|
; GISEL-GFX10-NEXT: buffer_store_dword v31, off, s[48:51], s32 offset:60
|
||
|
; GISEL-GFX10-NEXT: v_mov_b32_e32 v0, s0
|
||
|
; GISEL-GFX10-NEXT: v_mov_b32_e32 v1, s1
|
||
|
; GISEL-GFX10-NEXT: v_mov_b32_e32 v2, s2
|
||
|
; GISEL-GFX10-NEXT: v_mov_b32_e32 v3, s3
|
||
|
; GISEL-GFX10-NEXT: v_mov_b32_e32 v4, s4
|
||
|
; GISEL-GFX10-NEXT: v_mov_b32_e32 v5, s5
|
||
|
; GISEL-GFX10-NEXT: v_mov_b32_e32 v6, s6
|
||
|
; GISEL-GFX10-NEXT: v_mov_b32_e32 v7, s7
|
||
|
; GISEL-GFX10-NEXT: v_mov_b32_e32 v8, s8
|
||
|
; GISEL-GFX10-NEXT: v_mov_b32_e32 v9, s9
|
||
|
; GISEL-GFX10-NEXT: v_mov_b32_e32 v10, s10
|
||
|
; GISEL-GFX10-NEXT: v_mov_b32_e32 v11, s11
|
||
|
; GISEL-GFX10-NEXT: v_mov_b32_e32 v12, s12
|
||
|
; GISEL-GFX10-NEXT: v_mov_b32_e32 v13, s13
|
||
|
; GISEL-GFX10-NEXT: v_mov_b32_e32 v14, s14
|
||
|
; GISEL-GFX10-NEXT: v_mov_b32_e32 v15, s15
|
||
|
; GISEL-GFX10-NEXT: v_mov_b32_e32 v16, s16
|
||
|
; GISEL-GFX10-NEXT: v_mov_b32_e32 v17, s17
|
||
|
; GISEL-GFX10-NEXT: v_mov_b32_e32 v18, s18
|
||
|
; GISEL-GFX10-NEXT: v_mov_b32_e32 v19, s19
|
||
|
; GISEL-GFX10-NEXT: v_mov_b32_e32 v20, s20
|
||
|
; GISEL-GFX10-NEXT: v_mov_b32_e32 v21, s21
|
||
|
; GISEL-GFX10-NEXT: v_mov_b32_e32 v22, s22
|
||
|
; GISEL-GFX10-NEXT: v_mov_b32_e32 v23, s23
|
||
|
; GISEL-GFX10-NEXT: v_mov_b32_e32 v24, v32
|
||
|
; GISEL-GFX10-NEXT: v_mov_b32_e32 v25, v33
|
||
|
; GISEL-GFX10-NEXT: v_mov_b32_e32 v26, v34
|
||
|
; GISEL-GFX10-NEXT: v_mov_b32_e32 v27, v35
|
||
|
; GISEL-GFX10-NEXT: v_mov_b32_e32 v28, v36
|
||
|
; GISEL-GFX10-NEXT: v_mov_b32_e32 v29, v37
|
||
|
; GISEL-GFX10-NEXT: v_mov_b32_e32 v30, v38
|
||
|
; GISEL-GFX10-NEXT: v_mov_b32_e32 v31, v39
|
||
|
; GISEL-GFX10-NEXT: s_mov_b64 s[0:1], s[48:49]
|
||
|
; GISEL-GFX10-NEXT: s_mov_b32 s24, use@abs32@lo
|
||
|
; GISEL-GFX10-NEXT: s_mov_b32 s25, use@abs32@hi
|
||
|
; GISEL-GFX10-NEXT: s_mov_b64 s[2:3], s[50:51]
|
||
|
; GISEL-GFX10-NEXT: s_swappc_b64 s[30:31], s[24:25]
|
||
|
; GISEL-GFX10-NEXT: s_endpgm
|
||
|
;
|
||
|
; DAGISEL-GFX11-LABEL: amdgpu_cs_chain_spill:
|
||
|
; DAGISEL-GFX11: ; %bb.0:
|
||
|
; DAGISEL-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||
|
; DAGISEL-GFX11-NEXT: s_mov_b32 s32, 0
|
||
|
; DAGISEL-GFX11-NEXT: v_dual_mov_b32 v32, v15 :: v_dual_mov_b32 v33, v14
|
||
|
; DAGISEL-GFX11-NEXT: s_add_i32 s24, s32, 60
|
||
|
; DAGISEL-GFX11-NEXT: scratch_store_b32 off, v16, s32
|
||
|
; DAGISEL-GFX11-NEXT: scratch_store_b32 off, v31, s24
|
||
|
; DAGISEL-GFX11-NEXT: s_add_i32 s24, s32, 56
|
||
|
; DAGISEL-GFX11-NEXT: s_add_i32 s25, s32, 52
|
||
|
; DAGISEL-GFX11-NEXT: scratch_store_b32 off, v30, s24
|
||
|
; DAGISEL-GFX11-NEXT: scratch_store_b32 off, v29, s25
|
||
|
; DAGISEL-GFX11-NEXT: s_add_i32 s24, s32, 48
|
||
|
; DAGISEL-GFX11-NEXT: s_add_i32 s25, s32, 44
|
||
|
; DAGISEL-GFX11-NEXT: scratch_store_b32 off, v28, s24
|
||
|
; DAGISEL-GFX11-NEXT: scratch_store_b32 off, v27, s25
|
||
|
; DAGISEL-GFX11-NEXT: s_add_i32 s24, s32, 40
|
||
|
; DAGISEL-GFX11-NEXT: s_add_i32 s25, s32, 36
|
||
|
; DAGISEL-GFX11-NEXT: scratch_store_b32 off, v26, s24
|
||
|
; DAGISEL-GFX11-NEXT: scratch_store_b32 off, v25, s25
|
||
|
; DAGISEL-GFX11-NEXT: s_add_i32 s24, s32, 32
|
||
|
; DAGISEL-GFX11-NEXT: s_add_i32 s25, s32, 28
|
||
|
; DAGISEL-GFX11-NEXT: scratch_store_b32 off, v24, s24
|
||
|
; DAGISEL-GFX11-NEXT: scratch_store_b32 off, v23, s25
|
||
|
; DAGISEL-GFX11-NEXT: s_add_i32 s24, s32, 24
|
||
|
; DAGISEL-GFX11-NEXT: v_dual_mov_b32 v34, v13 :: v_dual_mov_b32 v35, v12
|
||
|
; DAGISEL-GFX11-NEXT: v_dual_mov_b32 v36, v11 :: v_dual_mov_b32 v37, v10
|
||
|
; DAGISEL-GFX11-NEXT: v_dual_mov_b32 v38, v9 :: v_dual_mov_b32 v39, v8
|
||
|
; DAGISEL-GFX11-NEXT: s_add_i32 s25, s32, 20
|
||
|
; DAGISEL-GFX11-NEXT: scratch_store_b32 off, v22, s24
|
||
|
; DAGISEL-GFX11-NEXT: scratch_store_b32 off, v21, s25
|
||
|
; DAGISEL-GFX11-NEXT: s_add_i32 s24, s32, 16
|
||
|
; DAGISEL-GFX11-NEXT: s_add_i32 s25, s32, 12
|
||
|
; DAGISEL-GFX11-NEXT: scratch_store_b32 off, v20, s24
|
||
|
; DAGISEL-GFX11-NEXT: scratch_store_b32 off, v19, s25
|
||
|
; DAGISEL-GFX11-NEXT: s_add_i32 s24, s32, 8
|
||
|
; DAGISEL-GFX11-NEXT: s_add_i32 s25, s32, 4
|
||
|
; DAGISEL-GFX11-NEXT: scratch_store_b32 off, v18, s24
|
||
|
; DAGISEL-GFX11-NEXT: scratch_store_b32 off, v17, s25
|
||
|
; DAGISEL-GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
|
||
|
; DAGISEL-GFX11-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3
|
||
|
; DAGISEL-GFX11-NEXT: v_dual_mov_b32 v4, s4 :: v_dual_mov_b32 v5, s5
|
||
|
; DAGISEL-GFX11-NEXT: v_dual_mov_b32 v6, s6 :: v_dual_mov_b32 v7, s7
|
||
|
; DAGISEL-GFX11-NEXT: v_dual_mov_b32 v8, s8 :: v_dual_mov_b32 v9, s9
|
||
|
; DAGISEL-GFX11-NEXT: v_dual_mov_b32 v10, s10 :: v_dual_mov_b32 v11, s11
|
||
|
; DAGISEL-GFX11-NEXT: v_dual_mov_b32 v12, s12 :: v_dual_mov_b32 v13, s13
|
||
|
; DAGISEL-GFX11-NEXT: v_dual_mov_b32 v14, s14 :: v_dual_mov_b32 v15, s15
|
||
|
; DAGISEL-GFX11-NEXT: v_dual_mov_b32 v16, s16 :: v_dual_mov_b32 v17, s17
|
||
|
; DAGISEL-GFX11-NEXT: v_dual_mov_b32 v18, s18 :: v_dual_mov_b32 v19, s19
|
||
|
; DAGISEL-GFX11-NEXT: v_dual_mov_b32 v20, s20 :: v_dual_mov_b32 v21, s21
|
||
|
; DAGISEL-GFX11-NEXT: v_dual_mov_b32 v22, s22 :: v_dual_mov_b32 v23, s23
|
||
|
; DAGISEL-GFX11-NEXT: v_dual_mov_b32 v24, v39 :: v_dual_mov_b32 v25, v38
|
||
|
; DAGISEL-GFX11-NEXT: v_dual_mov_b32 v26, v37 :: v_dual_mov_b32 v27, v36
|
||
|
; DAGISEL-GFX11-NEXT: v_dual_mov_b32 v28, v35 :: v_dual_mov_b32 v29, v34
|
||
|
; DAGISEL-GFX11-NEXT: v_dual_mov_b32 v30, v33 :: v_dual_mov_b32 v31, v32
|
||
|
; DAGISEL-GFX11-NEXT: s_mov_b32 s25, use@abs32@hi
|
||
|
; DAGISEL-GFX11-NEXT: s_mov_b32 s24, use@abs32@lo
|
||
|
; DAGISEL-GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
|
||
|
; DAGISEL-GFX11-NEXT: s_swappc_b64 s[30:31], s[24:25]
|
||
|
; DAGISEL-GFX11-NEXT: s_endpgm
|
||
|
;
|
||
|
; DAGISEL-GFX10-LABEL: amdgpu_cs_chain_spill:
|
||
|
; DAGISEL-GFX10: ; %bb.0:
|
||
|
; DAGISEL-GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||
|
; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v32, v15
|
||
|
; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v33, v14
|
||
|
; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v34, v13
|
||
|
; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v35, v12
|
||
|
; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v36, v11
|
||
|
; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v37, v10
|
||
|
; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v38, v9
|
||
|
; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v39, v8
|
||
|
; DAGISEL-GFX10-NEXT: s_mov_b32 s32, 0
|
||
|
; DAGISEL-GFX10-NEXT: buffer_store_dword v16, off, s[48:51], s32
|
||
|
; DAGISEL-GFX10-NEXT: buffer_store_dword v17, off, s[48:51], s32 offset:4
|
||
|
; DAGISEL-GFX10-NEXT: buffer_store_dword v18, off, s[48:51], s32 offset:8
|
||
|
; DAGISEL-GFX10-NEXT: buffer_store_dword v19, off, s[48:51], s32 offset:12
|
||
|
; DAGISEL-GFX10-NEXT: buffer_store_dword v20, off, s[48:51], s32 offset:16
|
||
|
; DAGISEL-GFX10-NEXT: buffer_store_dword v21, off, s[48:51], s32 offset:20
|
||
|
; DAGISEL-GFX10-NEXT: buffer_store_dword v22, off, s[48:51], s32 offset:24
|
||
|
; DAGISEL-GFX10-NEXT: buffer_store_dword v23, off, s[48:51], s32 offset:28
|
||
|
; DAGISEL-GFX10-NEXT: buffer_store_dword v24, off, s[48:51], s32 offset:32
|
||
|
; DAGISEL-GFX10-NEXT: buffer_store_dword v25, off, s[48:51], s32 offset:36
|
||
|
; DAGISEL-GFX10-NEXT: buffer_store_dword v26, off, s[48:51], s32 offset:40
|
||
|
; DAGISEL-GFX10-NEXT: buffer_store_dword v27, off, s[48:51], s32 offset:44
|
||
|
; DAGISEL-GFX10-NEXT: buffer_store_dword v28, off, s[48:51], s32 offset:48
|
||
|
; DAGISEL-GFX10-NEXT: buffer_store_dword v29, off, s[48:51], s32 offset:52
|
||
|
; DAGISEL-GFX10-NEXT: buffer_store_dword v30, off, s[48:51], s32 offset:56
|
||
|
; DAGISEL-GFX10-NEXT: buffer_store_dword v31, off, s[48:51], s32 offset:60
|
||
|
; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v0, s0
|
||
|
; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v1, s1
|
||
|
; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v2, s2
|
||
|
; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v3, s3
|
||
|
; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v4, s4
|
||
|
; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v5, s5
|
||
|
; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v6, s6
|
||
|
; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v7, s7
|
||
|
; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v8, s8
|
||
|
; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v9, s9
|
||
|
; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v10, s10
|
||
|
; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v11, s11
|
||
|
; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v12, s12
|
||
|
; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v13, s13
|
||
|
; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v14, s14
|
||
|
; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v15, s15
|
||
|
; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v16, s16
|
||
|
; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v17, s17
|
||
|
; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v18, s18
|
||
|
; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v19, s19
|
||
|
; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v20, s20
|
||
|
; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v21, s21
|
||
|
; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v22, s22
|
||
|
; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v23, s23
|
||
|
; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v24, v39
|
||
|
; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v25, v38
|
||
|
; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v26, v37
|
||
|
; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v27, v36
|
||
|
; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v28, v35
|
||
|
; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v29, v34
|
||
|
; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v30, v33
|
||
|
; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v31, v32
|
||
|
; DAGISEL-GFX10-NEXT: s_mov_b64 s[0:1], s[48:49]
|
||
|
; DAGISEL-GFX10-NEXT: s_mov_b32 s25, use@abs32@hi
|
||
|
; DAGISEL-GFX10-NEXT: s_mov_b32 s24, use@abs32@lo
|
||
|
; DAGISEL-GFX10-NEXT: s_mov_b64 s[2:3], s[50:51]
|
||
|
; DAGISEL-GFX10-NEXT: s_swappc_b64 s[30:31], s[24:25]
|
||
|
; DAGISEL-GFX10-NEXT: s_endpgm
|
||
|
call amdgpu_gfx void @use(<24 x i32> %sgprs, <24 x i32> %vgprs)
|
||
|
ret void
|
||
|
}
|
||
|
|
||
|
define amdgpu_cs_chain void @alloca_and_call() {
|
||
|
; GISEL-GFX11-LABEL: alloca_and_call:
|
||
|
; GISEL-GFX11: ; %bb.0: ; %.entry
|
||
|
; GISEL-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||
|
; GISEL-GFX11-NEXT: v_mov_b32_e32 v0, 42
|
||
|
; GISEL-GFX11-NEXT: s_mov_b32 s0, use@abs32@lo
|
||
|
; GISEL-GFX11-NEXT: s_mov_b32 s1, use@abs32@hi
|
||
|
; GISEL-GFX11-NEXT: s_mov_b32 s32, 16
|
||
|
; GISEL-GFX11-NEXT: scratch_store_b32 off, v0, off offset:4
|
||
|
; GISEL-GFX11-NEXT: v_mov_b32_e32 v0, 4
|
||
|
; GISEL-GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
|
||
|
; GISEL-GFX11-NEXT: s_endpgm
|
||
|
;
|
||
|
; GISEL-GFX10-LABEL: alloca_and_call:
|
||
|
; GISEL-GFX10: ; %bb.0: ; %.entry
|
||
|
; GISEL-GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||
|
; GISEL-GFX10-NEXT: v_mov_b32_e32 v0, 42
|
||
|
; GISEL-GFX10-NEXT: s_mov_b64 s[0:1], s[48:49]
|
||
|
; GISEL-GFX10-NEXT: s_mov_b32 s4, use@abs32@lo
|
||
|
; GISEL-GFX10-NEXT: s_mov_b32 s5, use@abs32@hi
|
||
|
; GISEL-GFX10-NEXT: s_mov_b64 s[2:3], s[50:51]
|
||
|
; GISEL-GFX10-NEXT: buffer_store_dword v0, off, s[48:51], 0 offset:4
|
||
|
; GISEL-GFX10-NEXT: v_mov_b32_e32 v0, 4
|
||
|
; GISEL-GFX10-NEXT: s_movk_i32 s32, 0x200
|
||
|
; GISEL-GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5]
|
||
|
; GISEL-GFX10-NEXT: s_endpgm
|
||
|
;
|
||
|
; DAGISEL-GFX11-LABEL: alloca_and_call:
|
||
|
; DAGISEL-GFX11: ; %bb.0: ; %.entry
|
||
|
; DAGISEL-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||
|
; DAGISEL-GFX11-NEXT: v_mov_b32_e32 v0, 42
|
||
|
; DAGISEL-GFX11-NEXT: s_mov_b32 s1, use@abs32@hi
|
||
|
; DAGISEL-GFX11-NEXT: s_mov_b32 s0, use@abs32@lo
|
||
|
; DAGISEL-GFX11-NEXT: s_mov_b32 s32, 16
|
||
|
; DAGISEL-GFX11-NEXT: scratch_store_b32 off, v0, off offset:4
|
||
|
; DAGISEL-GFX11-NEXT: v_mov_b32_e32 v0, 4
|
||
|
; DAGISEL-GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
|
||
|
; DAGISEL-GFX11-NEXT: s_endpgm
|
||
|
;
|
||
|
; DAGISEL-GFX10-LABEL: alloca_and_call:
|
||
|
; DAGISEL-GFX10: ; %bb.0: ; %.entry
|
||
|
; DAGISEL-GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||
|
; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v0, 42
|
||
|
; DAGISEL-GFX10-NEXT: s_mov_b64 s[0:1], s[48:49]
|
||
|
; DAGISEL-GFX10-NEXT: s_mov_b32 s5, use@abs32@hi
|
||
|
; DAGISEL-GFX10-NEXT: s_mov_b32 s4, use@abs32@lo
|
||
|
; DAGISEL-GFX10-NEXT: s_mov_b64 s[2:3], s[50:51]
|
||
|
; DAGISEL-GFX10-NEXT: buffer_store_dword v0, off, s[48:51], 0 offset:4
|
||
|
; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v0, 4
|
||
|
; DAGISEL-GFX10-NEXT: s_movk_i32 s32, 0x200
|
||
|
; DAGISEL-GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5]
|
||
|
; DAGISEL-GFX10-NEXT: s_endpgm
|
||
|
.entry:
|
||
|
%v = alloca [3 x i32], addrspace(5)
|
||
|
store i32 42, ptr addrspace(5) %v
|
||
|
call amdgpu_gfx void @use(ptr addrspace(5) %v)
|
||
|
ret void
|
||
|
}
|
||
|
|
||
|
define amdgpu_cs void @cs_to_chain(<3 x i32> inreg %a, <3 x i32> %b) {
|
||
|
; GISEL-GFX11-LABEL: cs_to_chain:
|
||
|
; GISEL-GFX11: ; %bb.0:
|
||
|
; GISEL-GFX11-NEXT: v_dual_mov_b32 v3, v0 :: v_dual_mov_b32 v10, v2
|
||
|
; GISEL-GFX11-NEXT: s_mov_b32 s3, s0
|
||
|
; GISEL-GFX11-NEXT: ;;#ASMSTART
|
||
|
; GISEL-GFX11-NEXT: s_nop
|
||
|
; GISEL-GFX11-NEXT: ;;#ASMEND
|
||
|
; GISEL-GFX11-NEXT: s_mov_b32 s4, chain_callee@abs32@lo
|
||
|
; GISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
||
|
; GISEL-GFX11-NEXT: v_dual_mov_b32 v8, v3 :: v_dual_mov_b32 v9, v1
|
||
|
; GISEL-GFX11-NEXT: s_mov_b32 s5, chain_callee@abs32@hi
|
||
|
; GISEL-GFX11-NEXT: s_mov_b32 s0, s3
|
||
|
; GISEL-GFX11-NEXT: s_mov_b32 exec_lo, -1
|
||
|
; GISEL-GFX11-NEXT: s_setpc_b64 s[4:5]
|
||
|
;
|
||
|
; GISEL-GFX10-LABEL: cs_to_chain:
|
||
|
; GISEL-GFX10: ; %bb.0:
|
||
|
; GISEL-GFX10-NEXT: s_getpc_b64 s[100:101]
|
||
|
; GISEL-GFX10-NEXT: s_mov_b32 s100, s0
|
||
|
; GISEL-GFX10-NEXT: v_mov_b32_e32 v3, v0
|
||
|
; GISEL-GFX10-NEXT: s_load_dwordx4 s[100:103], s[100:101], 0x10
|
||
|
; GISEL-GFX10-NEXT: v_mov_b32_e32 v9, v1
|
||
|
; GISEL-GFX10-NEXT: v_mov_b32_e32 v10, v2
|
||
|
; GISEL-GFX10-NEXT: s_mov_b32 s4, chain_callee@abs32@lo
|
||
|
; GISEL-GFX10-NEXT: s_mov_b32 s5, chain_callee@abs32@hi
|
||
|
; GISEL-GFX10-NEXT: s_waitcnt lgkmcnt(0)
|
||
|
; GISEL-GFX10-NEXT: s_bitset0_b32 s103, 21
|
||
|
; GISEL-GFX10-NEXT: s_add_u32 s100, s100, s3
|
||
|
; GISEL-GFX10-NEXT: s_mov_b32 s3, s0
|
||
|
; GISEL-GFX10-NEXT: ;;#ASMSTART
|
||
|
; GISEL-GFX10-NEXT: s_nop
|
||
|
; GISEL-GFX10-NEXT: ;;#ASMEND
|
||
|
; GISEL-GFX10-NEXT: s_addc_u32 s101, s101, 0
|
||
|
; GISEL-GFX10-NEXT: v_mov_b32_e32 v8, v3
|
||
|
; GISEL-GFX10-NEXT: s_mov_b64 s[48:49], s[100:101]
|
||
|
; GISEL-GFX10-NEXT: s_mov_b32 s0, s3
|
||
|
; GISEL-GFX10-NEXT: s_mov_b64 s[50:51], s[102:103]
|
||
|
; GISEL-GFX10-NEXT: s_mov_b32 exec_lo, -1
|
||
|
; GISEL-GFX10-NEXT: s_setpc_b64 s[4:5]
|
||
|
;
|
||
|
; DAGISEL-GFX11-LABEL: cs_to_chain:
|
||
|
; DAGISEL-GFX11: ; %bb.0:
|
||
|
; DAGISEL-GFX11-NEXT: v_dual_mov_b32 v3, v0 :: v_dual_mov_b32 v10, v2
|
||
|
; DAGISEL-GFX11-NEXT: s_mov_b32 s3, s0
|
||
|
; DAGISEL-GFX11-NEXT: ;;#ASMSTART
|
||
|
; DAGISEL-GFX11-NEXT: s_nop
|
||
|
; DAGISEL-GFX11-NEXT: ;;#ASMEND
|
||
|
; DAGISEL-GFX11-NEXT: s_mov_b32 s5, chain_callee@abs32@hi
|
||
|
; DAGISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
||
|
; DAGISEL-GFX11-NEXT: v_dual_mov_b32 v8, v3 :: v_dual_mov_b32 v9, v1
|
||
|
; DAGISEL-GFX11-NEXT: s_mov_b32 s4, chain_callee@abs32@lo
|
||
|
; DAGISEL-GFX11-NEXT: s_mov_b32 s0, s3
|
||
|
; DAGISEL-GFX11-NEXT: s_mov_b32 exec_lo, -1
|
||
|
; DAGISEL-GFX11-NEXT: s_setpc_b64 s[4:5]
|
||
|
;
|
||
|
; DAGISEL-GFX10-LABEL: cs_to_chain:
|
||
|
; DAGISEL-GFX10: ; %bb.0:
|
||
|
; DAGISEL-GFX10-NEXT: s_getpc_b64 s[100:101]
|
||
|
; DAGISEL-GFX10-NEXT: s_mov_b32 s100, s0
|
||
|
; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v3, v0
|
||
|
; DAGISEL-GFX10-NEXT: s_load_dwordx4 s[100:103], s[100:101], 0x10
|
||
|
; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v9, v1
|
||
|
; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v10, v2
|
||
|
; DAGISEL-GFX10-NEXT: s_mov_b32 s5, chain_callee@abs32@hi
|
||
|
; DAGISEL-GFX10-NEXT: s_mov_b32 s4, chain_callee@abs32@lo
|
||
|
; DAGISEL-GFX10-NEXT: s_waitcnt lgkmcnt(0)
|
||
|
; DAGISEL-GFX10-NEXT: s_bitset0_b32 s103, 21
|
||
|
; DAGISEL-GFX10-NEXT: s_add_u32 s100, s100, s3
|
||
|
; DAGISEL-GFX10-NEXT: s_mov_b32 s3, s0
|
||
|
; DAGISEL-GFX10-NEXT: ;;#ASMSTART
|
||
|
; DAGISEL-GFX10-NEXT: s_nop
|
||
|
; DAGISEL-GFX10-NEXT: ;;#ASMEND
|
||
|
; DAGISEL-GFX10-NEXT: s_addc_u32 s101, s101, 0
|
||
|
; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v8, v3
|
||
|
; DAGISEL-GFX10-NEXT: s_mov_b64 s[48:49], s[100:101]
|
||
|
; DAGISEL-GFX10-NEXT: s_mov_b64 s[50:51], s[102:103]
|
||
|
; DAGISEL-GFX10-NEXT: s_mov_b32 s0, s3
|
||
|
; DAGISEL-GFX10-NEXT: s_mov_b32 exec_lo, -1
|
||
|
; DAGISEL-GFX10-NEXT: s_setpc_b64 s[4:5]
|
||
|
call void asm "s_nop", "~{v0},~{v8},~{v16},~{s0}"()
|
||
|
call void(ptr, i32, <3 x i32>, <3 x i32>, i32, ...) @llvm.amdgcn.cs.chain.v3i32(ptr @chain_callee, i32 -1, <3 x i32> inreg %a, <3 x i32> %b, i32 0)
|
||
|
unreachable
|
||
|
}
|
||
|
|
||
|
define amdgpu_cs_chain void @chain_to_chain(<3 x i32> inreg %a, <3 x i32> %b) {
|
||
|
; GISEL-GFX11-LABEL: chain_to_chain:
|
||
|
; GISEL-GFX11: ; %bb.0:
|
||
|
; GISEL-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||
|
; GISEL-GFX11-NEXT: v_mov_b32_e32 v1, v8
|
||
|
; GISEL-GFX11-NEXT: s_mov_b32 s3, s0
|
||
|
; GISEL-GFX11-NEXT: ;;#ASMSTART
|
||
|
; GISEL-GFX11-NEXT: s_nop
|
||
|
; GISEL-GFX11-NEXT: ;;#ASMEND
|
||
|
; GISEL-GFX11-NEXT: s_mov_b32 s4, chain_callee@abs32@lo
|
||
|
; GISEL-GFX11-NEXT: s_mov_b32 s5, chain_callee@abs32@hi
|
||
|
; GISEL-GFX11-NEXT: v_mov_b32_e32 v8, v1
|
||
|
; GISEL-GFX11-NEXT: s_mov_b32 s0, s3
|
||
|
; GISEL-GFX11-NEXT: s_mov_b32 exec_lo, -1
|
||
|
; GISEL-GFX11-NEXT: s_setpc_b64 s[4:5]
|
||
|
;
|
||
|
; GISEL-GFX10-LABEL: chain_to_chain:
|
||
|
; GISEL-GFX10: ; %bb.0:
|
||
|
; GISEL-GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||
|
; GISEL-GFX10-NEXT: v_mov_b32_e32 v1, v8
|
||
|
; GISEL-GFX10-NEXT: s_mov_b32 s3, s0
|
||
|
; GISEL-GFX10-NEXT: ;;#ASMSTART
|
||
|
; GISEL-GFX10-NEXT: s_nop
|
||
|
; GISEL-GFX10-NEXT: ;;#ASMEND
|
||
|
; GISEL-GFX10-NEXT: s_mov_b32 s4, chain_callee@abs32@lo
|
||
|
; GISEL-GFX10-NEXT: s_mov_b32 s5, chain_callee@abs32@hi
|
||
|
; GISEL-GFX10-NEXT: v_mov_b32_e32 v8, v1
|
||
|
; GISEL-GFX10-NEXT: s_mov_b32 s0, s3
|
||
|
; GISEL-GFX10-NEXT: s_mov_b32 exec_lo, -1
|
||
|
; GISEL-GFX10-NEXT: s_setpc_b64 s[4:5]
|
||
|
;
|
||
|
; DAGISEL-GFX11-LABEL: chain_to_chain:
|
||
|
; DAGISEL-GFX11: ; %bb.0:
|
||
|
; DAGISEL-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||
|
; DAGISEL-GFX11-NEXT: v_mov_b32_e32 v1, v8
|
||
|
; DAGISEL-GFX11-NEXT: s_mov_b32 s3, s0
|
||
|
; DAGISEL-GFX11-NEXT: ;;#ASMSTART
|
||
|
; DAGISEL-GFX11-NEXT: s_nop
|
||
|
; DAGISEL-GFX11-NEXT: ;;#ASMEND
|
||
|
; DAGISEL-GFX11-NEXT: s_mov_b32 s5, chain_callee@abs32@hi
|
||
|
; DAGISEL-GFX11-NEXT: s_mov_b32 s4, chain_callee@abs32@lo
|
||
|
; DAGISEL-GFX11-NEXT: v_mov_b32_e32 v8, v1
|
||
|
; DAGISEL-GFX11-NEXT: s_mov_b32 s0, s3
|
||
|
; DAGISEL-GFX11-NEXT: s_mov_b32 exec_lo, -1
|
||
|
; DAGISEL-GFX11-NEXT: s_setpc_b64 s[4:5]
|
||
|
;
|
||
|
; DAGISEL-GFX10-LABEL: chain_to_chain:
|
||
|
; DAGISEL-GFX10: ; %bb.0:
|
||
|
; DAGISEL-GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||
|
; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v1, v8
|
||
|
; DAGISEL-GFX10-NEXT: s_mov_b32 s3, s0
|
||
|
; DAGISEL-GFX10-NEXT: ;;#ASMSTART
|
||
|
; DAGISEL-GFX10-NEXT: s_nop
|
||
|
; DAGISEL-GFX10-NEXT: ;;#ASMEND
|
||
|
; DAGISEL-GFX10-NEXT: s_mov_b32 s5, chain_callee@abs32@hi
|
||
|
; DAGISEL-GFX10-NEXT: s_mov_b32 s4, chain_callee@abs32@lo
|
||
|
; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v8, v1
|
||
|
; DAGISEL-GFX10-NEXT: s_mov_b32 s0, s3
|
||
|
; DAGISEL-GFX10-NEXT: s_mov_b32 exec_lo, -1
|
||
|
; DAGISEL-GFX10-NEXT: s_setpc_b64 s[4:5]
|
||
|
call void asm "s_nop", "~{v0},~{v8},~{v16},~{s0}"()
|
||
|
call void(ptr, i32, <3 x i32>, <3 x i32>, i32, ...) @llvm.amdgcn.cs.chain.v3i32(ptr @chain_callee, i32 -1, <3 x i32> inreg %a, <3 x i32> %b, i32 0)
|
||
|
unreachable
|
||
|
}
|
||
|
|
||
|
define amdgpu_cs_chain void @chain_to_chain_wwm(<3 x i32> inreg %a, <3 x i32> %b) {
|
||
|
; GISEL-GFX11-LABEL: chain_to_chain_wwm:
|
||
|
; GISEL-GFX11: ; %bb.0:
|
||
|
; GISEL-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||
|
; GISEL-GFX11-NEXT: s_mov_b32 s3, s0
|
||
|
; GISEL-GFX11-NEXT: v_mov_b32_e32 v1, 3
|
||
|
; GISEL-GFX11-NEXT: s_not_b32 exec_lo, exec_lo
|
||
|
; GISEL-GFX11-NEXT: v_mov_b32_e32 v1, 4
|
||
|
; GISEL-GFX11-NEXT: s_not_b32 exec_lo, exec_lo
|
||
|
; GISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
||
|
; GISEL-GFX11-NEXT: v_mov_b32_e32 v2, v1
|
||
|
; GISEL-GFX11-NEXT: ;;#ASMSTART
|
||
|
; GISEL-GFX11-NEXT: s_nop
|
||
|
; GISEL-GFX11-NEXT: ;;#ASMEND
|
||
|
; GISEL-GFX11-NEXT: s_mov_b32 s4, chain_callee@abs32@lo
|
||
|
; GISEL-GFX11-NEXT: s_mov_b32 s5, chain_callee@abs32@hi
|
||
|
; GISEL-GFX11-NEXT: s_mov_b32 s0, s3
|
||
|
; GISEL-GFX11-NEXT: v_mov_b32_e32 v8, v2
|
||
|
; GISEL-GFX11-NEXT: s_mov_b32 exec_lo, -1
|
||
|
; GISEL-GFX11-NEXT: s_setpc_b64 s[4:5]
|
||
|
;
|
||
|
; GISEL-GFX10-LABEL: chain_to_chain_wwm:
|
||
|
; GISEL-GFX10: ; %bb.0:
|
||
|
; GISEL-GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||
|
; GISEL-GFX10-NEXT: s_mov_b32 s3, s0
|
||
|
; GISEL-GFX10-NEXT: v_mov_b32_e32 v1, 3
|
||
|
; GISEL-GFX10-NEXT: s_not_b32 exec_lo, exec_lo
|
||
|
; GISEL-GFX10-NEXT: v_mov_b32_e32 v1, 4
|
||
|
; GISEL-GFX10-NEXT: s_not_b32 exec_lo, exec_lo
|
||
|
; GISEL-GFX10-NEXT: v_mov_b32_e32 v2, v1
|
||
|
; GISEL-GFX10-NEXT: ;;#ASMSTART
|
||
|
; GISEL-GFX10-NEXT: s_nop
|
||
|
; GISEL-GFX10-NEXT: ;;#ASMEND
|
||
|
; GISEL-GFX10-NEXT: s_mov_b32 s4, chain_callee@abs32@lo
|
||
|
; GISEL-GFX10-NEXT: s_mov_b32 s5, chain_callee@abs32@hi
|
||
|
; GISEL-GFX10-NEXT: s_mov_b32 s0, s3
|
||
|
; GISEL-GFX10-NEXT: v_mov_b32_e32 v8, v2
|
||
|
; GISEL-GFX10-NEXT: s_mov_b32 exec_lo, -1
|
||
|
; GISEL-GFX10-NEXT: s_setpc_b64 s[4:5]
|
||
|
;
|
||
|
; DAGISEL-GFX11-LABEL: chain_to_chain_wwm:
|
||
|
; DAGISEL-GFX11: ; %bb.0:
|
||
|
; DAGISEL-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||
|
; DAGISEL-GFX11-NEXT: s_mov_b32 s3, s0
|
||
|
; DAGISEL-GFX11-NEXT: v_mov_b32_e32 v1, 3
|
||
|
; DAGISEL-GFX11-NEXT: s_not_b32 exec_lo, exec_lo
|
||
|
; DAGISEL-GFX11-NEXT: v_mov_b32_e32 v1, 4
|
||
|
; DAGISEL-GFX11-NEXT: s_not_b32 exec_lo, exec_lo
|
||
|
; DAGISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
||
|
; DAGISEL-GFX11-NEXT: v_mov_b32_e32 v2, v1
|
||
|
; DAGISEL-GFX11-NEXT: ;;#ASMSTART
|
||
|
; DAGISEL-GFX11-NEXT: s_nop
|
||
|
; DAGISEL-GFX11-NEXT: ;;#ASMEND
|
||
|
; DAGISEL-GFX11-NEXT: s_mov_b32 s5, chain_callee@abs32@hi
|
||
|
; DAGISEL-GFX11-NEXT: s_mov_b32 s4, chain_callee@abs32@lo
|
||
|
; DAGISEL-GFX11-NEXT: s_mov_b32 s0, s3
|
||
|
; DAGISEL-GFX11-NEXT: v_mov_b32_e32 v8, v2
|
||
|
; DAGISEL-GFX11-NEXT: s_mov_b32 exec_lo, -1
|
||
|
; DAGISEL-GFX11-NEXT: s_setpc_b64 s[4:5]
|
||
|
;
|
||
|
; DAGISEL-GFX10-LABEL: chain_to_chain_wwm:
|
||
|
; DAGISEL-GFX10: ; %bb.0:
|
||
|
; DAGISEL-GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||
|
; DAGISEL-GFX10-NEXT: s_mov_b32 s3, s0
|
||
|
; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v1, 3
|
||
|
; DAGISEL-GFX10-NEXT: s_not_b32 exec_lo, exec_lo
|
||
|
; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v1, 4
|
||
|
; DAGISEL-GFX10-NEXT: s_not_b32 exec_lo, exec_lo
|
||
|
; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v2, v1
|
||
|
; DAGISEL-GFX10-NEXT: ;;#ASMSTART
|
||
|
; DAGISEL-GFX10-NEXT: s_nop
|
||
|
; DAGISEL-GFX10-NEXT: ;;#ASMEND
|
||
|
; DAGISEL-GFX10-NEXT: s_mov_b32 s5, chain_callee@abs32@hi
|
||
|
; DAGISEL-GFX10-NEXT: s_mov_b32 s4, chain_callee@abs32@lo
|
||
|
; DAGISEL-GFX10-NEXT: s_mov_b32 s0, s3
|
||
|
; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v8, v2
|
||
|
; DAGISEL-GFX10-NEXT: s_mov_b32 exec_lo, -1
|
||
|
; DAGISEL-GFX10-NEXT: s_setpc_b64 s[4:5]
|
||
|
%i = call i32 @llvm.amdgcn.set.inactive(i32 3, i32 4)
|
||
|
call void asm "s_nop", "~{v0},~{v8},~{v16},~{s0}"()
|
||
|
%w = call i32 @llvm.amdgcn.wwm(i32 %i)
|
||
|
%c = insertelement <3 x i32> %b, i32 %w, i32 0
|
||
|
call void(ptr, i32, <3 x i32>, <3 x i32>, i32, ...) @llvm.amdgcn.cs.chain.v3i32(ptr @chain_callee, i32 -1, <3 x i32> inreg %a, <3 x i32> %c, i32 0)
|
||
|
unreachable
|
||
|
}
|
||
|
|
||
|
define amdgpu_cs_chain void @chain_to_chain_use_all_v0_v7(<3 x i32> inreg %a, <3 x i32> %b) {
|
||
|
; GISEL-GFX11-LABEL: chain_to_chain_use_all_v0_v7:
|
||
|
; GISEL-GFX11: ; %bb.0:
|
||
|
; GISEL-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||
|
; GISEL-GFX11-NEXT: v_mov_b32_e32 v11, v8
|
||
|
; GISEL-GFX11-NEXT: s_mov_b32 s3, s0
|
||
|
; GISEL-GFX11-NEXT: ;;#ASMSTART
|
||
|
; GISEL-GFX11-NEXT: s_nop
|
||
|
; GISEL-GFX11-NEXT: ;;#ASMEND
|
||
|
; GISEL-GFX11-NEXT: s_mov_b32 s4, chain_callee@abs32@lo
|
||
|
; GISEL-GFX11-NEXT: s_mov_b32 s5, chain_callee@abs32@hi
|
||
|
; GISEL-GFX11-NEXT: v_mov_b32_e32 v8, v11
|
||
|
; GISEL-GFX11-NEXT: s_mov_b32 s0, s3
|
||
|
; GISEL-GFX11-NEXT: s_mov_b32 exec_lo, -1
|
||
|
; GISEL-GFX11-NEXT: s_setpc_b64 s[4:5]
|
||
|
;
|
||
|
; GISEL-GFX10-LABEL: chain_to_chain_use_all_v0_v7:
|
||
|
; GISEL-GFX10: ; %bb.0:
|
||
|
; GISEL-GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||
|
; GISEL-GFX10-NEXT: v_mov_b32_e32 v11, v8
|
||
|
; GISEL-GFX10-NEXT: s_mov_b32 s3, s0
|
||
|
; GISEL-GFX10-NEXT: ;;#ASMSTART
|
||
|
; GISEL-GFX10-NEXT: s_nop
|
||
|
; GISEL-GFX10-NEXT: ;;#ASMEND
|
||
|
; GISEL-GFX10-NEXT: s_mov_b32 s4, chain_callee@abs32@lo
|
||
|
; GISEL-GFX10-NEXT: s_mov_b32 s5, chain_callee@abs32@hi
|
||
|
; GISEL-GFX10-NEXT: v_mov_b32_e32 v8, v11
|
||
|
; GISEL-GFX10-NEXT: s_mov_b32 s0, s3
|
||
|
; GISEL-GFX10-NEXT: s_mov_b32 exec_lo, -1
|
||
|
; GISEL-GFX10-NEXT: s_setpc_b64 s[4:5]
|
||
|
;
|
||
|
; DAGISEL-GFX11-LABEL: chain_to_chain_use_all_v0_v7:
|
||
|
; DAGISEL-GFX11: ; %bb.0:
|
||
|
; DAGISEL-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||
|
; DAGISEL-GFX11-NEXT: v_mov_b32_e32 v11, v8
|
||
|
; DAGISEL-GFX11-NEXT: s_mov_b32 s3, s0
|
||
|
; DAGISEL-GFX11-NEXT: ;;#ASMSTART
|
||
|
; DAGISEL-GFX11-NEXT: s_nop
|
||
|
; DAGISEL-GFX11-NEXT: ;;#ASMEND
|
||
|
; DAGISEL-GFX11-NEXT: s_mov_b32 s5, chain_callee@abs32@hi
|
||
|
; DAGISEL-GFX11-NEXT: s_mov_b32 s4, chain_callee@abs32@lo
|
||
|
; DAGISEL-GFX11-NEXT: v_mov_b32_e32 v8, v11
|
||
|
; DAGISEL-GFX11-NEXT: s_mov_b32 s0, s3
|
||
|
; DAGISEL-GFX11-NEXT: s_mov_b32 exec_lo, -1
|
||
|
; DAGISEL-GFX11-NEXT: s_setpc_b64 s[4:5]
|
||
|
;
|
||
|
; DAGISEL-GFX10-LABEL: chain_to_chain_use_all_v0_v7:
|
||
|
; DAGISEL-GFX10: ; %bb.0:
|
||
|
; DAGISEL-GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||
|
; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v11, v8
|
||
|
; DAGISEL-GFX10-NEXT: s_mov_b32 s3, s0
|
||
|
; DAGISEL-GFX10-NEXT: ;;#ASMSTART
|
||
|
; DAGISEL-GFX10-NEXT: s_nop
|
||
|
; DAGISEL-GFX10-NEXT: ;;#ASMEND
|
||
|
; DAGISEL-GFX10-NEXT: s_mov_b32 s5, chain_callee@abs32@hi
|
||
|
; DAGISEL-GFX10-NEXT: s_mov_b32 s4, chain_callee@abs32@lo
|
||
|
; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v8, v11
|
||
|
; DAGISEL-GFX10-NEXT: s_mov_b32 s0, s3
|
||
|
; DAGISEL-GFX10-NEXT: s_mov_b32 exec_lo, -1
|
||
|
; DAGISEL-GFX10-NEXT: s_setpc_b64 s[4:5]
|
||
|
call void asm "s_nop", "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v16},~{s0}"()
|
||
|
call void(ptr, i32, <3 x i32>, <3 x i32>, i32, ...) @llvm.amdgcn.cs.chain.v3i32(ptr @chain_callee, i32 -1, <3 x i32> inreg %a, <3 x i32> %b, i32 0)
|
||
|
unreachable
|
||
|
}
|
||
|
|
||
|
define amdgpu_cs_chain void @chain_to_chain_fewer_args(<3 x i32> inreg %a, <3 x i32> %b) {
|
||
|
; GISEL-GFX11-LABEL: chain_to_chain_fewer_args:
|
||
|
; GISEL-GFX11: ; %bb.0:
|
||
|
; GISEL-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||
|
; GISEL-GFX11-NEXT: v_mov_b32_e32 v1, v8
|
||
|
; GISEL-GFX11-NEXT: s_mov_b32 s2, s0
|
||
|
; GISEL-GFX11-NEXT: ;;#ASMSTART
|
||
|
; GISEL-GFX11-NEXT: s_nop
|
||
|
; GISEL-GFX11-NEXT: ;;#ASMEND
|
||
|
; GISEL-GFX11-NEXT: s_mov_b32 s4, chain_callee_2@abs32@lo
|
||
|
; GISEL-GFX11-NEXT: s_mov_b32 s5, chain_callee_2@abs32@hi
|
||
|
; GISEL-GFX11-NEXT: v_mov_b32_e32 v8, v1
|
||
|
; GISEL-GFX11-NEXT: s_mov_b32 s0, s2
|
||
|
; GISEL-GFX11-NEXT: s_mov_b32 exec_lo, -1
|
||
|
; GISEL-GFX11-NEXT: s_setpc_b64 s[4:5]
|
||
|
;
|
||
|
; GISEL-GFX10-LABEL: chain_to_chain_fewer_args:
|
||
|
; GISEL-GFX10: ; %bb.0:
|
||
|
; GISEL-GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||
|
; GISEL-GFX10-NEXT: v_mov_b32_e32 v1, v8
|
||
|
; GISEL-GFX10-NEXT: s_mov_b32 s2, s0
|
||
|
; GISEL-GFX10-NEXT: ;;#ASMSTART
|
||
|
; GISEL-GFX10-NEXT: s_nop
|
||
|
; GISEL-GFX10-NEXT: ;;#ASMEND
|
||
|
; GISEL-GFX10-NEXT: s_mov_b32 s4, chain_callee_2@abs32@lo
|
||
|
; GISEL-GFX10-NEXT: s_mov_b32 s5, chain_callee_2@abs32@hi
|
||
|
; GISEL-GFX10-NEXT: v_mov_b32_e32 v8, v1
|
||
|
; GISEL-GFX10-NEXT: s_mov_b32 s0, s2
|
||
|
; GISEL-GFX10-NEXT: s_mov_b32 exec_lo, -1
|
||
|
; GISEL-GFX10-NEXT: s_setpc_b64 s[4:5]
|
||
|
;
|
||
|
; DAGISEL-GFX11-LABEL: chain_to_chain_fewer_args:
|
||
|
; DAGISEL-GFX11: ; %bb.0:
|
||
|
; DAGISEL-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||
|
; DAGISEL-GFX11-NEXT: v_mov_b32_e32 v1, v8
|
||
|
; DAGISEL-GFX11-NEXT: s_mov_b32 s2, s0
|
||
|
; DAGISEL-GFX11-NEXT: ;;#ASMSTART
|
||
|
; DAGISEL-GFX11-NEXT: s_nop
|
||
|
; DAGISEL-GFX11-NEXT: ;;#ASMEND
|
||
|
; DAGISEL-GFX11-NEXT: s_mov_b32 s5, chain_callee_2@abs32@hi
|
||
|
; DAGISEL-GFX11-NEXT: s_mov_b32 s4, chain_callee_2@abs32@lo
|
||
|
; DAGISEL-GFX11-NEXT: v_mov_b32_e32 v8, v1
|
||
|
; DAGISEL-GFX11-NEXT: s_mov_b32 s0, s2
|
||
|
; DAGISEL-GFX11-NEXT: s_mov_b32 exec_lo, -1
|
||
|
; DAGISEL-GFX11-NEXT: s_setpc_b64 s[4:5]
|
||
|
;
|
||
|
; DAGISEL-GFX10-LABEL: chain_to_chain_fewer_args:
|
||
|
; DAGISEL-GFX10: ; %bb.0:
|
||
|
; DAGISEL-GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||
|
; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v1, v8
|
||
|
; DAGISEL-GFX10-NEXT: s_mov_b32 s2, s0
|
||
|
; DAGISEL-GFX10-NEXT: ;;#ASMSTART
|
||
|
; DAGISEL-GFX10-NEXT: s_nop
|
||
|
; DAGISEL-GFX10-NEXT: ;;#ASMEND
|
||
|
; DAGISEL-GFX10-NEXT: s_mov_b32 s5, chain_callee_2@abs32@hi
|
||
|
; DAGISEL-GFX10-NEXT: s_mov_b32 s4, chain_callee_2@abs32@lo
|
||
|
; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v8, v1
|
||
|
; DAGISEL-GFX10-NEXT: s_mov_b32 s0, s2
|
||
|
; DAGISEL-GFX10-NEXT: s_mov_b32 exec_lo, -1
|
||
|
; DAGISEL-GFX10-NEXT: s_setpc_b64 s[4:5]
|
||
|
%s = shufflevector <3 x i32> %a, <3 x i32> zeroinitializer, <2 x i32> <i32 0, i32 1>
|
||
|
%v = shufflevector <3 x i32> %b, <3 x i32> zeroinitializer, <2 x i32> <i32 0, i32 1>
|
||
|
call void asm "s_nop", "~{v0},~{v8},~{v16},~{s0}"()
|
||
|
call void(ptr, i32, <2 x i32>, <2 x i32>, i32, ...) @llvm.amdgcn.cs.chain.v2i32(ptr @chain_callee_2, i32 -1, <2 x i32> inreg %s, <2 x i32> %v, i32 0)
|
||
|
unreachable
|
||
|
}
|
||
|
|
||
|
define amdgpu_cs_chain void @chain_to_chain_more_args(<3 x i32> inreg %a, <3 x i32> %b) {
|
||
|
; GISEL-GFX11-LABEL: chain_to_chain_more_args:
|
||
|
; GISEL-GFX11: ; %bb.0:
|
||
|
; GISEL-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||
|
; GISEL-GFX11-NEXT: v_mov_b32_e32 v1, v8
|
||
|
; GISEL-GFX11-NEXT: s_mov_b32 s3, s0
|
||
|
; GISEL-GFX11-NEXT: ;;#ASMSTART
|
||
|
; GISEL-GFX11-NEXT: s_nop
|
||
|
; GISEL-GFX11-NEXT: ;;#ASMEND
|
||
|
; GISEL-GFX11-NEXT: s_mov_b32 s4, chain_callee_2@abs32@lo
|
||
|
; GISEL-GFX11-NEXT: s_mov_b32 s5, chain_callee_2@abs32@hi
|
||
|
; GISEL-GFX11-NEXT: v_dual_mov_b32 v8, v1 :: v_dual_mov_b32 v11, 0
|
||
|
; GISEL-GFX11-NEXT: s_mov_b32 s0, s3
|
||
|
; GISEL-GFX11-NEXT: s_mov_b32 s3, 0
|
||
|
; GISEL-GFX11-NEXT: s_mov_b32 exec_lo, -1
|
||
|
; GISEL-GFX11-NEXT: s_setpc_b64 s[4:5]
|
||
|
;
|
||
|
; GISEL-GFX10-LABEL: chain_to_chain_more_args:
|
||
|
; GISEL-GFX10: ; %bb.0:
|
||
|
; GISEL-GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||
|
; GISEL-GFX10-NEXT: v_mov_b32_e32 v1, v8
|
||
|
; GISEL-GFX10-NEXT: s_mov_b32 s3, s0
|
||
|
; GISEL-GFX10-NEXT: ;;#ASMSTART
|
||
|
; GISEL-GFX10-NEXT: s_nop
|
||
|
; GISEL-GFX10-NEXT: ;;#ASMEND
|
||
|
; GISEL-GFX10-NEXT: v_mov_b32_e32 v11, 0
|
||
|
; GISEL-GFX10-NEXT: s_mov_b32 s4, chain_callee_2@abs32@lo
|
||
|
; GISEL-GFX10-NEXT: v_mov_b32_e32 v8, v1
|
||
|
; GISEL-GFX10-NEXT: s_mov_b32 s5, chain_callee_2@abs32@hi
|
||
|
; GISEL-GFX10-NEXT: s_mov_b32 s0, s3
|
||
|
; GISEL-GFX10-NEXT: s_mov_b32 s3, 0
|
||
|
; GISEL-GFX10-NEXT: s_mov_b32 exec_lo, -1
|
||
|
; GISEL-GFX10-NEXT: s_setpc_b64 s[4:5]
|
||
|
;
|
||
|
; DAGISEL-GFX11-LABEL: chain_to_chain_more_args:
|
||
|
; DAGISEL-GFX11: ; %bb.0:
|
||
|
; DAGISEL-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||
|
; DAGISEL-GFX11-NEXT: v_mov_b32_e32 v1, v8
|
||
|
; DAGISEL-GFX11-NEXT: s_mov_b32 s3, s0
|
||
|
; DAGISEL-GFX11-NEXT: ;;#ASMSTART
|
||
|
; DAGISEL-GFX11-NEXT: s_nop
|
||
|
; DAGISEL-GFX11-NEXT: ;;#ASMEND
|
||
|
; DAGISEL-GFX11-NEXT: s_mov_b32 s5, chain_callee_2@abs32@hi
|
||
|
; DAGISEL-GFX11-NEXT: s_mov_b32 s4, chain_callee_2@abs32@lo
|
||
|
; DAGISEL-GFX11-NEXT: v_dual_mov_b32 v8, v1 :: v_dual_mov_b32 v11, 0
|
||
|
; DAGISEL-GFX11-NEXT: s_mov_b32 s0, s3
|
||
|
; DAGISEL-GFX11-NEXT: s_mov_b32 s3, 0
|
||
|
; DAGISEL-GFX11-NEXT: s_mov_b32 exec_lo, -1
|
||
|
; DAGISEL-GFX11-NEXT: s_setpc_b64 s[4:5]
|
||
|
;
|
||
|
; DAGISEL-GFX10-LABEL: chain_to_chain_more_args:
|
||
|
; DAGISEL-GFX10: ; %bb.0:
|
||
|
; DAGISEL-GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||
|
; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v1, v8
|
||
|
; DAGISEL-GFX10-NEXT: s_mov_b32 s3, s0
|
||
|
; DAGISEL-GFX10-NEXT: ;;#ASMSTART
|
||
|
; DAGISEL-GFX10-NEXT: s_nop
|
||
|
; DAGISEL-GFX10-NEXT: ;;#ASMEND
|
||
|
; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v11, 0
|
||
|
; DAGISEL-GFX10-NEXT: s_mov_b32 s5, chain_callee_2@abs32@hi
|
||
|
; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v8, v1
|
||
|
; DAGISEL-GFX10-NEXT: s_mov_b32 s4, chain_callee_2@abs32@lo
|
||
|
; DAGISEL-GFX10-NEXT: s_mov_b32 s0, s3
|
||
|
; DAGISEL-GFX10-NEXT: s_mov_b32 s3, 0
|
||
|
; DAGISEL-GFX10-NEXT: s_mov_b32 exec_lo, -1
|
||
|
; DAGISEL-GFX10-NEXT: s_setpc_b64 s[4:5]
|
||
|
%s = shufflevector <3 x i32> %a, <3 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
||
|
%v = shufflevector <3 x i32> %b, <3 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
||
|
call void asm "s_nop", "~{v0},~{v8},~{v16},~{s0}"()
|
||
|
call void(ptr, i32, <4 x i32>, <4 x i32>, i32, ...) @llvm.amdgcn.cs.chain.v4i32(ptr @chain_callee_2, i32 -1, <4 x i32> inreg %s, <4 x i32> %v, i32 0)
|
||
|
unreachable
|
||
|
}
|
||
|
|
||
|
define amdgpu_cs_chain void @amdgpu_cs_chain_dont_realign_stack(i32 %idx) {
|
||
|
; GISEL-GFX11-LABEL: amdgpu_cs_chain_dont_realign_stack:
|
||
|
; GISEL-GFX11: ; %bb.0:
|
||
|
; GISEL-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||
|
; GISEL-GFX11-NEXT: s_mov_b32 s3, 4
|
||
|
; GISEL-GFX11-NEXT: s_mov_b32 s2, 3
|
||
|
; GISEL-GFX11-NEXT: s_mov_b32 s1, 2
|
||
|
; GISEL-GFX11-NEXT: s_mov_b32 s0, 1
|
||
|
; GISEL-GFX11-NEXT: v_lshlrev_b32_e32 v0, 4, v8
|
||
|
; GISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
||
|
; GISEL-GFX11-NEXT: v_add_nc_u32_e32 v4, 32, v0
|
||
|
; GISEL-GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v3, s3
|
||
|
; GISEL-GFX11-NEXT: v_dual_mov_b32 v1, s1 :: v_dual_mov_b32 v2, s2
|
||
|
; GISEL-GFX11-NEXT: scratch_store_b128 v4, v[0:3], off dlc
|
||
|
; GISEL-GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
||
|
; GISEL-GFX11-NEXT: s_endpgm
|
||
|
;
|
||
|
; GISEL-GFX10-LABEL: amdgpu_cs_chain_dont_realign_stack:
|
||
|
; GISEL-GFX10: ; %bb.0:
|
||
|
; GISEL-GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||
|
; GISEL-GFX10-NEXT: v_lshlrev_b32_e32 v0, 4, v8
|
||
|
; GISEL-GFX10-NEXT: v_mov_b32_e32 v1, 1
|
||
|
; GISEL-GFX10-NEXT: v_mov_b32_e32 v2, 2
|
||
|
; GISEL-GFX10-NEXT: v_mov_b32_e32 v3, 3
|
||
|
; GISEL-GFX10-NEXT: v_mov_b32_e32 v4, 4
|
||
|
; GISEL-GFX10-NEXT: v_add_nc_u32_e32 v0, 32, v0
|
||
|
; GISEL-GFX10-NEXT: buffer_store_dword v1, v0, s[48:51], 0 offen
|
||
|
; GISEL-GFX10-NEXT: s_waitcnt_vscnt null, 0x0
|
||
|
; GISEL-GFX10-NEXT: buffer_store_dword v2, v0, s[48:51], 0 offen offset:4
|
||
|
; GISEL-GFX10-NEXT: s_waitcnt_vscnt null, 0x0
|
||
|
; GISEL-GFX10-NEXT: buffer_store_dword v3, v0, s[48:51], 0 offen offset:8
|
||
|
; GISEL-GFX10-NEXT: s_waitcnt_vscnt null, 0x0
|
||
|
; GISEL-GFX10-NEXT: buffer_store_dword v4, v0, s[48:51], 0 offen offset:12
|
||
|
; GISEL-GFX10-NEXT: s_waitcnt_vscnt null, 0x0
|
||
|
; GISEL-GFX10-NEXT: s_endpgm
|
||
|
;
|
||
|
; DAGISEL-GFX11-LABEL: amdgpu_cs_chain_dont_realign_stack:
|
||
|
; DAGISEL-GFX11: ; %bb.0:
|
||
|
; DAGISEL-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||
|
; DAGISEL-GFX11-NEXT: v_dual_mov_b32 v0, 1 :: v_dual_mov_b32 v1, 2
|
||
|
; DAGISEL-GFX11-NEXT: v_dual_mov_b32 v2, 3 :: v_dual_mov_b32 v3, 4
|
||
|
; DAGISEL-GFX11-NEXT: v_lshl_add_u32 v4, v8, 4, 32
|
||
|
; DAGISEL-GFX11-NEXT: scratch_store_b128 v4, v[0:3], off dlc
|
||
|
; DAGISEL-GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
||
|
; DAGISEL-GFX11-NEXT: s_endpgm
|
||
|
;
|
||
|
; DAGISEL-GFX10-LABEL: amdgpu_cs_chain_dont_realign_stack:
|
||
|
; DAGISEL-GFX10: ; %bb.0:
|
||
|
; DAGISEL-GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||
|
; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v0, 4
|
||
|
; DAGISEL-GFX10-NEXT: v_lshl_add_u32 v1, v8, 4, 32
|
||
|
; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v2, 3
|
||
|
; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v3, 2
|
||
|
; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v4, 1
|
||
|
; DAGISEL-GFX10-NEXT: buffer_store_dword v0, v1, s[48:51], 0 offen offset:12
|
||
|
; DAGISEL-GFX10-NEXT: s_waitcnt_vscnt null, 0x0
|
||
|
; DAGISEL-GFX10-NEXT: buffer_store_dword v2, v1, s[48:51], 0 offen offset:8
|
||
|
; DAGISEL-GFX10-NEXT: s_waitcnt_vscnt null, 0x0
|
||
|
; DAGISEL-GFX10-NEXT: buffer_store_dword v3, v1, s[48:51], 0 offen offset:4
|
||
|
; DAGISEL-GFX10-NEXT: s_waitcnt_vscnt null, 0x0
|
||
|
; DAGISEL-GFX10-NEXT: buffer_store_dword v4, v1, s[48:51], 0 offen
|
||
|
; DAGISEL-GFX10-NEXT: s_waitcnt_vscnt null, 0x0
|
||
|
; DAGISEL-GFX10-NEXT: s_endpgm
|
||
|
%alloca.align32 = alloca [8 x <4 x i32>], align 32, addrspace(5)
|
||
|
%gep0 = getelementptr inbounds [8 x <4 x i32>], ptr addrspace(5) %alloca.align32, i32 0, i32 %idx
|
||
|
store volatile <4 x i32> <i32 1, i32 2, i32 3, i32 4>, ptr addrspace(5) %gep0, align 32
|
||
|
ret void
|
||
|
}
|
||
|
|
||
|
declare void @llvm.amdgcn.cs.chain.v2i32(ptr, i32, <2 x i32>, <2 x i32>, i32, ...)
|
||
|
declare void @llvm.amdgcn.cs.chain.v3i32(ptr, i32, <3 x i32>, <3 x i32>, i32, ...)
|
||
|
declare void @llvm.amdgcn.cs.chain.v4i32(ptr, i32, <4 x i32>, <4 x i32>, i32, ...)
|
||
|
declare amdgpu_cs_chain void @chain_callee_2(<2 x i32> inreg, <2 x i32>)
|
||
|
declare amdgpu_cs_chain void @chain_callee(<3 x i32> inreg, <3 x i32>)
|
||
|
declare amdgpu_cs_chain void @chain_callee_4(<4 x i32> inreg, <4 x i32>)
|
||
|
declare i32 @llvm.amdgcn.set.inactive(i32, i32)
|
||
|
declare i32 @llvm.amdgcn.wwm(i32)
|