617 lines
24 KiB
LLVM
617 lines
24 KiB
LLVM
|
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||
|
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck %s
|
||
|
|
||
|
; LDS is allocated per-kernel. Module scope variables are gathered into a struct which is
|
||
|
; allocated at address zero, if used by the kernel. Kernel scope variables are gathered into
|
||
|
; a per-kernel struct and allocated immediately after the module scope.
|
||
|
; This test checks that the module and kernel scope variables are allocated in deterministic
|
||
|
; order without spurious alignment padding between the two
|
||
|
|
||
|
; External LDS is checked because it influences LDS padding in general and because it will
|
||
|
; not be moved into either module or kernel struct
|
||
|
|
||
|
@module_variable = addrspace(3) global i16 undef
|
||
|
|
||
|
; Variables are allocated into module scope block when used by a non-kernel function
|
||
|
define void @use_module() #0 {
|
||
|
; CHECK-LABEL: use_module:
|
||
|
; CHECK: ; %bb.0:
|
||
|
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||
|
; CHECK-NEXT: v_mov_b32_e32 v0, 0
|
||
|
; CHECK-NEXT: ds_write_b16 v0, v0
|
||
|
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
|
||
|
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
||
|
store i16 0, ptr addrspace(3) @module_variable
|
||
|
ret void
|
||
|
}
|
||
|
|
||
|
; Variables only used by kernels are specialised and allocated per-kernel
|
||
|
@kernel_normal = addrspace(3) global i16 undef
|
||
|
@kernel_overalign = addrspace(3) global i16 undef, align 4
|
||
|
|
||
|
; External LDS shall not introduce padding between module and kernel scope variables
|
||
|
@extern_normal = external addrspace(3) global [0 x float]
|
||
|
@extern_overalign = external addrspace(3) global [0 x float], align 8
|
||
|
|
||
|
|
||
|
; External LDS does not influence the frame when called indirectly either
|
||
|
define void @use_extern_normal() #0 {
|
||
|
; CHECK-LABEL: use_extern_normal:
|
||
|
; CHECK: ; %bb.0:
|
||
|
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||
|
; CHECK-NEXT: s_getpc_b64 s[6:7]
|
||
|
; CHECK-NEXT: s_add_u32 s6, s6, llvm.amdgcn.dynlds.offset.table@rel32@lo+4
|
||
|
; CHECK-NEXT: s_addc_u32 s7, s7, llvm.amdgcn.dynlds.offset.table@rel32@hi+12
|
||
|
; CHECK-NEXT: s_mov_b32 s4, s15
|
||
|
; CHECK-NEXT: s_ashr_i32 s5, s15, 31
|
||
|
; CHECK-NEXT: v_mov_b32_e32 v0, 0x4048f5c3
|
||
|
; CHECK-NEXT: s_lshl_b64 s[4:5], s[4:5], 2
|
||
|
; CHECK-NEXT: s_add_u32 s4, s4, s6
|
||
|
; CHECK-NEXT: s_addc_u32 s5, s5, s7
|
||
|
; CHECK-NEXT: s_load_dword s4, s[4:5], 0x0
|
||
|
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
|
||
|
; CHECK-NEXT: v_mov_b32_e32 v1, s4
|
||
|
; CHECK-NEXT: ds_write_b32 v1, v0
|
||
|
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
|
||
|
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
||
|
%arrayidx = getelementptr inbounds [0 x float], ptr addrspace(3) @extern_normal, i32 0, i32 0
|
||
|
store float 0x40091EB860000000, ptr addrspace(3) %arrayidx
|
||
|
ret void
|
||
|
}
|
||
|
|
||
|
define void @use_extern_overalign() #0 {
|
||
|
; CHECK-LABEL: use_extern_overalign:
|
||
|
; CHECK: ; %bb.0:
|
||
|
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||
|
; CHECK-NEXT: s_getpc_b64 s[6:7]
|
||
|
; CHECK-NEXT: s_add_u32 s6, s6, llvm.amdgcn.dynlds.offset.table@rel32@lo+4
|
||
|
; CHECK-NEXT: s_addc_u32 s7, s7, llvm.amdgcn.dynlds.offset.table@rel32@hi+12
|
||
|
; CHECK-NEXT: s_mov_b32 s4, s15
|
||
|
; CHECK-NEXT: s_ashr_i32 s5, s15, 31
|
||
|
; CHECK-NEXT: v_mov_b32_e32 v0, 0x42280000
|
||
|
; CHECK-NEXT: s_lshl_b64 s[4:5], s[4:5], 2
|
||
|
; CHECK-NEXT: s_add_u32 s4, s4, s6
|
||
|
; CHECK-NEXT: s_addc_u32 s5, s5, s7
|
||
|
; CHECK-NEXT: s_load_dword s4, s[4:5], 0x0
|
||
|
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
|
||
|
; CHECK-NEXT: v_mov_b32_e32 v1, s4
|
||
|
; CHECK-NEXT: ds_write_b32 v1, v0 offset:4
|
||
|
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
|
||
|
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
||
|
%arrayidx = getelementptr inbounds [0 x float], ptr addrspace(3) @extern_overalign, i32 0, i32 1
|
||
|
store float 4.200000e+01, ptr addrspace(3) %arrayidx
|
||
|
ret void
|
||
|
}
|
||
|
|
||
|
|
||
|
; First 2^3 of 2^4 cases encoded into function names
|
||
|
; no use of extern variable from nested function
|
||
|
; module_variable used/not-used
|
||
|
; kernel variable normal/overaligned
|
||
|
; extern variable normal/overaligned
|
||
|
|
||
|
define amdgpu_kernel void @module_0_kernel_normal_extern_normal(i32 %idx) {
|
||
|
; CHECK-LABEL: module_0_kernel_normal_extern_normal:
|
||
|
; CHECK: ; %bb.0:
|
||
|
; CHECK-NEXT: s_load_dword s0, s[4:5], 0x0
|
||
|
; CHECK-NEXT: v_mov_b32_e32 v0, 0
|
||
|
; CHECK-NEXT: v_mov_b32_e32 v1, 2
|
||
|
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
|
||
|
; CHECK-NEXT: s_lshl_b32 s0, s0, 2
|
||
|
; CHECK-NEXT: s_add_i32 s0, s0, 4
|
||
|
; CHECK-NEXT: v_mov_b32_e32 v2, s0
|
||
|
; CHECK-NEXT: ds_write_b16 v0, v1
|
||
|
; CHECK-NEXT: ds_write_b32 v2, v0
|
||
|
; CHECK-NEXT: s_endpgm
|
||
|
store i16 2, ptr addrspace(3) @kernel_normal
|
||
|
|
||
|
%arrayidx1 = getelementptr inbounds [0 x float], ptr addrspace(3) @extern_normal, i32 0, i32 %idx
|
||
|
store float 0.0, ptr addrspace(3) %arrayidx1
|
||
|
ret void
|
||
|
}
|
||
|
|
||
|
define amdgpu_kernel void @module_1_kernel_normal_extern_normal(i32 %idx) {
|
||
|
; CHECK-LABEL: module_1_kernel_normal_extern_normal:
|
||
|
; CHECK: ; %bb.0:
|
||
|
; CHECK-NEXT: s_add_u32 s6, s6, s9
|
||
|
; CHECK-NEXT: s_mov_b32 s32, 0
|
||
|
; CHECK-NEXT: s_addc_u32 s7, s7, 0
|
||
|
; CHECK-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s6
|
||
|
; CHECK-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s7
|
||
|
; CHECK-NEXT: s_add_u32 s0, s0, s9
|
||
|
; CHECK-NEXT: s_addc_u32 s1, s1, 0
|
||
|
; CHECK-NEXT: s_add_u32 s8, s4, 8
|
||
|
; CHECK-NEXT: s_addc_u32 s9, s5, 0
|
||
|
; CHECK-NEXT: s_getpc_b64 s[6:7]
|
||
|
; CHECK-NEXT: s_add_u32 s6, s6, use_module@gotpcrel32@lo+4
|
||
|
; CHECK-NEXT: s_addc_u32 s7, s7, use_module@gotpcrel32@hi+12
|
||
|
; CHECK-NEXT: s_load_dwordx2 s[10:11], s[6:7], 0x0
|
||
|
; CHECK-NEXT: s_load_dword s12, s[4:5], 0x0
|
||
|
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
|
||
|
; CHECK-NEXT: s_swappc_b64 s[30:31], s[10:11]
|
||
|
; CHECK-NEXT: s_lshl_b32 s4, s12, 2
|
||
|
; CHECK-NEXT: v_mov_b32_e32 v0, 0
|
||
|
; CHECK-NEXT: v_mov_b32_e32 v1, 1
|
||
|
; CHECK-NEXT: s_add_i32 s4, s4, 4
|
||
|
; CHECK-NEXT: v_mov_b32_e32 v2, 2
|
||
|
; CHECK-NEXT: v_mov_b32_e32 v3, s4
|
||
|
; CHECK-NEXT: ds_write_b16 v0, v1
|
||
|
; CHECK-NEXT: ds_write_b16 v0, v2 offset:2
|
||
|
; CHECK-NEXT: ds_write_b32 v3, v0
|
||
|
; CHECK-NEXT: s_endpgm
|
||
|
call void @use_module()
|
||
|
store i16 1, ptr addrspace(3) @module_variable
|
||
|
|
||
|
store i16 2, ptr addrspace(3) @kernel_normal
|
||
|
|
||
|
%arrayidx1 = getelementptr inbounds [0 x float], ptr addrspace(3) @extern_normal, i32 0, i32 %idx
|
||
|
store float 0.0, ptr addrspace(3) %arrayidx1
|
||
|
ret void
|
||
|
}
|
||
|
|
||
|
define amdgpu_kernel void @module_0_kernel_overalign_extern_normal(i32 %idx) {
|
||
|
; CHECK-LABEL: module_0_kernel_overalign_extern_normal:
|
||
|
; CHECK: ; %bb.0:
|
||
|
; CHECK-NEXT: s_load_dword s0, s[4:5], 0x0
|
||
|
; CHECK-NEXT: v_mov_b32_e32 v0, 0
|
||
|
; CHECK-NEXT: v_mov_b32_e32 v1, 2
|
||
|
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
|
||
|
; CHECK-NEXT: s_lshl_b32 s0, s0, 2
|
||
|
; CHECK-NEXT: s_add_i32 s0, s0, 4
|
||
|
; CHECK-NEXT: v_mov_b32_e32 v2, s0
|
||
|
; CHECK-NEXT: ds_write_b16 v0, v1
|
||
|
; CHECK-NEXT: ds_write_b32 v2, v0
|
||
|
; CHECK-NEXT: s_endpgm
|
||
|
store i16 2, ptr addrspace(3) @kernel_overalign
|
||
|
|
||
|
%arrayidx1 = getelementptr inbounds [0 x float], ptr addrspace(3) @extern_normal, i32 0, i32 %idx
|
||
|
store float 0.0, ptr addrspace(3) %arrayidx1
|
||
|
ret void
|
||
|
}
|
||
|
|
||
|
define amdgpu_kernel void @module_1_kernel_overalign_extern_normal(i32 %idx) {
|
||
|
; CHECK-LABEL: module_1_kernel_overalign_extern_normal:
|
||
|
; CHECK: ; %bb.0:
|
||
|
; CHECK-NEXT: s_add_u32 s6, s6, s9
|
||
|
; CHECK-NEXT: s_mov_b32 s32, 0
|
||
|
; CHECK-NEXT: s_addc_u32 s7, s7, 0
|
||
|
; CHECK-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s6
|
||
|
; CHECK-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s7
|
||
|
; CHECK-NEXT: s_add_u32 s0, s0, s9
|
||
|
; CHECK-NEXT: s_addc_u32 s1, s1, 0
|
||
|
; CHECK-NEXT: s_add_u32 s8, s4, 8
|
||
|
; CHECK-NEXT: s_addc_u32 s9, s5, 0
|
||
|
; CHECK-NEXT: s_getpc_b64 s[6:7]
|
||
|
; CHECK-NEXT: s_add_u32 s6, s6, use_module@gotpcrel32@lo+4
|
||
|
; CHECK-NEXT: s_addc_u32 s7, s7, use_module@gotpcrel32@hi+12
|
||
|
; CHECK-NEXT: s_load_dwordx2 s[10:11], s[6:7], 0x0
|
||
|
; CHECK-NEXT: s_load_dword s12, s[4:5], 0x0
|
||
|
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
|
||
|
; CHECK-NEXT: s_swappc_b64 s[30:31], s[10:11]
|
||
|
; CHECK-NEXT: s_lshl_b32 s4, s12, 2
|
||
|
; CHECK-NEXT: v_mov_b32_e32 v0, 0
|
||
|
; CHECK-NEXT: v_mov_b32_e32 v1, 1
|
||
|
; CHECK-NEXT: s_add_i32 s4, s4, 8
|
||
|
; CHECK-NEXT: v_mov_b32_e32 v2, 2
|
||
|
; CHECK-NEXT: v_mov_b32_e32 v3, s4
|
||
|
; CHECK-NEXT: ds_write_b16 v0, v1
|
||
|
; CHECK-NEXT: ds_write_b16 v0, v2 offset:4
|
||
|
; CHECK-NEXT: ds_write_b32 v3, v0
|
||
|
; CHECK-NEXT: s_endpgm
|
||
|
call void @use_module()
|
||
|
store i16 1, ptr addrspace(3) @module_variable
|
||
|
|
||
|
store i16 2, ptr addrspace(3) @kernel_overalign
|
||
|
|
||
|
%arrayidx1 = getelementptr inbounds [0 x float], ptr addrspace(3) @extern_normal, i32 0, i32 %idx
|
||
|
store float 0.0, ptr addrspace(3) %arrayidx1
|
||
|
ret void
|
||
|
}
|
||
|
|
||
|
define amdgpu_kernel void @module_0_kernel_normal_extern_overalign(i32 %idx) {
|
||
|
; CHECK-LABEL: module_0_kernel_normal_extern_overalign:
|
||
|
; CHECK: ; %bb.0:
|
||
|
; CHECK-NEXT: s_load_dword s0, s[4:5], 0x0
|
||
|
; CHECK-NEXT: v_mov_b32_e32 v0, 0
|
||
|
; CHECK-NEXT: v_mov_b32_e32 v1, 2
|
||
|
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
|
||
|
; CHECK-NEXT: s_lshl_b32 s0, s0, 2
|
||
|
; CHECK-NEXT: s_add_i32 s0, s0, 8
|
||
|
; CHECK-NEXT: v_mov_b32_e32 v2, s0
|
||
|
; CHECK-NEXT: ds_write_b16 v0, v1
|
||
|
; CHECK-NEXT: ds_write_b32 v2, v0
|
||
|
; CHECK-NEXT: s_endpgm
|
||
|
store i16 2, ptr addrspace(3) @kernel_normal
|
||
|
|
||
|
%arrayidx1 = getelementptr inbounds [0 x float], ptr addrspace(3) @extern_overalign, i32 0, i32 %idx
|
||
|
store float 0.0, ptr addrspace(3) %arrayidx1
|
||
|
ret void
|
||
|
}
|
||
|
|
||
|
define amdgpu_kernel void @module_1_kernel_normal_extern_overalign(i32 %idx) {
|
||
|
; CHECK-LABEL: module_1_kernel_normal_extern_overalign:
|
||
|
; CHECK: ; %bb.0:
|
||
|
; CHECK-NEXT: s_add_u32 s6, s6, s9
|
||
|
; CHECK-NEXT: s_mov_b32 s32, 0
|
||
|
; CHECK-NEXT: s_addc_u32 s7, s7, 0
|
||
|
; CHECK-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s6
|
||
|
; CHECK-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s7
|
||
|
; CHECK-NEXT: s_add_u32 s0, s0, s9
|
||
|
; CHECK-NEXT: s_addc_u32 s1, s1, 0
|
||
|
; CHECK-NEXT: s_add_u32 s8, s4, 8
|
||
|
; CHECK-NEXT: s_addc_u32 s9, s5, 0
|
||
|
; CHECK-NEXT: s_getpc_b64 s[6:7]
|
||
|
; CHECK-NEXT: s_add_u32 s6, s6, use_module@gotpcrel32@lo+4
|
||
|
; CHECK-NEXT: s_addc_u32 s7, s7, use_module@gotpcrel32@hi+12
|
||
|
; CHECK-NEXT: s_load_dwordx2 s[10:11], s[6:7], 0x0
|
||
|
; CHECK-NEXT: s_load_dword s12, s[4:5], 0x0
|
||
|
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
|
||
|
; CHECK-NEXT: s_swappc_b64 s[30:31], s[10:11]
|
||
|
; CHECK-NEXT: s_lshl_b32 s4, s12, 2
|
||
|
; CHECK-NEXT: v_mov_b32_e32 v0, 0
|
||
|
; CHECK-NEXT: v_mov_b32_e32 v1, 1
|
||
|
; CHECK-NEXT: s_add_i32 s4, s4, 8
|
||
|
; CHECK-NEXT: v_mov_b32_e32 v2, 2
|
||
|
; CHECK-NEXT: v_mov_b32_e32 v3, s4
|
||
|
; CHECK-NEXT: ds_write_b16 v0, v1
|
||
|
; CHECK-NEXT: ds_write_b16 v0, v2 offset:2
|
||
|
; CHECK-NEXT: ds_write_b32 v3, v0
|
||
|
; CHECK-NEXT: s_endpgm
|
||
|
call void @use_module()
|
||
|
store i16 1, ptr addrspace(3) @module_variable
|
||
|
|
||
|
store i16 2, ptr addrspace(3) @kernel_normal
|
||
|
|
||
|
%arrayidx1 = getelementptr inbounds [0 x float], ptr addrspace(3) @extern_overalign, i32 0, i32 %idx
|
||
|
store float 0.0, ptr addrspace(3) %arrayidx1
|
||
|
ret void
|
||
|
}
|
||
|
|
||
|
define amdgpu_kernel void @module_0_kernel_overalign_extern_overalign(i32 %idx) {
|
||
|
; CHECK-LABEL: module_0_kernel_overalign_extern_overalign:
|
||
|
; CHECK: ; %bb.0:
|
||
|
; CHECK-NEXT: s_load_dword s0, s[4:5], 0x0
|
||
|
; CHECK-NEXT: v_mov_b32_e32 v0, 0
|
||
|
; CHECK-NEXT: v_mov_b32_e32 v1, 2
|
||
|
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
|
||
|
; CHECK-NEXT: s_lshl_b32 s0, s0, 2
|
||
|
; CHECK-NEXT: s_add_i32 s0, s0, 8
|
||
|
; CHECK-NEXT: v_mov_b32_e32 v2, s0
|
||
|
; CHECK-NEXT: ds_write_b16 v0, v1
|
||
|
; CHECK-NEXT: ds_write_b32 v2, v0
|
||
|
; CHECK-NEXT: s_endpgm
|
||
|
store i16 2, ptr addrspace(3) @kernel_overalign
|
||
|
|
||
|
%arrayidx1 = getelementptr inbounds [0 x float], ptr addrspace(3) @extern_overalign, i32 0, i32 %idx
|
||
|
store float 0.0, ptr addrspace(3) %arrayidx1
|
||
|
ret void
|
||
|
}
|
||
|
|
||
|
define amdgpu_kernel void @module_1_kernel_overalign_extern_overalign(i32 %idx) {
|
||
|
; CHECK-LABEL: module_1_kernel_overalign_extern_overalign:
|
||
|
; CHECK: ; %bb.0:
|
||
|
; CHECK-NEXT: s_add_u32 s6, s6, s9
|
||
|
; CHECK-NEXT: s_mov_b32 s32, 0
|
||
|
; CHECK-NEXT: s_addc_u32 s7, s7, 0
|
||
|
; CHECK-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s6
|
||
|
; CHECK-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s7
|
||
|
; CHECK-NEXT: s_add_u32 s0, s0, s9
|
||
|
; CHECK-NEXT: s_addc_u32 s1, s1, 0
|
||
|
; CHECK-NEXT: s_add_u32 s8, s4, 8
|
||
|
; CHECK-NEXT: s_addc_u32 s9, s5, 0
|
||
|
; CHECK-NEXT: s_getpc_b64 s[6:7]
|
||
|
; CHECK-NEXT: s_add_u32 s6, s6, use_module@gotpcrel32@lo+4
|
||
|
; CHECK-NEXT: s_addc_u32 s7, s7, use_module@gotpcrel32@hi+12
|
||
|
; CHECK-NEXT: s_load_dwordx2 s[10:11], s[6:7], 0x0
|
||
|
; CHECK-NEXT: s_load_dword s12, s[4:5], 0x0
|
||
|
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
|
||
|
; CHECK-NEXT: s_swappc_b64 s[30:31], s[10:11]
|
||
|
; CHECK-NEXT: s_lshl_b32 s4, s12, 2
|
||
|
; CHECK-NEXT: v_mov_b32_e32 v0, 0
|
||
|
; CHECK-NEXT: v_mov_b32_e32 v1, 1
|
||
|
; CHECK-NEXT: s_add_i32 s4, s4, 8
|
||
|
; CHECK-NEXT: v_mov_b32_e32 v2, 2
|
||
|
; CHECK-NEXT: v_mov_b32_e32 v3, s4
|
||
|
; CHECK-NEXT: ds_write_b16 v0, v1
|
||
|
; CHECK-NEXT: ds_write_b16 v0, v2 offset:4
|
||
|
; CHECK-NEXT: ds_write_b32 v3, v0
|
||
|
; CHECK-NEXT: s_endpgm
|
||
|
call void @use_module()
|
||
|
store i16 1, ptr addrspace(3) @module_variable
|
||
|
|
||
|
store i16 2, ptr addrspace(3) @kernel_overalign
|
||
|
|
||
|
%arrayidx1 = getelementptr inbounds [0 x float], ptr addrspace(3) @extern_overalign, i32 0, i32 %idx
|
||
|
store float 0.0, ptr addrspace(3) %arrayidx1
|
||
|
ret void
|
||
|
}
|
||
|
|
||
|
|
||
|
;; Second 2^3 of 2^4 cases encoded into function names
|
||
|
; with extern variable from nested function
|
||
|
; module_variable used/not-used
|
||
|
; kernel variable normal/overaligned
|
||
|
; extern variable normal/overaligned
|
||
|
|
||
|
define amdgpu_kernel void @module_0_kernel_normal_indirect_extern_normal(i32 %idx) {
|
||
|
; CHECK-LABEL: module_0_kernel_normal_indirect_extern_normal:
|
||
|
; CHECK: ; %bb.0:
|
||
|
; CHECK-NEXT: s_add_u32 s6, s6, s9
|
||
|
; CHECK-NEXT: s_mov_b32 s32, 0
|
||
|
; CHECK-NEXT: s_addc_u32 s7, s7, 0
|
||
|
; CHECK-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s6
|
||
|
; CHECK-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s7
|
||
|
; CHECK-NEXT: s_add_u32 s0, s0, s9
|
||
|
; CHECK-NEXT: s_addc_u32 s1, s1, 0
|
||
|
; CHECK-NEXT: s_add_u32 s8, s4, 8
|
||
|
; CHECK-NEXT: s_addc_u32 s9, s5, 0
|
||
|
; CHECK-NEXT: s_getpc_b64 s[4:5]
|
||
|
; CHECK-NEXT: s_add_u32 s4, s4, use_extern_normal@gotpcrel32@lo+4
|
||
|
; CHECK-NEXT: s_addc_u32 s5, s5, use_extern_normal@gotpcrel32@hi+12
|
||
|
; CHECK-NEXT: v_mov_b32_e32 v0, 0
|
||
|
; CHECK-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0
|
||
|
; CHECK-NEXT: v_mov_b32_e32 v1, 2
|
||
|
; CHECK-NEXT: s_mov_b32 s15, 0
|
||
|
; CHECK-NEXT: ds_write_b16 v0, v1
|
||
|
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
|
||
|
; CHECK-NEXT: s_swappc_b64 s[30:31], s[4:5]
|
||
|
; CHECK-NEXT: s_endpgm
|
||
|
store i16 2, ptr addrspace(3) @kernel_normal
|
||
|
|
||
|
call void @use_extern_normal()
|
||
|
ret void
|
||
|
}
|
||
|
|
||
|
define amdgpu_kernel void @module_1_kernel_normal_indirect_extern_normal(i32 %idx) {
|
||
|
; CHECK-LABEL: module_1_kernel_normal_indirect_extern_normal:
|
||
|
; CHECK: ; %bb.0:
|
||
|
; CHECK-NEXT: s_add_u32 s6, s6, s9
|
||
|
; CHECK-NEXT: s_mov_b32 s32, 0
|
||
|
; CHECK-NEXT: s_addc_u32 s7, s7, 0
|
||
|
; CHECK-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s6
|
||
|
; CHECK-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s7
|
||
|
; CHECK-NEXT: s_add_u32 s0, s0, s9
|
||
|
; CHECK-NEXT: s_addc_u32 s1, s1, 0
|
||
|
; CHECK-NEXT: s_add_u32 s8, s4, 8
|
||
|
; CHECK-NEXT: s_addc_u32 s9, s5, 0
|
||
|
; CHECK-NEXT: s_getpc_b64 s[4:5]
|
||
|
; CHECK-NEXT: s_add_u32 s4, s4, use_module@gotpcrel32@lo+4
|
||
|
; CHECK-NEXT: s_addc_u32 s5, s5, use_module@gotpcrel32@hi+12
|
||
|
; CHECK-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0
|
||
|
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
|
||
|
; CHECK-NEXT: s_swappc_b64 s[30:31], s[4:5]
|
||
|
; CHECK-NEXT: s_getpc_b64 s[4:5]
|
||
|
; CHECK-NEXT: s_add_u32 s4, s4, use_extern_normal@gotpcrel32@lo+4
|
||
|
; CHECK-NEXT: s_addc_u32 s5, s5, use_extern_normal@gotpcrel32@hi+12
|
||
|
; CHECK-NEXT: v_mov_b32_e32 v0, 0
|
||
|
; CHECK-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0
|
||
|
; CHECK-NEXT: v_mov_b32_e32 v1, 1
|
||
|
; CHECK-NEXT: v_mov_b32_e32 v2, 2
|
||
|
; CHECK-NEXT: s_mov_b32 s15, 4
|
||
|
; CHECK-NEXT: ds_write_b16 v0, v1
|
||
|
; CHECK-NEXT: ds_write_b16 v0, v2 offset:2
|
||
|
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
|
||
|
; CHECK-NEXT: s_swappc_b64 s[30:31], s[4:5]
|
||
|
; CHECK-NEXT: s_endpgm
|
||
|
call void @use_module()
|
||
|
store i16 1, ptr addrspace(3) @module_variable
|
||
|
|
||
|
store i16 2, ptr addrspace(3) @kernel_normal
|
||
|
|
||
|
call void @use_extern_normal()
|
||
|
ret void
|
||
|
}
|
||
|
|
||
|
define amdgpu_kernel void @module_0_kernel_overalign_indirect_extern_normal(i32 %idx) {
|
||
|
; CHECK-LABEL: module_0_kernel_overalign_indirect_extern_normal:
|
||
|
; CHECK: ; %bb.0:
|
||
|
; CHECK-NEXT: s_add_u32 s6, s6, s9
|
||
|
; CHECK-NEXT: s_mov_b32 s32, 0
|
||
|
; CHECK-NEXT: s_addc_u32 s7, s7, 0
|
||
|
; CHECK-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s6
|
||
|
; CHECK-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s7
|
||
|
; CHECK-NEXT: s_add_u32 s0, s0, s9
|
||
|
; CHECK-NEXT: s_addc_u32 s1, s1, 0
|
||
|
; CHECK-NEXT: s_add_u32 s8, s4, 8
|
||
|
; CHECK-NEXT: s_addc_u32 s9, s5, 0
|
||
|
; CHECK-NEXT: s_getpc_b64 s[4:5]
|
||
|
; CHECK-NEXT: s_add_u32 s4, s4, use_extern_normal@gotpcrel32@lo+4
|
||
|
; CHECK-NEXT: s_addc_u32 s5, s5, use_extern_normal@gotpcrel32@hi+12
|
||
|
; CHECK-NEXT: v_mov_b32_e32 v0, 0
|
||
|
; CHECK-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0
|
||
|
; CHECK-NEXT: v_mov_b32_e32 v1, 2
|
||
|
; CHECK-NEXT: s_mov_b32 s15, 2
|
||
|
; CHECK-NEXT: ds_write_b16 v0, v1
|
||
|
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
|
||
|
; CHECK-NEXT: s_swappc_b64 s[30:31], s[4:5]
|
||
|
; CHECK-NEXT: s_endpgm
|
||
|
store i16 2, ptr addrspace(3) @kernel_overalign
|
||
|
|
||
|
call void @use_extern_normal()
|
||
|
ret void
|
||
|
}
|
||
|
|
||
|
define amdgpu_kernel void @module_1_kernel_overalign_indirect_extern_normal(i32 %idx) {
|
||
|
; CHECK-LABEL: module_1_kernel_overalign_indirect_extern_normal:
|
||
|
; CHECK: ; %bb.0:
|
||
|
; CHECK-NEXT: s_add_u32 s6, s6, s9
|
||
|
; CHECK-NEXT: s_mov_b32 s32, 0
|
||
|
; CHECK-NEXT: s_addc_u32 s7, s7, 0
|
||
|
; CHECK-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s6
|
||
|
; CHECK-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s7
|
||
|
; CHECK-NEXT: s_add_u32 s0, s0, s9
|
||
|
; CHECK-NEXT: s_addc_u32 s1, s1, 0
|
||
|
; CHECK-NEXT: s_add_u32 s8, s4, 8
|
||
|
; CHECK-NEXT: s_addc_u32 s9, s5, 0
|
||
|
; CHECK-NEXT: s_getpc_b64 s[4:5]
|
||
|
; CHECK-NEXT: s_add_u32 s4, s4, use_module@gotpcrel32@lo+4
|
||
|
; CHECK-NEXT: s_addc_u32 s5, s5, use_module@gotpcrel32@hi+12
|
||
|
; CHECK-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0
|
||
|
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
|
||
|
; CHECK-NEXT: s_swappc_b64 s[30:31], s[4:5]
|
||
|
; CHECK-NEXT: s_getpc_b64 s[4:5]
|
||
|
; CHECK-NEXT: s_add_u32 s4, s4, use_extern_normal@gotpcrel32@lo+4
|
||
|
; CHECK-NEXT: s_addc_u32 s5, s5, use_extern_normal@gotpcrel32@hi+12
|
||
|
; CHECK-NEXT: v_mov_b32_e32 v0, 0
|
||
|
; CHECK-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0
|
||
|
; CHECK-NEXT: v_mov_b32_e32 v1, 1
|
||
|
; CHECK-NEXT: v_mov_b32_e32 v2, 2
|
||
|
; CHECK-NEXT: s_mov_b32 s15, 6
|
||
|
; CHECK-NEXT: ds_write_b16 v0, v1
|
||
|
; CHECK-NEXT: ds_write_b16 v0, v2 offset:4
|
||
|
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
|
||
|
; CHECK-NEXT: s_swappc_b64 s[30:31], s[4:5]
|
||
|
; CHECK-NEXT: s_endpgm
|
||
|
call void @use_module()
|
||
|
store i16 1, ptr addrspace(3) @module_variable
|
||
|
|
||
|
store i16 2, ptr addrspace(3) @kernel_overalign
|
||
|
|
||
|
call void @use_extern_normal()
|
||
|
ret void
|
||
|
}
|
||
|
|
||
|
define amdgpu_kernel void @module_0_kernel_normal_indirect_extern_overalign(i32 %idx) {
|
||
|
; CHECK-LABEL: module_0_kernel_normal_indirect_extern_overalign:
|
||
|
; CHECK: ; %bb.0:
|
||
|
; CHECK-NEXT: s_add_u32 s6, s6, s9
|
||
|
; CHECK-NEXT: s_mov_b32 s32, 0
|
||
|
; CHECK-NEXT: s_addc_u32 s7, s7, 0
|
||
|
; CHECK-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s6
|
||
|
; CHECK-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s7
|
||
|
; CHECK-NEXT: s_add_u32 s0, s0, s9
|
||
|
; CHECK-NEXT: s_addc_u32 s1, s1, 0
|
||
|
; CHECK-NEXT: s_add_u32 s8, s4, 8
|
||
|
; CHECK-NEXT: s_addc_u32 s9, s5, 0
|
||
|
; CHECK-NEXT: s_getpc_b64 s[4:5]
|
||
|
; CHECK-NEXT: s_add_u32 s4, s4, use_extern_overalign@gotpcrel32@lo+4
|
||
|
; CHECK-NEXT: s_addc_u32 s5, s5, use_extern_overalign@gotpcrel32@hi+12
|
||
|
; CHECK-NEXT: v_mov_b32_e32 v0, 0
|
||
|
; CHECK-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0
|
||
|
; CHECK-NEXT: v_mov_b32_e32 v1, 2
|
||
|
; CHECK-NEXT: s_mov_b32 s15, 1
|
||
|
; CHECK-NEXT: ds_write_b16 v0, v1
|
||
|
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
|
||
|
; CHECK-NEXT: s_swappc_b64 s[30:31], s[4:5]
|
||
|
; CHECK-NEXT: s_endpgm
|
||
|
store i16 2, ptr addrspace(3) @kernel_normal
|
||
|
|
||
|
call void @use_extern_overalign()
|
||
|
ret void
|
||
|
}
|
||
|
|
||
|
define amdgpu_kernel void @module_1_kernel_normal_indirect_extern_overalign(i32 %idx) {
|
||
|
; CHECK-LABEL: module_1_kernel_normal_indirect_extern_overalign:
|
||
|
; CHECK: ; %bb.0:
|
||
|
; CHECK-NEXT: s_add_u32 s6, s6, s9
|
||
|
; CHECK-NEXT: s_mov_b32 s32, 0
|
||
|
; CHECK-NEXT: s_addc_u32 s7, s7, 0
|
||
|
; CHECK-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s6
|
||
|
; CHECK-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s7
|
||
|
; CHECK-NEXT: s_add_u32 s0, s0, s9
|
||
|
; CHECK-NEXT: s_addc_u32 s1, s1, 0
|
||
|
; CHECK-NEXT: s_add_u32 s8, s4, 8
|
||
|
; CHECK-NEXT: s_addc_u32 s9, s5, 0
|
||
|
; CHECK-NEXT: s_getpc_b64 s[4:5]
|
||
|
; CHECK-NEXT: s_add_u32 s4, s4, use_module@gotpcrel32@lo+4
|
||
|
; CHECK-NEXT: s_addc_u32 s5, s5, use_module@gotpcrel32@hi+12
|
||
|
; CHECK-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0
|
||
|
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
|
||
|
; CHECK-NEXT: s_swappc_b64 s[30:31], s[4:5]
|
||
|
; CHECK-NEXT: s_getpc_b64 s[4:5]
|
||
|
; CHECK-NEXT: s_add_u32 s4, s4, use_extern_overalign@gotpcrel32@lo+4
|
||
|
; CHECK-NEXT: s_addc_u32 s5, s5, use_extern_overalign@gotpcrel32@hi+12
|
||
|
; CHECK-NEXT: v_mov_b32_e32 v0, 0
|
||
|
; CHECK-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0
|
||
|
; CHECK-NEXT: v_mov_b32_e32 v1, 1
|
||
|
; CHECK-NEXT: v_mov_b32_e32 v2, 2
|
||
|
; CHECK-NEXT: s_mov_b32 s15, 5
|
||
|
; CHECK-NEXT: ds_write_b16 v0, v1
|
||
|
; CHECK-NEXT: ds_write_b16 v0, v2 offset:2
|
||
|
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
|
||
|
; CHECK-NEXT: s_swappc_b64 s[30:31], s[4:5]
|
||
|
; CHECK-NEXT: s_endpgm
|
||
|
call void @use_module()
|
||
|
store i16 1, ptr addrspace(3) @module_variable
|
||
|
|
||
|
store i16 2, ptr addrspace(3) @kernel_normal
|
||
|
|
||
|
call void @use_extern_overalign()
|
||
|
ret void
|
||
|
}
|
||
|
|
||
|
define amdgpu_kernel void @module_0_kernel_overalign_indirect_extern_overalign(i32 %idx) {
|
||
|
; CHECK-LABEL: module_0_kernel_overalign_indirect_extern_overalign:
|
||
|
; CHECK: ; %bb.0:
|
||
|
; CHECK-NEXT: s_add_u32 s6, s6, s9
|
||
|
; CHECK-NEXT: s_mov_b32 s32, 0
|
||
|
; CHECK-NEXT: s_addc_u32 s7, s7, 0
|
||
|
; CHECK-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s6
|
||
|
; CHECK-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s7
|
||
|
; CHECK-NEXT: s_add_u32 s0, s0, s9
|
||
|
; CHECK-NEXT: s_addc_u32 s1, s1, 0
|
||
|
; CHECK-NEXT: s_add_u32 s8, s4, 8
|
||
|
; CHECK-NEXT: s_addc_u32 s9, s5, 0
|
||
|
; CHECK-NEXT: s_getpc_b64 s[4:5]
|
||
|
; CHECK-NEXT: s_add_u32 s4, s4, use_extern_overalign@gotpcrel32@lo+4
|
||
|
; CHECK-NEXT: s_addc_u32 s5, s5, use_extern_overalign@gotpcrel32@hi+12
|
||
|
; CHECK-NEXT: v_mov_b32_e32 v0, 0
|
||
|
; CHECK-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0
|
||
|
; CHECK-NEXT: v_mov_b32_e32 v1, 2
|
||
|
; CHECK-NEXT: s_mov_b32 s15, 3
|
||
|
; CHECK-NEXT: ds_write_b16 v0, v1
|
||
|
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
|
||
|
; CHECK-NEXT: s_swappc_b64 s[30:31], s[4:5]
|
||
|
; CHECK-NEXT: s_endpgm
|
||
|
store i16 2, ptr addrspace(3) @kernel_overalign
|
||
|
|
||
|
call void @use_extern_overalign()
|
||
|
ret void
|
||
|
}
|
||
|
|
||
|
define amdgpu_kernel void @module_1_kernel_overalign_indirect_extern_overalign(i32 %idx) {
|
||
|
; CHECK-LABEL: module_1_kernel_overalign_indirect_extern_overalign:
|
||
|
; CHECK: ; %bb.0:
|
||
|
; CHECK-NEXT: s_add_u32 s6, s6, s9
|
||
|
; CHECK-NEXT: s_mov_b32 s32, 0
|
||
|
; CHECK-NEXT: s_addc_u32 s7, s7, 0
|
||
|
; CHECK-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s6
|
||
|
; CHECK-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s7
|
||
|
; CHECK-NEXT: s_add_u32 s0, s0, s9
|
||
|
; CHECK-NEXT: s_addc_u32 s1, s1, 0
|
||
|
; CHECK-NEXT: s_add_u32 s8, s4, 8
|
||
|
; CHECK-NEXT: s_addc_u32 s9, s5, 0
|
||
|
; CHECK-NEXT: s_getpc_b64 s[4:5]
|
||
|
; CHECK-NEXT: s_add_u32 s4, s4, use_module@gotpcrel32@lo+4
|
||
|
; CHECK-NEXT: s_addc_u32 s5, s5, use_module@gotpcrel32@hi+12
|
||
|
; CHECK-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0
|
||
|
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
|
||
|
; CHECK-NEXT: s_swappc_b64 s[30:31], s[4:5]
|
||
|
; CHECK-NEXT: s_getpc_b64 s[4:5]
|
||
|
; CHECK-NEXT: s_add_u32 s4, s4, use_extern_overalign@gotpcrel32@lo+4
|
||
|
; CHECK-NEXT: s_addc_u32 s5, s5, use_extern_overalign@gotpcrel32@hi+12
|
||
|
; CHECK-NEXT: v_mov_b32_e32 v0, 0
|
||
|
; CHECK-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0
|
||
|
; CHECK-NEXT: v_mov_b32_e32 v1, 1
|
||
|
; CHECK-NEXT: v_mov_b32_e32 v2, 2
|
||
|
; CHECK-NEXT: s_mov_b32 s15, 7
|
||
|
; CHECK-NEXT: ds_write_b16 v0, v1
|
||
|
; CHECK-NEXT: ds_write_b16 v0, v2 offset:4
|
||
|
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
|
||
|
; CHECK-NEXT: s_swappc_b64 s[30:31], s[4:5]
|
||
|
; CHECK-NEXT: s_endpgm
|
||
|
call void @use_module()
|
||
|
store i16 1, ptr addrspace(3) @module_variable
|
||
|
|
||
|
store i16 2, ptr addrspace(3) @kernel_overalign
|
||
|
|
||
|
call void @use_extern_overalign()
|
||
|
ret void
|
||
|
}
|
||
|
|
||
|
|
||
|
attributes #0 = { noinline }
|
||
|
|
||
|
!llvm.module.flags = !{!0}
|
||
|
!0 = !{i32 1, !"amdgpu_code_object_version", i32 500}
|