270 lines
12 KiB
LLVM
270 lines
12 KiB
LLVM
; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -start-before=amdgpu-unify-divergent-exit-nodes --verify-machineinstrs < %s | FileCheck -enable-var-scope --check-prefixes=GCN,GCN-SDAG %s
|
|
; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1200 -start-before=amdgpu-unify-divergent-exit-nodes --verify-machineinstrs < %s | FileCheck -enable-var-scope --check-prefixes=GCN,GCN-GISEL %s
|
|
|
|
; --------------------------------------------------------------------------------
|
|
; fminimum tests
|
|
; --------------------------------------------------------------------------------
|
|
|
|
; GCN-LABEL: {{^}}v_fneg_minimum_f32:
|
|
; GCN: global_load_b32 [[A:v[0-9]+]]
|
|
; GCN: global_load_b32 [[B:v[0-9]+]]
|
|
; GCN: v_maximum_f32 [[RESULT:v[0-9]+]], -[[A]], -[[B]]
|
|
; GCN: global_store_b32 v[{{[0-9:]+}}], [[RESULT]]
|
|
define void @v_fneg_minimum_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) {
|
|
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
|
%tid.ext = sext i32 %tid to i64
|
|
%a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
|
|
%b.gep = getelementptr inbounds float, ptr addrspace(1) %b.ptr, i64 %tid.ext
|
|
%out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
|
|
%a = load volatile float, ptr addrspace(1) %a.gep
|
|
%b = load volatile float, ptr addrspace(1) %b.gep
|
|
%min = call float @llvm.minimum.f32(float %a, float %b)
|
|
%fneg = fneg float %min
|
|
store float %fneg, ptr addrspace(1) %out.gep
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}v_fneg_self_minimum_f32:
|
|
; GCN: global_load_b32 [[A:v[0-9]+]]
|
|
; GCN: v_maximum_f32 [[RESULT:v[0-9]+]], -[[A]], -[[A]]
|
|
; GCN: global_store_b32 v[{{[0-9:]+}}], [[RESULT]]
|
|
define void @v_fneg_self_minimum_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) {
|
|
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
|
%tid.ext = sext i32 %tid to i64
|
|
%a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
|
|
%out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
|
|
%a = load volatile float, ptr addrspace(1) %a.gep
|
|
%min = call float @llvm.minimum.f32(float %a, float %a)
|
|
%min.fneg = fneg float %min
|
|
store float %min.fneg, ptr addrspace(1) %out.gep
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}v_fneg_posk_minimum_f32:
|
|
; GCN: global_load_b32 [[A:v[0-9]+]]
|
|
; GCN: v_maximum_f32 [[RESULT:v[0-9]+]], -[[A]], -4.0
|
|
; GCN: global_store_b32 v[{{[0-9:]+}}], [[RESULT]]
|
|
define void @v_fneg_posk_minimum_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) {
|
|
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
|
%tid.ext = sext i32 %tid to i64
|
|
%a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
|
|
%out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
|
|
%a = load volatile float, ptr addrspace(1) %a.gep
|
|
%min = call float @llvm.minimum.f32(float %a, float 4.0)
|
|
%fneg = fneg float %min
|
|
store float %fneg, ptr addrspace(1) %out.gep
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}v_fneg_negk_minimum_f32:
|
|
; GCN: global_load_b32 [[A:v[0-9]+]]
|
|
; GCN: v_maximum_f32 [[RESULT:v[0-9]+]], -[[A]], 4.0
|
|
; GCN: global_store_b32 v[{{[0-9:]+}}], [[RESULT]]
|
|
define void @v_fneg_negk_minimum_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) {
|
|
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
|
%tid.ext = sext i32 %tid to i64
|
|
%a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
|
|
%out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
|
|
%a = load volatile float, ptr addrspace(1) %a.gep
|
|
%min = call float @llvm.minimum.f32(float %a, float -4.0)
|
|
%fneg = fneg float %min
|
|
store float %fneg, ptr addrspace(1) %out.gep
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}v_fneg_0_minimum_f32:
|
|
; GCN: global_load_b32 [[A:v[0-9]+]]
|
|
; GCN: v_minimum_f32 [[RESULT:v[0-9]+]], [[A]], 0
|
|
; GCN: global_store_b32 v[{{[0-9:]+}}], [[RESULT]]
|
|
define void @v_fneg_0_minimum_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) {
|
|
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
|
%tid.ext = sext i32 %tid to i64
|
|
%a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
|
|
%out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
|
|
%a = load volatile float, ptr addrspace(1) %a.gep
|
|
%min = call float @llvm.minimum.f32(float %a, float 0.0)
|
|
%fneg = fneg float %min
|
|
store float %fneg, ptr addrspace(1) %out.gep
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}v_fneg_0_minimum_foldable_use_f32:
|
|
; GCN: global_load_b32 [[A:v[0-9]+]]
|
|
; GCN: global_load_b32 [[B:v[0-9]+]]
|
|
; GCN: v_minimum_f32 [[MIN:v[0-9]+]], [[A]], 0
|
|
; GCN: v_mul_f32_e64 [[RESULT:v[0-9]+]], -[[MIN]], [[B]]
|
|
; GCN: global_store_b32 v[{{[0-9:]+}}], [[RESULT]]
|
|
define void @v_fneg_0_minimum_foldable_use_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) {
|
|
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
|
%tid.ext = sext i32 %tid to i64
|
|
%a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
|
|
%b.gep = getelementptr inbounds float, ptr addrspace(1) %b.ptr, i64 %tid.ext
|
|
%out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
|
|
%a = load volatile float, ptr addrspace(1) %a.gep
|
|
%b = load volatile float, ptr addrspace(1) %b.gep
|
|
%min = call float @llvm.minimum.f32(float %a, float 0.0)
|
|
%fneg = fneg float %min
|
|
%mul = fmul float %fneg, %b
|
|
store float %mul, ptr addrspace(1) %out.gep
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}v_fneg_minimum_multi_use_minimum_f32:
|
|
; GCN: global_load_b32 [[A:v[0-9]+]]
|
|
; GCN: global_load_b32 [[B:v[0-9]+]]
|
|
; GCN: v_maximum_f32 [[MAX0:v[0-9]+]], -[[A]], -[[B]]
|
|
; GCN-SDAG: v_mul_f32_e32 [[MUL1:v[0-9]+]], -4.0, [[MAX0]]
|
|
; GCN-GISEL: v_mul_f32_e64 [[MUL1:v[0-9]+]], -[[MAX0]], 4.0
|
|
; GCN: global_store_b32 v[{{[0-9:]+}}], [[MAX0]]
|
|
; GCN: global_store_b32 v[{{[0-9:]+}}], [[MUL1]]
|
|
define void @v_fneg_minimum_multi_use_minimum_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) {
|
|
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
|
%tid.ext = sext i32 %tid to i64
|
|
%a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
|
|
%b.gep = getelementptr inbounds float, ptr addrspace(1) %b.ptr, i64 %tid.ext
|
|
%out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
|
|
%a = load volatile float, ptr addrspace(1) %a.gep
|
|
%b = load volatile float, ptr addrspace(1) %b.gep
|
|
%min = call float @llvm.minimum.f32(float %a, float %b)
|
|
%fneg = fneg float %min
|
|
%use1 = fmul float %min, 4.0
|
|
store volatile float %fneg, ptr addrspace(1) %out
|
|
store volatile float %use1, ptr addrspace(1) %out
|
|
ret void
|
|
}
|
|
|
|
; --------------------------------------------------------------------------------
|
|
; fmaximum tests
|
|
; --------------------------------------------------------------------------------
|
|
|
|
; GCN-LABEL: {{^}}v_fneg_maximum_f32:
|
|
; GCN: global_load_b32 [[A:v[0-9]+]]
|
|
; GCN: global_load_b32 [[B:v[0-9]+]]
|
|
; GCN: v_minimum_f32 [[RESULT:v[0-9]+]], -[[A]], -[[B]]
|
|
; GCN: global_store_b32 v[{{[0-9:]+}}], [[RESULT]]
|
|
define void @v_fneg_maximum_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) {
|
|
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
|
%tid.ext = sext i32 %tid to i64
|
|
%a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
|
|
%b.gep = getelementptr inbounds float, ptr addrspace(1) %b.ptr, i64 %tid.ext
|
|
%out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
|
|
%a = load volatile float, ptr addrspace(1) %a.gep
|
|
%b = load volatile float, ptr addrspace(1) %b.gep
|
|
%min = call float @llvm.maximum.f32(float %a, float %b)
|
|
%fneg = fneg float %min
|
|
store float %fneg, ptr addrspace(1) %out.gep
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}v_fneg_self_maximum_f32:
|
|
; GCN: global_load_b32 [[A:v[0-9]+]]
|
|
; GCN: v_minimum_f32 [[RESULT:v[0-9]+]], -[[A]], -[[A]]
|
|
; GCN: global_store_b32 v[{{[0-9:]+}}], [[RESULT]]
|
|
define void @v_fneg_self_maximum_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) {
|
|
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
|
%tid.ext = sext i32 %tid to i64
|
|
%a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
|
|
%out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
|
|
%a = load volatile float, ptr addrspace(1) %a.gep
|
|
%min = call float @llvm.maximum.f32(float %a, float %a)
|
|
%min.fneg = fneg float %min
|
|
store float %min.fneg, ptr addrspace(1) %out.gep
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}v_fneg_posk_maximum_f32:
|
|
; GCN: global_load_b32 [[A:v[0-9]+]]
|
|
; GCN: v_minimum_f32 [[RESULT:v[0-9]+]], -[[A]], -4.0
|
|
; GCN: global_store_b32 v[{{[0-9:]+}}], [[RESULT]]
|
|
define void @v_fneg_posk_maximum_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) {
|
|
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
|
%tid.ext = sext i32 %tid to i64
|
|
%a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
|
|
%out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
|
|
%a = load volatile float, ptr addrspace(1) %a.gep
|
|
%min = call float @llvm.maximum.f32(float %a, float 4.0)
|
|
%fneg = fneg float %min
|
|
store float %fneg, ptr addrspace(1) %out.gep
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}v_fneg_negk_maximum_f32:
|
|
; GCN: global_load_b32 [[A:v[0-9]+]]
|
|
; GCN: v_minimum_f32 [[RESULT:v[0-9]+]], -[[A]], 4.0
|
|
; GCN: global_store_b32 v[{{[0-9:]+}}], [[RESULT]]
|
|
define void @v_fneg_negk_maximum_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) {
|
|
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
|
%tid.ext = sext i32 %tid to i64
|
|
%a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
|
|
%out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
|
|
%a = load volatile float, ptr addrspace(1) %a.gep
|
|
%min = call float @llvm.maximum.f32(float %a, float -4.0)
|
|
%fneg = fneg float %min
|
|
store float %fneg, ptr addrspace(1) %out.gep
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}v_fneg_0_maximum_f32:
|
|
; GCN: global_load_b32 [[A:v[0-9]+]]
|
|
; GCN: v_maximum_f32 [[RESULT:v[0-9]+]], [[A]], 0
|
|
; GCN: global_store_b32 v[{{[0-9:]+}}], [[RESULT]]
|
|
define void @v_fneg_0_maximum_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) {
|
|
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
|
%tid.ext = sext i32 %tid to i64
|
|
%a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
|
|
%out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
|
|
%a = load volatile float, ptr addrspace(1) %a.gep
|
|
%max = call float @llvm.maximum.f32(float %a, float 0.0)
|
|
%fneg = fneg float %max
|
|
store float %fneg, ptr addrspace(1) %out.gep
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}v_fneg_0_maximum_foldable_use_f32:
|
|
; GCN: global_load_b32 [[A:v[0-9]+]]
|
|
; GCN: global_load_b32 [[B:v[0-9]+]]
|
|
; GCN: v_maximum_f32 [[MAX:v[0-9]+]], [[A]], 0
|
|
; GCN: v_mul_f32_e64 [[RESULT:v[0-9]+]], -[[MAX]], [[B]]
|
|
; GCN: global_store_b32 v[{{[0-9:]+}}], [[RESULT]]
|
|
define void @v_fneg_0_maximum_foldable_use_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) {
|
|
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
|
%tid.ext = sext i32 %tid to i64
|
|
%a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
|
|
%b.gep = getelementptr inbounds float, ptr addrspace(1) %b.ptr, i64 %tid.ext
|
|
%out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
|
|
%a = load volatile float, ptr addrspace(1) %a.gep
|
|
%b = load volatile float, ptr addrspace(1) %b.gep
|
|
%max = call float @llvm.maximum.f32(float %a, float 0.0)
|
|
%fneg = fneg float %max
|
|
%mul = fmul float %fneg, %b
|
|
store float %mul, ptr addrspace(1) %out.gep
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}v_fneg_maximum_multi_use_maximum_f32:
|
|
; GCN: global_load_b32 [[A:v[0-9]+]]
|
|
; GCN: global_load_b32 [[B:v[0-9]+]]
|
|
; GCN: v_minimum_f32 [[MAX0:v[0-9]+]], -[[A]], -[[B]]
|
|
; GCN-SDAG: v_mul_f32_e32 [[MUL1:v[0-9]+]], -4.0, [[MAX0]]
|
|
; GCN-GISEL: v_mul_f32_e64 [[MUL1:v[0-9]+]], -[[MAX0]], 4.0
|
|
; GCN: global_store_b32 v[{{[0-9:]+}}], [[MAX0]]
|
|
; GCN: global_store_b32 v[{{[0-9:]+}}], [[MUL1]]
|
|
define void @v_fneg_maximum_multi_use_maximum_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) {
|
|
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
|
%tid.ext = sext i32 %tid to i64
|
|
%a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
|
|
%b.gep = getelementptr inbounds float, ptr addrspace(1) %b.ptr, i64 %tid.ext
|
|
%out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
|
|
%a = load volatile float, ptr addrspace(1) %a.gep
|
|
%b = load volatile float, ptr addrspace(1) %b.gep
|
|
%min = call float @llvm.maximum.f32(float %a, float %b)
|
|
%fneg = fneg float %min
|
|
%use1 = fmul float %min, 4.0
|
|
store volatile float %fneg, ptr addrspace(1) %out
|
|
store volatile float %use1, ptr addrspace(1) %out
|
|
ret void
|
|
}
|
|
|
|
declare i32 @llvm.amdgcn.workitem.id.x()
|
|
declare float @llvm.minimum.f32(float, float)
|
|
declare float @llvm.maximum.f32(float, float)
|