165 lines
8.3 KiB
Text
165 lines
8.3 KiB
Text
// Verify the behavior of the denormal-fp-mode attributes in the way that
|
|
// rocm-device-libs should be built with. The bitcode should be compiled with
|
|
// denormal-fp-math-f32=dynamic, and should be replaced with the denormal mode
|
|
// of the final TU.
|
|
|
|
// Build the fake device library in the way rocm-device-libs should be built.
|
|
//
|
|
// RUN: %clang_cc1 -x cl -triple amdgcn-amd-amdhsa -fdenormal-fp-math-f32=dynamic \
|
|
// RUN: -mcode-object-version=none -emit-llvm-bc \
|
|
// RUN: %S/Inputs/ocml-sample.cl -o %t.dynamic.f32.bc
|
|
//
|
|
// RUN: %clang_cc1 -x cl -triple amdgcn-amd-amdhsa -fdenormal-fp-math=dynamic \
|
|
// RUN: -mcode-object-version=none -emit-llvm-bc \
|
|
// RUN: %S/Inputs/ocml-sample.cl -o %t.dynamic.full.bc
|
|
|
|
|
|
|
|
// Check the default behavior with no denormal-fp-math arguments.
|
|
// RUN: %clang_cc1 -x hip -triple amdgcn-amd-amdhsa -target-cpu gfx803 -fcuda-is-device \
|
|
// RUN: -mlink-builtin-bitcode %t.dynamic.f32.bc \
|
|
// RUN: -emit-llvm %s -o - | FileCheck -implicit-check-not=denormal-fp-math %s --check-prefixes=CHECK,INTERNALIZE
|
|
|
|
|
|
// Check an explicit full ieee request
|
|
// RUN: %clang_cc1 -x hip -triple amdgcn-amd-amdhsa -target-cpu gfx803 -fcuda-is-device \
|
|
// RUN: -fdenormal-fp-math=ieee \
|
|
// RUN: -mlink-builtin-bitcode %t.dynamic.f32.bc \
|
|
// RUN: -emit-llvm %s -o - | FileCheck -implicit-check-not=denormal-fp-math %s --check-prefixes=CHECK,INTERNALIZE
|
|
|
|
|
|
// Check explicit f32-only flushing request
|
|
// RUN: %clang_cc1 -x hip -triple amdgcn-amd-amdhsa -target-cpu gfx803 \
|
|
// RUN: -fcuda-is-device -fdenormal-fp-math-f32=preserve-sign \
|
|
// RUN: -mlink-builtin-bitcode %t.dynamic.f32.bc -emit-llvm %s -o - \
|
|
// RUN: | FileCheck -implicit-check-not=denormal-fp-math --enable-var-scope %s --check-prefixes=CHECK,INTERNALIZE,IEEEF64-PSZF32
|
|
|
|
|
|
// Check explicit flush all request. Only the f32 component of the library is
|
|
// dynamic, so the linked functions should use IEEE as the base mode and the new
|
|
// functions preserve-sign.
|
|
// RUN: %clang_cc1 -x hip -triple amdgcn-amd-amdhsa -target-cpu gfx803 \
|
|
// RUN: -fcuda-is-device -fdenormal-fp-math=preserve-sign \
|
|
// RUN: -mlink-builtin-bitcode %t.dynamic.f32.bc -emit-llvm %s -o - \
|
|
// RUN: | FileCheck -implicit-check-not=denormal-fp-math --enable-var-scope %s --check-prefixes=CHECK,INTERNALIZE,PSZ
|
|
|
|
|
|
// Check explicit f32-only, ieee-other flushing request
|
|
// RUN: %clang_cc1 -x hip -triple amdgcn-amd-amdhsa -target-cpu gfx803 \
|
|
// RUN: -fcuda-is-device -fdenormal-fp-math=ieee -fdenormal-fp-math-f32=preserve-sign \
|
|
// RUN: -mlink-builtin-bitcode %t.dynamic.f32.bc -emit-llvm %s -o - \
|
|
// RUN: | FileCheck -implicit-check-not=denormal-fp-math --enable-var-scope %s --check-prefixes=CHECK,INTERNALIZE,IEEEF64-PSZF32
|
|
|
|
|
|
// Check inverse of normal usage. Requesting IEEE f32, with flushed f16/f64
|
|
// RUN: %clang_cc1 -x hip -triple amdgcn-amd-amdhsa -target-cpu gfx803 \
|
|
// RUN: -fcuda-is-device -fdenormal-fp-math=preserve-sign -fdenormal-fp-math-f32=ieee \
|
|
// RUN: -mlink-builtin-bitcode %t.dynamic.f32.bc -emit-llvm %s -o - \
|
|
// RUN: | FileCheck -implicit-check-not=denormal-fp-math --enable-var-scope %s --check-prefixes=CHECK,INTERNALIZE,IEEEF32-PSZF64-DYNF32
|
|
|
|
|
|
// Check backwards from the normal usage where both library components can be
|
|
// overridden.
|
|
// RUN: %clang_cc1 -x hip -triple amdgcn-amd-amdhsa -target-cpu gfx803 \
|
|
// RUN: -fcuda-is-device -fdenormal-fp-math=preserve-sign -fdenormal-fp-math-f32=ieee \
|
|
// RUN: -mlink-builtin-bitcode %t.dynamic.full.bc -emit-llvm %s -o - \
|
|
// RUN: | FileCheck -implicit-check-not=denormal-fp-math --enable-var-scope %s --check-prefixes=CHECK,INTERNALIZE,IEEEF32-PSZF64-DYNFULL
|
|
|
|
|
|
|
|
// Check the case where no internalization is performed
|
|
// RUN: %clang_cc1 -x hip -triple amdgcn-amd-amdhsa -target-cpu gfx803 \
|
|
// RUN: -fcuda-is-device -fdenormal-fp-math=preserve-sign -fdenormal-fp-math-f32=ieee \
|
|
// RUN: -mlink-bitcode-file %t.dynamic.full.bc -emit-llvm %s -o - \
|
|
// RUN: | FileCheck -implicit-check-not=denormal-fp-math --enable-var-scope %s --check-prefixes=CHECK,NOINTERNALIZE,NOINTERNALIZE-IEEEF32-PSZF64-DYNFULL
|
|
|
|
|
|
|
|
#define __device__ __attribute__((device))
|
|
#define __global__ __attribute__((global))
|
|
|
|
typedef _Float16 half;
|
|
|
|
extern "C" {
|
|
__device__ half do_f16_stuff(half a, half b, half c);
|
|
__device__ float do_f32_stuff(float a, float b, float c);
|
|
|
|
// Currently all library functions are internalized. Check a weak function in
|
|
// case we ever choose to not internalize these. In that case, the safest thing
|
|
// to do would likely be to preserve the dynamic denormal-fp-math.
|
|
__attribute__((weak)) __device__ float weak_do_f32_stuff(float a, float b, float c);
|
|
__device__ double do_f64_stuff(double a, double b, double c);
|
|
|
|
|
|
// CHECK: kernel_f16({{.*}}) #[[$KERNELATTR:[0-9]+]]
|
|
__global__ void kernel_f16(float* out, float* a, float* b, float* c) {
|
|
int id = 0;
|
|
out[id] = do_f16_stuff(a[id], b[id], c[id]);
|
|
}
|
|
|
|
// CHECK: kernel_f32({{.*}}) #[[$KERNELATTR]]
|
|
__global__ void kernel_f32(float* out, float* a, float* b, float* c) {
|
|
int id = 0;
|
|
out[id] = do_f32_stuff(a[id], b[id], c[id]);
|
|
out[id] += weak_do_f32_stuff(a[id], b[id], c[id]);
|
|
}
|
|
|
|
// CHECK: kernel_f64({{.*}}) #[[$KERNELATTR]]
|
|
__global__ void kernel_f64(double* out, double* a, double* b, double* c) {
|
|
int id = 0;
|
|
out[id] = do_f64_stuff(a[id], b[id], c[id]);
|
|
}
|
|
}
|
|
|
|
// INTERNALIZE: define internal {{(noundef )?}}half @do_f16_stuff({{.*}}) #[[$FUNCATTR:[0-9]+]]
|
|
// INTERNALIZE: define internal {{(noundef )?}}float @do_f32_stuff({{.*}}) #[[$FUNCATTR]]
|
|
// INTERNALIZE: define internal {{(noundef )?}}double @do_f64_stuff({{.*}}) #[[$FUNCATTR]]
|
|
// INTERNALIZE: define internal {{(noundef )?}}float @weak_do_f32_stuff({{.*}}) #[[$WEAK_FUNCATTR:[0-9]+]]
|
|
|
|
|
|
// NOINTERNALIZE: define dso_local {{(noundef )?}}half @do_f16_stuff({{.*}}) #[[$FUNCATTR:[0-9]+]]
|
|
// NOINTERNALIZE: define dso_local {{(noundef )?}}float @do_f32_stuff({{.*}}) #[[$FUNCATTR]]
|
|
// NOINTERNALIZE: define dso_local {{(noundef )?}}double @do_f64_stuff({{.*}}) #[[$FUNCATTR]]
|
|
// NOINTERNALIZE: define weak {{(noundef )?}}float @weak_do_f32_stuff({{.*}}) #[[$WEAK_FUNCATTR:[0-9]+]]
|
|
|
|
|
|
|
|
// We should not be littering call sites with the attribute
|
|
// Everything should use the default ieee with no explicit attribute
|
|
|
|
// FIXME: Should check-not "denormal-fp-math" within the denormal-fp-math-f32
|
|
// lines.
|
|
|
|
// Default mode relies on the implicit check-not for the denormal-fp-math.
|
|
|
|
// PSZ: #[[$KERNELATTR]] = { {{.*}} "denormal-fp-math"="preserve-sign,preserve-sign"
|
|
// PSZ-SAME: "target-cpu"="gfx803"
|
|
// PSZ: #[[$FUNCATTR]] = { {{.*}} "denormal-fp-math-f32"="preserve-sign,preserve-sign"
|
|
// PSZ-SAME: "target-cpu"="gfx803"
|
|
// PSZ: #[[$WEAK_FUNCATTR]] = { {{.*}} "denormal-fp-math-f32"="preserve-sign,preserve-sign"
|
|
// PSZ-SAME: "target-cpu"="gfx803"
|
|
|
|
// FIXME: Should check-not "denormal-fp-math" within the line
|
|
// IEEEF64-PSZF32: #[[$KERNELATTR]] = { {{.*}} "denormal-fp-math-f32"="preserve-sign,preserve-sign"
|
|
// IEEEF64-PSZF32-SAME: "target-cpu"="gfx803"
|
|
// IEEEF64-PSZF32: #[[$FUNCATTR]] = { {{.*}} "denormal-fp-math-f32"="preserve-sign,preserve-sign"
|
|
// IEEEF64-PSZF32-SAME: "target-cpu"="gfx803"
|
|
// IEEEF64-PSZF32: #[[$WEAK_FUNCATTR]] = { {{.*}} "denormal-fp-math-f32"="preserve-sign,preserve-sign"
|
|
// IEEEF64-PSZF32-SAME: "target-cpu"="gfx803"
|
|
|
|
// IEEEF32-PSZF64-DYNF32: #[[$KERNELATTR]] = { {{.*}} "denormal-fp-math"="preserve-sign,preserve-sign" "denormal-fp-math-f32"="ieee,ieee" {{.*}} "target-cpu"="gfx803" {{.*}} }
|
|
// implicit check-not
|
|
// implicit check-not
|
|
|
|
|
|
// IEEEF32-PSZF64-DYNFULL: #[[$KERNELATTR]] = { {{.*}} "denormal-fp-math"="preserve-sign,preserve-sign" "denormal-fp-math-f32"="ieee,ieee"
|
|
// IEEEF32-PSZF64-DYNFULL-SAME: "target-cpu"="gfx803"
|
|
// IEEEF32-PSZF64-DYNFULL: #[[$FUNCATTR]] = { {{.*}} "denormal-fp-math"="preserve-sign,preserve-sign" "denormal-fp-math-f32"="ieee,ieee"
|
|
// IEEEF32-PSZF64-DYNFULL-SAME: "target-cpu"="gfx803"
|
|
// IEEEF32-PSZF64-DYNFULL: #[[$WEAK_FUNCATTR]] = { {{.*}} "denormal-fp-math"="preserve-sign,preserve-sign" "denormal-fp-math-f32"="ieee,ieee"
|
|
// IEEEF32-PSZF64-DYNFULL-SAME: "target-cpu"="gfx803"
|
|
|
|
// -mlink-bitcode-file doesn't internalize or propagate attributes.
|
|
// NOINTERNALIZE-IEEEF32-PSZF64-DYNFULL: #[[$KERNELATTR]] = { {{.*}} "denormal-fp-math"="preserve-sign,preserve-sign" "denormal-fp-math-f32"="ieee,ieee" {{.*}} "target-cpu"="gfx803" {{.*}} }
|
|
// NOINTERNALIZE-IEEEF32-PSZF64-DYNFULL: #[[$FUNCATTR]] = { {{.*}} "denormal-fp-math"="dynamic,dynamic" {{.*}} }
|
|
// NOINTERNALIZE-IEEEF32-PSZF64-DYNFULL: #[[$WEAK_FUNCATTR]] = { {{.*}} "denormal-fp-math"="dynamic,dynamic" {{.*}} }
|