// Verify the behavior of the denormal-fp-mode attributes in the way that // rocm-device-libs should be built with. The bitcode should be compiled with // denormal-fp-math-f32=dynamic, and should be replaced with the denormal mode // of the final TU. // Build the fake device library in the way rocm-device-libs should be built. // // RUN: %clang_cc1 -x cl -triple amdgcn-amd-amdhsa -fdenormal-fp-math-f32=dynamic \ // RUN: -mcode-object-version=none -emit-llvm-bc \ // RUN: %S/Inputs/ocml-sample.cl -o %t.dynamic.f32.bc // // RUN: %clang_cc1 -x cl -triple amdgcn-amd-amdhsa -fdenormal-fp-math=dynamic \ // RUN: -mcode-object-version=none -emit-llvm-bc \ // RUN: %S/Inputs/ocml-sample.cl -o %t.dynamic.full.bc // Check the default behavior with no denormal-fp-math arguments. // RUN: %clang_cc1 -x hip -triple amdgcn-amd-amdhsa -target-cpu gfx803 -fcuda-is-device \ // RUN: -mlink-builtin-bitcode %t.dynamic.f32.bc \ // RUN: -emit-llvm %s -o - | FileCheck -implicit-check-not=denormal-fp-math %s --check-prefixes=CHECK,INTERNALIZE // Check an explicit full ieee request // RUN: %clang_cc1 -x hip -triple amdgcn-amd-amdhsa -target-cpu gfx803 -fcuda-is-device \ // RUN: -fdenormal-fp-math=ieee \ // RUN: -mlink-builtin-bitcode %t.dynamic.f32.bc \ // RUN: -emit-llvm %s -o - | FileCheck -implicit-check-not=denormal-fp-math %s --check-prefixes=CHECK,INTERNALIZE // Check explicit f32-only flushing request // RUN: %clang_cc1 -x hip -triple amdgcn-amd-amdhsa -target-cpu gfx803 \ // RUN: -fcuda-is-device -fdenormal-fp-math-f32=preserve-sign \ // RUN: -mlink-builtin-bitcode %t.dynamic.f32.bc -emit-llvm %s -o - \ // RUN: | FileCheck -implicit-check-not=denormal-fp-math --enable-var-scope %s --check-prefixes=CHECK,INTERNALIZE,IEEEF64-PSZF32 // Check explicit flush all request. Only the f32 component of the library is // dynamic, so the linked functions should use IEEE as the base mode and the new // functions preserve-sign. // RUN: %clang_cc1 -x hip -triple amdgcn-amd-amdhsa -target-cpu gfx803 \ // RUN: -fcuda-is-device -fdenormal-fp-math=preserve-sign \ // RUN: -mlink-builtin-bitcode %t.dynamic.f32.bc -emit-llvm %s -o - \ // RUN: | FileCheck -implicit-check-not=denormal-fp-math --enable-var-scope %s --check-prefixes=CHECK,INTERNALIZE,PSZ // Check explicit f32-only, ieee-other flushing request // RUN: %clang_cc1 -x hip -triple amdgcn-amd-amdhsa -target-cpu gfx803 \ // RUN: -fcuda-is-device -fdenormal-fp-math=ieee -fdenormal-fp-math-f32=preserve-sign \ // RUN: -mlink-builtin-bitcode %t.dynamic.f32.bc -emit-llvm %s -o - \ // RUN: | FileCheck -implicit-check-not=denormal-fp-math --enable-var-scope %s --check-prefixes=CHECK,INTERNALIZE,IEEEF64-PSZF32 // Check inverse of normal usage. Requesting IEEE f32, with flushed f16/f64 // RUN: %clang_cc1 -x hip -triple amdgcn-amd-amdhsa -target-cpu gfx803 \ // RUN: -fcuda-is-device -fdenormal-fp-math=preserve-sign -fdenormal-fp-math-f32=ieee \ // RUN: -mlink-builtin-bitcode %t.dynamic.f32.bc -emit-llvm %s -o - \ // RUN: | FileCheck -implicit-check-not=denormal-fp-math --enable-var-scope %s --check-prefixes=CHECK,INTERNALIZE,IEEEF32-PSZF64-DYNF32 // Check backwards from the normal usage where both library components can be // overridden. // RUN: %clang_cc1 -x hip -triple amdgcn-amd-amdhsa -target-cpu gfx803 \ // RUN: -fcuda-is-device -fdenormal-fp-math=preserve-sign -fdenormal-fp-math-f32=ieee \ // RUN: -mlink-builtin-bitcode %t.dynamic.full.bc -emit-llvm %s -o - \ // RUN: | FileCheck -implicit-check-not=denormal-fp-math --enable-var-scope %s --check-prefixes=CHECK,INTERNALIZE,IEEEF32-PSZF64-DYNFULL // Check the case where no internalization is performed // RUN: %clang_cc1 -x hip -triple amdgcn-amd-amdhsa -target-cpu gfx803 \ // RUN: -fcuda-is-device -fdenormal-fp-math=preserve-sign -fdenormal-fp-math-f32=ieee \ // RUN: -mlink-bitcode-file %t.dynamic.full.bc -emit-llvm %s -o - \ // RUN: | FileCheck -implicit-check-not=denormal-fp-math --enable-var-scope %s --check-prefixes=CHECK,NOINTERNALIZE,NOINTERNALIZE-IEEEF32-PSZF64-DYNFULL #define __device__ __attribute__((device)) #define __global__ __attribute__((global)) typedef _Float16 half; extern "C" { __device__ half do_f16_stuff(half a, half b, half c); __device__ float do_f32_stuff(float a, float b, float c); // Currently all library functions are internalized. Check a weak function in // case we ever choose to not internalize these. In that case, the safest thing // to do would likely be to preserve the dynamic denormal-fp-math. __attribute__((weak)) __device__ float weak_do_f32_stuff(float a, float b, float c); __device__ double do_f64_stuff(double a, double b, double c); // CHECK: kernel_f16({{.*}}) #[[$KERNELATTR:[0-9]+]] __global__ void kernel_f16(float* out, float* a, float* b, float* c) { int id = 0; out[id] = do_f16_stuff(a[id], b[id], c[id]); } // CHECK: kernel_f32({{.*}}) #[[$KERNELATTR]] __global__ void kernel_f32(float* out, float* a, float* b, float* c) { int id = 0; out[id] = do_f32_stuff(a[id], b[id], c[id]); out[id] += weak_do_f32_stuff(a[id], b[id], c[id]); } // CHECK: kernel_f64({{.*}}) #[[$KERNELATTR]] __global__ void kernel_f64(double* out, double* a, double* b, double* c) { int id = 0; out[id] = do_f64_stuff(a[id], b[id], c[id]); } } // INTERNALIZE: define internal {{(noundef )?}}half @do_f16_stuff({{.*}}) #[[$FUNCATTR:[0-9]+]] // INTERNALIZE: define internal {{(noundef )?}}float @do_f32_stuff({{.*}}) #[[$FUNCATTR]] // INTERNALIZE: define internal {{(noundef )?}}double @do_f64_stuff({{.*}}) #[[$FUNCATTR]] // INTERNALIZE: define internal {{(noundef )?}}float @weak_do_f32_stuff({{.*}}) #[[$WEAK_FUNCATTR:[0-9]+]] // NOINTERNALIZE: define dso_local {{(noundef )?}}half @do_f16_stuff({{.*}}) #[[$FUNCATTR:[0-9]+]] // NOINTERNALIZE: define dso_local {{(noundef )?}}float @do_f32_stuff({{.*}}) #[[$FUNCATTR]] // NOINTERNALIZE: define dso_local {{(noundef )?}}double @do_f64_stuff({{.*}}) #[[$FUNCATTR]] // NOINTERNALIZE: define weak {{(noundef )?}}float @weak_do_f32_stuff({{.*}}) #[[$WEAK_FUNCATTR:[0-9]+]] // We should not be littering call sites with the attribute // Everything should use the default ieee with no explicit attribute // FIXME: Should check-not "denormal-fp-math" within the denormal-fp-math-f32 // lines. // Default mode relies on the implicit check-not for the denormal-fp-math. // PSZ: #[[$KERNELATTR]] = { {{.*}} "denormal-fp-math"="preserve-sign,preserve-sign" // PSZ-SAME: "target-cpu"="gfx803" // PSZ: #[[$FUNCATTR]] = { {{.*}} "denormal-fp-math-f32"="preserve-sign,preserve-sign" // PSZ-SAME: "target-cpu"="gfx803" // PSZ: #[[$WEAK_FUNCATTR]] = { {{.*}} "denormal-fp-math-f32"="preserve-sign,preserve-sign" // PSZ-SAME: "target-cpu"="gfx803" // FIXME: Should check-not "denormal-fp-math" within the line // IEEEF64-PSZF32: #[[$KERNELATTR]] = { {{.*}} "denormal-fp-math-f32"="preserve-sign,preserve-sign" // IEEEF64-PSZF32-SAME: "target-cpu"="gfx803" // IEEEF64-PSZF32: #[[$FUNCATTR]] = { {{.*}} "denormal-fp-math-f32"="preserve-sign,preserve-sign" // IEEEF64-PSZF32-SAME: "target-cpu"="gfx803" // IEEEF64-PSZF32: #[[$WEAK_FUNCATTR]] = { {{.*}} "denormal-fp-math-f32"="preserve-sign,preserve-sign" // IEEEF64-PSZF32-SAME: "target-cpu"="gfx803" // IEEEF32-PSZF64-DYNF32: #[[$KERNELATTR]] = { {{.*}} "denormal-fp-math"="preserve-sign,preserve-sign" "denormal-fp-math-f32"="ieee,ieee" {{.*}} "target-cpu"="gfx803" {{.*}} } // implicit check-not // implicit check-not // IEEEF32-PSZF64-DYNFULL: #[[$KERNELATTR]] = { {{.*}} "denormal-fp-math"="preserve-sign,preserve-sign" "denormal-fp-math-f32"="ieee,ieee" // IEEEF32-PSZF64-DYNFULL-SAME: "target-cpu"="gfx803" // IEEEF32-PSZF64-DYNFULL: #[[$FUNCATTR]] = { {{.*}} "denormal-fp-math"="preserve-sign,preserve-sign" "denormal-fp-math-f32"="ieee,ieee" // IEEEF32-PSZF64-DYNFULL-SAME: "target-cpu"="gfx803" // IEEEF32-PSZF64-DYNFULL: #[[$WEAK_FUNCATTR]] = { {{.*}} "denormal-fp-math"="preserve-sign,preserve-sign" "denormal-fp-math-f32"="ieee,ieee" // IEEEF32-PSZF64-DYNFULL-SAME: "target-cpu"="gfx803" // -mlink-bitcode-file doesn't internalize or propagate attributes. // NOINTERNALIZE-IEEEF32-PSZF64-DYNFULL: #[[$KERNELATTR]] = { {{.*}} "denormal-fp-math"="preserve-sign,preserve-sign" "denormal-fp-math-f32"="ieee,ieee" {{.*}} "target-cpu"="gfx803" {{.*}} } // NOINTERNALIZE-IEEEF32-PSZF64-DYNFULL: #[[$FUNCATTR]] = { {{.*}} "denormal-fp-math"="dynamic,dynamic" {{.*}} } // NOINTERNALIZE-IEEEF32-PSZF64-DYNFULL: #[[$WEAK_FUNCATTR]] = { {{.*}} "denormal-fp-math"="dynamic,dynamic" {{.*}} }