// required for __clang_cuda_runtime_wrapper.h tests #pragma once #define __forceinline__