// RUN: %clang_cc1 %s -triple spir -verify -pedantic -Wconversion -Werror -fsyntax-only -cl-std=CL -fdeclare-opencl-builtins -DNO_HEADER // RUN: %clang_cc1 %s -triple spir -verify -pedantic -Wconversion -Werror -fsyntax-only -cl-std=CL -fdeclare-opencl-builtins -finclude-default-header // RUN: %clang_cc1 %s -triple spir -verify -pedantic -Wconversion -Werror -fsyntax-only -cl-std=CL1.2 -fdeclare-opencl-builtins -DNO_HEADER -cl-ext=-cl_intel_subgroups // RUN: %clang_cc1 %s -triple spir -verify -pedantic -Wconversion -Werror -fsyntax-only -cl-std=CL1.2 -fdeclare-opencl-builtins -finclude-default-header -cl-ext=-cl_intel_subgroups // RUN: %clang_cc1 %s -triple spir -verify -pedantic -Wconversion -Werror -fsyntax-only -cl-std=CL2.0 -fdeclare-opencl-builtins -DNO_HEADER // RUN: %clang_cc1 %s -triple spir -verify -pedantic -Wconversion -Werror -fsyntax-only -cl-std=CL2.0 -fdeclare-opencl-builtins -finclude-default-header // RUN: %clang_cc1 %s -triple spir -verify -pedantic -Wconversion -Werror -fsyntax-only -cl-std=CL3.0 -fdeclare-opencl-builtins -finclude-default-header // RUN: %clang_cc1 %s -triple spir -verify -pedantic -Wconversion -Werror -fsyntax-only -cl-std=CLC++ -fdeclare-opencl-builtins -DNO_HEADER // RUN: %clang_cc1 %s -triple spir -verify -pedantic -Wconversion -Werror -fsyntax-only -cl-std=CLC++ -fdeclare-opencl-builtins -finclude-default-header // RUN: %clang_cc1 %s -triple spir -verify -pedantic -Wconversion -Werror -fsyntax-only -cl-std=CLC++2021 -fdeclare-opencl-builtins -finclude-default-header // RUN: %clang_cc1 %s -triple spir -verify -pedantic -Wconversion -Werror -fsyntax-only -cl-std=CL2.0 -fdeclare-opencl-builtins -finclude-default-header -cl-ext=-cl_khr_fp64 -DNO_FP64 // RUN: %clang_cc1 %s -triple spir -verify -pedantic -Wconversion -Werror -fsyntax-only -cl-std=CL3.0 -fdeclare-opencl-builtins -finclude-default-header -DNO_ATOMSCOPE // Test the -fdeclare-opencl-builtins option. This is not a completeness // test, so it should not test for all builtins defined by OpenCL. Instead // this test should cover different functional aspects of the TableGen builtin // function machinery. #pragma OPENCL EXTENSION cl_khr_fp16 : enable #if __OPENCL_C_VERSION__ < CL_VERSION_1_2 #pragma OPENCL EXTENSION cl_khr_fp64 : enable #endif #if __OPENCL_C_VERSION__ <= CL_VERSION_1_2 #pragma OPENCL EXTENSION cl_khr_3d_image_writes : enable #endif // First, test that Clang gracefully handles missing types. #ifdef NO_HEADER void test_without_header(void) { barrier(0); // expected-note@-1 0+{{candidate function not viable}} // expected-error@-2 0+{{argument type 'void' is incomplete}} // expected-error@-3 0+{{no matching function for call to 'barrier'}} // expected-error@* {{typedef type cl_mem_fence_flags not found; include the base header with -finclude-default-header}} } #endif // Provide typedefs when invoking clang without -finclude-default-header. #ifdef NO_HEADER typedef unsigned char uchar; typedef unsigned int uint; typedef unsigned long ulong; typedef unsigned short ushort; typedef __SIZE_TYPE__ size_t; typedef __PTRDIFF_TYPE__ ptrdiff_t; typedef __INTPTR_TYPE__ intptr_t; typedef __UINTPTR_TYPE__ uintptr_t; typedef char char2 __attribute__((ext_vector_type(2))); typedef char char4 __attribute__((ext_vector_type(4))); typedef uchar uchar4 __attribute__((ext_vector_type(4))); typedef uchar uchar16 __attribute__((ext_vector_type(16))); typedef float float4 __attribute__((ext_vector_type(4))); typedef float float16 __attribute__((ext_vector_type(16))); typedef half half4 __attribute__((ext_vector_type(4))); typedef int int2 __attribute__((ext_vector_type(2))); typedef int int4 __attribute__((ext_vector_type(4))); typedef uint uint2 __attribute__((ext_vector_type(2))); typedef uint uint4 __attribute__((ext_vector_type(4))); typedef long long2 __attribute__((ext_vector_type(2))); typedef long long8 __attribute__((ext_vector_type(8))); typedef ulong ulong4 __attribute__((ext_vector_type(4))); typedef short short16 __attribute__((ext_vector_type(16))); typedef ushort ushort3 __attribute__((ext_vector_type(3))); typedef int clk_profiling_info; #define CLK_PROFILING_COMMAND_EXEC_TIME 0x1 typedef uint cl_mem_fence_flags; #define CLK_GLOBAL_MEM_FENCE 0x02 typedef struct {int a;} ndrange_t; // Enable extensions that are enabled in opencl-c-base.h. #if (defined(__OPENCL_CPP_VERSION__) || __OPENCL_C_VERSION__ >= 200) #define __opencl_c_device_enqueue 1 #define __opencl_c_generic_address_space 1 #define cl_khr_subgroup_extended_types 1 #define cl_khr_subgroup_ballot 1 #define cl_khr_subgroup_non_uniform_arithmetic 1 #define cl_khr_subgroup_clustered_reduce 1 #define __opencl_c_read_write_images 1 #define __opencl_subgroup_builtins 1 #endif #if (__OPENCL_CPP_VERSION__ == 100 || __OPENCL_C_VERSION__ == 200) #define __opencl_c_atomic_order_seq_cst 1 #define __opencl_c_atomic_scope_device 1 #endif #define __opencl_c_named_address_space_builtins 1 #endif kernel void test_pointers(volatile global void *global_p, global const int4 *a) { int i; unsigned int ui; prefetch(a, 2); atom_add((volatile __global int *)global_p, i); atom_cmpxchg((volatile __global unsigned int *)global_p, ui, ui); } // Only test enum arguments when the base header is included, because we need // the enum declarations. #if !defined(NO_HEADER) && (defined(__OPENCL_CPP_VERSION__) || __OPENCL_C_VERSION__ >= 200) kernel void test_enum_args(volatile global atomic_int *global_p, global int *expected) { int desired; atomic_work_item_fence(CLK_GLOBAL_MEM_FENCE, memory_order_acq_rel, memory_scope_device); atomic_compare_exchange_strong_explicit(global_p, expected, desired, memory_order_acq_rel, memory_order_relaxed, memory_scope_work_group); } #endif #if defined(__OPENCL_CPP_VERSION__) || __OPENCL_C_VERSION__ >= 200 void test_typedef_args(clk_event_t evt, volatile atomic_flag *flg, global unsigned long long *values) { capture_event_profiling_info(evt, CLK_PROFILING_COMMAND_EXEC_TIME, values); atomic_flag_clear(flg); bool result = atomic_flag_test_and_set(flg); size_t ws[2] = {2, 8}; ndrange_t r = ndrange_2D(ws); } // Check that atomic_fetch_ functions can be called with (u)intptr_t arguments, // despite OpenCLBuiltins.td not providing explicit overloads for those types. void test_atomic_fetch(volatile __generic atomic_int *a_int, volatile __generic atomic_intptr_t *a_intptr, volatile __generic atomic_uintptr_t *a_uintptr) { int i; intptr_t ip; uintptr_t uip; ptrdiff_t ptrdiff; i = atomic_fetch_add(a_int, i); ip = atomic_fetch_add(a_intptr, ptrdiff); uip = atomic_fetch_add(a_uintptr, ptrdiff); ip = atomic_fetch_or(a_intptr, ip); uip = atomic_fetch_or(a_uintptr, uip); } #endif #if !defined(NO_HEADER) && !defined(NO_FP64) && __OPENCL_C_VERSION__ >= 200 // Check added atomic_fetch_ functions by cl_ext_float_atomics // extension can be called void test_atomic_fetch_with_address_space(volatile __generic atomic_float *a_float, volatile __generic atomic_double *a_double, volatile __local atomic_float *a_float_local, volatile __local atomic_double *a_double_local, volatile __global atomic_float *a_float_global, volatile __global atomic_double *a_double_global) { float f1, resf1; double d1, resd1; resf1 = atomic_fetch_min(a_float, f1); resf1 = atomic_fetch_max_explicit(a_float_local, f1, memory_order_seq_cst); resf1 = atomic_fetch_add_explicit(a_float_global, f1, memory_order_seq_cst, memory_scope_work_group); resd1 = atomic_fetch_min(a_double, d1); resd1 = atomic_fetch_max_explicit(a_double_local, d1, memory_order_seq_cst); resd1 = atomic_fetch_add_explicit(a_double_global, d1, memory_order_seq_cst, memory_scope_work_group); } #endif // !defined(NO_HEADER) && __OPENCL_C_VERSION__ >= 200 #if !defined(NO_HEADER) && __OPENCL_C_VERSION__ == 200 && defined(__opencl_c_generic_address_space) // Test that overloads that use atomic_double are not available when the fp64 // extension is disabled. Test this by counting the number of notes about // candidate functions. void test_atomic_double_reporting(volatile __generic atomic_int *a) { atomic_init(a, a); // expected-error@-1{{no matching function for call to 'atomic_init'}} #if defined(NO_FP64) // Expecting 5 candidates: int, uint, long, ulong, float // expected-note@-4 5 {{candidate function not viable: no known conversion}} #else // Expecting 6 candidates: int, uint, long, ulong, float, double // expected-note@-7 6 {{candidate function not viable: no known conversion}} #endif } #endif #if defined(NO_ATOMSCOPE) && __OPENCL_C_VERSION__ >= 300 // Disable the feature by undefining the feature macro. #undef __opencl_c_atomic_scope_device // Test that only the overload with explicit order and scope arguments is // available when the __opencl_c_atomic_scope_device feature is disabled. void test_atomics_without_scope_device(volatile __generic atomic_int *a_int) { int d; atomic_exchange(a_int, d); // expected-error@-1{{use of undeclared identifier 'atomic_exchange'}} atomic_exchange_explicit(a_int, d, memory_order_seq_cst); // expected-error@-1{{no matching function for call to 'atomic_exchange_explicit'}} atomic_exchange_explicit(a_int, d, memory_order_seq_cst, memory_scope_work_group); } #endif // Test old atomic overloaded with generic address space in C++ for OpenCL. #if __OPENCL_C_VERSION__ >= 200 void test_legacy_atomics_cpp(__generic volatile unsigned int *a) { atomic_add(a, 1); #if !defined(__cplusplus) // expected-error@-2{{no matching function for call to 'atomic_add'}} // expected-note@-3 4 {{candidate function not viable}} #endif } #endif kernel void basic_conversion(void) { float f; char2 c2; long2 l2; float4 f4; int4 i4; #ifdef NO_FP64 (void)convert_double_rtp(f); // expected-error@-1{{use of undeclared identifier 'convert_double_rtp'}} #else double d; f = convert_float(d); #endif l2 = convert_long2_rtz(c2); i4 = convert_int4_sat(f4); } kernel void basic_conversion_neg(void) { int i; float f; f = convert_float_sat(i); #if !defined(__OPENCL_CPP_VERSION__) // expected-error@-2{{use of undeclared identifier 'convert_float_sat'}} #else // expected-error@-4{{use of undeclared identifier 'convert_float_sat'; did you mean 'convert_float'?}} // expected-note@-5{{'convert_float' declared here}} #endif } char4 test_int(char c, char4 c4) { char m = max(c, c); char4 m4 = max(c4, c4); uchar4 abs1 = abs(c4); uchar4 abs2 = abs(abs1); return max(c4, c); } kernel void basic_vector_misc(float4 a) { float4 res; uint4 mask = (uint4)(1, 2, 3, 4); res = shuffle(a, mask); } kernel void basic_image_readonly(read_only image2d_t image_read_only_image2d) { int2 i2; sampler_t sampler; half4 res; float4 resf; resf = read_imagef(image_read_only_image2d, i2); res = read_imageh(image_read_only_image2d, i2); #if __OPENCL_C_VERSION__ < CL_VERSION_1_2 && !defined(__OPENCL_CPP_VERSION__) // expected-error@-3{{no matching function for call to 'read_imagef'}} // expected-error@-3{{no matching function for call to 'read_imageh'}} #endif res = read_imageh(image_read_only_image2d, sampler, i2); int imgWidth = get_image_width(image_read_only_image2d); } #if __OPENCL_C_VERSION__ >= CL_VERSION_2_0 kernel void basic_image_readwrite(read_write image3d_t image_read_write_image3d) { half4 h4; int4 i4; write_imageh(image_read_write_image3d, i4, h4); int imgDepth = get_image_depth(image_read_write_image3d); } #endif // __OPENCL_C_VERSION__ >= CL_VERSION_2_0 kernel void basic_image_writeonly(write_only image1d_buffer_t image_write_only_image1d_buffer, write_only image3d_t image3dwo) { half4 h4; float4 f4; int i; write_imagef(image_write_only_image1d_buffer, i, f4); write_imageh(image_write_only_image1d_buffer, i, h4); int4 i4; write_imagef(image3dwo, i4, i, f4); #if __OPENCL_C_VERSION__ <= CL_VERSION_1_2 && !defined(__OPENCL_CPP_VERSION__) // expected-error@-2{{no matching function for call to 'write_imagef'}} #endif } kernel void basic_subgroup(global uint *out) { out[0] = get_sub_group_size(); #if __OPENCL_C_VERSION__ <= CL_VERSION_1_2 && !defined(__OPENCL_CPP_VERSION__) // expected-error@-2{{use of undeclared identifier 'get_sub_group_size'}} #endif // Only test when the base header is included, because we need the enum declarations. #if !defined(NO_HEADER) && (defined(__OPENCL_CPP_VERSION__) || __OPENCL_C_VERSION__ >= 200) sub_group_barrier(CLK_GLOBAL_MEM_FENCE, memory_scope_device); #endif } kernel void extended_subgroup(global uint4 *out, global int *scalar, global char2 *c2) { out[0] = get_sub_group_eq_mask(); scalar[0] = sub_group_non_uniform_scan_inclusive_or(3); scalar[1] = sub_group_clustered_reduce_logical_xor(2, 4); *c2 = sub_group_broadcast(*c2, 2); #if __OPENCL_C_VERSION__ < CL_VERSION_2_0 && !defined(__OPENCL_CPP_VERSION__) // expected-error@-5{{use of undeclared identifier 'get_sub_group_eq_mask'}} // expected-error@-5{{use of undeclared identifier 'sub_group_non_uniform_scan_inclusive_or'}} // expected-error@-5{{use of undeclared identifier 'sub_group_clustered_reduce_logical_xor'}} // expected-error@-5{{use of undeclared identifier 'sub_group_broadcast'}} #endif } kernel void basic_vector_data(void) { #if __OPENCL_C_VERSION__ >= CL_VERSION_2_0 generic void *generic_p; #endif constant void *constant_p; local void *local_p; global void *global_p; private void *private_p; size_t s; ulong4 ul4; short16 s16; #if __OPENCL_C_VERSION__ >= CL_VERSION_2_0 ushort3 us3; uchar16 uc16; #endif long8 l8; uint2 ui2; float16 f16; ul4 = vload4(s, (const __constant ulong *) constant_p); s16 = vload16(s, (const __constant short *) constant_p); #if __OPENCL_C_VERSION__ >= CL_VERSION_2_0 us3 = vload3(s, (const __generic ushort *) generic_p); uc16 = vload16(s, (const __generic uchar *) generic_p); #endif l8 = vload8(s, (const __global long *) global_p); ui2 = vload2(s, (const __local uint *) local_p); f16 = vload16(s, (const __private float *) private_p); } kernel void basic_work_item(void) { uint ui; barrier(CLK_GLOBAL_MEM_FENCE); get_enqueued_local_size(ui); #if !defined(__OPENCL_CPP_VERSION__) && __OPENCL_C_VERSION__ < CL_VERSION_2_0 // expected-error@-2{{use of undeclared identifier 'get_enqueued_local_size'}} #endif } #ifdef NO_FP64 void test_extension_types(char2 c2) { // We should see 6 candidates for float and half types, and none for double types. int i = isnan(c2); // expected-error@-1{{no matching function for call to 'isnan'}} // expected-note@-2 6 {{candidate function not viable: no known conversion from '__private char2' (vector of 2 'char' values) to 'float}} // expected-note@-3 6 {{candidate function not viable: no known conversion from '__private char2' (vector of 2 'char' values) to 'half}} } #endif