355 lines
14 KiB
C++
355 lines
14 KiB
C++
//===--- cuda/dynamic_cuda/cuda.h --------------------------------- C++ -*-===//
|
|
//
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
//
|
|
// The parts of the cuda api that are presently in use by the openmp cuda plugin
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
#ifndef DYNAMIC_CUDA_CUDA_H_INCLUDED
|
|
#define DYNAMIC_CUDA_CUDA_H_INCLUDED
|
|
|
|
#include <cstddef>
|
|
#include <cstdint>
|
|
|
|
typedef int CUdevice;
|
|
typedef uintptr_t CUdeviceptr;
|
|
typedef struct CUmod_st *CUmodule;
|
|
typedef struct CUctx_st *CUcontext;
|
|
typedef struct CUfunc_st *CUfunction;
|
|
typedef struct CUstream_st *CUstream;
|
|
typedef struct CUevent_st *CUevent;
|
|
|
|
#define CU_DEVICE_INVALID ((CUdevice)-2)
|
|
|
|
typedef unsigned long long CUmemGenericAllocationHandle_v1;
|
|
typedef CUmemGenericAllocationHandle_v1 CUmemGenericAllocationHandle;
|
|
|
|
#define CU_DEVICE_INVALID ((CUdevice)-2)
|
|
|
|
typedef enum CUmemAllocationGranularity_flags_enum {
|
|
CU_MEM_ALLOC_GRANULARITY_MINIMUM = 0x0,
|
|
CU_MEM_ALLOC_GRANULARITY_RECOMMENDED = 0x1
|
|
} CUmemAllocationGranularity_flags;
|
|
|
|
typedef enum CUmemAccess_flags_enum {
|
|
CU_MEM_ACCESS_FLAGS_PROT_NONE = 0x0,
|
|
CU_MEM_ACCESS_FLAGS_PROT_READ = 0x1,
|
|
CU_MEM_ACCESS_FLAGS_PROT_READWRITE = 0x3,
|
|
CU_MEM_ACCESS_FLAGS_PROT_MAX = 0x7FFFFFFF
|
|
} CUmemAccess_flags;
|
|
|
|
typedef enum CUmemLocationType_enum {
|
|
CU_MEM_LOCATION_TYPE_INVALID = 0x0,
|
|
CU_MEM_LOCATION_TYPE_DEVICE = 0x1,
|
|
CU_MEM_LOCATION_TYPE_MAX = 0x7FFFFFFF
|
|
} CUmemLocationType;
|
|
|
|
typedef struct CUmemLocation_st {
|
|
CUmemLocationType type;
|
|
int id;
|
|
} CUmemLocation_v1;
|
|
typedef CUmemLocation_v1 CUmemLocation;
|
|
|
|
typedef struct CUmemAccessDesc_st {
|
|
CUmemLocation location;
|
|
CUmemAccess_flags flags;
|
|
} CUmemAccessDesc_v1;
|
|
|
|
typedef CUmemAccessDesc_v1 CUmemAccessDesc;
|
|
|
|
typedef enum CUmemAllocationType_enum {
|
|
CU_MEM_ALLOCATION_TYPE_INVALID = 0x0,
|
|
CU_MEM_ALLOCATION_TYPE_PINNED = 0x1,
|
|
CU_MEM_ALLOCATION_TYPE_MAX = 0x7FFFFFFF
|
|
} CUmemAllocationType;
|
|
|
|
typedef enum CUmemAllocationHandleType_enum {
|
|
CU_MEM_HANDLE_TYPE_NONE = 0x0,
|
|
CU_MEM_HANDLE_TYPE_POSIX_FILE_DESCRIPTOR = 0x1,
|
|
CU_MEM_HANDLE_TYPE_WIN32 = 0x2,
|
|
CU_MEM_HANDLE_TYPE_WIN32_KMT = 0x4,
|
|
CU_MEM_HANDLE_TYPE_MAX = 0x7FFFFFFF
|
|
} CUmemAllocationHandleType;
|
|
|
|
typedef struct CUmemAllocationProp_st {
|
|
CUmemAllocationType type;
|
|
CUmemAllocationHandleType requestedHandleTypes;
|
|
CUmemLocation location;
|
|
|
|
void *win32HandleMetaData;
|
|
struct {
|
|
unsigned char compressionType;
|
|
unsigned char gpuDirectRDMACapable;
|
|
unsigned short usage;
|
|
unsigned char reserved[4];
|
|
} allocFlags;
|
|
} CUmemAllocationProp_v1;
|
|
typedef CUmemAllocationProp_v1 CUmemAllocationProp;
|
|
|
|
typedef enum cudaError_enum {
|
|
CUDA_SUCCESS = 0,
|
|
CUDA_ERROR_INVALID_VALUE = 1,
|
|
CUDA_ERROR_NO_DEVICE = 100,
|
|
CUDA_ERROR_INVALID_HANDLE = 400,
|
|
CUDA_ERROR_NOT_READY = 600,
|
|
CUDA_ERROR_TOO_MANY_PEERS = 711,
|
|
} CUresult;
|
|
|
|
typedef enum CUstream_flags_enum {
|
|
CU_STREAM_DEFAULT = 0x0,
|
|
CU_STREAM_NON_BLOCKING = 0x1,
|
|
} CUstream_flags;
|
|
|
|
typedef enum CUlimit_enum {
|
|
CU_LIMIT_STACK_SIZE = 0x0,
|
|
CU_LIMIT_PRINTF_FIFO_SIZE = 0x1,
|
|
CU_LIMIT_MALLOC_HEAP_SIZE = 0x2,
|
|
CU_LIMIT_DEV_RUNTIME_SYNC_DEPTH = 0x3,
|
|
CU_LIMIT_DEV_RUNTIME_PENDING_LAUNCH_COUNT = 0x4,
|
|
CU_LIMIT_MAX_L2_FETCH_GRANULARITY = 0x5,
|
|
CU_LIMIT_PERSISTING_L2_CACHE_SIZE = 0x6,
|
|
CU_LIMIT_MAX
|
|
} CUlimit;
|
|
|
|
typedef enum CUdevice_attribute_enum {
|
|
CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_BLOCK = 1,
|
|
CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_X = 2,
|
|
CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Y = 3,
|
|
CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Z = 4,
|
|
CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_X = 5,
|
|
CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Y = 6,
|
|
CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Z = 7,
|
|
CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK = 8,
|
|
CU_DEVICE_ATTRIBUTE_SHARED_MEMORY_PER_BLOCK = 8,
|
|
CU_DEVICE_ATTRIBUTE_TOTAL_CONSTANT_MEMORY = 9,
|
|
CU_DEVICE_ATTRIBUTE_WARP_SIZE = 10,
|
|
CU_DEVICE_ATTRIBUTE_MAX_PITCH = 11,
|
|
CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_BLOCK = 12,
|
|
CU_DEVICE_ATTRIBUTE_REGISTERS_PER_BLOCK = 12,
|
|
CU_DEVICE_ATTRIBUTE_CLOCK_RATE = 13,
|
|
CU_DEVICE_ATTRIBUTE_TEXTURE_ALIGNMENT = 14,
|
|
CU_DEVICE_ATTRIBUTE_GPU_OVERLAP = 15,
|
|
CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT = 16,
|
|
CU_DEVICE_ATTRIBUTE_KERNEL_EXEC_TIMEOUT = 17,
|
|
CU_DEVICE_ATTRIBUTE_INTEGRATED = 18,
|
|
CU_DEVICE_ATTRIBUTE_CAN_MAP_HOST_MEMORY = 19,
|
|
CU_DEVICE_ATTRIBUTE_COMPUTE_MODE = 20,
|
|
CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_WIDTH = 21,
|
|
CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_WIDTH = 22,
|
|
CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_HEIGHT = 23,
|
|
CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_WIDTH = 24,
|
|
CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_HEIGHT = 25,
|
|
CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_DEPTH = 26,
|
|
CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_WIDTH = 27,
|
|
CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_HEIGHT = 28,
|
|
CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_LAYERS = 29,
|
|
CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_ARRAY_WIDTH = 27,
|
|
CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_ARRAY_HEIGHT = 28,
|
|
CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_ARRAY_NUMSLICES = 29,
|
|
CU_DEVICE_ATTRIBUTE_SURFACE_ALIGNMENT = 30,
|
|
CU_DEVICE_ATTRIBUTE_CONCURRENT_KERNELS = 31,
|
|
CU_DEVICE_ATTRIBUTE_ECC_ENABLED = 32,
|
|
CU_DEVICE_ATTRIBUTE_PCI_BUS_ID = 33,
|
|
CU_DEVICE_ATTRIBUTE_PCI_DEVICE_ID = 34,
|
|
CU_DEVICE_ATTRIBUTE_TCC_DRIVER = 35,
|
|
CU_DEVICE_ATTRIBUTE_MEMORY_CLOCK_RATE = 36,
|
|
CU_DEVICE_ATTRIBUTE_GLOBAL_MEMORY_BUS_WIDTH = 37,
|
|
CU_DEVICE_ATTRIBUTE_L2_CACHE_SIZE = 38,
|
|
CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_MULTIPROCESSOR = 39,
|
|
CU_DEVICE_ATTRIBUTE_ASYNC_ENGINE_COUNT = 40,
|
|
CU_DEVICE_ATTRIBUTE_UNIFIED_ADDRESSING = 41,
|
|
CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LAYERED_WIDTH = 42,
|
|
CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LAYERED_LAYERS = 43,
|
|
CU_DEVICE_ATTRIBUTE_CAN_TEX2D_GATHER = 44,
|
|
CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_GATHER_WIDTH = 45,
|
|
CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_GATHER_HEIGHT = 46,
|
|
CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_WIDTH_ALTERNATE = 47,
|
|
CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_HEIGHT_ALTERNATE = 48,
|
|
CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_DEPTH_ALTERNATE = 49,
|
|
CU_DEVICE_ATTRIBUTE_PCI_DOMAIN_ID = 50,
|
|
CU_DEVICE_ATTRIBUTE_TEXTURE_PITCH_ALIGNMENT = 51,
|
|
CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURECUBEMAP_WIDTH = 52,
|
|
CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURECUBEMAP_LAYERED_WIDTH = 53,
|
|
CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURECUBEMAP_LAYERED_LAYERS = 54,
|
|
CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_WIDTH = 55,
|
|
CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_WIDTH = 56,
|
|
CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_HEIGHT = 57,
|
|
CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_WIDTH = 58,
|
|
CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_HEIGHT = 59,
|
|
CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_DEPTH = 60,
|
|
CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_LAYERED_WIDTH = 61,
|
|
CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_LAYERED_LAYERS = 62,
|
|
CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_LAYERED_WIDTH = 63,
|
|
CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_LAYERED_HEIGHT = 64,
|
|
CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_LAYERED_LAYERS = 65,
|
|
CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACECUBEMAP_WIDTH = 66,
|
|
CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACECUBEMAP_LAYERED_WIDTH = 67,
|
|
CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACECUBEMAP_LAYERED_LAYERS = 68,
|
|
CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LINEAR_WIDTH = 69,
|
|
CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_WIDTH = 70,
|
|
CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_HEIGHT = 71,
|
|
CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_PITCH = 72,
|
|
CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_MIPMAPPED_WIDTH = 73,
|
|
CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_MIPMAPPED_HEIGHT = 74,
|
|
CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR = 75,
|
|
CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR = 76,
|
|
CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_MIPMAPPED_WIDTH = 77,
|
|
CU_DEVICE_ATTRIBUTE_STREAM_PRIORITIES_SUPPORTED = 78,
|
|
CU_DEVICE_ATTRIBUTE_GLOBAL_L1_CACHE_SUPPORTED = 79,
|
|
CU_DEVICE_ATTRIBUTE_LOCAL_L1_CACHE_SUPPORTED = 80,
|
|
CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_MULTIPROCESSOR = 81,
|
|
CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_MULTIPROCESSOR = 82,
|
|
CU_DEVICE_ATTRIBUTE_MANAGED_MEMORY = 83,
|
|
CU_DEVICE_ATTRIBUTE_MULTI_GPU_BOARD = 84,
|
|
CU_DEVICE_ATTRIBUTE_MULTI_GPU_BOARD_GROUP_ID = 85,
|
|
CU_DEVICE_ATTRIBUTE_HOST_NATIVE_ATOMIC_SUPPORTED = 86,
|
|
CU_DEVICE_ATTRIBUTE_SINGLE_TO_DOUBLE_PRECISION_PERF_RATIO = 87,
|
|
CU_DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS = 88,
|
|
CU_DEVICE_ATTRIBUTE_CONCURRENT_MANAGED_ACCESS = 89,
|
|
CU_DEVICE_ATTRIBUTE_COMPUTE_PREEMPTION_SUPPORTED = 90,
|
|
CU_DEVICE_ATTRIBUTE_CAN_USE_HOST_POINTER_FOR_REGISTERED_MEM = 91,
|
|
CU_DEVICE_ATTRIBUTE_CAN_USE_STREAM_MEM_OPS = 92,
|
|
CU_DEVICE_ATTRIBUTE_CAN_USE_64_BIT_STREAM_MEM_OPS = 93,
|
|
CU_DEVICE_ATTRIBUTE_CAN_USE_STREAM_WAIT_VALUE_NOR = 94,
|
|
CU_DEVICE_ATTRIBUTE_COOPERATIVE_LAUNCH = 95,
|
|
CU_DEVICE_ATTRIBUTE_COOPERATIVE_MULTI_DEVICE_LAUNCH = 96,
|
|
CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK_OPTIN = 97,
|
|
CU_DEVICE_ATTRIBUTE_CAN_FLUSH_REMOTE_WRITES = 98,
|
|
CU_DEVICE_ATTRIBUTE_HOST_REGISTER_SUPPORTED = 99,
|
|
CU_DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS_USES_HOST_PAGE_TABLES = 100,
|
|
CU_DEVICE_ATTRIBUTE_DIRECT_MANAGED_MEM_ACCESS_FROM_HOST = 101,
|
|
CU_DEVICE_ATTRIBUTE_VIRTUAL_ADDRESS_MANAGEMENT_SUPPORTED = 102,
|
|
CU_DEVICE_ATTRIBUTE_VIRTUAL_MEMORY_MANAGEMENT_SUPPORTED = 102,
|
|
CU_DEVICE_ATTRIBUTE_HANDLE_TYPE_POSIX_FILE_DESCRIPTOR_SUPPORTED = 103,
|
|
CU_DEVICE_ATTRIBUTE_HANDLE_TYPE_WIN32_HANDLE_SUPPORTED = 104,
|
|
CU_DEVICE_ATTRIBUTE_HANDLE_TYPE_WIN32_KMT_HANDLE_SUPPORTED = 105,
|
|
CU_DEVICE_ATTRIBUTE_MAX_BLOCKS_PER_MULTIPROCESSOR = 106,
|
|
CU_DEVICE_ATTRIBUTE_GENERIC_COMPRESSION_SUPPORTED = 107,
|
|
CU_DEVICE_ATTRIBUTE_MAX_PERSISTING_L2_CACHE_SIZE = 108,
|
|
CU_DEVICE_ATTRIBUTE_MAX_ACCESS_POLICY_WINDOW_SIZE = 109,
|
|
CU_DEVICE_ATTRIBUTE_GPU_DIRECT_RDMA_WITH_CUDA_VMM_SUPPORTED = 110,
|
|
CU_DEVICE_ATTRIBUTE_RESERVED_SHARED_MEMORY_PER_BLOCK = 111,
|
|
CU_DEVICE_ATTRIBUTE_SPARSE_CUDA_ARRAY_SUPPORTED = 112,
|
|
CU_DEVICE_ATTRIBUTE_READ_ONLY_HOST_REGISTER_SUPPORTED = 113,
|
|
CU_DEVICE_ATTRIBUTE_TIMELINE_SEMAPHORE_INTEROP_SUPPORTED = 114,
|
|
CU_DEVICE_ATTRIBUTE_MEMORY_POOLS_SUPPORTED = 115,
|
|
CU_DEVICE_ATTRIBUTE_GPU_DIRECT_RDMA_SUPPORTED = 116,
|
|
CU_DEVICE_ATTRIBUTE_GPU_DIRECT_RDMA_FLUSH_WRITES_OPTIONS = 117,
|
|
CU_DEVICE_ATTRIBUTE_GPU_DIRECT_RDMA_WRITES_ORDERING = 118,
|
|
CU_DEVICE_ATTRIBUTE_MEMPOOL_SUPPORTED_HANDLE_TYPES = 119,
|
|
CU_DEVICE_ATTRIBUTE_MAX,
|
|
} CUdevice_attribute;
|
|
|
|
typedef enum CUfunction_attribute_enum {
|
|
CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK = 0,
|
|
} CUfunction_attribute;
|
|
|
|
typedef enum CUctx_flags_enum {
|
|
CU_CTX_SCHED_BLOCKING_SYNC = 0x04,
|
|
CU_CTX_SCHED_MASK = 0x07,
|
|
} CUctx_flags;
|
|
|
|
typedef enum CUmemAttach_flags_enum {
|
|
CU_MEM_ATTACH_GLOBAL = 0x1,
|
|
CU_MEM_ATTACH_HOST = 0x2,
|
|
CU_MEM_ATTACH_SINGLE = 0x4,
|
|
} CUmemAttach_flags;
|
|
|
|
typedef enum CUcomputeMode_enum {
|
|
CU_COMPUTEMODE_DEFAULT = 0,
|
|
CU_COMPUTEMODE_PROHIBITED = 2,
|
|
CU_COMPUTEMODE_EXCLUSIVE_PROCESS = 3,
|
|
} CUcompute_mode;
|
|
|
|
typedef enum CUevent_flags_enum {
|
|
CU_EVENT_DEFAULT = 0x0,
|
|
CU_EVENT_BLOCKING_SYNC = 0x1,
|
|
CU_EVENT_DISABLE_TIMING = 0x2,
|
|
CU_EVENT_INTERPROCESS = 0x4
|
|
} CUevent_flags;
|
|
|
|
CUresult cuCtxGetDevice(CUdevice *);
|
|
CUresult cuDeviceGet(CUdevice *, int);
|
|
CUresult cuDeviceGetAttribute(int *, CUdevice_attribute, CUdevice);
|
|
CUresult cuDeviceGetCount(int *);
|
|
CUresult cuFuncGetAttribute(int *, CUfunction_attribute, CUfunction);
|
|
|
|
// Device info
|
|
CUresult cuDeviceGetName(char *, int, CUdevice);
|
|
CUresult cuDeviceTotalMem(size_t *, CUdevice);
|
|
CUresult cuDriverGetVersion(int *);
|
|
|
|
CUresult cuGetErrorString(CUresult, const char **);
|
|
CUresult cuInit(unsigned);
|
|
CUresult cuLaunchKernel(CUfunction, unsigned, unsigned, unsigned, unsigned,
|
|
unsigned, unsigned, unsigned, CUstream, void **,
|
|
void **);
|
|
|
|
CUresult cuMemAlloc(CUdeviceptr *, size_t);
|
|
CUresult cuMemAllocHost(void **, size_t);
|
|
CUresult cuMemAllocManaged(CUdeviceptr *, size_t, unsigned int);
|
|
CUresult cuMemAllocAsync(CUdeviceptr *, size_t, CUstream);
|
|
|
|
CUresult cuMemcpyDtoDAsync(CUdeviceptr, CUdeviceptr, size_t, CUstream);
|
|
CUresult cuMemcpyDtoH(void *, CUdeviceptr, size_t);
|
|
CUresult cuMemcpyDtoHAsync(void *, CUdeviceptr, size_t, CUstream);
|
|
CUresult cuMemcpyHtoD(CUdeviceptr, const void *, size_t);
|
|
CUresult cuMemcpyHtoDAsync(CUdeviceptr, const void *, size_t, CUstream);
|
|
|
|
CUresult cuMemFree(CUdeviceptr);
|
|
CUresult cuMemFreeHost(void *);
|
|
CUresult cuMemFreeAsync(CUdeviceptr, CUstream);
|
|
|
|
CUresult cuModuleGetFunction(CUfunction *, CUmodule, const char *);
|
|
CUresult cuModuleGetGlobal(CUdeviceptr *, size_t *, CUmodule, const char *);
|
|
|
|
CUresult cuModuleUnload(CUmodule);
|
|
CUresult cuStreamCreate(CUstream *, unsigned);
|
|
CUresult cuStreamDestroy(CUstream);
|
|
CUresult cuStreamSynchronize(CUstream);
|
|
CUresult cuStreamQuery(CUstream);
|
|
CUresult cuCtxSetCurrent(CUcontext);
|
|
CUresult cuDevicePrimaryCtxRelease(CUdevice);
|
|
CUresult cuDevicePrimaryCtxGetState(CUdevice, unsigned *, int *);
|
|
CUresult cuDevicePrimaryCtxSetFlags(CUdevice, unsigned);
|
|
CUresult cuDevicePrimaryCtxRetain(CUcontext *, CUdevice);
|
|
CUresult cuModuleLoadDataEx(CUmodule *, const void *, unsigned, void *,
|
|
void **);
|
|
|
|
CUresult cuDeviceCanAccessPeer(int *, CUdevice, CUdevice);
|
|
CUresult cuCtxEnablePeerAccess(CUcontext, unsigned);
|
|
CUresult cuMemcpyPeerAsync(CUdeviceptr, CUcontext, CUdeviceptr, CUcontext,
|
|
size_t, CUstream);
|
|
|
|
CUresult cuCtxGetLimit(size_t *, CUlimit);
|
|
CUresult cuCtxSetLimit(CUlimit, size_t);
|
|
|
|
CUresult cuEventCreate(CUevent *, unsigned int);
|
|
CUresult cuEventRecord(CUevent, CUstream);
|
|
CUresult cuStreamWaitEvent(CUstream, CUevent, unsigned int);
|
|
CUresult cuEventSynchronize(CUevent);
|
|
CUresult cuEventDestroy(CUevent);
|
|
|
|
CUresult cuMemUnmap(CUdeviceptr ptr, size_t size);
|
|
CUresult cuMemRelease(CUmemGenericAllocationHandle handle);
|
|
CUresult cuMemAddressFree(CUdeviceptr ptr, size_t size);
|
|
CUresult cuMemGetInfo(size_t *free, size_t *total);
|
|
CUresult cuMemAddressReserve(CUdeviceptr *ptr, size_t size, size_t alignment,
|
|
CUdeviceptr addr, unsigned long long flags);
|
|
CUresult cuMemMap(CUdeviceptr ptr, size_t size, size_t offset,
|
|
CUmemGenericAllocationHandle handle,
|
|
unsigned long long flags);
|
|
CUresult cuMemCreate(CUmemGenericAllocationHandle *handle, size_t size,
|
|
const CUmemAllocationProp *prop, unsigned long long flags);
|
|
CUresult cuMemSetAccess(CUdeviceptr ptr, size_t size,
|
|
const CUmemAccessDesc *desc, size_t count);
|
|
CUresult cuMemGetAllocationGranularity(size_t *granularity,
|
|
const CUmemAllocationProp *prop,
|
|
CUmemAllocationGranularity_flags option);
|
|
|
|
#endif
|