237 lines
11 KiB
C++
237 lines
11 KiB
C++
//===-- Shared/PluginAPI.h - Target independent plugin API ------*- C++ -*-===//
|
|
//
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
//
|
|
// This file defines an interface between target independent OpenMP offload
|
|
// runtime library libomptarget and target dependent plugin.
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
#ifndef OMPTARGET_SHARED_PLUGIN_API_H
|
|
#define OMPTARGET_SHARED_PLUGIN_API_H
|
|
|
|
#include <cstddef>
|
|
#include <cstdint>
|
|
|
|
#include "Shared/APITypes.h"
|
|
|
|
extern "C" {
|
|
|
|
// First method called on the plugin
|
|
int32_t __tgt_rtl_init_plugin();
|
|
|
|
// Return the number of available devices of the type supported by the
|
|
// target RTL.
|
|
int32_t __tgt_rtl_number_of_devices(void);
|
|
|
|
// Return an integer different from zero if the provided device image can be
|
|
// supported by the runtime. The functionality is similar to comparing the
|
|
// result of __tgt__rtl__load__binary to NULL. However, this is meant to be a
|
|
// lightweight query to determine if the RTL is suitable for an image without
|
|
// having to load the library, which can be expensive.
|
|
int32_t __tgt_rtl_is_valid_binary(__tgt_device_image *Image);
|
|
|
|
// Return an integer other than zero if the data can be exchaned from SrcDevId
|
|
// to DstDevId. If it is data exchangable, the device plugin should provide
|
|
// function to move data from source device to destination device directly.
|
|
int32_t __tgt_rtl_is_data_exchangable(int32_t SrcDevId, int32_t DstDevId);
|
|
|
|
// Return an integer other than zero if the plugin can handle images which do
|
|
// not contain target regions and global variables (but can contain other
|
|
// functions)
|
|
int32_t __tgt_rtl_supports_empty_images();
|
|
|
|
// Initialize the requires flags for the device.
|
|
int64_t __tgt_rtl_init_requires(int64_t RequiresFlags);
|
|
|
|
// Initialize the specified device. In case of success return 0; otherwise
|
|
// return an error code.
|
|
int32_t __tgt_rtl_init_device(int32_t ID);
|
|
|
|
// Pass an executable image section described by image to the specified
|
|
// device and prepare an address table of target entities. In case of error,
|
|
// return NULL. Otherwise, return a pointer to the built address table.
|
|
// Individual entries in the table may also be NULL, when the corresponding
|
|
// offload region is not supported on the target device.
|
|
int32_t __tgt_rtl_load_binary(int32_t ID, __tgt_device_image *Image,
|
|
__tgt_device_binary *Binary);
|
|
|
|
// Look up the device address of the named symbol in the given binary. Returns
|
|
// non-zero on failure.
|
|
int32_t __tgt_rtl_get_global(__tgt_device_binary Binary, uint64_t Size,
|
|
const char *Name, void **DevicePtr);
|
|
|
|
// Look up the device address of the named kernel in the given binary. Returns
|
|
// non-zero on failure.
|
|
int32_t __tgt_rtl_get_function(__tgt_device_binary Binary, const char *Name,
|
|
void **DevicePtr);
|
|
|
|
// Allocate data on the particular target device, of the specified size.
|
|
// HostPtr is a address of the host data the allocated target data
|
|
// will be associated with (HostPtr may be NULL if it is not known at
|
|
// allocation time, like for example it would be for target data that
|
|
// is allocated by omp_target_alloc() API). Return address of the
|
|
// allocated data on the target that will be used by libomptarget.so to
|
|
// initialize the target data mapping structures. These addresses are
|
|
// used to generate a table of target variables to pass to
|
|
// __tgt_rtl_run_region(). The __tgt_rtl_data_alloc() returns NULL in
|
|
// case an error occurred on the target device. Kind dictates what allocator
|
|
// to use (e.g. shared, host, device).
|
|
void *__tgt_rtl_data_alloc(int32_t ID, int64_t Size, void *HostPtr,
|
|
int32_t Kind);
|
|
|
|
// Pass the data content to the target device using the target address. In case
|
|
// of success, return zero. Otherwise, return an error code.
|
|
int32_t __tgt_rtl_data_submit(int32_t ID, void *TargetPtr, void *HostPtr,
|
|
int64_t Size);
|
|
|
|
int32_t __tgt_rtl_data_submit_async(int32_t ID, void *TargetPtr, void *HostPtr,
|
|
int64_t Size, __tgt_async_info *AsyncInfo);
|
|
|
|
// Retrieve the data content from the target device using its address. In case
|
|
// of success, return zero. Otherwise, return an error code.
|
|
int32_t __tgt_rtl_data_retrieve(int32_t ID, void *HostPtr, void *TargetPtr,
|
|
int64_t Size);
|
|
|
|
// Asynchronous version of __tgt_rtl_data_retrieve
|
|
int32_t __tgt_rtl_data_retrieve_async(int32_t ID, void *HostPtr,
|
|
void *TargetPtr, int64_t Size,
|
|
__tgt_async_info *AsyncInfo);
|
|
|
|
// Copy the data content from one target device to another target device using
|
|
// its address. This operation does not need to copy data back to host and then
|
|
// from host to another device. In case of success, return zero. Otherwise,
|
|
// return an error code.
|
|
int32_t __tgt_rtl_data_exchange(int32_t SrcID, void *SrcPtr, int32_t DstID,
|
|
void *DstPtr, int64_t Size);
|
|
|
|
// Asynchronous version of __tgt_rtl_data_exchange
|
|
int32_t __tgt_rtl_data_exchange_async(int32_t SrcID, void *SrcPtr,
|
|
int32_t DesID, void *DstPtr, int64_t Size,
|
|
__tgt_async_info *AsyncInfo);
|
|
|
|
// De-allocate the data referenced by target ptr on the device. In case of
|
|
// success, return zero. Otherwise, return an error code. Kind dictates what
|
|
// allocator to use (e.g. shared, host, device).
|
|
int32_t __tgt_rtl_data_delete(int32_t ID, void *TargetPtr, int32_t Kind);
|
|
|
|
// Transfer control to the offloaded entry Entry on the target device.
|
|
// Args and Offsets are arrays of NumArgs size of target addresses and
|
|
// offsets. An offset should be added to the target address before passing it
|
|
// to the outlined function on device side. If AsyncInfo is nullptr, it is
|
|
// synchronous; otherwise it is asynchronous. However, AsyncInfo may be
|
|
// ignored on some platforms, like x86_64. In that case, it is synchronous. In
|
|
// case of success, return zero. Otherwise, return an error code.
|
|
int32_t __tgt_rtl_run_target_region(int32_t ID, void *Entry, void **Args,
|
|
ptrdiff_t *Offsets, int32_t NumArgs);
|
|
|
|
// Asynchronous version of __tgt_rtl_run_target_region
|
|
int32_t __tgt_rtl_run_target_region_async(int32_t ID, void *Entry, void **Args,
|
|
ptrdiff_t *Offsets, int32_t NumArgs,
|
|
__tgt_async_info *AsyncInfo);
|
|
|
|
// Similar to __tgt_rtl_run_target_region, but additionally specify the
|
|
// number of teams to be created and a number of threads in each team. If
|
|
// AsyncInfo is nullptr, it is synchronous; otherwise it is asynchronous.
|
|
// However, AsyncInfo may be ignored on some platforms, like x86_64. In that
|
|
// case, it is synchronous.
|
|
int32_t __tgt_rtl_run_target_team_region(int32_t ID, void *Entry, void **Args,
|
|
ptrdiff_t *Offsets, int32_t NumArgs,
|
|
int32_t NumTeams, int32_t ThreadLimit,
|
|
uint64_t LoopTripcount);
|
|
|
|
// Asynchronous version of __tgt_rtl_run_target_team_region
|
|
int32_t __tgt_rtl_run_target_team_region_async(
|
|
int32_t ID, void *Entry, void **Args, ptrdiff_t *Offsets, int32_t NumArgs,
|
|
int32_t NumTeams, int32_t ThreadLimit, uint64_t LoopTripcount,
|
|
__tgt_async_info *AsyncInfo);
|
|
|
|
// Device synchronization. In case of success, return zero. Otherwise, return an
|
|
// error code.
|
|
int32_t __tgt_rtl_synchronize(int32_t ID, __tgt_async_info *AsyncInfo);
|
|
|
|
// Queries for the completion of asynchronous operations. Instead of blocking
|
|
// the calling thread as __tgt_rtl_synchronize, the progress of the operations
|
|
// stored in AsyncInfo->Queue is queried in a non-blocking manner, partially
|
|
// advancing their execution. If all operations are completed, AsyncInfo->Queue
|
|
// is set to nullptr. If there are still pending operations, AsyncInfo->Queue is
|
|
// kept as a valid queue. In any case of success (i.e., successful query
|
|
// with/without completing all operations), return zero. Otherwise, return an
|
|
// error code.
|
|
int32_t __tgt_rtl_query_async(int32_t ID, __tgt_async_info *AsyncInfo);
|
|
|
|
// Set plugin's internal information flag externally.
|
|
void __tgt_rtl_set_info_flag(uint32_t);
|
|
|
|
// Print the device information
|
|
void __tgt_rtl_print_device_info(int32_t ID);
|
|
|
|
// Event related interfaces. It is expected to use the interfaces in the
|
|
// following way:
|
|
// 1) Create an event on the target device (__tgt_rtl_create_event).
|
|
// 2) Record the event based on the status of \p AsyncInfo->Queue at the moment
|
|
// of function call to __tgt_rtl_record_event. An event becomes "meaningful"
|
|
// once it is recorded, such that others can depend on it.
|
|
// 3) Call __tgt_rtl_wait_event to set dependence on the event. Whether the
|
|
// operation is blocking or non-blocking depends on the target. It is expected
|
|
// to be non-blocking, just set dependence and return.
|
|
// 4) Call __tgt_rtl_sync_event to sync the event. It is expected to block the
|
|
// thread calling the function.
|
|
// 5) Destroy the event (__tgt_rtl_destroy_event).
|
|
// {
|
|
int32_t __tgt_rtl_create_event(int32_t ID, void **Event);
|
|
|
|
int32_t __tgt_rtl_record_event(int32_t ID, void *Event,
|
|
__tgt_async_info *AsyncInfo);
|
|
|
|
int32_t __tgt_rtl_wait_event(int32_t ID, void *Event,
|
|
__tgt_async_info *AsyncInfo);
|
|
|
|
int32_t __tgt_rtl_sync_event(int32_t ID, void *Event);
|
|
|
|
int32_t __tgt_rtl_destroy_event(int32_t ID, void *Event);
|
|
// }
|
|
|
|
int32_t __tgt_rtl_init_async_info(int32_t ID, __tgt_async_info **AsyncInfoPtr);
|
|
int32_t __tgt_rtl_init_device_info(int32_t ID, __tgt_device_info *DeviceInfoPtr,
|
|
const char **ErrStr);
|
|
|
|
// lock/pin host memory
|
|
int32_t __tgt_rtl_data_lock(int32_t ID, void *HstPtr, int64_t Size,
|
|
void **LockedPtr);
|
|
|
|
// unlock/unpin host memory
|
|
int32_t __tgt_rtl_data_unlock(int32_t ID, void *HstPtr);
|
|
|
|
// Notify the plugin about a new mapping starting at the host address \p HstPtr
|
|
// and \p Size bytes. The plugin may lock/pin that buffer to achieve optimal
|
|
// memory transfers involving that buffer.
|
|
int32_t __tgt_rtl_data_notify_mapped(int32_t ID, void *HstPtr, int64_t Size);
|
|
|
|
// Notify the plugin about an existing mapping being unmapped, starting at the
|
|
// host address \p HstPtr and \p Size bytes.
|
|
int32_t __tgt_rtl_data_notify_unmapped(int32_t ID, void *HstPtr);
|
|
|
|
// Set the global device identifier offset, such that the plugin may determine a
|
|
// unique device number.
|
|
int32_t __tgt_rtl_set_device_offset(int32_t DeviceIdOffset);
|
|
|
|
int32_t __tgt_rtl_launch_kernel(int32_t DeviceId, void *TgtEntryPtr,
|
|
void **TgtArgs, ptrdiff_t *TgtOffsets,
|
|
KernelArgsTy *KernelArgs,
|
|
__tgt_async_info *AsyncInfoPtr);
|
|
|
|
int32_t __tgt_rtl_initialize_record_replay(int32_t DeviceId, int64_t MemorySize,
|
|
void *VAddr, bool isRecord,
|
|
bool SaveOutput,
|
|
uint64_t &ReqPtrArgOffset);
|
|
|
|
// Returns true if the device \p DeviceId suggests to use auto zero-copy.
|
|
int32_t __tgt_rtl_use_auto_zero_copy(int32_t DeviceId);
|
|
}
|
|
|
|
#endif // OMPTARGET_SHARED_PLUGIN_API_H
|