226 lines
6.8 KiB
C++
226 lines
6.8 KiB
C++
//===-- Generic device loader interface -----------------------------------===//
|
|
//
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
#ifndef LLVM_LIBC_UTILS_GPU_LOADER_LOADER_H
|
|
#define LLVM_LIBC_UTILS_GPU_LOADER_LOADER_H
|
|
|
|
#include "utils/gpu/server/rpc_server.h"
|
|
|
|
#include "include/llvm-libc-types/test_rpc_opcodes_t.h"
|
|
|
|
#include <cstddef>
|
|
#include <cstdint>
|
|
#include <cstdio>
|
|
#include <cstdlib>
|
|
#include <cstring>
|
|
|
|
/// Generic launch parameters for configuration the number of blocks / threads.
|
|
struct LaunchParameters {
|
|
uint32_t num_threads_x;
|
|
uint32_t num_threads_y;
|
|
uint32_t num_threads_z;
|
|
uint32_t num_blocks_x;
|
|
uint32_t num_blocks_y;
|
|
uint32_t num_blocks_z;
|
|
};
|
|
|
|
/// The arguments to the '_begin' kernel.
|
|
struct begin_args_t {
|
|
int argc;
|
|
void *argv;
|
|
void *envp;
|
|
};
|
|
|
|
/// The arguments to the '_start' kernel.
|
|
struct start_args_t {
|
|
int argc;
|
|
void *argv;
|
|
void *envp;
|
|
void *ret;
|
|
};
|
|
|
|
/// The arguments to the '_end' kernel.
|
|
struct end_args_t {
|
|
int argc;
|
|
};
|
|
|
|
/// Generic interface to load the \p image and launch execution of the _start
|
|
/// kernel on the target device. Copies \p argc and \p argv to the device.
|
|
/// Returns the final value of the `main` function on the device.
|
|
int load(int argc, char **argv, char **evnp, void *image, size_t size,
|
|
const LaunchParameters ¶ms);
|
|
|
|
/// Return \p V aligned "upwards" according to \p Align.
|
|
template <typename V, typename A> inline V align_up(V val, A align) {
|
|
return ((val + V(align) - 1) / V(align)) * V(align);
|
|
}
|
|
|
|
/// Copy the system's argument vector to GPU memory allocated using \p alloc.
|
|
template <typename Allocator>
|
|
void *copy_argument_vector(int argc, char **argv, Allocator alloc) {
|
|
size_t argv_size = sizeof(char *) * (argc + 1);
|
|
size_t str_size = 0;
|
|
for (int i = 0; i < argc; ++i)
|
|
str_size += strlen(argv[i]) + 1;
|
|
|
|
// We allocate enough space for a null terminated array and all the strings.
|
|
void *dev_argv = alloc(argv_size + str_size);
|
|
if (!dev_argv)
|
|
return nullptr;
|
|
|
|
// Store the strings linerally in the same memory buffer.
|
|
void *dev_str = reinterpret_cast<uint8_t *>(dev_argv) + argv_size;
|
|
for (int i = 0; i < argc; ++i) {
|
|
size_t size = strlen(argv[i]) + 1;
|
|
std::memcpy(dev_str, argv[i], size);
|
|
static_cast<void **>(dev_argv)[i] = dev_str;
|
|
dev_str = reinterpret_cast<uint8_t *>(dev_str) + size;
|
|
}
|
|
|
|
// Ensure the vector is null terminated.
|
|
reinterpret_cast<void **>(dev_argv)[argv_size] = nullptr;
|
|
return dev_argv;
|
|
};
|
|
|
|
/// Copy the system's environment to GPU memory allocated using \p alloc.
|
|
template <typename Allocator>
|
|
void *copy_environment(char **envp, Allocator alloc) {
|
|
int envc = 0;
|
|
for (char **env = envp; *env != 0; ++env)
|
|
++envc;
|
|
|
|
return copy_argument_vector(envc, envp, alloc);
|
|
};
|
|
|
|
inline void handle_error(const char *msg) {
|
|
fprintf(stderr, "%s\n", msg);
|
|
exit(EXIT_FAILURE);
|
|
}
|
|
|
|
inline void handle_error(rpc_status_t) {
|
|
handle_error("Failure in the RPC server\n");
|
|
}
|
|
|
|
template <uint32_t lane_size>
|
|
inline void register_rpc_callbacks(uint32_t device_id) {
|
|
static_assert(lane_size == 32 || lane_size == 64, "Invalid Lane size");
|
|
// Register the ping test for the `libc` tests.
|
|
rpc_register_callback(
|
|
device_id, static_cast<rpc_opcode_t>(RPC_TEST_INCREMENT),
|
|
[](rpc_port_t port, void *data) {
|
|
rpc_recv_and_send(
|
|
port,
|
|
[](rpc_buffer_t *buffer, void *data) {
|
|
reinterpret_cast<uint64_t *>(buffer->data)[0] += 1;
|
|
},
|
|
data);
|
|
},
|
|
nullptr);
|
|
|
|
// Register the interface test callbacks.
|
|
rpc_register_callback(
|
|
device_id, static_cast<rpc_opcode_t>(RPC_TEST_INTERFACE),
|
|
[](rpc_port_t port, void *data) {
|
|
uint64_t cnt = 0;
|
|
bool end_with_recv;
|
|
rpc_recv(
|
|
port,
|
|
[](rpc_buffer_t *buffer, void *data) {
|
|
*reinterpret_cast<bool *>(data) = buffer->data[0];
|
|
},
|
|
&end_with_recv);
|
|
rpc_recv(
|
|
port,
|
|
[](rpc_buffer_t *buffer, void *data) {
|
|
*reinterpret_cast<uint64_t *>(data) = buffer->data[0];
|
|
},
|
|
&cnt);
|
|
rpc_send(
|
|
port,
|
|
[](rpc_buffer_t *buffer, void *data) {
|
|
uint64_t &cnt = *reinterpret_cast<uint64_t *>(data);
|
|
buffer->data[0] = cnt = cnt + 1;
|
|
},
|
|
&cnt);
|
|
rpc_recv(
|
|
port,
|
|
[](rpc_buffer_t *buffer, void *data) {
|
|
*reinterpret_cast<uint64_t *>(data) = buffer->data[0];
|
|
},
|
|
&cnt);
|
|
rpc_send(
|
|
port,
|
|
[](rpc_buffer_t *buffer, void *data) {
|
|
uint64_t &cnt = *reinterpret_cast<uint64_t *>(data);
|
|
buffer->data[0] = cnt = cnt + 1;
|
|
},
|
|
&cnt);
|
|
rpc_recv(
|
|
port,
|
|
[](rpc_buffer_t *buffer, void *data) {
|
|
*reinterpret_cast<uint64_t *>(data) = buffer->data[0];
|
|
},
|
|
&cnt);
|
|
rpc_recv(
|
|
port,
|
|
[](rpc_buffer_t *buffer, void *data) {
|
|
*reinterpret_cast<uint64_t *>(data) = buffer->data[0];
|
|
},
|
|
&cnt);
|
|
rpc_send(
|
|
port,
|
|
[](rpc_buffer_t *buffer, void *data) {
|
|
uint64_t &cnt = *reinterpret_cast<uint64_t *>(data);
|
|
buffer->data[0] = cnt = cnt + 1;
|
|
},
|
|
&cnt);
|
|
rpc_send(
|
|
port,
|
|
[](rpc_buffer_t *buffer, void *data) {
|
|
uint64_t &cnt = *reinterpret_cast<uint64_t *>(data);
|
|
buffer->data[0] = cnt = cnt + 1;
|
|
},
|
|
&cnt);
|
|
if (end_with_recv)
|
|
rpc_recv(
|
|
port,
|
|
[](rpc_buffer_t *buffer, void *data) {
|
|
*reinterpret_cast<uint64_t *>(data) = buffer->data[0];
|
|
},
|
|
&cnt);
|
|
else
|
|
rpc_send(
|
|
port,
|
|
[](rpc_buffer_t *buffer, void *data) {
|
|
uint64_t &cnt = *reinterpret_cast<uint64_t *>(data);
|
|
buffer->data[0] = cnt = cnt + 1;
|
|
},
|
|
&cnt);
|
|
},
|
|
nullptr);
|
|
|
|
// Register the stream test handler.
|
|
rpc_register_callback(
|
|
device_id, static_cast<rpc_opcode_t>(RPC_TEST_STREAM),
|
|
[](rpc_port_t port, void *data) {
|
|
uint64_t sizes[lane_size] = {0};
|
|
void *dst[lane_size] = {nullptr};
|
|
rpc_recv_n(
|
|
port, dst, sizes,
|
|
[](uint64_t size, void *) -> void * { return new char[size]; },
|
|
nullptr);
|
|
rpc_send_n(port, dst, sizes);
|
|
for (uint64_t i = 0; i < lane_size; ++i) {
|
|
if (dst[i])
|
|
delete[] reinterpret_cast<uint8_t *>(dst[i]);
|
|
}
|
|
},
|
|
nullptr);
|
|
}
|
|
|
|
#endif
|