201 lines
8.1 KiB
C++
201 lines
8.1 KiB
C++
//===- llvm-omp-kernel-replay.cpp - Replay OpenMP offload kernel ----------===//
|
|
//
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
//
|
|
// This is a command line utility to replay the execution of recorded OpenMP
|
|
// offload kernels.
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
#include "omptarget.h"
|
|
|
|
#include "Shared/PluginAPI.h"
|
|
|
|
#include "llvm/Support/CommandLine.h"
|
|
#include "llvm/Support/JSON.h"
|
|
#include "llvm/Support/MemoryBuffer.h"
|
|
#include <cstdint>
|
|
#include <cstdlib>
|
|
|
|
using namespace llvm;
|
|
|
|
cl::OptionCategory ReplayOptions("llvm-omp-kernel-replay Options");
|
|
|
|
// InputFilename - The filename to read the json description of the kernel.
|
|
static cl::opt<std::string> InputFilename(cl::Positional,
|
|
cl::desc("<input kernel json file>"),
|
|
cl::Required);
|
|
|
|
static cl::opt<bool> VerifyOpt(
|
|
"verify",
|
|
cl::desc(
|
|
"Verify device memory post execution against the original output."),
|
|
cl::init(false), cl::cat(ReplayOptions));
|
|
|
|
static cl::opt<bool> SaveOutputOpt(
|
|
"save-output",
|
|
cl::desc("Save the device memory output of the replayed kernel execution."),
|
|
cl::init(false), cl::cat(ReplayOptions));
|
|
|
|
static cl::opt<unsigned> NumTeamsOpt("num-teams",
|
|
cl::desc("Set the number of teams."),
|
|
cl::init(0), cl::cat(ReplayOptions));
|
|
|
|
static cl::opt<unsigned> NumThreadsOpt("num-threads",
|
|
cl::desc("Set the number of threads."),
|
|
cl::init(0), cl::cat(ReplayOptions));
|
|
|
|
static cl::opt<int32_t> DeviceIdOpt("device-id", cl::desc("Set the device id."),
|
|
cl::init(-1), cl::cat(ReplayOptions));
|
|
|
|
int main(int argc, char **argv) {
|
|
cl::HideUnrelatedOptions(ReplayOptions);
|
|
cl::ParseCommandLineOptions(argc, argv, "llvm-omp-kernel-replay\n");
|
|
|
|
ErrorOr<std::unique_ptr<MemoryBuffer>> KernelInfoMB =
|
|
MemoryBuffer::getFile(InputFilename, /*isText=*/true,
|
|
/*RequiresNullTerminator=*/true);
|
|
if (!KernelInfoMB)
|
|
report_fatal_error("Error reading the kernel info json file");
|
|
Expected<json::Value> JsonKernelInfo =
|
|
json::parse(KernelInfoMB.get()->getBuffer());
|
|
if (auto Err = JsonKernelInfo.takeError())
|
|
report_fatal_error("Cannot parse the kernel info json file");
|
|
|
|
auto NumTeamsJson =
|
|
JsonKernelInfo->getAsObject()->getInteger("NumTeamsClause");
|
|
unsigned NumTeams = (NumTeamsOpt > 0 ? NumTeamsOpt : NumTeamsJson.value());
|
|
auto NumThreadsJson =
|
|
JsonKernelInfo->getAsObject()->getInteger("ThreadLimitClause");
|
|
unsigned NumThreads =
|
|
(NumThreadsOpt > 0 ? NumThreadsOpt : NumThreadsJson.value());
|
|
// TODO: Print a warning if number of teams/threads is explicitly set in the
|
|
// kernel info but overriden through command line options.
|
|
auto LoopTripCount =
|
|
JsonKernelInfo->getAsObject()->getInteger("LoopTripCount");
|
|
auto KernelFunc = JsonKernelInfo->getAsObject()->getString("Name");
|
|
|
|
SmallVector<void *> TgtArgs;
|
|
SmallVector<ptrdiff_t> TgtArgOffsets;
|
|
auto NumArgs = JsonKernelInfo->getAsObject()->getInteger("NumArgs");
|
|
auto *TgtArgsArray = JsonKernelInfo->getAsObject()->getArray("ArgPtrs");
|
|
for (auto It : *TgtArgsArray)
|
|
TgtArgs.push_back(reinterpret_cast<void *>(It.getAsInteger().value()));
|
|
auto *TgtArgOffsetsArray =
|
|
JsonKernelInfo->getAsObject()->getArray("ArgOffsets");
|
|
for (auto It : *TgtArgOffsetsArray)
|
|
TgtArgOffsets.push_back(static_cast<ptrdiff_t>(It.getAsInteger().value()));
|
|
|
|
void *BAllocStart = reinterpret_cast<void *>(
|
|
JsonKernelInfo->getAsObject()->getInteger("BumpAllocVAStart").value());
|
|
|
|
__tgt_offload_entry KernelEntry = {nullptr, nullptr, 0, 0, 0};
|
|
std::string KernelEntryName = KernelFunc.value().str();
|
|
KernelEntry.name = const_cast<char *>(KernelEntryName.c_str());
|
|
// Anything non-zero works to uniquely identify the kernel.
|
|
KernelEntry.addr = (void *)0x1;
|
|
|
|
ErrorOr<std::unique_ptr<MemoryBuffer>> ImageMB =
|
|
MemoryBuffer::getFile(KernelEntryName + ".image", /*isText=*/false,
|
|
/*RequiresNullTerminator=*/false);
|
|
if (!ImageMB)
|
|
report_fatal_error("Error reading the kernel image.");
|
|
|
|
__tgt_device_image DeviceImage;
|
|
DeviceImage.ImageStart = const_cast<char *>(ImageMB.get()->getBufferStart());
|
|
DeviceImage.ImageEnd = const_cast<char *>(ImageMB.get()->getBufferEnd());
|
|
DeviceImage.EntriesBegin = &KernelEntry;
|
|
DeviceImage.EntriesEnd = &KernelEntry + 1;
|
|
|
|
__tgt_bin_desc Desc;
|
|
Desc.NumDeviceImages = 1;
|
|
Desc.HostEntriesBegin = &KernelEntry;
|
|
Desc.HostEntriesEnd = &KernelEntry + 1;
|
|
Desc.DeviceImages = &DeviceImage;
|
|
|
|
auto DeviceMemorySizeJson =
|
|
JsonKernelInfo->getAsObject()->getInteger("DeviceMemorySize");
|
|
// Set device memory size to the ceiling of GB granularity.
|
|
uint64_t DeviceMemorySize = std::ceil(DeviceMemorySizeJson.value());
|
|
|
|
auto DeviceIdJson = JsonKernelInfo->getAsObject()->getInteger("DeviceId");
|
|
// TODO: Print warning if the user overrides the device id in the json file.
|
|
int32_t DeviceId = (DeviceIdOpt > -1 ? DeviceIdOpt : DeviceIdJson.value());
|
|
|
|
// TODO: do we need requires?
|
|
//__tgt_register_requires(/*Flags=*/1);
|
|
|
|
__tgt_register_lib(&Desc);
|
|
|
|
uint64_t ReqPtrArgOffset = 0;
|
|
int Rc = __tgt_activate_record_replay(DeviceId, DeviceMemorySize, BAllocStart,
|
|
false, VerifyOpt, ReqPtrArgOffset);
|
|
|
|
if (Rc != OMP_TGT_SUCCESS) {
|
|
report_fatal_error("Cannot activate record replay\n");
|
|
}
|
|
|
|
ErrorOr<std::unique_ptr<MemoryBuffer>> DeviceMemoryMB =
|
|
MemoryBuffer::getFile(KernelEntryName + ".memory", /*isText=*/false,
|
|
/*RequiresNullTerminator=*/false);
|
|
|
|
if (!DeviceMemoryMB)
|
|
report_fatal_error("Error reading the kernel input device memory.");
|
|
|
|
// On AMD for currently unknown reasons we cannot copy memory mapped data to
|
|
// device. This is a work-around.
|
|
uint8_t *recored_data = new uint8_t[DeviceMemoryMB.get()->getBufferSize()];
|
|
std::memcpy(recored_data,
|
|
const_cast<char *>(DeviceMemoryMB.get()->getBuffer().data()),
|
|
DeviceMemoryMB.get()->getBufferSize());
|
|
|
|
// If necessary, adjust pointer arguments.
|
|
if (ReqPtrArgOffset) {
|
|
for (auto *&Arg : TgtArgs) {
|
|
auto ArgInt = uintptr_t(Arg);
|
|
// Try to find pointer arguments.
|
|
if (ArgInt < uintptr_t(BAllocStart) ||
|
|
ArgInt >= uintptr_t(BAllocStart) + DeviceMemorySize)
|
|
continue;
|
|
Arg = reinterpret_cast<void *>(ArgInt - ReqPtrArgOffset);
|
|
}
|
|
}
|
|
|
|
__tgt_target_kernel_replay(
|
|
/*Loc=*/nullptr, DeviceId, KernelEntry.addr, (char *)recored_data,
|
|
DeviceMemoryMB.get()->getBufferSize(), TgtArgs.data(),
|
|
TgtArgOffsets.data(), NumArgs.value(), NumTeams, NumThreads,
|
|
LoopTripCount.value());
|
|
|
|
if (VerifyOpt) {
|
|
ErrorOr<std::unique_ptr<MemoryBuffer>> OriginalOutputMB =
|
|
MemoryBuffer::getFile(KernelEntryName + ".original.output",
|
|
/*isText=*/false,
|
|
/*RequiresNullTerminator=*/false);
|
|
if (!OriginalOutputMB)
|
|
report_fatal_error("Error reading the kernel original output file, make "
|
|
"sure LIBOMPTARGET_SAVE_OUTPUT is set when recording");
|
|
ErrorOr<std::unique_ptr<MemoryBuffer>> ReplayOutputMB =
|
|
MemoryBuffer::getFile(KernelEntryName + ".replay.output",
|
|
/*isText=*/false,
|
|
/*RequiresNullTerminator=*/false);
|
|
if (!ReplayOutputMB)
|
|
report_fatal_error("Error reading the kernel replay output file");
|
|
|
|
StringRef OriginalOutput = OriginalOutputMB.get()->getBuffer();
|
|
StringRef ReplayOutput = ReplayOutputMB.get()->getBuffer();
|
|
if (OriginalOutput == ReplayOutput)
|
|
outs() << "[llvm-omp-kernel-replay] Replay device memory verified!\n";
|
|
else
|
|
outs() << "[llvm-omp-kernel-replay] Replay device memory failed to "
|
|
"verify!\n";
|
|
}
|
|
|
|
delete[] recored_data;
|
|
|
|
return 0;
|
|
}
|