318 lines
10 KiB
C
318 lines
10 KiB
C
|
//===----RTLs/amdgpu/utils/UtilitiesRTL.h ------------------------- C++ -*-===//
|
||
|
//
|
||
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||
|
// See https://llvm.org/LICENSE.txt for license information.
|
||
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||
|
//
|
||
|
//===----------------------------------------------------------------------===//
|
||
|
//
|
||
|
// RTL Utilities for AMDGPU plugins
|
||
|
//
|
||
|
//===----------------------------------------------------------------------===//
|
||
|
|
||
|
#include <cstdint>
|
||
|
|
||
|
#include "Shared/Debug.h"
|
||
|
#include "Utils/ELF.h"
|
||
|
|
||
|
#include "omptarget.h"
|
||
|
|
||
|
#include "llvm/ADT/StringMap.h"
|
||
|
#include "llvm/ADT/StringRef.h"
|
||
|
#include "llvm/Support/Error.h"
|
||
|
|
||
|
#include "llvm/BinaryFormat/AMDGPUMetadataVerifier.h"
|
||
|
#include "llvm/BinaryFormat/ELF.h"
|
||
|
#include "llvm/BinaryFormat/MsgPackDocument.h"
|
||
|
#include "llvm/Support/MemoryBufferRef.h"
|
||
|
#include "llvm/Support/YAMLTraits.h"
|
||
|
|
||
|
using namespace llvm::ELF;
|
||
|
|
||
|
namespace llvm {
|
||
|
namespace omp {
|
||
|
namespace target {
|
||
|
namespace plugin {
|
||
|
namespace utils {
|
||
|
|
||
|
// The implicit arguments of COV5 AMDGPU kernels.
|
||
|
struct AMDGPUImplicitArgsTy {
|
||
|
uint32_t BlockCountX;
|
||
|
uint32_t BlockCountY;
|
||
|
uint32_t BlockCountZ;
|
||
|
uint16_t GroupSizeX;
|
||
|
uint16_t GroupSizeY;
|
||
|
uint16_t GroupSizeZ;
|
||
|
uint8_t Unused0[46]; // 46 byte offset.
|
||
|
uint16_t GridDims;
|
||
|
uint8_t Unused1[54]; // 54 byte offset.
|
||
|
uint32_t DynamicLdsSize;
|
||
|
uint8_t Unused2[132]; // 132 byte offset.
|
||
|
};
|
||
|
|
||
|
// Dummy struct for COV4 implicitargs.
|
||
|
struct AMDGPUImplicitArgsTyCOV4 {
|
||
|
uint8_t Unused[56];
|
||
|
};
|
||
|
|
||
|
inline uint32_t getImplicitArgsSize(uint16_t Version) {
|
||
|
return Version < ELF::ELFABIVERSION_AMDGPU_HSA_V5
|
||
|
? sizeof(AMDGPUImplicitArgsTyCOV4)
|
||
|
: sizeof(AMDGPUImplicitArgsTy);
|
||
|
}
|
||
|
|
||
|
/// Check if an image is compatible with current system's environment. The
|
||
|
/// system environment is given as a 'target-id' which has the form:
|
||
|
///
|
||
|
/// <target-id> := <processor> ( ":" <target-feature> ( "+" | "-" ) )*
|
||
|
///
|
||
|
/// If a feature is not specific as '+' or '-' it is assumed to be in an 'any'
|
||
|
/// and is compatible with either '+' or '-'. The HSA runtime returns this
|
||
|
/// information using the target-id, while we use the ELF header to determine
|
||
|
/// these features.
|
||
|
inline bool isImageCompatibleWithEnv(StringRef ImageArch, uint32_t ImageFlags,
|
||
|
StringRef EnvTargetID) {
|
||
|
StringRef EnvArch = EnvTargetID.split(":").first;
|
||
|
|
||
|
// Trivial check if the base processors match.
|
||
|
if (EnvArch != ImageArch)
|
||
|
return false;
|
||
|
|
||
|
// Check if the image is requesting xnack on or off.
|
||
|
switch (ImageFlags & EF_AMDGPU_FEATURE_XNACK_V4) {
|
||
|
case EF_AMDGPU_FEATURE_XNACK_OFF_V4:
|
||
|
// The image is 'xnack-' so the environment must be 'xnack-'.
|
||
|
if (!EnvTargetID.contains("xnack-"))
|
||
|
return false;
|
||
|
break;
|
||
|
case EF_AMDGPU_FEATURE_XNACK_ON_V4:
|
||
|
// The image is 'xnack+' so the environment must be 'xnack+'.
|
||
|
if (!EnvTargetID.contains("xnack+"))
|
||
|
return false;
|
||
|
break;
|
||
|
case EF_AMDGPU_FEATURE_XNACK_UNSUPPORTED_V4:
|
||
|
case EF_AMDGPU_FEATURE_XNACK_ANY_V4:
|
||
|
default:
|
||
|
break;
|
||
|
}
|
||
|
|
||
|
// Check if the image is requesting sramecc on or off.
|
||
|
switch (ImageFlags & EF_AMDGPU_FEATURE_SRAMECC_V4) {
|
||
|
case EF_AMDGPU_FEATURE_SRAMECC_OFF_V4:
|
||
|
// The image is 'sramecc-' so the environment must be 'sramecc-'.
|
||
|
if (!EnvTargetID.contains("sramecc-"))
|
||
|
return false;
|
||
|
break;
|
||
|
case EF_AMDGPU_FEATURE_SRAMECC_ON_V4:
|
||
|
// The image is 'sramecc+' so the environment must be 'sramecc+'.
|
||
|
if (!EnvTargetID.contains("sramecc+"))
|
||
|
return false;
|
||
|
break;
|
||
|
case EF_AMDGPU_FEATURE_SRAMECC_UNSUPPORTED_V4:
|
||
|
case EF_AMDGPU_FEATURE_SRAMECC_ANY_V4:
|
||
|
break;
|
||
|
}
|
||
|
|
||
|
return true;
|
||
|
}
|
||
|
|
||
|
struct KernelMetaDataTy {
|
||
|
uint64_t KernelObject;
|
||
|
uint32_t GroupSegmentList;
|
||
|
uint32_t PrivateSegmentSize;
|
||
|
uint32_t SGPRCount;
|
||
|
uint32_t VGPRCount;
|
||
|
uint32_t SGPRSpillCount;
|
||
|
uint32_t VGPRSpillCount;
|
||
|
uint32_t KernelSegmentSize;
|
||
|
uint32_t ExplicitArgumentCount;
|
||
|
uint32_t ImplicitArgumentCount;
|
||
|
uint32_t RequestedWorkgroupSize[3];
|
||
|
uint32_t WorkgroupSizeHint[3];
|
||
|
uint32_t WavefronSize;
|
||
|
uint32_t MaxFlatWorkgroupSize;
|
||
|
};
|
||
|
namespace {
|
||
|
|
||
|
/// Reads the AMDGPU specific per-kernel-metadata from an image.
|
||
|
class KernelInfoReader {
|
||
|
public:
|
||
|
KernelInfoReader(StringMap<KernelMetaDataTy> &KIM) : KernelInfoMap(KIM) {}
|
||
|
|
||
|
/// Process ELF note to read AMDGPU metadata from respective information
|
||
|
/// fields.
|
||
|
Error processNote(const object::ELF64LE::Note &Note, size_t Align) {
|
||
|
if (Note.getName() != "AMDGPU")
|
||
|
return Error::success(); // We are not interested in other things
|
||
|
|
||
|
assert(Note.getType() == ELF::NT_AMDGPU_METADATA &&
|
||
|
"Parse AMDGPU MetaData");
|
||
|
auto Desc = Note.getDesc(Align);
|
||
|
StringRef MsgPackString =
|
||
|
StringRef(reinterpret_cast<const char *>(Desc.data()), Desc.size());
|
||
|
msgpack::Document MsgPackDoc;
|
||
|
if (!MsgPackDoc.readFromBlob(MsgPackString, /*Multi=*/false))
|
||
|
return Error::success();
|
||
|
|
||
|
AMDGPU::HSAMD::V3::MetadataVerifier Verifier(true);
|
||
|
if (!Verifier.verify(MsgPackDoc.getRoot()))
|
||
|
return Error::success();
|
||
|
|
||
|
auto RootMap = MsgPackDoc.getRoot().getMap(true);
|
||
|
|
||
|
if (auto Err = iterateAMDKernels(RootMap))
|
||
|
return Err;
|
||
|
|
||
|
return Error::success();
|
||
|
}
|
||
|
|
||
|
private:
|
||
|
/// Extracts the relevant information via simple string look-up in the msgpack
|
||
|
/// document elements.
|
||
|
Error extractKernelData(msgpack::MapDocNode::MapTy::value_type V,
|
||
|
std::string &KernelName,
|
||
|
KernelMetaDataTy &KernelData) {
|
||
|
if (!V.first.isString())
|
||
|
return Error::success();
|
||
|
|
||
|
const auto IsKey = [](const msgpack::DocNode &DK, StringRef SK) {
|
||
|
return DK.getString() == SK;
|
||
|
};
|
||
|
|
||
|
const auto GetSequenceOfThreeInts = [](msgpack::DocNode &DN,
|
||
|
uint32_t *Vals) {
|
||
|
assert(DN.isArray() && "MsgPack DocNode is an array node");
|
||
|
auto DNA = DN.getArray();
|
||
|
assert(DNA.size() == 3 && "ArrayNode has at most three elements");
|
||
|
|
||
|
int I = 0;
|
||
|
for (auto DNABegin = DNA.begin(), DNAEnd = DNA.end(); DNABegin != DNAEnd;
|
||
|
++DNABegin) {
|
||
|
Vals[I++] = DNABegin->getUInt();
|
||
|
}
|
||
|
};
|
||
|
|
||
|
if (IsKey(V.first, ".name")) {
|
||
|
KernelName = V.second.toString();
|
||
|
} else if (IsKey(V.first, ".sgpr_count")) {
|
||
|
KernelData.SGPRCount = V.second.getUInt();
|
||
|
} else if (IsKey(V.first, ".sgpr_spill_count")) {
|
||
|
KernelData.SGPRSpillCount = V.second.getUInt();
|
||
|
} else if (IsKey(V.first, ".vgpr_count")) {
|
||
|
KernelData.VGPRCount = V.second.getUInt();
|
||
|
} else if (IsKey(V.first, ".vgpr_spill_count")) {
|
||
|
KernelData.VGPRSpillCount = V.second.getUInt();
|
||
|
} else if (IsKey(V.first, ".private_segment_fixed_size")) {
|
||
|
KernelData.PrivateSegmentSize = V.second.getUInt();
|
||
|
} else if (IsKey(V.first, ".group_segment_fixed_size")) {
|
||
|
KernelData.GroupSegmentList = V.second.getUInt();
|
||
|
} else if (IsKey(V.first, ".reqd_workgroup_size")) {
|
||
|
GetSequenceOfThreeInts(V.second, KernelData.RequestedWorkgroupSize);
|
||
|
} else if (IsKey(V.first, ".workgroup_size_hint")) {
|
||
|
GetSequenceOfThreeInts(V.second, KernelData.WorkgroupSizeHint);
|
||
|
} else if (IsKey(V.first, ".wavefront_size")) {
|
||
|
KernelData.WavefronSize = V.second.getUInt();
|
||
|
} else if (IsKey(V.first, ".max_flat_workgroup_size")) {
|
||
|
KernelData.MaxFlatWorkgroupSize = V.second.getUInt();
|
||
|
}
|
||
|
|
||
|
return Error::success();
|
||
|
}
|
||
|
|
||
|
/// Get the "amdhsa.kernels" element from the msgpack Document
|
||
|
Expected<msgpack::ArrayDocNode> getAMDKernelsArray(msgpack::MapDocNode &MDN) {
|
||
|
auto Res = MDN.find("amdhsa.kernels");
|
||
|
if (Res == MDN.end())
|
||
|
return createStringError(inconvertibleErrorCode(),
|
||
|
"Could not find amdhsa.kernels key");
|
||
|
|
||
|
auto Pair = *Res;
|
||
|
assert(Pair.second.isArray() &&
|
||
|
"AMDGPU kernel entries are arrays of entries");
|
||
|
|
||
|
return Pair.second.getArray();
|
||
|
}
|
||
|
|
||
|
/// Iterate all entries for one "amdhsa.kernels" entry. Each entry is a
|
||
|
/// MapDocNode that either maps a string to a single value (most of them) or
|
||
|
/// to another array of things. Currently, we only handle the case that maps
|
||
|
/// to scalar value.
|
||
|
Error generateKernelInfo(msgpack::ArrayDocNode::ArrayTy::iterator It) {
|
||
|
KernelMetaDataTy KernelData;
|
||
|
std::string KernelName;
|
||
|
auto Entry = (*It).getMap();
|
||
|
for (auto MI = Entry.begin(), E = Entry.end(); MI != E; ++MI)
|
||
|
if (auto Err = extractKernelData(*MI, KernelName, KernelData))
|
||
|
return Err;
|
||
|
|
||
|
KernelInfoMap.insert({KernelName, KernelData});
|
||
|
return Error::success();
|
||
|
}
|
||
|
|
||
|
/// Go over the list of AMD kernels in the "amdhsa.kernels" entry
|
||
|
Error iterateAMDKernels(msgpack::MapDocNode &MDN) {
|
||
|
auto KernelsOrErr = getAMDKernelsArray(MDN);
|
||
|
if (auto Err = KernelsOrErr.takeError())
|
||
|
return Err;
|
||
|
|
||
|
auto KernelsArr = *KernelsOrErr;
|
||
|
for (auto It = KernelsArr.begin(), E = KernelsArr.end(); It != E; ++It) {
|
||
|
if (!It->isMap())
|
||
|
continue; // we expect <key,value> pairs
|
||
|
|
||
|
// Obtain the value for the different entries. Each array entry is a
|
||
|
// MapDocNode
|
||
|
if (auto Err = generateKernelInfo(It))
|
||
|
return Err;
|
||
|
}
|
||
|
return Error::success();
|
||
|
}
|
||
|
|
||
|
// Kernel names are the keys
|
||
|
StringMap<KernelMetaDataTy> &KernelInfoMap;
|
||
|
};
|
||
|
} // namespace
|
||
|
|
||
|
/// Reads the AMDGPU specific metadata from the ELF file and propagates the
|
||
|
/// KernelInfoMap
|
||
|
inline Error
|
||
|
readAMDGPUMetaDataFromImage(MemoryBufferRef MemBuffer,
|
||
|
StringMap<KernelMetaDataTy> &KernelInfoMap,
|
||
|
uint16_t &ELFABIVersion) {
|
||
|
Error Err = Error::success(); // Used later as out-parameter
|
||
|
|
||
|
auto ELFOrError = object::ELF64LEFile::create(MemBuffer.getBuffer());
|
||
|
if (auto Err = ELFOrError.takeError())
|
||
|
return Err;
|
||
|
|
||
|
const object::ELF64LEFile ELFObj = ELFOrError.get();
|
||
|
ArrayRef<object::ELF64LE::Shdr> Sections = cantFail(ELFObj.sections());
|
||
|
KernelInfoReader Reader(KernelInfoMap);
|
||
|
|
||
|
// Read the code object version from ELF image header
|
||
|
auto Header = ELFObj.getHeader();
|
||
|
ELFABIVersion = (uint8_t)(Header.e_ident[ELF::EI_ABIVERSION]);
|
||
|
DP("ELFABIVERSION Version: %u\n", ELFABIVersion);
|
||
|
|
||
|
for (const auto &S : Sections) {
|
||
|
if (S.sh_type != ELF::SHT_NOTE)
|
||
|
continue;
|
||
|
|
||
|
for (const auto N : ELFObj.notes(S, Err)) {
|
||
|
if (Err)
|
||
|
return Err;
|
||
|
// Fills the KernelInfoTabel entries in the reader
|
||
|
if ((Err = Reader.processNote(N, S.sh_addralign)))
|
||
|
return Err;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
return Error::success();
|
||
|
}
|
||
|
|
||
|
} // namespace utils
|
||
|
} // namespace plugin
|
||
|
} // namespace target
|
||
|
} // namespace omp
|
||
|
} // namespace llvm
|