154 lines
5.2 KiB
C++
154 lines
5.2 KiB
C++
|
//===- SerializeToBlob.cpp - MLIR GPU lowering pass -----------------------===//
|
||
|
//
|
||
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||
|
// See https://llvm.org/LICENSE.txt for license information.
|
||
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||
|
//
|
||
|
//===----------------------------------------------------------------------===//
|
||
|
//
|
||
|
// This file implements a base class for a pass to serialize a gpu module
|
||
|
// into a binary blob that can be executed on a GPU. The binary blob is added
|
||
|
// as a string attribute to the gpu module.
|
||
|
//
|
||
|
//===----------------------------------------------------------------------===//
|
||
|
|
||
|
#include "mlir/Dialect/GPU/IR/GPUDialect.h"
|
||
|
#include "mlir/Dialect/GPU/Transforms/Passes.h"
|
||
|
#include "mlir/Dialect/LLVMIR/LLVMDialect.h"
|
||
|
#include "mlir/ExecutionEngine/OptUtils.h"
|
||
|
#include "mlir/Pass/Pass.h"
|
||
|
#include "mlir/Target/LLVMIR/Dialect/GPU/GPUToLLVMIRTranslation.h"
|
||
|
#include "mlir/Target/LLVMIR/Dialect/LLVMIR/LLVMToLLVMIRTranslation.h"
|
||
|
#include "mlir/Target/LLVMIR/Export.h"
|
||
|
#include "llvm/IR/LegacyPassManager.h"
|
||
|
#include "llvm/MC/TargetRegistry.h"
|
||
|
#include "llvm/Support/TargetSelect.h"
|
||
|
#include "llvm/Target/TargetMachine.h"
|
||
|
|
||
|
#include <optional>
|
||
|
#include <string>
|
||
|
|
||
|
#define DEBUG_TYPE "serialize-to-blob"
|
||
|
|
||
|
using namespace mlir;
|
||
|
|
||
|
std::string gpu::getDefaultGpuBinaryAnnotation() { return "gpu.binary"; }
|
||
|
|
||
|
gpu::SerializeToBlobPass::SerializeToBlobPass(TypeID passID)
|
||
|
: OperationPass<gpu::GPUModuleOp>(passID) {}
|
||
|
|
||
|
gpu::SerializeToBlobPass::SerializeToBlobPass(const SerializeToBlobPass &other)
|
||
|
: OperationPass<gpu::GPUModuleOp>(other) {}
|
||
|
|
||
|
std::optional<std::string>
|
||
|
gpu::SerializeToBlobPass::translateToISA(llvm::Module &llvmModule,
|
||
|
llvm::TargetMachine &targetMachine) {
|
||
|
llvmModule.setDataLayout(targetMachine.createDataLayout());
|
||
|
|
||
|
if (failed(optimizeLlvm(llvmModule, targetMachine)))
|
||
|
return std::nullopt;
|
||
|
|
||
|
std::string targetISA;
|
||
|
llvm::raw_string_ostream stream(targetISA);
|
||
|
|
||
|
{ // Drop pstream after this to prevent the ISA from being stuck buffering
|
||
|
llvm::buffer_ostream pstream(stream);
|
||
|
llvm::legacy::PassManager codegenPasses;
|
||
|
|
||
|
if (targetMachine.addPassesToEmitFile(codegenPasses, pstream, nullptr,
|
||
|
llvm::CodeGenFileType::AssemblyFile))
|
||
|
return std::nullopt;
|
||
|
|
||
|
codegenPasses.run(llvmModule);
|
||
|
}
|
||
|
return stream.str();
|
||
|
}
|
||
|
|
||
|
void gpu::SerializeToBlobPass::runOnOperation() {
|
||
|
// Lower the module to an LLVM IR module using a separate context to enable
|
||
|
// multi-threaded processing.
|
||
|
llvm::LLVMContext llvmContext;
|
||
|
std::unique_ptr<llvm::Module> llvmModule = translateToLLVMIR(llvmContext);
|
||
|
if (!llvmModule)
|
||
|
return signalPassFailure();
|
||
|
|
||
|
// Lower the LLVM IR module to target ISA.
|
||
|
std::unique_ptr<llvm::TargetMachine> targetMachine = createTargetMachine();
|
||
|
if (!targetMachine)
|
||
|
return signalPassFailure();
|
||
|
|
||
|
std::optional<std::string> maybeTargetISA =
|
||
|
translateToISA(*llvmModule, *targetMachine);
|
||
|
|
||
|
if (!maybeTargetISA.has_value())
|
||
|
return signalPassFailure();
|
||
|
|
||
|
std::string targetISA = std::move(*maybeTargetISA);
|
||
|
|
||
|
LLVM_DEBUG({
|
||
|
llvm::dbgs() << "ISA for module: " << getOperation().getNameAttr() << "\n";
|
||
|
llvm::dbgs() << targetISA << "\n";
|
||
|
llvm::dbgs().flush();
|
||
|
});
|
||
|
|
||
|
// Serialize the target ISA.
|
||
|
std::unique_ptr<std::vector<char>> blob = serializeISA(targetISA);
|
||
|
if (!blob)
|
||
|
return signalPassFailure();
|
||
|
|
||
|
// Add the blob as module attribute.
|
||
|
auto attr =
|
||
|
StringAttr::get(&getContext(), StringRef(blob->data(), blob->size()));
|
||
|
getOperation()->setAttr(gpuBinaryAnnotation, attr);
|
||
|
}
|
||
|
|
||
|
LogicalResult
|
||
|
gpu::SerializeToBlobPass::optimizeLlvm(llvm::Module &llvmModule,
|
||
|
llvm::TargetMachine &targetMachine) {
|
||
|
int optLevel = this->optLevel.getValue();
|
||
|
if (optLevel < 0 || optLevel > 3)
|
||
|
return getOperation().emitError()
|
||
|
<< "invalid optimization level " << optLevel;
|
||
|
|
||
|
targetMachine.setOptLevel(static_cast<llvm::CodeGenOptLevel>(optLevel));
|
||
|
|
||
|
auto transformer =
|
||
|
makeOptimizingTransformer(optLevel, /*sizeLevel=*/0, &targetMachine);
|
||
|
auto error = transformer(&llvmModule);
|
||
|
if (error) {
|
||
|
InFlightDiagnostic mlirError = getOperation()->emitError();
|
||
|
llvm::handleAllErrors(
|
||
|
std::move(error), [&mlirError](const llvm::ErrorInfoBase &ei) {
|
||
|
mlirError << "could not optimize LLVM IR: " << ei.message();
|
||
|
});
|
||
|
return mlirError;
|
||
|
}
|
||
|
return success();
|
||
|
}
|
||
|
|
||
|
std::unique_ptr<llvm::TargetMachine>
|
||
|
gpu::SerializeToBlobPass::createTargetMachine() {
|
||
|
Location loc = getOperation().getLoc();
|
||
|
std::string error;
|
||
|
const llvm::Target *target =
|
||
|
llvm::TargetRegistry::lookupTarget(triple, error);
|
||
|
if (!target) {
|
||
|
emitError(loc, Twine("failed to lookup target: ") + error);
|
||
|
return {};
|
||
|
}
|
||
|
llvm::TargetMachine *machine =
|
||
|
target->createTargetMachine(triple, chip, features, {}, {});
|
||
|
if (!machine) {
|
||
|
emitError(loc, "failed to create target machine");
|
||
|
return {};
|
||
|
}
|
||
|
|
||
|
return std::unique_ptr<llvm::TargetMachine>{machine};
|
||
|
}
|
||
|
|
||
|
std::unique_ptr<llvm::Module>
|
||
|
gpu::SerializeToBlobPass::translateToLLVMIR(llvm::LLVMContext &llvmContext) {
|
||
|
return translateModuleToLLVMIR(getOperation(), llvmContext,
|
||
|
"LLVMDialectModule");
|
||
|
}
|