//===- SerializeToBlob.cpp - MLIR GPU lowering pass -----------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // This file implements a base class for a pass to serialize a gpu module // into a binary blob that can be executed on a GPU. The binary blob is added // as a string attribute to the gpu module. // //===----------------------------------------------------------------------===// #include "mlir/Dialect/GPU/IR/GPUDialect.h" #include "mlir/Dialect/GPU/Transforms/Passes.h" #include "mlir/Dialect/LLVMIR/LLVMDialect.h" #include "mlir/ExecutionEngine/OptUtils.h" #include "mlir/Pass/Pass.h" #include "mlir/Target/LLVMIR/Dialect/GPU/GPUToLLVMIRTranslation.h" #include "mlir/Target/LLVMIR/Dialect/LLVMIR/LLVMToLLVMIRTranslation.h" #include "mlir/Target/LLVMIR/Export.h" #include "llvm/IR/LegacyPassManager.h" #include "llvm/MC/TargetRegistry.h" #include "llvm/Support/TargetSelect.h" #include "llvm/Target/TargetMachine.h" #include #include #define DEBUG_TYPE "serialize-to-blob" using namespace mlir; std::string gpu::getDefaultGpuBinaryAnnotation() { return "gpu.binary"; } gpu::SerializeToBlobPass::SerializeToBlobPass(TypeID passID) : OperationPass(passID) {} gpu::SerializeToBlobPass::SerializeToBlobPass(const SerializeToBlobPass &other) : OperationPass(other) {} std::optional gpu::SerializeToBlobPass::translateToISA(llvm::Module &llvmModule, llvm::TargetMachine &targetMachine) { llvmModule.setDataLayout(targetMachine.createDataLayout()); if (failed(optimizeLlvm(llvmModule, targetMachine))) return std::nullopt; std::string targetISA; llvm::raw_string_ostream stream(targetISA); { // Drop pstream after this to prevent the ISA from being stuck buffering llvm::buffer_ostream pstream(stream); llvm::legacy::PassManager codegenPasses; if (targetMachine.addPassesToEmitFile(codegenPasses, pstream, nullptr, llvm::CodeGenFileType::AssemblyFile)) return std::nullopt; codegenPasses.run(llvmModule); } return stream.str(); } void gpu::SerializeToBlobPass::runOnOperation() { // Lower the module to an LLVM IR module using a separate context to enable // multi-threaded processing. llvm::LLVMContext llvmContext; std::unique_ptr llvmModule = translateToLLVMIR(llvmContext); if (!llvmModule) return signalPassFailure(); // Lower the LLVM IR module to target ISA. std::unique_ptr targetMachine = createTargetMachine(); if (!targetMachine) return signalPassFailure(); std::optional maybeTargetISA = translateToISA(*llvmModule, *targetMachine); if (!maybeTargetISA.has_value()) return signalPassFailure(); std::string targetISA = std::move(*maybeTargetISA); LLVM_DEBUG({ llvm::dbgs() << "ISA for module: " << getOperation().getNameAttr() << "\n"; llvm::dbgs() << targetISA << "\n"; llvm::dbgs().flush(); }); // Serialize the target ISA. std::unique_ptr> blob = serializeISA(targetISA); if (!blob) return signalPassFailure(); // Add the blob as module attribute. auto attr = StringAttr::get(&getContext(), StringRef(blob->data(), blob->size())); getOperation()->setAttr(gpuBinaryAnnotation, attr); } LogicalResult gpu::SerializeToBlobPass::optimizeLlvm(llvm::Module &llvmModule, llvm::TargetMachine &targetMachine) { int optLevel = this->optLevel.getValue(); if (optLevel < 0 || optLevel > 3) return getOperation().emitError() << "invalid optimization level " << optLevel; targetMachine.setOptLevel(static_cast(optLevel)); auto transformer = makeOptimizingTransformer(optLevel, /*sizeLevel=*/0, &targetMachine); auto error = transformer(&llvmModule); if (error) { InFlightDiagnostic mlirError = getOperation()->emitError(); llvm::handleAllErrors( std::move(error), [&mlirError](const llvm::ErrorInfoBase &ei) { mlirError << "could not optimize LLVM IR: " << ei.message(); }); return mlirError; } return success(); } std::unique_ptr gpu::SerializeToBlobPass::createTargetMachine() { Location loc = getOperation().getLoc(); std::string error; const llvm::Target *target = llvm::TargetRegistry::lookupTarget(triple, error); if (!target) { emitError(loc, Twine("failed to lookup target: ") + error); return {}; } llvm::TargetMachine *machine = target->createTargetMachine(triple, chip, features, {}, {}); if (!machine) { emitError(loc, "failed to create target machine"); return {}; } return std::unique_ptr{machine}; } std::unique_ptr gpu::SerializeToBlobPass::translateToLLVMIR(llvm::LLVMContext &llvmContext) { return translateModuleToLLVMIR(getOperation(), llvmContext, "LLVMDialectModule"); }