bolt/deps/llvm-18.1.8/mlir/lib/Dialect/GPU/Transforms/SerializeToBlob.cpp

//===- SerializeToBlob.cpp - MLIR GPU lowering pass -----------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This file implements a base class for a pass to serialize a gpu module
// into a binary blob that can be executed on a GPU. The binary blob is added
// as a string attribute to the gpu module.
//
//===----------------------------------------------------------------------===//

#include "mlir/Dialect/GPU/IR/GPUDialect.h"
#include "mlir/Dialect/GPU/Transforms/Passes.h"
#include "mlir/Dialect/LLVMIR/LLVMDialect.h"
#include "mlir/ExecutionEngine/OptUtils.h"
#include "mlir/Pass/Pass.h"
#include "mlir/Target/LLVMIR/Dialect/GPU/GPUToLLVMIRTranslation.h"
#include "mlir/Target/LLVMIR/Dialect/LLVMIR/LLVMToLLVMIRTranslation.h"
#include "mlir/Target/LLVMIR/Export.h"
#include "llvm/IR/LegacyPassManager.h"
#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/TargetSelect.h"
#include "llvm/Target/TargetMachine.h"

#include <optional>
#include <string>

#define DEBUG_TYPE "serialize-to-blob"

using namespace mlir;

std::string gpu::getDefaultGpuBinaryAnnotation() { return "gpu.binary"; }

gpu::SerializeToBlobPass::SerializeToBlobPass(TypeID passID)
    : OperationPass<gpu::GPUModuleOp>(passID) {}

gpu::SerializeToBlobPass::SerializeToBlobPass(const SerializeToBlobPass &other)
    : OperationPass<gpu::GPUModuleOp>(other) {}

std::optional<std::string>
gpu::SerializeToBlobPass::translateToISA(llvm::Module &llvmModule,
                                         llvm::TargetMachine &targetMachine) {
  llvmModule.setDataLayout(targetMachine.createDataLayout());

  if (failed(optimizeLlvm(llvmModule, targetMachine)))
    return std::nullopt;

  std::string targetISA;
  llvm::raw_string_ostream stream(targetISA);

  { // Drop pstream after this to prevent the ISA from being stuck buffering
    llvm::buffer_ostream pstream(stream);
    llvm::legacy::PassManager codegenPasses;

    if (targetMachine.addPassesToEmitFile(codegenPasses, pstream, nullptr,
                                          llvm::CodeGenFileType::AssemblyFile))
      return std::nullopt;

    codegenPasses.run(llvmModule);
  }
  return stream.str();
}

void gpu::SerializeToBlobPass::runOnOperation() {
  // Lower the module to an LLVM IR module using a separate context to enable
  // multi-threaded processing.
  llvm::LLVMContext llvmContext;
  std::unique_ptr<llvm::Module> llvmModule = translateToLLVMIR(llvmContext);
  if (!llvmModule)
    return signalPassFailure();

  // Lower the LLVM IR module to target ISA.
  std::unique_ptr<llvm::TargetMachine> targetMachine = createTargetMachine();
  if (!targetMachine)
    return signalPassFailure();

  std::optional<std::string> maybeTargetISA =
      translateToISA(*llvmModule, *targetMachine);

  if (!maybeTargetISA.has_value())
    return signalPassFailure();

  std::string targetISA = std::move(*maybeTargetISA);

  LLVM_DEBUG({
    llvm::dbgs() << "ISA for module: " << getOperation().getNameAttr() << "\n";
    llvm::dbgs() << targetISA << "\n";
    llvm::dbgs().flush();
  });

  // Serialize the target ISA.
  std::unique_ptr<std::vector<char>> blob = serializeISA(targetISA);
  if (!blob)
    return signalPassFailure();

  // Add the blob as module attribute.
  auto attr =
      StringAttr::get(&getContext(), StringRef(blob->data(), blob->size()));
  getOperation()->setAttr(gpuBinaryAnnotation, attr);
}

LogicalResult
gpu::SerializeToBlobPass::optimizeLlvm(llvm::Module &llvmModule,
                                       llvm::TargetMachine &targetMachine) {
  int optLevel = this->optLevel.getValue();
  if (optLevel < 0 || optLevel > 3)
    return getOperation().emitError()
           << "invalid optimization level " << optLevel;

  targetMachine.setOptLevel(static_cast<llvm::CodeGenOptLevel>(optLevel));

  auto transformer =
      makeOptimizingTransformer(optLevel, /*sizeLevel=*/0, &targetMachine);
  auto error = transformer(&llvmModule);
  if (error) {
    InFlightDiagnostic mlirError = getOperation()->emitError();
    llvm::handleAllErrors(
        std::move(error), [&mlirError](const llvm::ErrorInfoBase &ei) {
          mlirError << "could not optimize LLVM IR: " << ei.message();
        });
    return mlirError;
  }
  return success();
}

std::unique_ptr<llvm::TargetMachine>
gpu::SerializeToBlobPass::createTargetMachine() {
  Location loc = getOperation().getLoc();
  std::string error;
  const llvm::Target *target =
      llvm::TargetRegistry::lookupTarget(triple, error);
  if (!target) {
    emitError(loc, Twine("failed to lookup target: ") + error);
    return {};
  }
  llvm::TargetMachine *machine =
      target->createTargetMachine(triple, chip, features, {}, {});
  if (!machine) {
    emitError(loc, "failed to create target machine");
    return {};
  }

  return std::unique_ptr<llvm::TargetMachine>{machine};
}

std::unique_ptr<llvm::Module>
gpu::SerializeToBlobPass::translateToLLVMIR(llvm::LLVMContext &llvmContext) {
  return translateModuleToLLVMIR(getOperation(), llvmContext,
                                 "LLVMDialectModule");
}
Embed LLVM 18.1.8 2025-02-14 19:21:04 +01:00			`//===- SerializeToBlob.cpp - MLIR GPU lowering pass -----------------------===//`
			`//`
			`// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.`
			`// See https://llvm.org/LICENSE.txt for license information.`
			`// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception`
			`//`
			`//===----------------------------------------------------------------------===//`
			`//`
			`// This file implements a base class for a pass to serialize a gpu module`
			`// into a binary blob that can be executed on a GPU. The binary blob is added`
			`// as a string attribute to the gpu module.`
			`//`
			`//===----------------------------------------------------------------------===//`

			`#include "mlir/Dialect/GPU/IR/GPUDialect.h"`
			`#include "mlir/Dialect/GPU/Transforms/Passes.h"`
			`#include "mlir/Dialect/LLVMIR/LLVMDialect.h"`
			`#include "mlir/ExecutionEngine/OptUtils.h"`
			`#include "mlir/Pass/Pass.h"`
			`#include "mlir/Target/LLVMIR/Dialect/GPU/GPUToLLVMIRTranslation.h"`
			`#include "mlir/Target/LLVMIR/Dialect/LLVMIR/LLVMToLLVMIRTranslation.h"`
			`#include "mlir/Target/LLVMIR/Export.h"`
			`#include "llvm/IR/LegacyPassManager.h"`
			`#include "llvm/MC/TargetRegistry.h"`
			`#include "llvm/Support/TargetSelect.h"`
			`#include "llvm/Target/TargetMachine.h"`

			`#include <optional>`
			`#include <string>`

			`#define DEBUG_TYPE "serialize-to-blob"`

			`using namespace mlir;`

			`std::string gpu::getDefaultGpuBinaryAnnotation() { return "gpu.binary"; }`

			`gpu::SerializeToBlobPass::SerializeToBlobPass(TypeID passID)`
			`: OperationPass<gpu::GPUModuleOp>(passID) {}`

			`gpu::SerializeToBlobPass::SerializeToBlobPass(const SerializeToBlobPass &other)`
			`: OperationPass<gpu::GPUModuleOp>(other) {}`

			`std::optional<std::string>`
			`gpu::SerializeToBlobPass::translateToISA(llvm::Module &llvmModule,`
			`llvm::TargetMachine &targetMachine) {`
			`llvmModule.setDataLayout(targetMachine.createDataLayout());`

			`if (failed(optimizeLlvm(llvmModule, targetMachine)))`
			`return std::nullopt;`

			`std::string targetISA;`
			`llvm::raw_string_ostream stream(targetISA);`

			`{ // Drop pstream after this to prevent the ISA from being stuck buffering`
			`llvm::buffer_ostream pstream(stream);`
			`llvm::legacy::PassManager codegenPasses;`

			`if (targetMachine.addPassesToEmitFile(codegenPasses, pstream, nullptr,`
			`llvm::CodeGenFileType::AssemblyFile))`
			`return std::nullopt;`

			`codegenPasses.run(llvmModule);`
			`}`
			`return stream.str();`
			`}`

			`void gpu::SerializeToBlobPass::runOnOperation() {`
			`// Lower the module to an LLVM IR module using a separate context to enable`
			`// multi-threaded processing.`
			`llvm::LLVMContext llvmContext;`
			`std::unique_ptr<llvm::Module> llvmModule = translateToLLVMIR(llvmContext);`
			`if (!llvmModule)`
			`return signalPassFailure();`

			`// Lower the LLVM IR module to target ISA.`
			`std::unique_ptr<llvm::TargetMachine> targetMachine = createTargetMachine();`
			`if (!targetMachine)`
			`return signalPassFailure();`

			`std::optional<std::string> maybeTargetISA =`
			`translateToISA(llvmModule, targetMachine);`

			`if (!maybeTargetISA.has_value())`
			`return signalPassFailure();`

			`std::string targetISA = std::move(*maybeTargetISA);`

			`LLVM_DEBUG({`
			`llvm::dbgs() << "ISA for module: " << getOperation().getNameAttr() << "\n";`
			`llvm::dbgs() << targetISA << "\n";`
			`llvm::dbgs().flush();`
			`});`

			`// Serialize the target ISA.`
			`std::unique_ptr<std::vector<char>> blob = serializeISA(targetISA);`
			`if (!blob)`
			`return signalPassFailure();`

			`// Add the blob as module attribute.`
			`auto attr =`
			`StringAttr::get(&getContext(), StringRef(blob->data(), blob->size()));`
			`getOperation()->setAttr(gpuBinaryAnnotation, attr);`
			`}`

			`LogicalResult`
			`gpu::SerializeToBlobPass::optimizeLlvm(llvm::Module &llvmModule,`
			`llvm::TargetMachine &targetMachine) {`
			`int optLevel = this->optLevel.getValue();`
			`if (optLevel < 0 \|\| optLevel > 3)`
			`return getOperation().emitError()`
			`<< "invalid optimization level " << optLevel;`

			`targetMachine.setOptLevel(static_cast<llvm::CodeGenOptLevel>(optLevel));`

			`auto transformer =`
			`makeOptimizingTransformer(optLevel, /sizeLevel=/0, &targetMachine);`
			`auto error = transformer(&llvmModule);`
			`if (error) {`
			`InFlightDiagnostic mlirError = getOperation()->emitError();`
			`llvm::handleAllErrors(`
			`std::move(error), [&mlirError](const llvm::ErrorInfoBase &ei) {`
			`mlirError << "could not optimize LLVM IR: " << ei.message();`
			`});`
			`return mlirError;`
			`}`
			`return success();`
			`}`

			`std::unique_ptr<llvm::TargetMachine>`
			`gpu::SerializeToBlobPass::createTargetMachine() {`
			`Location loc = getOperation().getLoc();`
			`std::string error;`
			`const llvm::Target *target =`
			`llvm::TargetRegistry::lookupTarget(triple, error);`
			`if (!target) {`
			`emitError(loc, Twine("failed to lookup target: ") + error);`
			`return {};`
			`}`
			`llvm::TargetMachine *machine =`
			`target->createTargetMachine(triple, chip, features, {}, {});`
			`if (!machine) {`
			`emitError(loc, "failed to create target machine");`
			`return {};`
			`}`

			`return std::unique_ptr<llvm::TargetMachine>{machine};`
			`}`

			`std::unique_ptr<llvm::Module>`
			`gpu::SerializeToBlobPass::translateToLLVMIR(llvm::LLVMContext &llvmContext) {`
			`return translateModuleToLLVMIR(getOperation(), llvmContext,`
			`"LLVMDialectModule");`
			`}`