bolt/deps/llvm-18.1.8/mlir/lib/Dialect/GPU/Transforms/SerializeToCubin.cpp

//===- LowerGPUToCUBIN.cpp - Convert GPU kernel to CUBIN blob -------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This file implements a pass that serializes a gpu module into CUBIN blob and
// adds that blob as a string attribute of the module.
//
//===----------------------------------------------------------------------===//

#include "mlir/Dialect/GPU/Transforms/Passes.h"
#include "mlir/Dialect/LLVMIR/NVVMDialect.h"
#include "llvm/Support/Debug.h"

#if MLIR_GPU_TO_CUBIN_PASS_ENABLE
#include "mlir/Pass/Pass.h"
#include "mlir/Target/LLVMIR/Dialect/NVVM/NVVMToLLVMIRTranslation.h"
#include "mlir/Target/LLVMIR/Export.h"
#include "llvm/Support/TargetSelect.h"
#include "llvm/Support/Threading.h"

#include <cuda.h>

using namespace mlir;

static void emitCudaError(const llvm::Twine &expr, const char *buffer,
                          CUresult result, Location loc) {
  const char *error = nullptr;
  cuGetErrorString(result, &error);
  emitError(loc,
            expr.concat(error ? " failed with error code " + llvm::Twine{error}
                              : llvm::Twine(" failed with unknown error "))
                .concat("[")
                .concat(buffer)
                .concat("]"));
}

#define RETURN_ON_CUDA_ERROR(expr)                                             \
  do {                                                                         \
    if (auto status = (expr)) {                                                \
      emitCudaError(#expr, jitErrorBuffer, status, loc);                       \
      return {};                                                               \
    }                                                                          \
  } while (false)

namespace {
class SerializeToCubinPass
    : public PassWrapper<SerializeToCubinPass, gpu::SerializeToBlobPass> {
  static llvm::once_flag initializeBackendOnce;

public:
  MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(SerializeToCubinPass)

  SerializeToCubinPass(StringRef triple = "nvptx64-nvidia-cuda",
                       StringRef chip = "sm_35", StringRef features = "+ptx60",
                       int optLevel = 2, bool dumpPtx = false);

  StringRef getArgument() const override { return "gpu-to-cubin"; }
  StringRef getDescription() const override {
    return "Lower GPU kernel function to CUBIN binary annotations";
  }

private:
  // Serializes PTX to CUBIN.
  std::unique_ptr<std::vector<char>>
  serializeISA(const std::string &isa) override;
};
} // namespace

// Sets the 'option' to 'value' unless it already has a value.
static void maybeSetOption(Pass::Option<std::string> &option, StringRef value) {
  if (!option.hasValue())
    option = value.str();
}

llvm::once_flag SerializeToCubinPass::initializeBackendOnce;

SerializeToCubinPass::SerializeToCubinPass(StringRef triple, StringRef chip,
                                           StringRef features, int optLevel,
                                           bool dumpPtx) {
  // No matter how this pass is constructed, ensure that the NVPTX backend
  // is initialized exactly once.
  llvm::call_once(initializeBackendOnce, []() {
    // Initialize LLVM NVPTX backend.
#if LLVM_HAS_NVPTX_TARGET
    LLVMInitializeNVPTXTarget();
    LLVMInitializeNVPTXTargetInfo();
    LLVMInitializeNVPTXTargetMC();
    LLVMInitializeNVPTXAsmPrinter();
#endif
  });

  maybeSetOption(this->triple, triple);
  maybeSetOption(this->chip, chip);
  maybeSetOption(this->features, features);
  this->dumpPtx = dumpPtx;
  if (this->optLevel.getNumOccurrences() == 0)
    this->optLevel.setValue(optLevel);
}

std::unique_ptr<std::vector<char>>
SerializeToCubinPass::serializeISA(const std::string &isa) {
  Location loc = getOperation().getLoc();
  char jitErrorBuffer[4096] = {0};

  RETURN_ON_CUDA_ERROR(cuInit(0));

  // Linking requires a device context.
  CUdevice device;
  RETURN_ON_CUDA_ERROR(cuDeviceGet(&device, 0));
  CUcontext context;
  // Use the primary context.
  RETURN_ON_CUDA_ERROR(cuDevicePrimaryCtxRetain(&context, device));
  // Push the primary context so that the next CUDA operations
  // actually use it.
  RETURN_ON_CUDA_ERROR(cuCtxPushCurrent(context));
  CUlinkState linkState;

  CUjit_option jitOptions[] = {CU_JIT_ERROR_LOG_BUFFER,
                               CU_JIT_ERROR_LOG_BUFFER_SIZE_BYTES};
  void *jitOptionsVals[] = {jitErrorBuffer,
                            reinterpret_cast<void *>(sizeof(jitErrorBuffer))};

  RETURN_ON_CUDA_ERROR(cuLinkCreate(2,              /* number of jit options */
                                    jitOptions,     /* jit options */
                                    jitOptionsVals, /* jit option values */
                                    &linkState));

  auto kernelName = getOperation().getName().str();
  if (dumpPtx) {
    llvm::dbgs() << " Kernel Name : [" << kernelName << "]\n";
    llvm::dbgs() << isa << "\n";
  }
  RETURN_ON_CUDA_ERROR(cuLinkAddData(
      linkState, CUjitInputType::CU_JIT_INPUT_PTX,
      const_cast<void *>(static_cast<const void *>(isa.c_str())), isa.length(),
      kernelName.c_str(), 0, /* number of jit options */
      nullptr,               /* jit options */
      nullptr                /* jit option values */
      ));

  void *cubinData;
  size_t cubinSize;
  RETURN_ON_CUDA_ERROR(cuLinkComplete(linkState, &cubinData, &cubinSize));

  char *cubinAsChar = static_cast<char *>(cubinData);
  auto result =
      std::make_unique<std::vector<char>>(cubinAsChar, cubinAsChar + cubinSize);

  // This will also destroy the cubin data.
  RETURN_ON_CUDA_ERROR(cuLinkDestroy(linkState));
  // Pop and release the primary context.
  CUcontext poppedContext;
  RETURN_ON_CUDA_ERROR(cuCtxPopCurrent(&poppedContext));
  RETURN_ON_CUDA_ERROR(cuDevicePrimaryCtxRelease(device));

  return result;
}

// Register pass to serialize GPU kernel functions to a CUBIN binary annotation.
void mlir::registerGpuSerializeToCubinPass() {
  PassRegistration<SerializeToCubinPass> registerSerializeToCubin(
      [] { return std::make_unique<SerializeToCubinPass>(); });
}

std::unique_ptr<Pass> mlir::createGpuSerializeToCubinPass(StringRef triple,
                                                          StringRef arch,
                                                          StringRef features,
                                                          int optLevel,
                                                          bool dumpPtx) {
  return std::make_unique<SerializeToCubinPass>(triple, arch, features,
                                                optLevel, dumpPtx);
}

#else  // MLIR_GPU_TO_CUBIN_PASS_ENABLE
void mlir::registerGpuSerializeToCubinPass() {}
#endif // MLIR_GPU_TO_CUBIN_PASS_ENABLE
Embed LLVM 18.1.8 2025-02-14 19:21:04 +01:00			`//===- LowerGPUToCUBIN.cpp - Convert GPU kernel to CUBIN blob -------------===//`
			`//`
			`// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.`
			`// See https://llvm.org/LICENSE.txt for license information.`
			`// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception`
			`//`
			`//===----------------------------------------------------------------------===//`
			`//`
			`// This file implements a pass that serializes a gpu module into CUBIN blob and`
			`// adds that blob as a string attribute of the module.`
			`//`
			`//===----------------------------------------------------------------------===//`

			`#include "mlir/Dialect/GPU/Transforms/Passes.h"`
			`#include "mlir/Dialect/LLVMIR/NVVMDialect.h"`
			`#include "llvm/Support/Debug.h"`

			`#if MLIR_GPU_TO_CUBIN_PASS_ENABLE`
			`#include "mlir/Pass/Pass.h"`
			`#include "mlir/Target/LLVMIR/Dialect/NVVM/NVVMToLLVMIRTranslation.h"`
			`#include "mlir/Target/LLVMIR/Export.h"`
			`#include "llvm/Support/TargetSelect.h"`
			`#include "llvm/Support/Threading.h"`

			`#include <cuda.h>`

			`using namespace mlir;`

			`static void emitCudaError(const llvm::Twine &expr, const char *buffer,`
			`CUresult result, Location loc) {`
			`const char *error = nullptr;`
			`cuGetErrorString(result, &error);`
			`emitError(loc,`
			`expr.concat(error ? " failed with error code " + llvm::Twine{error}`
			`: llvm::Twine(" failed with unknown error "))`
			`.concat("[")`
			`.concat(buffer)`
			`.concat("]"));`
			`}`

			`#define RETURN_ON_CUDA_ERROR(expr) \`
			`do { \`
			`if (auto status = (expr)) { \`
			`emitCudaError(#expr, jitErrorBuffer, status, loc); \`
			`return {}; \`
			`} \`
			`} while (false)`

			`namespace {`
			`class SerializeToCubinPass`
			`: public PassWrapper<SerializeToCubinPass, gpu::SerializeToBlobPass> {`
			`static llvm::once_flag initializeBackendOnce;`

			`public:`
			`MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(SerializeToCubinPass)`

			`SerializeToCubinPass(StringRef triple = "nvptx64-nvidia-cuda",`
			`StringRef chip = "sm_35", StringRef features = "+ptx60",`
			`int optLevel = 2, bool dumpPtx = false);`

			`StringRef getArgument() const override { return "gpu-to-cubin"; }`
			`StringRef getDescription() const override {`
			`return "Lower GPU kernel function to CUBIN binary annotations";`
			`}`

			`private:`
			`// Serializes PTX to CUBIN.`
			`std::unique_ptr<std::vector<char>>`
			`serializeISA(const std::string &isa) override;`
			`};`
			`} // namespace`

			`// Sets the 'option' to 'value' unless it already has a value.`
			`static void maybeSetOption(Pass::Option<std::string> &option, StringRef value) {`
			`if (!option.hasValue())`
			`option = value.str();`
			`}`

			`llvm::once_flag SerializeToCubinPass::initializeBackendOnce;`

			`SerializeToCubinPass::SerializeToCubinPass(StringRef triple, StringRef chip,`
			`StringRef features, int optLevel,`
			`bool dumpPtx) {`
			`// No matter how this pass is constructed, ensure that the NVPTX backend`
			`// is initialized exactly once.`
			`llvm::call_once(initializeBackendOnce, []() {`
			`// Initialize LLVM NVPTX backend.`
			`#if LLVM_HAS_NVPTX_TARGET`
			`LLVMInitializeNVPTXTarget();`
			`LLVMInitializeNVPTXTargetInfo();`
			`LLVMInitializeNVPTXTargetMC();`
			`LLVMInitializeNVPTXAsmPrinter();`
			`#endif`
			`});`

			`maybeSetOption(this->triple, triple);`
			`maybeSetOption(this->chip, chip);`
			`maybeSetOption(this->features, features);`
			`this->dumpPtx = dumpPtx;`
			`if (this->optLevel.getNumOccurrences() == 0)`
			`this->optLevel.setValue(optLevel);`
			`}`

			`std::unique_ptr<std::vector<char>>`
			`SerializeToCubinPass::serializeISA(const std::string &isa) {`
			`Location loc = getOperation().getLoc();`
			`char jitErrorBuffer[4096] = {0};`

			`RETURN_ON_CUDA_ERROR(cuInit(0));`

			`// Linking requires a device context.`
			`CUdevice device;`
			`RETURN_ON_CUDA_ERROR(cuDeviceGet(&device, 0));`
			`CUcontext context;`
			`// Use the primary context.`
			`RETURN_ON_CUDA_ERROR(cuDevicePrimaryCtxRetain(&context, device));`
			`// Push the primary context so that the next CUDA operations`
			`// actually use it.`
			`RETURN_ON_CUDA_ERROR(cuCtxPushCurrent(context));`
			`CUlinkState linkState;`

			`CUjit_option jitOptions[] = {CU_JIT_ERROR_LOG_BUFFER,`
			`CU_JIT_ERROR_LOG_BUFFER_SIZE_BYTES};`
			`void *jitOptionsVals[] = {jitErrorBuffer,`
			`reinterpret_cast<void *>(sizeof(jitErrorBuffer))};`

			`RETURN_ON_CUDA_ERROR(cuLinkCreate(2, /* number of jit options */`
			`jitOptions, /* jit options */`
			`jitOptionsVals, /* jit option values */`
			`&linkState));`

			`auto kernelName = getOperation().getName().str();`
			`if (dumpPtx) {`
			`llvm::dbgs() << " Kernel Name : [" << kernelName << "]\n";`
			`llvm::dbgs() << isa << "\n";`
			`}`
			`RETURN_ON_CUDA_ERROR(cuLinkAddData(`
			`linkState, CUjitInputType::CU_JIT_INPUT_PTX,`
			`const_cast<void >(static_cast<const void >(isa.c_str())), isa.length(),`
			`kernelName.c_str(), 0, /* number of jit options */`
			`nullptr, /* jit options */`
			`nullptr /* jit option values */`
			`));`

			`void *cubinData;`
			`size_t cubinSize;`
			`RETURN_ON_CUDA_ERROR(cuLinkComplete(linkState, &cubinData, &cubinSize));`

			`char cubinAsChar = static_cast<char >(cubinData);`
			`auto result =`
			`std::make_unique<std::vector<char>>(cubinAsChar, cubinAsChar + cubinSize);`

			`// This will also destroy the cubin data.`
			`RETURN_ON_CUDA_ERROR(cuLinkDestroy(linkState));`
			`// Pop and release the primary context.`
			`CUcontext poppedContext;`
			`RETURN_ON_CUDA_ERROR(cuCtxPopCurrent(&poppedContext));`
			`RETURN_ON_CUDA_ERROR(cuDevicePrimaryCtxRelease(device));`

			`return result;`
			`}`

			`// Register pass to serialize GPU kernel functions to a CUBIN binary annotation.`
			`void mlir::registerGpuSerializeToCubinPass() {`
			`PassRegistration<SerializeToCubinPass> registerSerializeToCubin(`
			`[] { return std::make_unique<SerializeToCubinPass>(); });`
			`}`

			`std::unique_ptr<Pass> mlir::createGpuSerializeToCubinPass(StringRef triple,`
			`StringRef arch,`
			`StringRef features,`
			`int optLevel,`
			`bool dumpPtx) {`
			`return std::make_unique<SerializeToCubinPass>(triple, arch, features,`
			`optLevel, dumpPtx);`
			`}`

			`#else // MLIR_GPU_TO_CUBIN_PASS_ENABLE`
			`void mlir::registerGpuSerializeToCubinPass() {}`
			`#endif // MLIR_GPU_TO_CUBIN_PASS_ENABLE`