bolt/deps/llvm-18.1.8/mlir/lib/Dialect/Bufferization/Transforms/OwnershipBasedBufferDeallocation.cpp
2025-02-14 19:21:04 +01:00

1093 lines
45 KiB
C++

//===- OwnershipBasedBufferDeallocation.cpp - impl. for buffer dealloc. ---===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This file implements logic for computing correct `bufferization.dealloc`
// positions. Furthermore, buffer deallocation also adds required new clone
// operations to ensure that memrefs returned by functions never alias an
// argument.
//
// TODO:
// The current implementation does not support explicit-control-flow loops and
// the resulting code will be invalid with respect to program semantics.
// However, structured control-flow loops are fully supported.
//
//===----------------------------------------------------------------------===//
#include "mlir/Dialect/Bufferization/IR/BufferDeallocationOpInterface.h"
#include "mlir/Dialect/Bufferization/IR/Bufferization.h"
#include "mlir/Dialect/Bufferization/Transforms/Passes.h"
#include "mlir/Dialect/ControlFlow/IR/ControlFlowOps.h"
#include "mlir/Dialect/Func/IR/FuncOps.h"
#include "mlir/Dialect/MemRef/IR/MemRef.h"
#include "mlir/Dialect/SCF/IR/SCF.h"
#include "mlir/IR/Iterators.h"
#include "mlir/Interfaces/ControlFlowInterfaces.h"
namespace mlir {
namespace bufferization {
#define GEN_PASS_DEF_OWNERSHIPBASEDBUFFERDEALLOCATION
#include "mlir/Dialect/Bufferization/Transforms/Passes.h.inc"
} // namespace bufferization
} // namespace mlir
using namespace mlir;
using namespace mlir::bufferization;
//===----------------------------------------------------------------------===//
// Helpers
//===----------------------------------------------------------------------===//
static Value buildBoolValue(OpBuilder &builder, Location loc, bool value) {
return builder.create<arith::ConstantOp>(loc, builder.getBoolAttr(value));
}
static bool isMemref(Value v) { return v.getType().isa<BaseMemRefType>(); }
/// Return "true" if the given op is guaranteed to have neither "Allocate" nor
/// "Free" side effects.
static bool hasNeitherAllocateNorFreeSideEffect(Operation *op) {
if (isa<MemoryEffectOpInterface>(op))
return hasEffect<MemoryEffects::Allocate>(op) ||
hasEffect<MemoryEffects::Free>(op);
// If the op does not implement the MemoryEffectOpInterface but has has
// recursive memory effects, then this op in isolation (without its body) does
// not have any side effects. All the ops inside the regions of this op will
// be processed separately.
return op->hasTrait<OpTrait::HasRecursiveMemoryEffects>();
}
/// Return "true" if the given op has buffer semantics. I.e., it has buffer
/// operands, buffer results and/or buffer region entry block arguments.
static bool hasBufferSemantics(Operation *op) {
if (llvm::any_of(op->getOperands(), isMemref) ||
llvm::any_of(op->getResults(), isMemref))
return true;
for (Region &region : op->getRegions())
if (!region.empty())
if (llvm::any_of(region.front().getArguments(), isMemref))
return true;
return false;
}
//===----------------------------------------------------------------------===//
// Backedges analysis
//===----------------------------------------------------------------------===//
namespace {
/// A straight-forward program analysis which detects loop backedges induced by
/// explicit control flow.
class Backedges {
public:
using BlockSetT = SmallPtrSet<Block *, 16>;
using BackedgeSetT = llvm::DenseSet<std::pair<Block *, Block *>>;
public:
/// Constructs a new backedges analysis using the op provided.
Backedges(Operation *op) { recurse(op); }
/// Returns the number of backedges formed by explicit control flow.
size_t size() const { return edgeSet.size(); }
/// Returns the start iterator to loop over all backedges.
BackedgeSetT::const_iterator begin() const { return edgeSet.begin(); }
/// Returns the end iterator to loop over all backedges.
BackedgeSetT::const_iterator end() const { return edgeSet.end(); }
private:
/// Enters the current block and inserts a backedge into the `edgeSet` if we
/// have already visited the current block. The inserted edge links the given
/// `predecessor` with the `current` block.
bool enter(Block &current, Block *predecessor) {
bool inserted = visited.insert(&current).second;
if (!inserted)
edgeSet.insert(std::make_pair(predecessor, &current));
return inserted;
}
/// Leaves the current block.
void exit(Block &current) { visited.erase(&current); }
/// Recurses into the given operation while taking all attached regions into
/// account.
void recurse(Operation *op) {
Block *current = op->getBlock();
// If the current op implements the `BranchOpInterface`, there can be
// cycles in the scope of all successor blocks.
if (isa<BranchOpInterface>(op)) {
for (Block *succ : current->getSuccessors())
recurse(*succ, current);
}
// Recurse into all distinct regions and check for explicit control-flow
// loops.
for (Region &region : op->getRegions()) {
if (!region.empty())
recurse(region.front(), current);
}
}
/// Recurses into explicit control-flow structures that are given by
/// the successor relation defined on the block level.
void recurse(Block &block, Block *predecessor) {
// Try to enter the current block. If this is not possible, we are
// currently processing this block and can safely return here.
if (!enter(block, predecessor))
return;
// Recurse into all operations and successor blocks.
for (Operation &op : block.getOperations())
recurse(&op);
// Leave the current block.
exit(block);
}
/// Stores all blocks that are currently visited and on the processing stack.
BlockSetT visited;
/// Stores all backedges in the format (source, target).
BackedgeSetT edgeSet;
};
} // namespace
//===----------------------------------------------------------------------===//
// BufferDeallocation
//===----------------------------------------------------------------------===//
namespace {
/// The buffer deallocation transformation which ensures that all allocs in the
/// program have a corresponding de-allocation.
class BufferDeallocation {
public:
BufferDeallocation(Operation *op, bool privateFuncDynamicOwnership)
: state(op) {
options.privateFuncDynamicOwnership = privateFuncDynamicOwnership;
}
/// Performs the actual placement/creation of all dealloc operations.
LogicalResult deallocate(FunctionOpInterface op);
private:
/// The base case for the recursive template below.
template <typename... T>
typename std::enable_if<sizeof...(T) == 0, FailureOr<Operation *>>::type
handleOp(Operation *op) {
return op;
}
/// Applies all the handlers of the interfaces in the template list
/// implemented by 'op'. In particular, if an operation implements more than
/// one of the interfaces in the template list, all the associated handlers
/// will be applied to the operation in the same order as the template list
/// specifies. If a handler reports a failure or removes the operation without
/// replacement (indicated by returning 'nullptr'), no further handlers are
/// applied and the return value is propagated to the caller of 'handleOp'.
///
/// The interface handlers job is to update the deallocation state, most
/// importantly the ownership map and list of memrefs to potentially be
/// deallocated per block, but also to insert `bufferization.dealloc`
/// operations where needed. Obviously, no MemRefs that may be used at a later
/// point in the control-flow may be deallocated and the ownership map has to
/// be updated to reflect potential ownership changes caused by the dealloc
/// operation (e.g., if two interfaces on the same op insert a dealloc
/// operation each, the second one should query the ownership map and use them
/// as deallocation condition such that MemRefs already deallocated in the
/// first dealloc operation are not deallocated a second time (double-free)).
/// Note that currently only the interfaces on terminators may insert dealloc
/// operations and it is verified as a precondition that a terminator op must
/// implement exactly one of the interfaces handling dealloc insertion.
///
/// The return value of the 'handleInterface' functions should be a
/// FailureOr<Operation *> indicating whether there was a failure or otherwise
/// returning the operation itself or a replacement operation.
///
/// Note: The difference compared to `TypeSwitch` is that all
/// matching cases are applied instead of just the first match.
template <typename InterfaceT, typename... InterfacesU>
FailureOr<Operation *> handleOp(Operation *op) {
Operation *next = op;
if (auto concreteOp = dyn_cast<InterfaceT>(op)) {
FailureOr<Operation *> result = handleInterface(concreteOp);
if (failed(result))
return failure();
next = *result;
}
if (!next)
return FailureOr<Operation *>(nullptr);
return handleOp<InterfacesU...>(next);
}
/// Apply all supported interface handlers to the given op.
FailureOr<Operation *> handleAllInterfaces(Operation *op) {
if (auto deallocOpInterface = dyn_cast<BufferDeallocationOpInterface>(op))
return deallocOpInterface.process(state, options);
if (failed(verifyOperationPreconditions(op)))
return failure();
return handleOp<MemoryEffectOpInterface, RegionBranchOpInterface,
CallOpInterface, BranchOpInterface,
RegionBranchTerminatorOpInterface>(op);
}
/// Make sure that for each forwarded MemRef value, an ownership indicator
/// `i1` value is forwarded as well such that the successor block knows
/// whether the MemRef has to be deallocated.
///
/// Example:
/// ```
/// ^bb1:
/// <more ops...>
/// cf.br ^bb2(<forward-to-bb2>)
/// ```
/// becomes
/// ```
/// // let (m, c) = getMemrefsAndConditionsToDeallocate(bb1)
/// // let r = getMemrefsToRetain(bb1, bb2, <forward-to-bb2>)
/// ^bb1:
/// <more ops...>
/// o = bufferization.dealloc m if c retain r
/// // replace ownership(r) with o element-wise
/// cf.br ^bb2(<forward-to-bb2>, o)
/// ```
FailureOr<Operation *> handleInterface(BranchOpInterface op);
/// Add an ownership indicator for every forwarding MemRef operand and result.
/// Nested regions never take ownership of MemRefs owned by a parent region
/// (neither via forwarding operand nor when captured implicitly when the
/// region is not isolated from above). Ownerships will only be passed to peer
/// regions (when an operation has multiple regions, such as scf.while), or to
/// parent regions.
/// Note that the block arguments in the nested region are currently handled
/// centrally in the 'dealloc' function, but better interface support could
/// allow us to do this here for the nested region specifically to reduce the
/// amount of assumptions we make on the structure of ops implementing this
/// interface.
///
/// Example:
/// ```
/// %ret = scf.for %i = %c0 to %c10 step %c1 iter_args(%m = %memref) {
/// <more ops...>
/// scf.yield %m : memref<2xi32>, i1
/// }
/// ```
/// becomes
/// ```
/// %ret:2 = scf.for %i = %c0 to %c10 step %c1
/// iter_args(%m = %memref, %own = %false) {
/// <more ops...>
/// // Note that the scf.yield is handled by the
/// // RegionBranchTerminatorOpInterface (not this handler)
/// // let o = getMemrefWithUniqueOwnership(%own)
/// scf.yield %m, o : memref<2xi32>, i1
/// }
/// ```
FailureOr<Operation *> handleInterface(RegionBranchOpInterface op);
/// If the private-function-dynamic-ownership pass option is enabled and the
/// called function is private, additional arguments and results are added for
/// each MemRef argument/result to pass the dynamic ownership indicator along.
/// Otherwise, updates the ownership map and list of memrefs to be deallocated
/// according to the function boundary ABI, i.e., assume ownership of all
/// returned MemRefs.
///
/// Example (assume `private-function-dynamic-ownership` is enabled):
/// ```
/// func.func @f(%arg0: memref<2xi32>) -> memref<2xi32> {...}
/// func.func private @g(%arg0: memref<2xi32>) -> memref<2xi32> {...}
///
/// %ret_f = func.call @f(%memref) : (memref<2xi32>) -> memref<2xi32>
/// %ret_g = func.call @g(%memref) : (memref<2xi32>) -> memref<2xi32>
/// ```
/// becomes
/// ```
/// func.func @f(%arg0: memref<2xi32>) -> memref<2xi32> {...}
/// func.func private @g(%arg0: memref<2xi32>) -> memref<2xi32> {...}
///
/// %ret_f = func.call @f(%memref) : (memref<2xi32>) -> memref<2xi32>
/// // set ownership(%ret_f) := true
/// // remember to deallocate %ret_f
///
/// // (new_memref, own) = getmemrefWithUniqueOwnership(%memref)
/// %ret_g:2 = func.call @g(new_memref, own) :
/// (memref<2xi32>, i1) -> (memref<2xi32>, i1)
/// // set ownership(%ret_g#0) := %ret_g#1
/// // remember to deallocate %ret_g
/// ```
FailureOr<Operation *> handleInterface(CallOpInterface op);
/// Takes care of allocation and free side-effects. It collects allocated
/// MemRefs that we have to add to manually deallocate, but also removes
/// values again that are already deallocated before the end of the block. It
/// also updates the ownership map accordingly.
///
/// Example:
/// ```
/// %alloc = memref.alloc()
/// %alloca = memref.alloca()
/// ```
/// becomes
/// ```
/// %alloc = memref.alloc()
/// %alloca = memref.alloca()
/// // set ownership(alloc) := true
/// // set ownership(alloca) := false
/// // remember to deallocate %alloc
/// ```
FailureOr<Operation *> handleInterface(MemoryEffectOpInterface op);
/// Takes care that the function boundary ABI is adhered to if the parent
/// operation implements FunctionOpInterface, inserting a
/// `bufferization.clone` if necessary, and inserts the
/// `bufferization.dealloc` operation according to the ops operands.
///
/// Example:
/// ```
/// ^bb1:
/// <more ops...>
/// func.return <return-vals>
/// ```
/// becomes
/// ```
/// // let (m, c) = getMemrefsAndConditionsToDeallocate(bb1)
/// // let r = getMemrefsToRetain(bb1, nullptr, <return-vals>)
/// ^bb1:
/// <more ops...>
/// o = bufferization.dealloc m if c retain r
/// func.return <return-vals>
/// (if !isFunctionWithoutDynamicOwnership: append o)
/// ```
FailureOr<Operation *> handleInterface(RegionBranchTerminatorOpInterface op);
/// Construct a new operation which is exactly the same as the passed 'op'
/// except that the OpResults list is appended by new results of the passed
/// 'types'.
/// TODO: ideally, this would be implemented using an OpInterface because it
/// is used to append function results, loop iter_args, etc. and thus makes
/// some assumptions that the variadic list of those is at the end of the
/// OpResults range.
Operation *appendOpResults(Operation *op, ArrayRef<Type> types);
/// A convenience template for the generic 'appendOpResults' function above to
/// avoid manual casting of the result.
template <typename OpTy>
OpTy appendOpResults(OpTy op, ArrayRef<Type> types) {
return cast<OpTy>(appendOpResults(op.getOperation(), types));
}
/// Performs deallocation of a single basic block. This is a private function
/// because some internal data structures have to be set up beforehand and
/// this function has to be called on blocks in a region in dominance order.
LogicalResult deallocate(Block *block);
/// After all relevant interfaces of an operation have been processed by the
/// 'handleInterface' functions, this function sets the ownership of operation
/// results that have not been set yet by the 'handleInterface' functions. It
/// generally assumes that each result can alias with every operand of the
/// operation, if there are MemRef typed results but no MemRef operands it
/// assigns 'false' as ownership. This happens, e.g., for the
/// memref.get_global operation. It would also be possible to query some alias
/// analysis to get more precise ownerships, however, the analysis would have
/// to be updated according to the IR modifications this pass performs (e.g.,
/// re-building operations to have more result values, inserting clone
/// operations, etc.).
void populateRemainingOwnerships(Operation *op);
/// Given an SSA value of MemRef type, returns the same of a new SSA value
/// which has 'Unique' ownership where the ownership indicator is guaranteed
/// to be always 'true'.
Value materializeMemrefWithGuaranteedOwnership(OpBuilder &builder,
Value memref, Block *block);
/// Returns whether the given operation implements FunctionOpInterface, has
/// private visibility, and the private-function-dynamic-ownership pass option
/// is enabled.
bool isFunctionWithoutDynamicOwnership(Operation *op);
/// Given an SSA value of MemRef type, this function queries the
/// BufferDeallocationOpInterface of the defining operation of 'memref' for a
/// materialized ownership indicator for 'memref'. If the op does not
/// implement the interface or if the block for which the materialized value
/// is requested does not match the block in which 'memref' is defined, the
/// default implementation in
/// `DeallocationState::getMemrefWithUniqueOwnership` is queried instead.
std::pair<Value, Value>
materializeUniqueOwnership(OpBuilder &builder, Value memref, Block *block);
/// Checks all the preconditions for operations implementing the
/// FunctionOpInterface that have to hold for the deallocation to be
/// applicable:
/// (1) Checks that there are not explicit control flow loops.
static LogicalResult verifyFunctionPreconditions(FunctionOpInterface op);
/// Checks all the preconditions for operations inside the region of
/// operations implementing the FunctionOpInterface that have to hold for the
/// deallocation to be applicable:
/// (1) Checks if all operations that have at least one attached region
/// implement the RegionBranchOpInterface. This is not required in edge cases,
/// where we have a single attached region and the parent operation has no
/// results.
/// (2) Checks that no deallocations already exist. Especially deallocations
/// in nested regions are not properly supported yet since this requires
/// ownership of the memref to be transferred to the nested region, which does
/// not happen by default. This constrained can be lifted in the future.
/// (3) Checks that terminators with more than one successor except
/// `cf.cond_br` are not present and that either BranchOpInterface or
/// RegionBranchTerminatorOpInterface is implemented.
static LogicalResult verifyOperationPreconditions(Operation *op);
/// When the 'private-function-dynamic-ownership' pass option is enabled,
/// additional `i1` arguments and return values are added for each MemRef
/// value in the function signature. This function takes care of updating the
/// `function_type` attribute of the function according to the actually
/// returned values from the terminators.
static LogicalResult updateFunctionSignature(FunctionOpInterface op);
private:
/// Collects all analysis state and including liveness, caches, ownerships of
/// already processed values and operations, and the MemRefs that have to be
/// deallocated at the end of each block.
DeallocationState state;
/// Collects all pass options in a single place.
DeallocationOptions options;
};
} // namespace
//===----------------------------------------------------------------------===//
// BufferDeallocation Implementation
//===----------------------------------------------------------------------===//
std::pair<Value, Value>
BufferDeallocation::materializeUniqueOwnership(OpBuilder &builder, Value memref,
Block *block) {
// The interface can only materialize ownership indicators in the same block
// as the defining op.
if (memref.getParentBlock() != block)
return state.getMemrefWithUniqueOwnership(builder, memref, block);
Operation *owner = memref.getDefiningOp();
if (!owner)
owner = memref.getParentBlock()->getParentOp();
// If the op implements the interface, query it for a materialized ownership
// value.
if (auto deallocOpInterface = dyn_cast<BufferDeallocationOpInterface>(owner))
return deallocOpInterface.materializeUniqueOwnershipForMemref(
state, options, builder, memref);
// Otherwise use the default implementation.
return state.getMemrefWithUniqueOwnership(builder, memref, block);
}
LogicalResult
BufferDeallocation::verifyFunctionPreconditions(FunctionOpInterface op) {
// (1) Ensure that there are supported loops only (no explicit control flow
// loops).
Backedges backedges(op);
if (backedges.size()) {
op->emitError("Only structured control-flow loops are supported.");
return failure();
}
return success();
}
LogicalResult BufferDeallocation::verifyOperationPreconditions(Operation *op) {
// (1) The pass does not work properly when deallocations are already present.
// Alternatively, we could also remove all deallocations as a pre-pass.
if (isa<DeallocOp>(op))
return op->emitError(
"No deallocation operations must be present when running this pass!");
// (2) Memory side effects of unregistered ops are unknown. In particular, we
// do not know whether an unregistered op allocates memory or not.
// - Ops with recursive memory effects are allowed. All nested ops in the
// regions of `op` will be analyzed separately.
// - Call ops are allowed even though they typically do not implement the
// MemoryEffectOpInterface. They usually do not have side effects apart
// from the callee, which will be analyzed separately. (This is similar to
// "recursive memory effects".)
if (!isa<MemoryEffectOpInterface>(op) &&
!op->hasTrait<OpTrait::HasRecursiveMemoryEffects>() &&
!isa<CallOpInterface>(op))
return op->emitError(
"ops with unknown memory side effects are not supported");
// We do not care about ops that do not operate on buffers and have no
// Allocate/Free side effect.
if (!hasBufferSemantics(op) && hasNeitherAllocateNorFreeSideEffect(op))
return success();
// (3) Check that the control flow structures are supported.
auto regions = op->getRegions();
// Check that if the operation has at
// least one region it implements the RegionBranchOpInterface. If there
// is an operation that does not fulfill this condition, we cannot apply
// the deallocation steps. Furthermore, we accept cases, where we have a
// region that returns no results, since, in that case, the intra-region
// control flow does not affect the transformation.
size_t size = regions.size();
if (((size == 1 && !op->getResults().empty()) || size > 1) &&
!dyn_cast<RegionBranchOpInterface>(op)) {
return op->emitError("All operations with attached regions need to "
"implement the RegionBranchOpInterface.");
}
// (3) Check that terminators with more than one successor except `cf.cond_br`
// are not present and that either BranchOpInterface or
// RegionBranchTerminatorOpInterface is implemented.
if (op->hasTrait<OpTrait::NoTerminator>())
return op->emitError("NoTerminator trait is not supported");
if (op->hasTrait<OpTrait::IsTerminator>()) {
// Either one of those interfaces has to be implemented on terminators, but
// not both.
if (!isa<BranchOpInterface, RegionBranchTerminatorOpInterface>(op) ||
(isa<BranchOpInterface>(op) &&
isa<RegionBranchTerminatorOpInterface>(op)))
return op->emitError(
"Terminators must implement either BranchOpInterface or "
"RegionBranchTerminatorOpInterface (but not both)!");
// We only support terminators with 0 or 1 successors for now and
// special-case the conditional branch op.
if (op->getSuccessors().size() > 1)
return op->emitError("Terminators with more than one successor "
"are not supported!");
}
return success();
}
LogicalResult
BufferDeallocation::updateFunctionSignature(FunctionOpInterface op) {
SmallVector<TypeRange> returnOperandTypes(llvm::map_range(
op.getFunctionBody().getOps<RegionBranchTerminatorOpInterface>(),
[](RegionBranchTerminatorOpInterface op) {
return op.getSuccessorOperands(RegionBranchPoint::parent()).getTypes();
}));
if (!llvm::all_equal(returnOperandTypes))
return op->emitError(
"there are multiple return operations with different operand types");
TypeRange resultTypes = op.getResultTypes();
// Check if we found a return operation because that doesn't necessarily
// always have to be the case, e.g., consider a function with one block that
// has a cf.br at the end branching to itself again (i.e., an infinite loop).
// In that case we don't want to crash but just not update the return types.
if (!returnOperandTypes.empty())
resultTypes = returnOperandTypes[0];
// TODO: it would be nice if the FunctionOpInterface had a method to not only
// get the function type but also set it.
op->setAttr(
"function_type",
TypeAttr::get(FunctionType::get(
op->getContext(), op.getFunctionBody().front().getArgumentTypes(),
resultTypes)));
return success();
}
LogicalResult BufferDeallocation::deallocate(FunctionOpInterface op) {
// Stop and emit a proper error message if we don't support the input IR.
if (failed(verifyFunctionPreconditions(op)))
return failure();
// Process the function block by block.
auto result = op->walk<WalkOrder::PostOrder, ForwardDominanceIterator<>>(
[&](Block *block) {
if (failed(deallocate(block)))
return WalkResult::interrupt();
return WalkResult::advance();
});
if (result.wasInterrupted())
return failure();
// Update the function signature if the function is private, dynamic ownership
// is enabled, and the function has memrefs as arguments or results.
return updateFunctionSignature(op);
}
LogicalResult BufferDeallocation::deallocate(Block *block) {
OpBuilder builder = OpBuilder::atBlockBegin(block);
// Compute liveness transfers of ownership to this block.
SmallVector<Value> liveMemrefs;
state.getLiveMemrefsIn(block, liveMemrefs);
for (auto li : liveMemrefs) {
// Ownership of implicitly captured memrefs from other regions is never
// taken, but ownership of memrefs in the same region (but different block)
// is taken.
if (li.getParentRegion() == block->getParent()) {
state.updateOwnership(li, state.getOwnership(li, li.getParentBlock()),
block);
state.addMemrefToDeallocate(li, block);
continue;
}
if (li.getParentRegion()->isProperAncestor(block->getParent())) {
Value falseVal = buildBoolValue(builder, li.getLoc(), false);
state.updateOwnership(li, falseVal, block);
}
}
for (unsigned i = 0, e = block->getNumArguments(); i < e; ++i) {
BlockArgument arg = block->getArgument(i);
if (!isMemref(arg))
continue;
// Adhere to function boundary ABI: no ownership of function argument
// MemRefs is taken.
if (isFunctionWithoutDynamicOwnership(block->getParentOp()) &&
block->isEntryBlock()) {
Value newArg = buildBoolValue(builder, arg.getLoc(), false);
state.updateOwnership(arg, newArg);
state.addMemrefToDeallocate(arg, block);
continue;
}
// Pass MemRef ownerships along via `i1` values.
Value newArg = block->addArgument(builder.getI1Type(), arg.getLoc());
state.updateOwnership(arg, newArg);
state.addMemrefToDeallocate(arg, block);
}
// For each operation in the block, handle the interfaces that affect aliasing
// and ownership of memrefs.
for (Operation &op : llvm::make_early_inc_range(*block)) {
FailureOr<Operation *> result = handleAllInterfaces(&op);
if (failed(result))
return failure();
if (!*result)
continue;
populateRemainingOwnerships(*result);
}
// TODO: if block has no terminator, handle dealloc insertion here.
return success();
}
Operation *BufferDeallocation::appendOpResults(Operation *op,
ArrayRef<Type> types) {
SmallVector<Type> newTypes(op->getResultTypes());
newTypes.append(types.begin(), types.end());
auto *newOp = Operation::create(op->getLoc(), op->getName(), newTypes,
op->getOperands(), op->getAttrDictionary(),
op->getPropertiesStorage(),
op->getSuccessors(), op->getNumRegions());
for (auto [oldRegion, newRegion] :
llvm::zip(op->getRegions(), newOp->getRegions()))
newRegion.takeBody(oldRegion);
OpBuilder(op).insert(newOp);
op->replaceAllUsesWith(newOp->getResults().take_front(op->getNumResults()));
op->erase();
return newOp;
}
FailureOr<Operation *>
BufferDeallocation::handleInterface(RegionBranchOpInterface op) {
OpBuilder builder = OpBuilder::atBlockBegin(op->getBlock());
// TODO: the RegionBranchOpInterface does not provide all the necessary
// methods to perform this transformation without additional assumptions on
// the structure. In particular, that
// * additional values to be passed to the next region can be added to the end
// of the operand list, the end of the block argument list, and the end of
// the result value list. However, it seems to be the general guideline for
// operations implementing this interface to follow this structure.
// * and that the block arguments and result values match the forwarded
// operands one-to-one (i.e., that there are no other values appended to the
// front).
// These assumptions are satisfied by the `scf.if`, `scf.for`, and `scf.while`
// operations.
SmallVector<RegionSuccessor> regions;
op.getSuccessorRegions(RegionBranchPoint::parent(), regions);
assert(!regions.empty() && "Must have at least one successor region");
SmallVector<Value> entryOperands(
op.getEntrySuccessorOperands(regions.front()));
unsigned numMemrefOperands = llvm::count_if(entryOperands, isMemref);
// No ownership is acquired for any MemRefs that are passed to the region from
// the outside.
Value falseVal = buildBoolValue(builder, op.getLoc(), false);
op->insertOperands(op->getNumOperands(),
SmallVector<Value>(numMemrefOperands, falseVal));
int counter = op->getNumResults();
unsigned numMemrefResults = llvm::count_if(op->getResults(), isMemref);
SmallVector<Type> ownershipResults(numMemrefResults, builder.getI1Type());
RegionBranchOpInterface newOp = appendOpResults(op, ownershipResults);
for (auto result : llvm::make_filter_range(newOp->getResults(), isMemref)) {
state.updateOwnership(result, newOp->getResult(counter++));
state.addMemrefToDeallocate(result, newOp->getBlock());
}
return newOp.getOperation();
}
Value BufferDeallocation::materializeMemrefWithGuaranteedOwnership(
OpBuilder &builder, Value memref, Block *block) {
// First, make sure we at least have 'Unique' ownership already.
std::pair<Value, Value> newMemrefAndOnwership =
materializeUniqueOwnership(builder, memref, block);
Value newMemref = newMemrefAndOnwership.first;
Value condition = newMemrefAndOnwership.second;
// Avoid inserting additional IR if ownership is already guaranteed. In
// particular, this is already the case when we had 'Unknown' ownership
// initially and a clone was inserted to get to 'Unique' ownership.
if (matchPattern(condition, m_One()))
return newMemref;
// Insert a runtime check and only clone if we still don't have ownership at
// runtime.
Value maybeClone =
builder
.create<scf::IfOp>(
memref.getLoc(), condition,
[&](OpBuilder &builder, Location loc) {
builder.create<scf::YieldOp>(loc, newMemref);
},
[&](OpBuilder &builder, Location loc) {
Value clone =
builder.create<bufferization::CloneOp>(loc, newMemref);
builder.create<scf::YieldOp>(loc, clone);
})
.getResult(0);
Value trueVal = buildBoolValue(builder, memref.getLoc(), true);
state.updateOwnership(maybeClone, trueVal);
state.addMemrefToDeallocate(maybeClone, maybeClone.getParentBlock());
return maybeClone;
}
FailureOr<Operation *>
BufferDeallocation::handleInterface(BranchOpInterface op) {
if (op->getNumSuccessors() > 1)
return op->emitError("BranchOpInterface operations with multiple "
"successors are not supported yet");
if (op->getNumSuccessors() != 1)
return emitError(op.getLoc(),
"only BranchOpInterface operations with exactly "
"one successor are supported yet");
if (op.getSuccessorOperands(0).getProducedOperandCount() > 0)
return op.emitError("produced operands are not supported");
// Collect the values to deallocate and retain and use them to create the
// dealloc operation.
Block *block = op->getBlock();
OpBuilder builder(op);
SmallVector<Value> memrefs, conditions, toRetain;
if (failed(state.getMemrefsAndConditionsToDeallocate(
builder, op.getLoc(), block, memrefs, conditions)))
return failure();
OperandRange forwardedOperands =
op.getSuccessorOperands(0).getForwardedOperands();
state.getMemrefsToRetain(block, op->getSuccessor(0), forwardedOperands,
toRetain);
auto deallocOp = builder.create<bufferization::DeallocOp>(
op.getLoc(), memrefs, conditions, toRetain);
// We want to replace the current ownership of the retained values with the
// result values of the dealloc operation as they are always unique.
state.resetOwnerships(deallocOp.getRetained(), block);
for (auto [retained, ownership] :
llvm::zip(deallocOp.getRetained(), deallocOp.getUpdatedConditions())) {
state.updateOwnership(retained, ownership, block);
}
unsigned numAdditionalReturns = llvm::count_if(forwardedOperands, isMemref);
SmallVector<Value> newOperands(forwardedOperands);
auto additionalConditions =
deallocOp.getUpdatedConditions().take_front(numAdditionalReturns);
newOperands.append(additionalConditions.begin(), additionalConditions.end());
op.getSuccessorOperands(0).getMutableForwardedOperands().assign(newOperands);
return op.getOperation();
}
FailureOr<Operation *> BufferDeallocation::handleInterface(CallOpInterface op) {
OpBuilder builder(op);
// Lookup the function operation and check if it has private visibility. If
// the function is referenced by SSA value instead of a Symbol, it's assumed
// to be always private.
Operation *funcOp = op.resolveCallable(state.getSymbolTable());
bool isPrivate = true;
if (auto symbol = dyn_cast<SymbolOpInterface>(funcOp))
isPrivate = symbol.isPrivate() && !symbol.isDeclaration();
// If the private-function-dynamic-ownership option is enabled and we are
// calling a private function, we need to add an additional `i1`
// argument/result for each MemRef argument/result to dynamically pass the
// current ownership indicator rather than adhering to the function boundary
// ABI.
if (options.privateFuncDynamicOwnership && isPrivate) {
SmallVector<Value> newOperands, ownershipIndicatorsToAdd;
for (Value operand : op.getArgOperands()) {
if (!isMemref(operand)) {
newOperands.push_back(operand);
continue;
}
auto [memref, condition] =
materializeUniqueOwnership(builder, operand, op->getBlock());
newOperands.push_back(memref);
ownershipIndicatorsToAdd.push_back(condition);
}
newOperands.append(ownershipIndicatorsToAdd.begin(),
ownershipIndicatorsToAdd.end());
op.getArgOperandsMutable().assign(newOperands);
unsigned numMemrefs = llvm::count_if(op->getResults(), isMemref);
SmallVector<Type> ownershipTypesToAppend(numMemrefs, builder.getI1Type());
unsigned ownershipCounter = op->getNumResults();
op = appendOpResults(op, ownershipTypesToAppend);
for (auto result : llvm::make_filter_range(op->getResults(), isMemref)) {
state.updateOwnership(result, op->getResult(ownershipCounter++));
state.addMemrefToDeallocate(result, result.getParentBlock());
}
return op.getOperation();
}
// According to the function boundary ABI we are guaranteed to get ownership
// of all MemRefs returned by the function. Thus we set ownership to constant
// 'true' and remember to deallocate it.
Value trueVal = buildBoolValue(builder, op.getLoc(), true);
for (auto result : llvm::make_filter_range(op->getResults(), isMemref)) {
state.updateOwnership(result, trueVal);
state.addMemrefToDeallocate(result, result.getParentBlock());
}
return op.getOperation();
}
FailureOr<Operation *>
BufferDeallocation::handleInterface(MemoryEffectOpInterface op) {
auto *block = op->getBlock();
OpBuilder builder = OpBuilder::atBlockBegin(block);
for (auto operand : llvm::make_filter_range(op->getOperands(), isMemref)) {
if (op.getEffectOnValue<MemoryEffects::Free>(operand).has_value()) {
// The bufferization.manual_deallocation attribute can be attached to ops
// with an allocation and/or deallocation side effect. It indicates that
// the op is under a "manual deallocation" scheme. Deallocation ops are
// usually forbidden in the input IR (not supported by the buffer
// deallocation pass). However, if they are under manual deallocation,
// they can be safely ignored by the buffer deallocation pass.
if (!op->hasAttr(BufferizationDialect::kManualDeallocation))
return op->emitError(
"memory free side-effect on MemRef value not supported!");
// Buffers that were allocated under "manual deallocation" may be
// manually deallocated. We insert a runtime assertion to cover certain
// cases of invalid IR where an automatically managed buffer allocation
// is manually deallocated. This is not a bulletproof check!
OpBuilder::InsertionGuard g(builder);
builder.setInsertionPoint(op);
Ownership ownership = state.getOwnership(operand, block);
if (ownership.isUnique()) {
Value ownershipInverted = builder.create<arith::XOrIOp>(
op.getLoc(), ownership.getIndicator(),
buildBoolValue(builder, op.getLoc(), true));
builder.create<cf::AssertOp>(
op.getLoc(), ownershipInverted,
"expected that the block does not have ownership");
}
}
}
for (auto res : llvm::make_filter_range(op->getResults(), isMemref)) {
auto allocEffect = op.getEffectOnValue<MemoryEffects::Allocate>(res);
if (allocEffect.has_value()) {
if (isa<SideEffects::AutomaticAllocationScopeResource>(
allocEffect->getResource())) {
// Make sure that the ownership of auto-managed allocations is set to
// false. This is important for operations that have at least one memref
// typed operand. E.g., consider an operation like `bufferization.clone`
// that lowers to a `memref.alloca + memref.copy` instead of a
// `memref.alloc`. If we wouldn't set the ownership of the result here,
// the default ownership population in `populateRemainingOwnerships`
// would assume aliasing with the MemRef operand.
state.resetOwnerships(res, block);
state.updateOwnership(res, buildBoolValue(builder, op.getLoc(), false));
continue;
}
if (op->hasAttr(BufferizationDialect::kManualDeallocation)) {
// This allocation will be deallocated manually. Assign an ownership of
// "false", so that it will never be deallocated by the buffer
// deallocation pass.
state.resetOwnerships(res, block);
state.updateOwnership(res, buildBoolValue(builder, op.getLoc(), false));
continue;
}
state.updateOwnership(res, buildBoolValue(builder, op.getLoc(), true));
state.addMemrefToDeallocate(res, block);
}
}
return op.getOperation();
}
FailureOr<Operation *>
BufferDeallocation::handleInterface(RegionBranchTerminatorOpInterface op) {
OpBuilder builder(op);
// If this is a return operation of a function that is not private or the
// dynamic function boundary ownership is disabled, we need to return memref
// values for which we have guaranteed ownership to pass on to adhere to the
// function boundary ABI.
bool funcWithoutDynamicOwnership =
isFunctionWithoutDynamicOwnership(op->getParentOp());
if (funcWithoutDynamicOwnership) {
for (OpOperand &val : op->getOpOperands()) {
if (!isMemref(val.get()))
continue;
val.set(materializeMemrefWithGuaranteedOwnership(builder, val.get(),
op->getBlock()));
}
}
// TODO: getSuccessorRegions is not implemented by all operations we care
// about, but we would need to check how many successors there are and under
// which condition they are taken, etc.
MutableOperandRange operands =
op.getMutableSuccessorOperands(RegionBranchPoint::parent());
SmallVector<Value> updatedOwnerships;
auto result = deallocation_impl::insertDeallocOpForReturnLike(
state, op, OperandRange(operands), updatedOwnerships);
if (failed(result) || !*result)
return result;
// Add an additional operand for every MemRef for the ownership indicator.
if (!funcWithoutDynamicOwnership) {
SmallVector<Value> newOperands{OperandRange(operands)};
newOperands.append(updatedOwnerships.begin(), updatedOwnerships.end());
operands.assign(newOperands);
}
return op.getOperation();
}
bool BufferDeallocation::isFunctionWithoutDynamicOwnership(Operation *op) {
auto funcOp = dyn_cast<FunctionOpInterface>(op);
return funcOp && (!options.privateFuncDynamicOwnership ||
!funcOp.isPrivate() || funcOp.isExternal());
}
void BufferDeallocation::populateRemainingOwnerships(Operation *op) {
for (auto res : op->getResults()) {
if (!isMemref(res))
continue;
if (!state.getOwnership(res, op->getBlock()).isUninitialized())
continue;
// The op does not allocate memory, otherwise, it would have been assigned
// an ownership during `handleInterface`. Assume the result may alias with
// any memref operand and thus combine all their ownerships.
for (auto operand : op->getOperands()) {
if (!isMemref(operand))
continue;
state.updateOwnership(
res, state.getOwnership(operand, operand.getParentBlock()),
op->getBlock());
}
// If the ownership value is still uninitialized (e.g., because the op has
// no memref operands), assume that no ownership is taken. E.g., this is the
// case for "memref.get_global".
//
// Note: This can lead to memory leaks if memory side effects are not
// properly specified on the op.
if (state.getOwnership(res, op->getBlock()).isUninitialized()) {
OpBuilder builder(op);
state.updateOwnership(res, buildBoolValue(builder, op->getLoc(), false));
}
}
}
//===----------------------------------------------------------------------===//
// OwnershipBasedBufferDeallocationPass
//===----------------------------------------------------------------------===//
namespace {
/// The actual buffer deallocation pass that inserts and moves dealloc nodes
/// into the right positions. Furthermore, it inserts additional clones if
/// necessary. It uses the algorithm described at the top of the file.
struct OwnershipBasedBufferDeallocationPass
: public bufferization::impl::OwnershipBasedBufferDeallocationBase<
OwnershipBasedBufferDeallocationPass> {
OwnershipBasedBufferDeallocationPass() = default;
OwnershipBasedBufferDeallocationPass(bool privateFuncDynamicOwnership)
: OwnershipBasedBufferDeallocationPass() {
this->privateFuncDynamicOwnership.setValue(privateFuncDynamicOwnership);
}
void runOnOperation() override {
auto status = getOperation()->walk([&](func::FuncOp func) {
if (func.isExternal())
return WalkResult::skip();
if (failed(deallocateBuffersOwnershipBased(func,
privateFuncDynamicOwnership)))
return WalkResult::interrupt();
return WalkResult::advance();
});
if (status.wasInterrupted())
signalPassFailure();
}
};
} // namespace
//===----------------------------------------------------------------------===//
// Implement bufferization API
//===----------------------------------------------------------------------===//
LogicalResult bufferization::deallocateBuffersOwnershipBased(
FunctionOpInterface op, bool privateFuncDynamicOwnership) {
// Gather all required allocation nodes and prepare the deallocation phase.
BufferDeallocation deallocation(op, privateFuncDynamicOwnership);
// Place all required temporary clone and dealloc nodes.
return deallocation.deallocate(op);
}
//===----------------------------------------------------------------------===//
// OwnershipBasedBufferDeallocationPass construction
//===----------------------------------------------------------------------===//
std::unique_ptr<Pass>
mlir::bufferization::createOwnershipBasedBufferDeallocationPass(
bool privateFuncDynamicOwnership) {
return std::make_unique<OwnershipBasedBufferDeallocationPass>(
privateFuncDynamicOwnership);
}