1329 lines
58 KiB
C++
1329 lines
58 KiB
C++
//===- LowerHLFIROrderedAssignments.cpp - Lower HLFIR ordered assignments -===//
|
|
//
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
// This file defines a pass to lower HLFIR ordered assignments.
|
|
// Ordered assignments are all the operations with the
|
|
// OrderedAssignmentTreeOpInterface that implements user defined assignments,
|
|
// assignment to vector subscripted entities, and assignments inside forall and
|
|
// where.
|
|
// The pass lowers these operations to regular hlfir.assign, loops and, if
|
|
// needed, introduces temporary storage to fulfill Fortran semantics.
|
|
//
|
|
// For each rewrite, an analysis builds an evaluation schedule, and then the
|
|
// new code is generated by following the evaluation schedule.
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
#include "ScheduleOrderedAssignments.h"
|
|
#include "flang/Optimizer/Builder/FIRBuilder.h"
|
|
#include "flang/Optimizer/Builder/HLFIRTools.h"
|
|
#include "flang/Optimizer/Builder/TemporaryStorage.h"
|
|
#include "flang/Optimizer/Builder/Todo.h"
|
|
#include "flang/Optimizer/Dialect/Support/FIRContext.h"
|
|
#include "flang/Optimizer/HLFIR/Passes.h"
|
|
#include "mlir/IR/Dominance.h"
|
|
#include "mlir/IR/IRMapping.h"
|
|
#include "mlir/Transforms/DialectConversion.h"
|
|
#include "llvm/ADT/SmallSet.h"
|
|
#include "llvm/ADT/TypeSwitch.h"
|
|
#include "llvm/Support/Debug.h"
|
|
|
|
namespace hlfir {
|
|
#define GEN_PASS_DEF_LOWERHLFIRORDEREDASSIGNMENTS
|
|
#include "flang/Optimizer/HLFIR/Passes.h.inc"
|
|
} // namespace hlfir
|
|
|
|
#define DEBUG_TYPE "flang-ordered-assignment"
|
|
|
|
// Test option only to test the scheduling part only (operations are erased
|
|
// without codegen). The only goal is to allow printing and testing the debug
|
|
// info.
|
|
static llvm::cl::opt<bool> dbgScheduleOnly(
|
|
"flang-dbg-order-assignment-schedule-only",
|
|
llvm::cl::desc("Only run ordered assignment scheduling with no codegen"),
|
|
llvm::cl::init(false));
|
|
|
|
namespace {
|
|
|
|
/// Structure that represents a masked expression being lowered. Masked
|
|
/// expressions are any expressions inside an hlfir.where. As described in
|
|
/// Fortran 2018 section 10.2.3.2, the evaluation of the elemental parts of such
|
|
/// expressions must be masked, while the evaluation of none elemental parts
|
|
/// must not be masked. This structure analyzes the region evaluating the
|
|
/// expression and allows splitting the generation of the none elemental part
|
|
/// from the elemental part.
|
|
struct MaskedArrayExpr {
|
|
MaskedArrayExpr(mlir::Location loc, mlir::Region ®ion);
|
|
|
|
/// Generate the none elemental part. Must be called outside of the
|
|
/// loops created for the WHERE construct.
|
|
void generateNoneElementalPart(fir::FirOpBuilder &builder,
|
|
mlir::IRMapping &mapper);
|
|
|
|
/// Methods below can only be called once generateNoneElementalPart has been
|
|
/// called.
|
|
|
|
/// Return the shape of the expression.
|
|
mlir::Value generateShape(fir::FirOpBuilder &builder,
|
|
mlir::IRMapping &mapper);
|
|
/// Return the value of an element value for this expression given the current
|
|
/// where loop indices.
|
|
mlir::Value generateElementalParts(fir::FirOpBuilder &builder,
|
|
mlir::ValueRange oneBasedIndices,
|
|
mlir::IRMapping &mapper);
|
|
/// Generate the cleanup for the none elemental parts, if any. This must be
|
|
/// called after the loops created for the WHERE construct.
|
|
void generateNoneElementalCleanupIfAny(fir::FirOpBuilder &builder,
|
|
mlir::IRMapping &mapper);
|
|
|
|
mlir::Location loc;
|
|
mlir::Region ®ion;
|
|
/// Was generateNoneElementalPart called?
|
|
bool noneElementalPartWasGenerated = false;
|
|
/// Set of operations that form the elemental parts of the
|
|
/// expression evaluation. These are the hlfir.elemental and
|
|
/// hlfir.elemental_addr that form the elemental tree producing
|
|
/// the expression value. hlfir.elemental that produce values
|
|
/// used inside transformational operations are not part of this set.
|
|
llvm::SmallSet<mlir::Operation *, 4> elementalParts{};
|
|
};
|
|
} // namespace
|
|
|
|
namespace {
|
|
/// Structure that visits an ordered assignment tree and generates code for
|
|
/// it according to a schedule.
|
|
class OrderedAssignmentRewriter {
|
|
public:
|
|
OrderedAssignmentRewriter(fir::FirOpBuilder &builder,
|
|
hlfir::OrderedAssignmentTreeOpInterface root)
|
|
: builder{builder}, root{root} {}
|
|
|
|
/// Generate code for the current run of the schedule.
|
|
void lowerRun(hlfir::Run &run) {
|
|
currentRun = &run;
|
|
walk(root);
|
|
currentRun = nullptr;
|
|
assert(constructStack.empty() && "must exit constructs after a run");
|
|
mapper.clear();
|
|
savedInCurrentRunBeforeUse.clear();
|
|
}
|
|
|
|
/// After all run have been lowered, clean-up all the temporary
|
|
/// storage that were created (do not call final routines).
|
|
void cleanupSavedEntities() {
|
|
for (auto &temp : savedEntities)
|
|
temp.second.destroy(root.getLoc(), builder);
|
|
}
|
|
|
|
/// Lowered value for an expression, and the original hlfir.yield if any
|
|
/// clean-up needs to be cloned after usage.
|
|
using ValueAndCleanUp = std::pair<mlir::Value, std::optional<hlfir::YieldOp>>;
|
|
|
|
private:
|
|
/// Walk the part of an order assignment tree node that needs
|
|
/// to be evaluated in the current run.
|
|
void walk(hlfir::OrderedAssignmentTreeOpInterface node);
|
|
|
|
/// Generate code when entering a given ordered assignment node.
|
|
void pre(hlfir::ForallOp forallOp);
|
|
void pre(hlfir::ForallIndexOp);
|
|
void pre(hlfir::ForallMaskOp);
|
|
void pre(hlfir::WhereOp whereOp);
|
|
void pre(hlfir::ElseWhereOp elseWhereOp);
|
|
void pre(hlfir::RegionAssignOp);
|
|
|
|
/// Generate code when leaving a given ordered assignment node.
|
|
void post(hlfir::ForallOp);
|
|
void post(hlfir::ForallMaskOp);
|
|
void post(hlfir::WhereOp);
|
|
void post(hlfir::ElseWhereOp);
|
|
/// Enter (and maybe create) the fir.if else block of an ElseWhereOp,
|
|
/// but do not generate the elswhere mask or the new fir.if.
|
|
void enterElsewhere(hlfir::ElseWhereOp);
|
|
|
|
/// Are there any leaf region in the node that must be saved in the current
|
|
/// run?
|
|
bool mustSaveRegionIn(
|
|
hlfir::OrderedAssignmentTreeOpInterface node,
|
|
llvm::SmallVectorImpl<hlfir::SaveEntity> &saveEntities) const;
|
|
/// Should this node be evaluated in the current run? Saving a region in a
|
|
/// node does not imply the node needs to be evaluated.
|
|
bool
|
|
isRequiredInCurrentRun(hlfir::OrderedAssignmentTreeOpInterface node) const;
|
|
|
|
/// Generate a scalar value yielded by an ordered assignment tree region.
|
|
/// If the value was not saved in a previous run, this clone the region
|
|
/// code, except the final yield, at the current execution point.
|
|
/// If the value was saved in a previous run, this fetches the saved value
|
|
/// from the temporary storage and returns the value.
|
|
/// Inside Forall, the value will be hoisted outside of the forall loops if
|
|
/// it does not depend on the forall indices.
|
|
/// An optional type can be provided to get a value from a specific type
|
|
/// (the cast will be hoisted if the computation is hoisted).
|
|
mlir::Value generateYieldedScalarValue(
|
|
mlir::Region ®ion,
|
|
std::optional<mlir::Type> castToType = std::nullopt);
|
|
|
|
/// Generate an entity yielded by an ordered assignment tree region, and
|
|
/// optionally return the (uncloned) yield if there is any clean-up that
|
|
/// should be done after using the entity. Like, generateYieldedScalarValue,
|
|
/// this will return the saved value if the region was saved in a previous
|
|
/// run.
|
|
ValueAndCleanUp
|
|
generateYieldedEntity(mlir::Region ®ion,
|
|
std::optional<mlir::Type> castToType = std::nullopt);
|
|
|
|
struct LhsValueAndCleanUp {
|
|
mlir::Value lhs;
|
|
std::optional<hlfir::YieldOp> elementalCleanup;
|
|
mlir::Region *nonElementalCleanup = nullptr;
|
|
std::optional<hlfir::LoopNest> vectorSubscriptLoopNest;
|
|
std::optional<mlir::Value> vectorSubscriptShape;
|
|
};
|
|
|
|
/// Generate the left-hand side. If the left-hand side is vector
|
|
/// subscripted (hlfir.elemental_addr), this will create a loop nest
|
|
/// (unless it was already created by a WHERE mask) and return the
|
|
/// element address.
|
|
LhsValueAndCleanUp
|
|
generateYieldedLHS(mlir::Location loc, mlir::Region &lhsRegion,
|
|
std::optional<hlfir::Entity> loweredRhs = std::nullopt);
|
|
|
|
/// If \p maybeYield is present and has a clean-up, generate the clean-up
|
|
/// at the current insertion point (by cloning).
|
|
void generateCleanupIfAny(std::optional<hlfir::YieldOp> maybeYield);
|
|
void generateCleanupIfAny(mlir::Region *cleanupRegion);
|
|
|
|
/// Generate a masked entity. This can only be called when whereLoopNest was
|
|
/// set (When an hlfir.where is being visited).
|
|
/// This method returns the scalar element (that may have been previously
|
|
/// saved) for the current indices inside the where loop.
|
|
mlir::Value generateMaskedEntity(mlir::Location loc, mlir::Region ®ion) {
|
|
MaskedArrayExpr maskedExpr(loc, region);
|
|
return generateMaskedEntity(maskedExpr);
|
|
}
|
|
mlir::Value generateMaskedEntity(MaskedArrayExpr &maskedExpr);
|
|
|
|
/// Create a fir.if at the current position inside the where loop nest
|
|
/// given the element value of a mask.
|
|
void generateMaskIfOp(mlir::Value cdt);
|
|
|
|
/// Save a value for subsequent runs.
|
|
void generateSaveEntity(hlfir::SaveEntity savedEntity,
|
|
bool willUseSavedEntityInSameRun);
|
|
void saveLeftHandSide(hlfir::SaveEntity savedEntity,
|
|
hlfir::RegionAssignOp regionAssignOp);
|
|
|
|
/// Get a value if it was saved in this run or a previous run. Returns
|
|
/// nullopt if it has not been saved.
|
|
std::optional<ValueAndCleanUp> getIfSaved(mlir::Region ®ion);
|
|
|
|
/// Generate code before the loop nest for the current run, if any.
|
|
void doBeforeLoopNest(const std::function<void()> &callback) {
|
|
if (constructStack.empty()) {
|
|
callback();
|
|
return;
|
|
}
|
|
auto insertionPoint = builder.saveInsertionPoint();
|
|
builder.setInsertionPoint(constructStack[0]);
|
|
callback();
|
|
builder.restoreInsertionPoint(insertionPoint);
|
|
}
|
|
|
|
/// Can the current loop nest iteration number be computed? For simplicity,
|
|
/// this is true if and only if all the bounds and steps of the fir.do_loop
|
|
/// nest dominates the outer loop. The argument is filled with the current
|
|
/// loop nest on success.
|
|
bool currentLoopNestIterationNumberCanBeComputed(
|
|
llvm::SmallVectorImpl<fir::DoLoopOp> &loopNest);
|
|
|
|
template <typename T>
|
|
fir::factory::TemporaryStorage *insertSavedEntity(mlir::Region ®ion,
|
|
T &&temp) {
|
|
auto inserted =
|
|
savedEntities.insert(std::make_pair(®ion, std::forward<T>(temp)));
|
|
assert(inserted.second && "temp must have been emplaced");
|
|
return &inserted.first->second;
|
|
}
|
|
|
|
fir::FirOpBuilder &builder;
|
|
|
|
/// Map containing the mapping between the original order assignment tree
|
|
/// operations and the operations that have been cloned in the current run.
|
|
/// It is reset between two runs.
|
|
mlir::IRMapping mapper;
|
|
/// Dominance info is used to determine if inner loop bounds are all computed
|
|
/// before outer loop for the current loop. It does not need to be reset
|
|
/// between runs.
|
|
mlir::DominanceInfo dominanceInfo;
|
|
/// Construct stack in the current run. This allows setting back the insertion
|
|
/// point correctly when leaving a node that requires a fir.do_loop or fir.if
|
|
/// operation.
|
|
llvm::SmallVector<mlir::Operation *> constructStack;
|
|
/// Current where loop nest, if any.
|
|
std::optional<hlfir::LoopNest> whereLoopNest;
|
|
|
|
/// Map of temporary storage to keep track of saved entity once the run
|
|
/// that saves them has been lowered. It is kept in-between runs.
|
|
/// llvm::MapVector is used to guarantee deterministic order
|
|
/// of iterating through savedEntities (e.g. for generating
|
|
/// destruction code for the temporary storages).
|
|
llvm::MapVector<mlir::Region *, fir::factory::TemporaryStorage> savedEntities;
|
|
/// Map holding the values that were saved in the current run and that also
|
|
/// need to be used (because their construct will be visited). It is reset
|
|
/// after each run. It avoids having to store and fetch in the temporary
|
|
/// during the same run, which would require the temporary to have different
|
|
/// fetching and storing counters.
|
|
llvm::DenseMap<mlir::Region *, ValueAndCleanUp> savedInCurrentRunBeforeUse;
|
|
|
|
/// Root of the order assignment tree being lowered.
|
|
hlfir::OrderedAssignmentTreeOpInterface root;
|
|
/// Pointer to the current run of the schedule being lowered.
|
|
hlfir::Run *currentRun = nullptr;
|
|
|
|
/// When allocating temporary storage inlined, indicate if the storage should
|
|
/// be heap or stack allocated. Temporary allocated with the runtime are heap
|
|
/// allocated by the runtime.
|
|
bool allocateOnHeap = true;
|
|
};
|
|
} // namespace
|
|
|
|
void OrderedAssignmentRewriter::walk(
|
|
hlfir::OrderedAssignmentTreeOpInterface node) {
|
|
bool mustVisit =
|
|
isRequiredInCurrentRun(node) || mlir::isa<hlfir::ForallIndexOp>(node);
|
|
llvm::SmallVector<hlfir::SaveEntity> saveEntities;
|
|
mlir::Operation *nodeOp = node.getOperation();
|
|
if (mustSaveRegionIn(node, saveEntities)) {
|
|
mlir::IRRewriter::InsertPoint insertionPoint;
|
|
if (auto elseWhereOp = mlir::dyn_cast<hlfir::ElseWhereOp>(nodeOp)) {
|
|
// ElseWhere mask to save must be evaluated inside the fir.if else
|
|
// for the previous where/elsewehere (its evaluation must be
|
|
// masked by the "pending control mask").
|
|
insertionPoint = builder.saveInsertionPoint();
|
|
enterElsewhere(elseWhereOp);
|
|
}
|
|
for (hlfir::SaveEntity saveEntity : saveEntities)
|
|
generateSaveEntity(saveEntity, mustVisit);
|
|
if (insertionPoint.isSet())
|
|
builder.restoreInsertionPoint(insertionPoint);
|
|
}
|
|
if (mustVisit) {
|
|
llvm::TypeSwitch<mlir::Operation *, void>(nodeOp)
|
|
.Case<hlfir::ForallOp, hlfir::ForallIndexOp, hlfir::ForallMaskOp,
|
|
hlfir::RegionAssignOp, hlfir::WhereOp, hlfir::ElseWhereOp>(
|
|
[&](auto concreteOp) { pre(concreteOp); })
|
|
.Default([](auto) {});
|
|
if (auto *body = node.getSubTreeRegion()) {
|
|
for (mlir::Operation &op : body->getOps())
|
|
if (auto subNode =
|
|
mlir::dyn_cast<hlfir::OrderedAssignmentTreeOpInterface>(op))
|
|
walk(subNode);
|
|
llvm::TypeSwitch<mlir::Operation *, void>(nodeOp)
|
|
.Case<hlfir::ForallOp, hlfir::ForallMaskOp, hlfir::WhereOp,
|
|
hlfir::ElseWhereOp>([&](auto concreteOp) { post(concreteOp); })
|
|
.Default([](auto) {});
|
|
}
|
|
}
|
|
}
|
|
|
|
void OrderedAssignmentRewriter::pre(hlfir::ForallOp forallOp) {
|
|
/// Create a fir.do_loop given the hlfir.forall control values.
|
|
mlir::Type idxTy = builder.getIndexType();
|
|
mlir::Location loc = forallOp.getLoc();
|
|
mlir::Value lb = generateYieldedScalarValue(forallOp.getLbRegion(), idxTy);
|
|
mlir::Value ub = generateYieldedScalarValue(forallOp.getUbRegion(), idxTy);
|
|
mlir::Value step;
|
|
if (forallOp.getStepRegion().empty()) {
|
|
auto insertionPoint = builder.saveInsertionPoint();
|
|
if (!constructStack.empty())
|
|
builder.setInsertionPoint(constructStack[0]);
|
|
step = builder.createIntegerConstant(loc, idxTy, 1);
|
|
if (!constructStack.empty())
|
|
builder.restoreInsertionPoint(insertionPoint);
|
|
} else {
|
|
step = generateYieldedScalarValue(forallOp.getStepRegion(), idxTy);
|
|
}
|
|
auto doLoop = builder.create<fir::DoLoopOp>(loc, lb, ub, step);
|
|
builder.setInsertionPointToStart(doLoop.getBody());
|
|
mlir::Value oldIndex = forallOp.getForallIndexValue();
|
|
mlir::Value newIndex =
|
|
builder.createConvert(loc, oldIndex.getType(), doLoop.getInductionVar());
|
|
mapper.map(oldIndex, newIndex);
|
|
constructStack.push_back(doLoop);
|
|
}
|
|
|
|
void OrderedAssignmentRewriter::post(hlfir::ForallOp) {
|
|
assert(!constructStack.empty() && "must contain a loop");
|
|
builder.setInsertionPointAfter(constructStack.pop_back_val());
|
|
}
|
|
|
|
void OrderedAssignmentRewriter::pre(hlfir::ForallIndexOp forallIndexOp) {
|
|
mlir::Location loc = forallIndexOp.getLoc();
|
|
mlir::Type intTy = fir::unwrapRefType(forallIndexOp.getType());
|
|
mlir::Value indexVar =
|
|
builder.createTemporary(loc, intTy, forallIndexOp.getName());
|
|
mlir::Value newVal = mapper.lookupOrDefault(forallIndexOp.getIndex());
|
|
builder.createStoreWithConvert(loc, newVal, indexVar);
|
|
mapper.map(forallIndexOp, indexVar);
|
|
}
|
|
|
|
void OrderedAssignmentRewriter::pre(hlfir::ForallMaskOp forallMaskOp) {
|
|
mlir::Location loc = forallMaskOp.getLoc();
|
|
mlir::Value mask = generateYieldedScalarValue(forallMaskOp.getMaskRegion(),
|
|
builder.getI1Type());
|
|
auto ifOp = builder.create<fir::IfOp>(loc, std::nullopt, mask, false);
|
|
builder.setInsertionPointToStart(&ifOp.getThenRegion().front());
|
|
constructStack.push_back(ifOp);
|
|
}
|
|
|
|
void OrderedAssignmentRewriter::post(hlfir::ForallMaskOp forallMaskOp) {
|
|
assert(!constructStack.empty() && "must contain an ifop");
|
|
builder.setInsertionPointAfter(constructStack.pop_back_val());
|
|
}
|
|
|
|
/// Convert an entity to the type of a given mold.
|
|
/// This is intended to help with cases where hlfir entity is a value while
|
|
/// it must be used as a variable or vice-versa. These mismatches may occur
|
|
/// between the type of user defined assignment block arguments and the actual
|
|
/// argument that was lowered for them. The actual may be an in-memory copy
|
|
/// while the block argument expects an hlfir.expr.
|
|
static hlfir::Entity
|
|
convertToMoldType(mlir::Location loc, fir::FirOpBuilder &builder,
|
|
hlfir::Entity input, hlfir::Entity mold,
|
|
llvm::SmallVectorImpl<hlfir::CleanupFunction> &cleanups) {
|
|
if (input.getType() == mold.getType())
|
|
return input;
|
|
fir::FirOpBuilder *b = &builder;
|
|
if (input.isVariable() && mold.isValue()) {
|
|
if (fir::isa_trivial(mold.getType())) {
|
|
// fir.ref<T> to T.
|
|
mlir::Value load = builder.create<fir::LoadOp>(loc, input);
|
|
return hlfir::Entity{builder.createConvert(loc, mold.getType(), load)};
|
|
}
|
|
// fir.ref<T> to hlfir.expr<T>.
|
|
mlir::Value asExpr = builder.create<hlfir::AsExprOp>(loc, input);
|
|
if (asExpr.getType() != mold.getType())
|
|
TODO(loc, "hlfir.expr conversion");
|
|
cleanups.emplace_back([=]() { b->create<hlfir::DestroyOp>(loc, asExpr); });
|
|
return hlfir::Entity{asExpr};
|
|
}
|
|
if (input.isValue() && mold.isVariable()) {
|
|
// T to fir.ref<T>, or hlfir.expr<T> to fir.ref<T>.
|
|
hlfir::AssociateOp associate = hlfir::genAssociateExpr(
|
|
loc, builder, input, mold.getFortranElementType(), ".tmp.val2ref");
|
|
cleanups.emplace_back(
|
|
[=]() { b->create<hlfir::EndAssociateOp>(loc, associate); });
|
|
return hlfir::Entity{associate.getBase()};
|
|
}
|
|
// Variable to Variable mismatch (e.g., fir.heap<T> vs fir.ref<T>), or value
|
|
// to Value mismatch (e.g. i1 vs fir.logical<4>).
|
|
if (mlir::isa<fir::BaseBoxType>(mold.getType()) &&
|
|
!mlir::isa<fir::BaseBoxType>(input.getType())) {
|
|
// An entity may have have been saved without descriptor while the original
|
|
// value had a descriptor (e.g., it was not contiguous).
|
|
auto emboxed = hlfir::convertToBox(loc, builder, input, mold.getType());
|
|
assert(!emboxed.second && "temp should already be in memory");
|
|
input = hlfir::Entity{fir::getBase(emboxed.first)};
|
|
}
|
|
return hlfir::Entity{builder.createConvert(loc, mold.getType(), input)};
|
|
}
|
|
|
|
void OrderedAssignmentRewriter::pre(hlfir::RegionAssignOp regionAssignOp) {
|
|
mlir::Location loc = regionAssignOp.getLoc();
|
|
std::optional<hlfir::LoopNest> elementalLoopNest;
|
|
auto [rhsValue, oldRhsYield] =
|
|
generateYieldedEntity(regionAssignOp.getRhsRegion());
|
|
hlfir::Entity rhsEntity{rhsValue};
|
|
LhsValueAndCleanUp loweredLhs =
|
|
generateYieldedLHS(loc, regionAssignOp.getLhsRegion(), rhsEntity);
|
|
hlfir::Entity lhsEntity{loweredLhs.lhs};
|
|
if (loweredLhs.vectorSubscriptLoopNest)
|
|
rhsEntity = hlfir::getElementAt(
|
|
loc, builder, rhsEntity,
|
|
loweredLhs.vectorSubscriptLoopNest->oneBasedIndices);
|
|
if (!regionAssignOp.getUserDefinedAssignment().empty()) {
|
|
hlfir::Entity userAssignLhs{regionAssignOp.getUserAssignmentLhs()};
|
|
hlfir::Entity userAssignRhs{regionAssignOp.getUserAssignmentRhs()};
|
|
std::optional<hlfir::LoopNest> elementalLoopNest;
|
|
if (lhsEntity.isArray() && userAssignLhs.isScalar()) {
|
|
// Elemental assignment with array argument (the RHS cannot be an array
|
|
// if the LHS is not).
|
|
mlir::Value shape = hlfir::genShape(loc, builder, lhsEntity);
|
|
elementalLoopNest = hlfir::genLoopNest(loc, builder, shape);
|
|
builder.setInsertionPointToStart(elementalLoopNest->innerLoop.getBody());
|
|
lhsEntity = hlfir::getElementAt(loc, builder, lhsEntity,
|
|
elementalLoopNest->oneBasedIndices);
|
|
rhsEntity = hlfir::getElementAt(loc, builder, rhsEntity,
|
|
elementalLoopNest->oneBasedIndices);
|
|
}
|
|
|
|
llvm::SmallVector<hlfir::CleanupFunction, 2> argConversionCleanups;
|
|
lhsEntity = convertToMoldType(loc, builder, lhsEntity, userAssignLhs,
|
|
argConversionCleanups);
|
|
rhsEntity = convertToMoldType(loc, builder, rhsEntity, userAssignRhs,
|
|
argConversionCleanups);
|
|
mapper.map(userAssignLhs, lhsEntity);
|
|
mapper.map(userAssignRhs, rhsEntity);
|
|
for (auto &op :
|
|
regionAssignOp.getUserDefinedAssignment().front().without_terminator())
|
|
(void)builder.clone(op, mapper);
|
|
for (auto &cleanupConversion : argConversionCleanups)
|
|
cleanupConversion();
|
|
if (elementalLoopNest)
|
|
builder.setInsertionPointAfter(elementalLoopNest->outerLoop);
|
|
} else {
|
|
// TODO: preserve allocatable assignment aspects for forall once
|
|
// they are conveyed in hlfir.region_assign.
|
|
builder.create<hlfir::AssignOp>(loc, rhsEntity, lhsEntity);
|
|
}
|
|
generateCleanupIfAny(loweredLhs.elementalCleanup);
|
|
if (loweredLhs.vectorSubscriptLoopNest)
|
|
builder.setInsertionPointAfter(
|
|
loweredLhs.vectorSubscriptLoopNest->outerLoop);
|
|
generateCleanupIfAny(oldRhsYield);
|
|
generateCleanupIfAny(loweredLhs.nonElementalCleanup);
|
|
}
|
|
|
|
void OrderedAssignmentRewriter::generateMaskIfOp(mlir::Value cdt) {
|
|
mlir::Location loc = cdt.getLoc();
|
|
cdt = hlfir::loadTrivialScalar(loc, builder, hlfir::Entity{cdt});
|
|
cdt = builder.createConvert(loc, builder.getI1Type(), cdt);
|
|
auto ifOp = builder.create<fir::IfOp>(cdt.getLoc(), std::nullopt, cdt,
|
|
/*withElseRegion=*/false);
|
|
constructStack.push_back(ifOp.getOperation());
|
|
builder.setInsertionPointToStart(&ifOp.getThenRegion().front());
|
|
}
|
|
|
|
void OrderedAssignmentRewriter::pre(hlfir::WhereOp whereOp) {
|
|
mlir::Location loc = whereOp.getLoc();
|
|
if (!whereLoopNest) {
|
|
// This is the top-level WHERE. Start a loop nest iterating on the shape of
|
|
// the where mask.
|
|
if (auto maybeSaved = getIfSaved(whereOp.getMaskRegion())) {
|
|
// Use the saved value to get the shape and condition element.
|
|
hlfir::Entity savedMask{maybeSaved->first};
|
|
mlir::Value shape = hlfir::genShape(loc, builder, savedMask);
|
|
whereLoopNest = hlfir::genLoopNest(loc, builder, shape);
|
|
constructStack.push_back(whereLoopNest->outerLoop.getOperation());
|
|
builder.setInsertionPointToStart(whereLoopNest->innerLoop.getBody());
|
|
mlir::Value cdt = hlfir::getElementAt(loc, builder, savedMask,
|
|
whereLoopNest->oneBasedIndices);
|
|
generateMaskIfOp(cdt);
|
|
if (maybeSaved->second) {
|
|
// If this is the same run as the one that saved the value, the clean-up
|
|
// was left-over to be done now.
|
|
auto insertionPoint = builder.saveInsertionPoint();
|
|
builder.setInsertionPointAfter(whereLoopNest->outerLoop);
|
|
generateCleanupIfAny(maybeSaved->second);
|
|
builder.restoreInsertionPoint(insertionPoint);
|
|
}
|
|
return;
|
|
}
|
|
// The mask was not evaluated yet or can be safely re-evaluated.
|
|
MaskedArrayExpr mask(loc, whereOp.getMaskRegion());
|
|
mask.generateNoneElementalPart(builder, mapper);
|
|
mlir::Value shape = mask.generateShape(builder, mapper);
|
|
whereLoopNest = hlfir::genLoopNest(loc, builder, shape);
|
|
constructStack.push_back(whereLoopNest->outerLoop.getOperation());
|
|
builder.setInsertionPointToStart(whereLoopNest->innerLoop.getBody());
|
|
mlir::Value cdt = generateMaskedEntity(mask);
|
|
generateMaskIfOp(cdt);
|
|
return;
|
|
}
|
|
// Where Loops have been already created by a parent WHERE.
|
|
// Generate a fir.if with the value of the current element of the mask
|
|
// inside the loops. The case where the mask was saved is handled in the
|
|
// generateYieldedScalarValue call.
|
|
mlir::Value cdt = generateYieldedScalarValue(whereOp.getMaskRegion());
|
|
generateMaskIfOp(cdt);
|
|
}
|
|
|
|
void OrderedAssignmentRewriter::post(hlfir::WhereOp whereOp) {
|
|
assert(!constructStack.empty() && "must contain a fir.if");
|
|
builder.setInsertionPointAfter(constructStack.pop_back_val());
|
|
// If all where/elsewhere fir.if have been popped, this is the outer whereOp,
|
|
// and the where loop must be exited.
|
|
assert(!constructStack.empty() && "must contain a fir.do_loop or fir.if");
|
|
if (mlir::isa<fir::DoLoopOp>(constructStack.back())) {
|
|
builder.setInsertionPointAfter(constructStack.pop_back_val());
|
|
whereLoopNest.reset();
|
|
}
|
|
}
|
|
|
|
void OrderedAssignmentRewriter::enterElsewhere(hlfir::ElseWhereOp elseWhereOp) {
|
|
// Create an "else" region for the current where/elsewhere fir.if.
|
|
auto ifOp = mlir::dyn_cast<fir::IfOp>(constructStack.back());
|
|
assert(ifOp && "must be an if");
|
|
if (ifOp.getElseRegion().empty()) {
|
|
mlir::Location loc = elseWhereOp.getLoc();
|
|
builder.createBlock(&ifOp.getElseRegion());
|
|
auto end = builder.create<fir::ResultOp>(loc);
|
|
builder.setInsertionPoint(end);
|
|
} else {
|
|
builder.setInsertionPoint(&ifOp.getElseRegion().back().back());
|
|
}
|
|
}
|
|
|
|
void OrderedAssignmentRewriter::pre(hlfir::ElseWhereOp elseWhereOp) {
|
|
enterElsewhere(elseWhereOp);
|
|
if (elseWhereOp.getMaskRegion().empty())
|
|
return;
|
|
// Create new nested fir.if with elsewhere mask if any.
|
|
mlir::Value cdt = generateYieldedScalarValue(elseWhereOp.getMaskRegion());
|
|
generateMaskIfOp(cdt);
|
|
}
|
|
|
|
void OrderedAssignmentRewriter::post(hlfir::ElseWhereOp elseWhereOp) {
|
|
// Exit ifOp that was created for the elseWhereOp mask, if any.
|
|
if (elseWhereOp.getMaskRegion().empty())
|
|
return;
|
|
assert(!constructStack.empty() && "must contain a fir.if");
|
|
builder.setInsertionPointAfter(constructStack.pop_back_val());
|
|
}
|
|
|
|
/// Is this value a Forall index?
|
|
/// Forall index are block arguments of hlfir.forall body, or the result
|
|
/// of hlfir.forall_index.
|
|
static bool isForallIndex(mlir::Value value) {
|
|
if (auto blockArg = mlir::dyn_cast<mlir::BlockArgument>(value)) {
|
|
if (mlir::Block *block = blockArg.getOwner())
|
|
return block->isEntryBlock() &&
|
|
mlir::isa_and_nonnull<hlfir::ForallOp>(block->getParentOp());
|
|
return false;
|
|
}
|
|
return value.getDefiningOp<hlfir::ForallIndexOp>();
|
|
}
|
|
|
|
static OrderedAssignmentRewriter::ValueAndCleanUp
|
|
castIfNeeded(mlir::Location loc, fir::FirOpBuilder &builder,
|
|
OrderedAssignmentRewriter::ValueAndCleanUp valueAndCleanUp,
|
|
std::optional<mlir::Type> castToType) {
|
|
if (!castToType.has_value())
|
|
return valueAndCleanUp;
|
|
mlir::Value cast =
|
|
builder.createConvert(loc, *castToType, valueAndCleanUp.first);
|
|
return {cast, valueAndCleanUp.second};
|
|
}
|
|
|
|
std::optional<OrderedAssignmentRewriter::ValueAndCleanUp>
|
|
OrderedAssignmentRewriter::getIfSaved(mlir::Region ®ion) {
|
|
mlir::Location loc = region.getParentOp()->getLoc();
|
|
// If the region was saved in the same run, use the value that was evaluated
|
|
// instead of fetching the temp, and do clean-up, if any, that were delayed.
|
|
// This is done to avoid requiring the temporary stack to have different
|
|
// fetching and storing counters, and also because it produces slightly better
|
|
// code.
|
|
if (auto savedInSameRun = savedInCurrentRunBeforeUse.find(®ion);
|
|
savedInSameRun != savedInCurrentRunBeforeUse.end())
|
|
return savedInSameRun->second;
|
|
// If the region was saved in a previous run, fetch the saved value.
|
|
if (auto temp = savedEntities.find(®ion); temp != savedEntities.end()) {
|
|
doBeforeLoopNest([&]() { temp->second.resetFetchPosition(loc, builder); });
|
|
return ValueAndCleanUp{temp->second.fetch(loc, builder), std::nullopt};
|
|
}
|
|
return std::nullopt;
|
|
}
|
|
|
|
OrderedAssignmentRewriter::ValueAndCleanUp
|
|
OrderedAssignmentRewriter::generateYieldedEntity(
|
|
mlir::Region ®ion, std::optional<mlir::Type> castToType) {
|
|
mlir::Location loc = region.getParentOp()->getLoc();
|
|
if (auto maybeValueAndCleanUp = getIfSaved(region))
|
|
return castIfNeeded(loc, builder, *maybeValueAndCleanUp, castToType);
|
|
// Otherwise, evaluate the region now.
|
|
|
|
// Masked expression must not evaluate the elemental parts that are masked,
|
|
// they have custom code generation.
|
|
if (whereLoopNest.has_value()) {
|
|
mlir::Value maskedValue = generateMaskedEntity(loc, region);
|
|
return castIfNeeded(loc, builder, {maskedValue, std::nullopt}, castToType);
|
|
}
|
|
|
|
assert(region.hasOneBlock() && "region must contain one block");
|
|
auto oldYield = mlir::dyn_cast_or_null<hlfir::YieldOp>(
|
|
region.back().getOperations().back());
|
|
assert(oldYield && "region computing entities must end with a YieldOp");
|
|
mlir::Block::OpListType &ops = region.back().getOperations();
|
|
|
|
// Inside Forall, scalars that do not depend on forall indices can be hoisted
|
|
// here because their evaluation is required to only call pure procedures, and
|
|
// if they depend on a variable previously assigned to in a forall assignment,
|
|
// this assignment must have been scheduled in a previous run. Hoisting of
|
|
// scalars is done here to help creating simple temporary storage if needed.
|
|
// Inner forall bounds can often be hoisted, and this allows computing the
|
|
// total number of iterations to create temporary storages.
|
|
bool hoistComputation = false;
|
|
if (fir::isa_trivial(oldYield.getEntity().getType()) &&
|
|
!constructStack.empty()) {
|
|
hoistComputation = true;
|
|
for (mlir::Operation &op : ops)
|
|
if (llvm::any_of(op.getOperands(), [](mlir::Value value) {
|
|
return isForallIndex(value);
|
|
})) {
|
|
hoistComputation = false;
|
|
break;
|
|
}
|
|
}
|
|
auto insertionPoint = builder.saveInsertionPoint();
|
|
if (hoistComputation)
|
|
builder.setInsertionPoint(constructStack[0]);
|
|
|
|
// Clone all operations except the final hlfir.yield.
|
|
assert(!ops.empty() && "yield block cannot be empty");
|
|
auto end = ops.end();
|
|
for (auto opIt = ops.begin(); std::next(opIt) != end; ++opIt)
|
|
(void)builder.clone(*opIt, mapper);
|
|
// Get the value for the yielded entity, it may be the result of an operation
|
|
// that was cloned, or it may be the same as the previous value if the yield
|
|
// operand was created before the ordered assignment tree.
|
|
mlir::Value newEntity = mapper.lookupOrDefault(oldYield.getEntity());
|
|
if (castToType.has_value())
|
|
newEntity =
|
|
builder.createConvert(newEntity.getLoc(), *castToType, newEntity);
|
|
|
|
if (hoistComputation) {
|
|
// Hoisted trivial scalars clean-up can be done right away, the value is
|
|
// in registers.
|
|
generateCleanupIfAny(oldYield);
|
|
builder.restoreInsertionPoint(insertionPoint);
|
|
return {newEntity, std::nullopt};
|
|
}
|
|
if (oldYield.getCleanup().empty())
|
|
return {newEntity, std::nullopt};
|
|
return {newEntity, oldYield};
|
|
}
|
|
|
|
mlir::Value OrderedAssignmentRewriter::generateYieldedScalarValue(
|
|
mlir::Region ®ion, std::optional<mlir::Type> castToType) {
|
|
mlir::Location loc = region.getParentOp()->getLoc();
|
|
auto [value, maybeYield] = generateYieldedEntity(region, castToType);
|
|
value = hlfir::loadTrivialScalar(loc, builder, hlfir::Entity{value});
|
|
assert(fir::isa_trivial(value.getType()) && "not a trivial scalar value");
|
|
generateCleanupIfAny(maybeYield);
|
|
return value;
|
|
}
|
|
|
|
OrderedAssignmentRewriter::LhsValueAndCleanUp
|
|
OrderedAssignmentRewriter::generateYieldedLHS(
|
|
mlir::Location loc, mlir::Region &lhsRegion,
|
|
std::optional<hlfir::Entity> loweredRhs) {
|
|
LhsValueAndCleanUp loweredLhs;
|
|
hlfir::ElementalAddrOp elementalAddrLhs =
|
|
mlir::dyn_cast<hlfir::ElementalAddrOp>(lhsRegion.back().back());
|
|
if (auto temp = savedEntities.find(&lhsRegion); temp != savedEntities.end()) {
|
|
// The LHS address was computed and saved in a previous run. Fetch it.
|
|
doBeforeLoopNest([&]() { temp->second.resetFetchPosition(loc, builder); });
|
|
if (elementalAddrLhs && !whereLoopNest) {
|
|
// Vector subscripted designator address are saved element by element.
|
|
// If no "elemental" loops have been created yet, the shape of the
|
|
// RHS, if it is an array can be used, or the shape of the vector
|
|
// subscripted designator must be retrieved to generate the "elemental"
|
|
// loop nest.
|
|
if (loweredRhs && loweredRhs->isArray()) {
|
|
// The RHS shape can be used to create the elemental loops and avoid
|
|
// saving the LHS shape.
|
|
loweredLhs.vectorSubscriptShape =
|
|
hlfir::genShape(loc, builder, *loweredRhs);
|
|
} else {
|
|
// If the shape cannot be retrieved from the RHS, it must have been
|
|
// saved. Get it from the temporary.
|
|
auto &vectorTmp =
|
|
temp->second.cast<fir::factory::AnyVectorSubscriptStack>();
|
|
loweredLhs.vectorSubscriptShape = vectorTmp.fetchShape(loc, builder);
|
|
}
|
|
loweredLhs.vectorSubscriptLoopNest = hlfir::genLoopNest(
|
|
loc, builder, loweredLhs.vectorSubscriptShape.value());
|
|
builder.setInsertionPointToStart(
|
|
loweredLhs.vectorSubscriptLoopNest->innerLoop.getBody());
|
|
}
|
|
loweredLhs.lhs = temp->second.fetch(loc, builder);
|
|
return loweredLhs;
|
|
}
|
|
// The LHS has not yet been evaluated and saved. Evaluate it now.
|
|
if (elementalAddrLhs && !whereLoopNest) {
|
|
// This is a vector subscripted entity. The address of elements must
|
|
// be returned. If no "elemental" loops have been created for a WHERE,
|
|
// create them now based on the vector subscripted designator shape.
|
|
for (auto &op : lhsRegion.front().without_terminator())
|
|
(void)builder.clone(op, mapper);
|
|
loweredLhs.vectorSubscriptShape =
|
|
mapper.lookupOrDefault(elementalAddrLhs.getShape());
|
|
loweredLhs.vectorSubscriptLoopNest =
|
|
hlfir::genLoopNest(loc, builder, *loweredLhs.vectorSubscriptShape,
|
|
!elementalAddrLhs.isOrdered());
|
|
builder.setInsertionPointToStart(
|
|
loweredLhs.vectorSubscriptLoopNest->innerLoop.getBody());
|
|
mapper.map(elementalAddrLhs.getIndices(),
|
|
loweredLhs.vectorSubscriptLoopNest->oneBasedIndices);
|
|
for (auto &op : elementalAddrLhs.getBody().front().without_terminator())
|
|
(void)builder.clone(op, mapper);
|
|
loweredLhs.elementalCleanup = elementalAddrLhs.getYieldOp();
|
|
loweredLhs.lhs =
|
|
mapper.lookupOrDefault(loweredLhs.elementalCleanup->getEntity());
|
|
} else {
|
|
// This is a designator without vector subscripts. Generate it as
|
|
// it is done for other entities.
|
|
auto [lhs, yield] = generateYieldedEntity(lhsRegion);
|
|
loweredLhs.lhs = lhs;
|
|
if (yield && !yield->getCleanup().empty())
|
|
loweredLhs.nonElementalCleanup = &yield->getCleanup();
|
|
}
|
|
return loweredLhs;
|
|
}
|
|
|
|
mlir::Value
|
|
OrderedAssignmentRewriter::generateMaskedEntity(MaskedArrayExpr &maskedExpr) {
|
|
assert(whereLoopNest.has_value() && "must be inside WHERE loop nest");
|
|
auto insertionPoint = builder.saveInsertionPoint();
|
|
if (!maskedExpr.noneElementalPartWasGenerated) {
|
|
// Generate none elemental part before the where loops (but inside the
|
|
// current forall loops if any).
|
|
builder.setInsertionPoint(whereLoopNest->outerLoop);
|
|
maskedExpr.generateNoneElementalPart(builder, mapper);
|
|
}
|
|
// Generate the none elemental part cleanup after the where loops.
|
|
builder.setInsertionPointAfter(whereLoopNest->outerLoop);
|
|
maskedExpr.generateNoneElementalCleanupIfAny(builder, mapper);
|
|
// Generate the value of the current element for the masked expression
|
|
// at the current insertion point (inside the where loops, and any fir.if
|
|
// generated for previous masks).
|
|
builder.restoreInsertionPoint(insertionPoint);
|
|
return maskedExpr.generateElementalParts(
|
|
builder, whereLoopNest->oneBasedIndices, mapper);
|
|
}
|
|
|
|
void OrderedAssignmentRewriter::generateCleanupIfAny(
|
|
std::optional<hlfir::YieldOp> maybeYield) {
|
|
if (maybeYield.has_value())
|
|
generateCleanupIfAny(&maybeYield->getCleanup());
|
|
}
|
|
void OrderedAssignmentRewriter::generateCleanupIfAny(
|
|
mlir::Region *cleanupRegion) {
|
|
if (cleanupRegion && !cleanupRegion->empty()) {
|
|
assert(cleanupRegion->hasOneBlock() && "region must contain one block");
|
|
for (auto &op : cleanupRegion->back().without_terminator())
|
|
builder.clone(op, mapper);
|
|
}
|
|
}
|
|
|
|
bool OrderedAssignmentRewriter::mustSaveRegionIn(
|
|
hlfir::OrderedAssignmentTreeOpInterface node,
|
|
llvm::SmallVectorImpl<hlfir::SaveEntity> &saveEntities) const {
|
|
for (auto &action : currentRun->actions)
|
|
if (hlfir::SaveEntity *savedEntity =
|
|
std::get_if<hlfir::SaveEntity>(&action))
|
|
if (node.getOperation() == savedEntity->yieldRegion->getParentOp())
|
|
saveEntities.push_back(*savedEntity);
|
|
return !saveEntities.empty();
|
|
}
|
|
|
|
bool OrderedAssignmentRewriter::isRequiredInCurrentRun(
|
|
hlfir::OrderedAssignmentTreeOpInterface node) const {
|
|
// hlfir.forall_index do not contain saved regions/assignments,
|
|
// but if their hlfir.forall parent was required, they are
|
|
// required (the forall indices needs to be mapped).
|
|
if (mlir::isa<hlfir::ForallIndexOp>(node))
|
|
return true;
|
|
for (auto &action : currentRun->actions)
|
|
if (hlfir::SaveEntity *savedEntity =
|
|
std::get_if<hlfir::SaveEntity>(&action)) {
|
|
// A SaveEntity action does not require evaluating the node that contains
|
|
// it, but it requires to evaluate all the parents of the nodes that
|
|
// contains it. For instance, an saving a bound in hlfir.forall B does not
|
|
// require creating the loops for B, but it requires creating the loops
|
|
// for any forall parent A of the forall B.
|
|
if (node->isProperAncestor(savedEntity->yieldRegion->getParentOp()))
|
|
return true;
|
|
} else {
|
|
auto assign = std::get<hlfir::RegionAssignOp>(action);
|
|
if (node->isAncestor(assign.getOperation()))
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
/// Is the apply using all the elemental indices in order?
|
|
static bool isInOrderApply(hlfir::ApplyOp apply,
|
|
hlfir::ElementalOpInterface elemental) {
|
|
mlir::Region::BlockArgListType elementalIndices = elemental.getIndices();
|
|
if (elementalIndices.size() != apply.getIndices().size())
|
|
return false;
|
|
for (auto [elementalIdx, applyIdx] :
|
|
llvm::zip(elementalIndices, apply.getIndices()))
|
|
if (elementalIdx != applyIdx)
|
|
return false;
|
|
return true;
|
|
}
|
|
|
|
/// Gather the tree of hlfir::ElementalOpInterface use-def, if any, starting
|
|
/// from \p elemental, which may be a nullptr.
|
|
static void
|
|
gatherElementalTree(hlfir::ElementalOpInterface elemental,
|
|
llvm::SmallPtrSetImpl<mlir::Operation *> &elementalOps,
|
|
bool isOutOfOrder) {
|
|
if (elemental) {
|
|
// Only inline an applied elemental that must be executed in order if the
|
|
// applying indices are in order. An hlfir::Elemental may have been created
|
|
// for a transformational like transpose, and Fortran 2018 standard
|
|
// section 10.2.3.2, point 10 imply that impure elemental sub-expression
|
|
// evaluations should not be masked if they are the arguments of
|
|
// transformational expressions.
|
|
if (isOutOfOrder && elemental.isOrdered())
|
|
return;
|
|
elementalOps.insert(elemental.getOperation());
|
|
for (mlir::Operation &op : elemental.getElementalRegion().getOps())
|
|
if (auto apply = mlir::dyn_cast<hlfir::ApplyOp>(op)) {
|
|
bool isUnorderedApply =
|
|
isOutOfOrder || !isInOrderApply(apply, elemental);
|
|
auto maybeElemental =
|
|
mlir::dyn_cast_or_null<hlfir::ElementalOpInterface>(
|
|
apply.getExpr().getDefiningOp());
|
|
gatherElementalTree(maybeElemental, elementalOps, isUnorderedApply);
|
|
}
|
|
}
|
|
}
|
|
|
|
MaskedArrayExpr::MaskedArrayExpr(mlir::Location loc, mlir::Region ®ion)
|
|
: loc{loc}, region{region} {
|
|
mlir::Operation &terminator = region.back().back();
|
|
if (auto elementalAddr =
|
|
mlir::dyn_cast<hlfir::ElementalOpInterface>(terminator)) {
|
|
// Vector subscripted designator (hlfir.elemental_addr terminator).
|
|
gatherElementalTree(elementalAddr, elementalParts, /*isOutOfOrder=*/false);
|
|
return;
|
|
}
|
|
// Try if elemental expression.
|
|
mlir::Value entity = mlir::cast<hlfir::YieldOp>(terminator).getEntity();
|
|
auto maybeElemental = mlir::dyn_cast_or_null<hlfir::ElementalOpInterface>(
|
|
entity.getDefiningOp());
|
|
gatherElementalTree(maybeElemental, elementalParts, /*isOutOfOrder=*/false);
|
|
}
|
|
|
|
void MaskedArrayExpr::generateNoneElementalPart(fir::FirOpBuilder &builder,
|
|
mlir::IRMapping &mapper) {
|
|
assert(!noneElementalPartWasGenerated &&
|
|
"none elemental parts already generated");
|
|
// Clone all operations, except the elemental and the final yield.
|
|
mlir::Block::OpListType &ops = region.back().getOperations();
|
|
assert(!ops.empty() && "yield block cannot be empty");
|
|
auto end = ops.end();
|
|
for (auto opIt = ops.begin(); std::next(opIt) != end; ++opIt)
|
|
if (!elementalParts.contains(&*opIt))
|
|
(void)builder.clone(*opIt, mapper);
|
|
noneElementalPartWasGenerated = true;
|
|
}
|
|
|
|
mlir::Value MaskedArrayExpr::generateShape(fir::FirOpBuilder &builder,
|
|
mlir::IRMapping &mapper) {
|
|
assert(noneElementalPartWasGenerated &&
|
|
"non elemental part must have been generated");
|
|
mlir::Operation &terminator = region.back().back();
|
|
// If the operation that produced the yielded entity is elemental, it was not
|
|
// cloned, but it holds a shape argument that was cloned. Return the cloned
|
|
// shape.
|
|
if (auto elementalAddrOp = mlir::dyn_cast<hlfir::ElementalAddrOp>(terminator))
|
|
return mapper.lookupOrDefault(elementalAddrOp.getShape());
|
|
mlir::Value entity = mlir::cast<hlfir::YieldOp>(terminator).getEntity();
|
|
if (auto elemental = entity.getDefiningOp<hlfir::ElementalOp>())
|
|
return mapper.lookupOrDefault(elemental.getShape());
|
|
// Otherwise, the whole entity was cloned, and the shape can be generated
|
|
// from it.
|
|
hlfir::Entity clonedEntity{mapper.lookupOrDefault(entity)};
|
|
return hlfir::genShape(loc, builder, hlfir::Entity{clonedEntity});
|
|
}
|
|
|
|
mlir::Value
|
|
MaskedArrayExpr::generateElementalParts(fir::FirOpBuilder &builder,
|
|
mlir::ValueRange oneBasedIndices,
|
|
mlir::IRMapping &mapper) {
|
|
assert(noneElementalPartWasGenerated &&
|
|
"non elemental part must have been generated");
|
|
mlir::Operation &terminator = region.back().back();
|
|
hlfir::ElementalOpInterface elemental =
|
|
mlir::dyn_cast<hlfir::ElementalAddrOp>(terminator);
|
|
if (!elemental) {
|
|
// If the terminator is not an hlfir.elemental_addr, try if the yielded
|
|
// entity was produced by an hlfir.elemental.
|
|
mlir::Value entity = mlir::cast<hlfir::YieldOp>(terminator).getEntity();
|
|
elemental = entity.getDefiningOp<hlfir::ElementalOp>();
|
|
if (!elemental) {
|
|
// The yielded entity was not produced by an elemental operation,
|
|
// get its clone in the non elemental part evaluation and address it.
|
|
hlfir::Entity clonedEntity{mapper.lookupOrDefault(entity)};
|
|
return hlfir::getElementAt(loc, builder, clonedEntity, oneBasedIndices);
|
|
}
|
|
}
|
|
|
|
auto mustRecursivelyInline =
|
|
[&](hlfir::ElementalOp appliedElemental) -> bool {
|
|
return elementalParts.contains(appliedElemental.getOperation());
|
|
};
|
|
return inlineElementalOp(loc, builder, elemental, oneBasedIndices, mapper,
|
|
mustRecursivelyInline);
|
|
}
|
|
|
|
void MaskedArrayExpr::generateNoneElementalCleanupIfAny(
|
|
fir::FirOpBuilder &builder, mlir::IRMapping &mapper) {
|
|
mlir::Operation &terminator = region.back().back();
|
|
mlir::Region *cleanupRegion = nullptr;
|
|
if (auto elementalAddr = mlir::dyn_cast<hlfir::ElementalAddrOp>(terminator)) {
|
|
cleanupRegion = &elementalAddr.getCleanup();
|
|
} else {
|
|
auto yieldOp = mlir::cast<hlfir::YieldOp>(terminator);
|
|
cleanupRegion = &yieldOp.getCleanup();
|
|
}
|
|
if (cleanupRegion->empty())
|
|
return;
|
|
for (mlir::Operation &op : cleanupRegion->front().without_terminator()) {
|
|
if (auto destroy = mlir::dyn_cast<hlfir::DestroyOp>(op))
|
|
if (elementalParts.contains(destroy.getExpr().getDefiningOp()))
|
|
continue;
|
|
(void)builder.clone(op, mapper);
|
|
}
|
|
}
|
|
|
|
static hlfir::RegionAssignOp
|
|
getAssignIfLeftHandSideRegion(mlir::Region ®ion) {
|
|
auto assign = mlir::dyn_cast<hlfir::RegionAssignOp>(region.getParentOp());
|
|
if (assign && (&assign.getLhsRegion() == ®ion))
|
|
return assign;
|
|
return nullptr;
|
|
}
|
|
|
|
bool OrderedAssignmentRewriter::currentLoopNestIterationNumberCanBeComputed(
|
|
llvm::SmallVectorImpl<fir::DoLoopOp> &loopNest) {
|
|
if (constructStack.empty())
|
|
return true;
|
|
mlir::Operation *outerLoop = constructStack[0];
|
|
mlir::Operation *currentConstruct = constructStack.back();
|
|
// Loop through the loops until the outer construct is met, and test if the
|
|
// loop operands dominate the outer construct.
|
|
while (currentConstruct) {
|
|
if (auto doLoop = mlir::dyn_cast<fir::DoLoopOp>(currentConstruct)) {
|
|
if (llvm::any_of(doLoop->getOperands(), [&](mlir::Value value) {
|
|
return !dominanceInfo.properlyDominates(value, outerLoop);
|
|
})) {
|
|
return false;
|
|
}
|
|
loopNest.push_back(doLoop);
|
|
}
|
|
if (currentConstruct == outerLoop)
|
|
currentConstruct = nullptr;
|
|
else
|
|
currentConstruct = currentConstruct->getParentOp();
|
|
}
|
|
return true;
|
|
}
|
|
|
|
static mlir::Value
|
|
computeLoopNestIterationNumber(mlir::Location loc, fir::FirOpBuilder &builder,
|
|
llvm::ArrayRef<fir::DoLoopOp> loopNest) {
|
|
mlir::Value loopExtent;
|
|
for (fir::DoLoopOp doLoop : loopNest) {
|
|
mlir::Value extent = builder.genExtentFromTriplet(
|
|
loc, doLoop.getLowerBound(), doLoop.getUpperBound(), doLoop.getStep(),
|
|
builder.getIndexType());
|
|
if (!loopExtent)
|
|
loopExtent = extent;
|
|
else
|
|
loopExtent = builder.create<mlir::arith::MulIOp>(loc, loopExtent, extent);
|
|
}
|
|
assert(loopExtent && "loopNest must not be empty");
|
|
return loopExtent;
|
|
}
|
|
|
|
/// Return a name for temporary storage that indicates in which context
|
|
/// the temporary storage was created.
|
|
static llvm::StringRef
|
|
getTempName(hlfir::OrderedAssignmentTreeOpInterface root) {
|
|
if (mlir::isa<hlfir::ForallOp>(root.getOperation()))
|
|
return ".tmp.forall";
|
|
if (mlir::isa<hlfir::WhereOp>(root.getOperation()))
|
|
return ".tmp.where";
|
|
return ".tmp.assign";
|
|
}
|
|
|
|
void OrderedAssignmentRewriter::generateSaveEntity(
|
|
hlfir::SaveEntity savedEntity, bool willUseSavedEntityInSameRun) {
|
|
mlir::Region ®ion = *savedEntity.yieldRegion;
|
|
|
|
if (hlfir::RegionAssignOp regionAssignOp =
|
|
getAssignIfLeftHandSideRegion(region)) {
|
|
// Need to save the address, not the values.
|
|
assert(!willUseSavedEntityInSameRun &&
|
|
"lhs cannot be used in the loop nest where it is saved");
|
|
return saveLeftHandSide(savedEntity, regionAssignOp);
|
|
}
|
|
|
|
mlir::Location loc = region.getParentOp()->getLoc();
|
|
// Evaluate the region inside the loop nest (if any).
|
|
auto [clonedValue, oldYield] = generateYieldedEntity(region);
|
|
hlfir::Entity entity{clonedValue};
|
|
entity = hlfir::loadTrivialScalar(loc, builder, entity);
|
|
mlir::Type entityType = entity.getType();
|
|
|
|
llvm::StringRef tempName = getTempName(root);
|
|
fir::factory::TemporaryStorage *temp = nullptr;
|
|
if (constructStack.empty()) {
|
|
// Value evaluated outside of any loops (this may be the first MASK of a
|
|
// WHERE construct, or an LHS/RHS temp of hlfir.region_assign outside of
|
|
// WHERE/FORALL).
|
|
temp = insertSavedEntity(
|
|
region, fir::factory::SimpleCopy(loc, builder, entity, tempName));
|
|
} else {
|
|
// Need to create a temporary for values computed inside loops.
|
|
// Create temporary storage outside of the loop nest given the entity
|
|
// type (and the loop context).
|
|
llvm::SmallVector<fir::DoLoopOp> loopNest;
|
|
bool loopShapeCanBePreComputed =
|
|
currentLoopNestIterationNumberCanBeComputed(loopNest);
|
|
doBeforeLoopNest([&] {
|
|
/// For simple scalars inside loops whose total iteration number can be
|
|
/// pre-computed, create a rank-1 array outside of the loops. It will be
|
|
/// assigned/fetched inside the loops like a normal Fortran array given
|
|
/// the iteration count.
|
|
if (loopShapeCanBePreComputed && fir::isa_trivial(entityType)) {
|
|
mlir::Value loopExtent =
|
|
computeLoopNestIterationNumber(loc, builder, loopNest);
|
|
auto sequenceType =
|
|
builder.getVarLenSeqTy(entityType).cast<fir::SequenceType>();
|
|
temp = insertSavedEntity(region,
|
|
fir::factory::HomogeneousScalarStack{
|
|
loc, builder, sequenceType, loopExtent,
|
|
/*lenParams=*/{}, allocateOnHeap,
|
|
/*stackThroughLoops=*/true, tempName});
|
|
|
|
} else {
|
|
// If the number of iteration is not known, or if the values at each
|
|
// iterations are values that may have different shape, type parameters
|
|
// or dynamic type, use the runtime to create and manage a stack-like
|
|
// temporary.
|
|
temp = insertSavedEntity(
|
|
region, fir::factory::AnyValueStack{loc, builder, entityType});
|
|
}
|
|
});
|
|
// Inside the loop nest (and any fir.if if there are active masks), copy
|
|
// the value to the temp and do clean-ups for the value if any.
|
|
temp->pushValue(loc, builder, entity);
|
|
}
|
|
|
|
// Delay the clean-up if the entity will be used in the same run (i.e., the
|
|
// parent construct will be visited and needs to be lowered). When possible,
|
|
// this is not done for hlfir.expr because this use would prevent the
|
|
// hlfir.expr storage from being moved when creating the temporary in
|
|
// bufferization, and that would lead to an extra copy.
|
|
if (willUseSavedEntityInSameRun &&
|
|
(!temp->canBeFetchedAfterPush() ||
|
|
!mlir::isa<hlfir::ExprType>(entity.getType()))) {
|
|
auto inserted =
|
|
savedInCurrentRunBeforeUse.try_emplace(®ion, entity, oldYield);
|
|
assert(inserted.second && "entity must have been emplaced");
|
|
(void)inserted;
|
|
} else {
|
|
if (constructStack.empty() &&
|
|
mlir::isa<hlfir::RegionAssignOp>(region.getParentOp())) {
|
|
// Here the clean-up code is inserted after the original
|
|
// RegionAssignOp, so that the assignment code happens
|
|
// before the cleanup. We do this only for standalone
|
|
// operations, because the clean-up is handled specially
|
|
// during lowering of the parent constructs if any
|
|
// (e.g. see generateNoneElementalCleanupIfAny for
|
|
// WhereOp).
|
|
auto insertionPoint = builder.saveInsertionPoint();
|
|
builder.setInsertionPointAfter(region.getParentOp());
|
|
generateCleanupIfAny(oldYield);
|
|
builder.restoreInsertionPoint(insertionPoint);
|
|
} else {
|
|
generateCleanupIfAny(oldYield);
|
|
}
|
|
}
|
|
}
|
|
|
|
static bool rhsIsArray(hlfir::RegionAssignOp regionAssignOp) {
|
|
auto yieldOp = mlir::dyn_cast<hlfir::YieldOp>(
|
|
regionAssignOp.getRhsRegion().back().back());
|
|
return yieldOp && hlfir::Entity{yieldOp.getEntity()}.isArray();
|
|
}
|
|
|
|
void OrderedAssignmentRewriter::saveLeftHandSide(
|
|
hlfir::SaveEntity savedEntity, hlfir::RegionAssignOp regionAssignOp) {
|
|
mlir::Region ®ion = *savedEntity.yieldRegion;
|
|
mlir::Location loc = region.getParentOp()->getLoc();
|
|
LhsValueAndCleanUp loweredLhs = generateYieldedLHS(loc, region);
|
|
fir::factory::TemporaryStorage *temp = nullptr;
|
|
if (loweredLhs.vectorSubscriptLoopNest)
|
|
constructStack.push_back(loweredLhs.vectorSubscriptLoopNest->outerLoop);
|
|
if (loweredLhs.vectorSubscriptLoopNest && !rhsIsArray(regionAssignOp)) {
|
|
// Vector subscripted entity for which the shape must also be saved on top
|
|
// of the element addresses (e.g. the shape may change in each forall
|
|
// iteration and is needed to create the elemental loops).
|
|
mlir::Value shape = loweredLhs.vectorSubscriptShape.value();
|
|
int rank = mlir::cast<fir::ShapeType>(shape.getType()).getRank();
|
|
const bool shapeIsInvariant =
|
|
constructStack.empty() ||
|
|
dominanceInfo.properlyDominates(shape, constructStack[0]);
|
|
doBeforeLoopNest([&] {
|
|
// Outside of any forall/where/elemental loops, create a temporary that
|
|
// will both be able to save the vector subscripted designator shape(s)
|
|
// and element addresses.
|
|
temp =
|
|
insertSavedEntity(region, fir::factory::AnyVectorSubscriptStack{
|
|
loc, builder, loweredLhs.lhs.getType(),
|
|
shapeIsInvariant, rank});
|
|
});
|
|
// Save shape before the elemental loop nest created by the vector
|
|
// subscripted LHS.
|
|
auto &vectorTmp = temp->cast<fir::factory::AnyVectorSubscriptStack>();
|
|
auto insertionPoint = builder.saveInsertionPoint();
|
|
builder.setInsertionPoint(loweredLhs.vectorSubscriptLoopNest->outerLoop);
|
|
vectorTmp.pushShape(loc, builder, shape);
|
|
builder.restoreInsertionPoint(insertionPoint);
|
|
} else {
|
|
// Otherwise, only save the LHS address.
|
|
// If the LHS address dominates the constructs, its SSA value can
|
|
// simply be tracked and there is no need to save the address in memory.
|
|
// Otherwise, the addresses are stored at each iteration in memory with
|
|
// a descriptor stack.
|
|
if (constructStack.empty() ||
|
|
dominanceInfo.properlyDominates(loweredLhs.lhs, constructStack[0]))
|
|
doBeforeLoopNest([&] {
|
|
temp = insertSavedEntity(region, fir::factory::SSARegister{});
|
|
});
|
|
else
|
|
doBeforeLoopNest([&] {
|
|
temp = insertSavedEntity(
|
|
region, fir::factory::AnyVariableStack{loc, builder,
|
|
loweredLhs.lhs.getType()});
|
|
});
|
|
}
|
|
temp->pushValue(loc, builder, loweredLhs.lhs);
|
|
generateCleanupIfAny(loweredLhs.elementalCleanup);
|
|
if (loweredLhs.vectorSubscriptLoopNest) {
|
|
constructStack.pop_back();
|
|
builder.setInsertionPointAfter(
|
|
loweredLhs.vectorSubscriptLoopNest->outerLoop);
|
|
}
|
|
}
|
|
|
|
/// Lower an ordered assignment tree to fir.do_loop and hlfir.assign given
|
|
/// a schedule.
|
|
static void lower(hlfir::OrderedAssignmentTreeOpInterface root,
|
|
mlir::PatternRewriter &rewriter, hlfir::Schedule &schedule) {
|
|
auto module = root->getParentOfType<mlir::ModuleOp>();
|
|
fir::FirOpBuilder builder(rewriter, module);
|
|
OrderedAssignmentRewriter assignmentRewriter(builder, root);
|
|
for (auto &run : schedule)
|
|
assignmentRewriter.lowerRun(run);
|
|
assignmentRewriter.cleanupSavedEntities();
|
|
}
|
|
|
|
/// Shared rewrite entry point for all the ordered assignment tree root
|
|
/// operations. It calls the scheduler and then apply the schedule.
|
|
static mlir::LogicalResult rewrite(hlfir::OrderedAssignmentTreeOpInterface root,
|
|
bool tryFusingAssignments,
|
|
mlir::PatternRewriter &rewriter) {
|
|
hlfir::Schedule schedule =
|
|
hlfir::buildEvaluationSchedule(root, tryFusingAssignments);
|
|
|
|
LLVM_DEBUG(
|
|
/// Debug option to print the scheduling debug info without doing
|
|
/// any code generation. The operations are simply erased to avoid
|
|
/// failing and calling the rewrite patterns on nested operations.
|
|
/// The only purpose of this is to help testing scheduling without
|
|
/// having to test generated code.
|
|
if (dbgScheduleOnly) {
|
|
rewriter.eraseOp(root);
|
|
return mlir::success();
|
|
});
|
|
lower(root, rewriter, schedule);
|
|
rewriter.eraseOp(root);
|
|
return mlir::success();
|
|
}
|
|
|
|
namespace {
|
|
|
|
class ForallOpConversion : public mlir::OpRewritePattern<hlfir::ForallOp> {
|
|
public:
|
|
explicit ForallOpConversion(mlir::MLIRContext *ctx, bool tryFusingAssignments)
|
|
: OpRewritePattern{ctx}, tryFusingAssignments{tryFusingAssignments} {}
|
|
|
|
mlir::LogicalResult
|
|
matchAndRewrite(hlfir::ForallOp forallOp,
|
|
mlir::PatternRewriter &rewriter) const override {
|
|
auto root = mlir::cast<hlfir::OrderedAssignmentTreeOpInterface>(
|
|
forallOp.getOperation());
|
|
if (mlir::failed(::rewrite(root, tryFusingAssignments, rewriter)))
|
|
TODO(forallOp.getLoc(), "FORALL construct or statement in HLFIR");
|
|
return mlir::success();
|
|
}
|
|
const bool tryFusingAssignments;
|
|
};
|
|
|
|
class WhereOpConversion : public mlir::OpRewritePattern<hlfir::WhereOp> {
|
|
public:
|
|
explicit WhereOpConversion(mlir::MLIRContext *ctx, bool tryFusingAssignments)
|
|
: OpRewritePattern{ctx}, tryFusingAssignments{tryFusingAssignments} {}
|
|
|
|
mlir::LogicalResult
|
|
matchAndRewrite(hlfir::WhereOp whereOp,
|
|
mlir::PatternRewriter &rewriter) const override {
|
|
auto root = mlir::cast<hlfir::OrderedAssignmentTreeOpInterface>(
|
|
whereOp.getOperation());
|
|
return ::rewrite(root, tryFusingAssignments, rewriter);
|
|
}
|
|
const bool tryFusingAssignments;
|
|
};
|
|
|
|
class RegionAssignConversion
|
|
: public mlir::OpRewritePattern<hlfir::RegionAssignOp> {
|
|
public:
|
|
explicit RegionAssignConversion(mlir::MLIRContext *ctx)
|
|
: OpRewritePattern{ctx} {}
|
|
|
|
mlir::LogicalResult
|
|
matchAndRewrite(hlfir::RegionAssignOp regionAssignOp,
|
|
mlir::PatternRewriter &rewriter) const override {
|
|
auto root = mlir::cast<hlfir::OrderedAssignmentTreeOpInterface>(
|
|
regionAssignOp.getOperation());
|
|
return ::rewrite(root, /*tryFusingAssignments=*/false, rewriter);
|
|
}
|
|
};
|
|
|
|
class LowerHLFIROrderedAssignments
|
|
: public hlfir::impl::LowerHLFIROrderedAssignmentsBase<
|
|
LowerHLFIROrderedAssignments> {
|
|
public:
|
|
void runOnOperation() override {
|
|
// Running on a ModuleOp because this pass may generate FuncOp declaration
|
|
// for runtime calls. This could be a FuncOp pass otherwise.
|
|
auto module = this->getOperation();
|
|
auto *context = &getContext();
|
|
mlir::RewritePatternSet patterns(context);
|
|
// Patterns are only defined for the OrderedAssignmentTreeOpInterface
|
|
// operations that can be the root of ordered assignments. The other
|
|
// operations will be taken care of while rewriting these trees (they
|
|
// cannot exist outside of these operations given their verifiers/traits).
|
|
patterns.insert<ForallOpConversion, WhereOpConversion>(
|
|
context, this->tryFusingAssignments.getValue());
|
|
patterns.insert<RegionAssignConversion>(context);
|
|
mlir::ConversionTarget target(*context);
|
|
target.markUnknownOpDynamicallyLegal([](mlir::Operation *op) {
|
|
return !mlir::isa<hlfir::OrderedAssignmentTreeOpInterface>(op);
|
|
});
|
|
if (mlir::failed(mlir::applyPartialConversion(module, target,
|
|
std::move(patterns)))) {
|
|
mlir::emitError(mlir::UnknownLoc::get(context),
|
|
"failure in HLFIR ordered assignments lowering pass");
|
|
signalPassFailure();
|
|
}
|
|
}
|
|
};
|
|
} // namespace
|
|
|
|
std::unique_ptr<mlir::Pass> hlfir::createLowerHLFIROrderedAssignmentsPass() {
|
|
return std::make_unique<LowerHLFIROrderedAssignments>();
|
|
}
|