Files
swift-mirror/lib/SILOptimizer/Mandatory/PredictableMemOpt.cpp
2025-08-18 09:45:19 -07:00

2522 lines
94 KiB
C++

//===--- PredictableMemOpt.cpp - Perform predictable memory optzns --------===//
//
// This source file is part of the Swift.org open source project
//
// Copyright (c) 2014 - 2017 Apple Inc. and the Swift project authors
// Licensed under Apache License v2.0 with Runtime Library Exception
//
// See https://swift.org/LICENSE.txt for license information
// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
//
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "predictable-memopt"
#include "PMOMemoryUseCollector.h"
#include "swift/Basic/Assertions.h"
#include "swift/Basic/BlotMapVector.h"
#include "swift/Basic/BlotSetVector.h"
#include "swift/Basic/FrozenMultiMap.h"
#include "swift/Basic/STLExtras.h"
#include "swift/SIL/BasicBlockBits.h"
#include "swift/SIL/BasicBlockUtils.h"
#include "swift/SIL/LinearLifetimeChecker.h"
#include "swift/SIL/OSSACompleteLifetime.h"
#include "swift/SIL/OwnershipUtils.h"
#include "swift/SIL/SILBuilder.h"
#include "swift/SILOptimizer/PassManager/Passes.h"
#include "swift/SILOptimizer/PassManager/Transforms.h"
#include "swift/SILOptimizer/Utils/CFGOptUtils.h"
#include "swift/SILOptimizer/Utils/InstOptUtils.h"
#include "swift/SILOptimizer/Utils/OwnershipOptUtils.h"
#include "swift/SILOptimizer/Utils/SILSSAUpdater.h"
#include "swift/SILOptimizer/Utils/ValueLifetime.h"
#include "llvm/ADT/SmallBitVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
using namespace swift;
static llvm::cl::opt<bool> EnableAggressiveExpansionBlocking(
"enable-aggressive-expansion-blocking", llvm::cl::init(false));
STATISTIC(NumLoadTakePromoted, "Number of load takes promoted");
STATISTIC(NumDestroyAddrPromoted, "Number of destroy_addrs promoted");
STATISTIC(NumAllocRemoved, "Number of allocations completely removed");
namespace {
/// The following utilities handle two fundamentally different optimizations:
/// - load-copy removal preserves allocation
/// - dead allocation removal replaces the allocation
///
/// Ownership handling is completely different in these cases. The utilities are
/// not well-factored to reflect this, so we have a mode flag.
enum class OptimizationMode {
PreserveAlloc,
ReplaceAlloc
};
} // end namespace
//===----------------------------------------------------------------------===//
// Subelement Analysis
//===----------------------------------------------------------------------===//
namespace {
// The type of a particular memory access and its corresponding subelement index
// range relative to the allocation being optimized.
struct LoadInfo {
SILType loadType;
unsigned firstElt;
unsigned numElts;
IntRange<unsigned> range() const {
return IntRange<unsigned>(firstElt, firstElt + numElts);
}
};
} // namespace
// We can only analyze components of structs whose storage is fully accessible
// from Swift.
static StructDecl *
getFullyReferenceableStruct(SILType Ty) {
auto SD = Ty.getStructOrBoundGenericStruct();
if (!SD || SD->hasUnreferenceableStorage())
return nullptr;
return SD;
}
static unsigned getNumSubElements(SILType T, SILFunction &F,
TypeExpansionContext context) {
if (auto TT = T.getAs<TupleType>()) {
unsigned NumElements = 0;
for (auto index : indices(TT.getElementTypes()))
NumElements +=
getNumSubElements(T.getTupleElementType(index), F, context);
return NumElements;
}
if (auto *SD = getFullyReferenceableStruct(T)) {
unsigned NumElements = 0;
for (auto *D : SD->getStoredProperties())
NumElements +=
getNumSubElements(T.getFieldType(D, F.getModule(), context), F,
context);
// Returning NumElements == 0 implies that "empty" values can be
// materialized without ownership. This is only valid for trivial types.
if (NumElements > 0 || T.isTrivial(F))
return NumElements;
}
// If this isn't a tuple or struct, it is a single element.
return 1;
}
/// getAccessPathRoot - Given an address, dive through any tuple/struct element
/// addresses to get the underlying value.
static SILValue getAccessPathRoot(SILValue pointer) {
while (true) {
if (auto *TEAI = dyn_cast<TupleElementAddrInst>(pointer)) {
pointer = TEAI->getOperand();
continue;
}
if (auto *SEAI = dyn_cast<StructElementAddrInst>(pointer)) {
pointer = SEAI->getOperand();
continue;
}
if (auto *BAI = dyn_cast<BeginAccessInst>(pointer)) {
pointer = BAI->getSource();
continue;
}
return pointer;
}
}
/// Compute the subelement number indicated by the specified pointer (which is
/// derived from the root by a series of tuple/struct element addresses) by
/// treating the type as a linearized namespace with sequential elements. For
/// example, given:
///
/// root = alloc { a: { c: i64, d: i64 }, b: (i64, i64) }
/// tmp1 = struct_element_addr root, 1
/// tmp2 = tuple_element_addr tmp1, 0
///
/// This will return a subelement number of 2.
///
/// If this pointer is to within an existential projection, it returns ~0U.
static unsigned computeSubelement(SILValue Pointer,
SingleValueInstruction *RootInst) {
unsigned SubElementNumber = 0;
auto &F = *RootInst->getFunction();
while (1) {
// If we got to the root, we're done.
if (RootInst == Pointer)
return SubElementNumber;
if (auto *PBI = dyn_cast<ProjectBoxInst>(Pointer)) {
Pointer = PBI->getOperand();
continue;
}
if (auto *BAI = dyn_cast<BeginAccessInst>(Pointer)) {
Pointer = BAI->getSource();
continue;
}
if (auto *TEAI = dyn_cast<TupleElementAddrInst>(Pointer)) {
SILType TT = TEAI->getOperand()->getType();
// Keep track of what subelement is being referenced.
for (unsigned i = 0, e = TEAI->getFieldIndex(); i != e; ++i) {
SubElementNumber +=
getNumSubElements(TT.getTupleElementType(i), F,
TypeExpansionContext(*RootInst->getFunction()));
}
Pointer = TEAI->getOperand();
continue;
}
if (auto *SEAI = dyn_cast<StructElementAddrInst>(Pointer)) {
SILType ST = SEAI->getOperand()->getType();
// Keep track of what subelement is being referenced.
StructDecl *SD = SEAI->getStructDecl();
for (auto *D : SD->getStoredProperties()) {
if (D == SEAI->getField()) break;
auto context = TypeExpansionContext(*RootInst->getFunction());
SubElementNumber +=
getNumSubElements(ST.getFieldType(D, F.getModule(), context), F,
context);
}
Pointer = SEAI->getOperand();
continue;
}
if (auto *MD = dyn_cast<MarkDependenceInst>(Pointer)) {
Pointer = MD->getValue();
continue;
}
// This fails when we visit unchecked_take_enum_data_addr. We should just
// add support for enums.
assert(isa<InitExistentialAddrInst>(Pointer) &&
"Unknown access path instruction");
// Cannot promote loads and stores from within an existential projection.
return ~0U;
}
}
//===----------------------------------------------------------------------===//
// Available Value
//===----------------------------------------------------------------------===//
namespace {
class AvailableValueAggregator;
struct AvailableValue {
friend class AvailableValueAggregator;
SILValue Value;
unsigned SubElementNumber;
/// If this gets too expensive in terms of copying, we can use an arena and a
/// FrozenPtrSet like we do in ARC.
llvm::SmallSetVector<SILInstruction *, 1> InsertionPoints;
/// Just for updating.
SmallVectorImpl<PMOMemoryUse> *Uses;
public:
AvailableValue() = default;
/// Main initializer for available values.
///
/// *NOTE* We assume that all available values start with a singular insertion
/// point and insertion points are added by merging.
AvailableValue(SILValue Value, unsigned SubElementNumber,
SILInstruction *InsertPoint)
: Value(Value), SubElementNumber(SubElementNumber), InsertionPoints() {
InsertionPoints.insert(InsertPoint);
}
/// Deleted copy constructor. This is a move only type.
AvailableValue(const AvailableValue &) = delete;
/// Deleted copy operator. This is a move only type.
AvailableValue &operator=(const AvailableValue &) = delete;
/// Move constructor.
AvailableValue(AvailableValue &&Other)
: Value(nullptr), SubElementNumber(~0), InsertionPoints() {
std::swap(Value, Other.Value);
std::swap(SubElementNumber, Other.SubElementNumber);
std::swap(InsertionPoints, Other.InsertionPoints);
}
/// Move operator.
AvailableValue &operator=(AvailableValue &&Other) {
std::swap(Value, Other.Value);
std::swap(SubElementNumber, Other.SubElementNumber);
std::swap(InsertionPoints, Other.InsertionPoints);
return *this;
}
operator bool() const { return bool(Value); }
bool operator==(const AvailableValue &Other) const {
return Value == Other.Value && SubElementNumber == Other.SubElementNumber;
}
bool operator!=(const AvailableValue &Other) const {
return !(*this == Other);
}
SILValue getValue() const { return Value; }
SILType getType() const { return Value->getType(); }
unsigned getSubElementNumber() const { return SubElementNumber; }
ArrayRef<SILInstruction *> getInsertionPoints() const {
return InsertionPoints.getArrayRef();
}
void mergeInsertionPoints(const AvailableValue &Other) & {
assert(Value == Other.Value && SubElementNumber == Other.SubElementNumber);
InsertionPoints.set_union(Other.InsertionPoints);
}
void addInsertionPoint(SILInstruction *i) & { InsertionPoints.insert(i); }
AvailableValue emitStructExtract(SILBuilder &B, SILLocation Loc, VarDecl *D,
unsigned SubElementNumber) const {
SILValue NewValue = B.emitStructExtract(Loc, Value, D);
return {NewValue, SubElementNumber, InsertionPoints};
}
AvailableValue emitTupleExtract(SILBuilder &B, SILLocation Loc,
unsigned EltNo,
unsigned SubElementNumber) const {
SILValue NewValue = B.emitTupleExtract(Loc, Value, EltNo);
return {NewValue, SubElementNumber, InsertionPoints};
}
AvailableValue emitBeginBorrow(SILBuilder &b, SILLocation loc) const {
// If we do not have ownership or already are guaranteed, just return a copy
// of our state.
if (Value->getOwnershipKind().isCompatibleWith(OwnershipKind::Guaranteed)) {
return {Value, SubElementNumber, InsertionPoints};
}
// Otherwise, return newValue.
return {b.createBeginBorrow(loc, Value), SubElementNumber, InsertionPoints};
}
void dump() const LLVM_ATTRIBUTE_USED;
void print(llvm::raw_ostream &os) const;
private:
/// Private constructor.
AvailableValue(SILValue Value, unsigned SubElementNumber,
const decltype(InsertionPoints) &InsertPoints)
: Value(Value), SubElementNumber(SubElementNumber),
InsertionPoints(InsertPoints) {}
};
} // end anonymous namespace
void AvailableValue::dump() const { print(llvm::dbgs()); }
void AvailableValue::print(llvm::raw_ostream &os) const {
os << "Available Value Dump. Value: ";
if (getValue()) {
os << getValue();
} else {
os << "NoValue;\n";
}
os << "SubElementNumber: " << getSubElementNumber() << "\n";
os << "Insertion Points:\n";
for (auto *I : getInsertionPoints()) {
os << *I;
}
}
namespace llvm {
llvm::raw_ostream &operator<<(llvm::raw_ostream &os, const AvailableValue &V) {
V.print(os);
return os;
}
} // end llvm namespace
//===----------------------------------------------------------------------===//
// Subelement Extraction
//===----------------------------------------------------------------------===//
static bool isFullyAvailable(SILType loadTy, unsigned firstElt,
ArrayRef<AvailableValue> AvailableValues) {
if (firstElt >= AvailableValues.size()) { // #Elements may be zero.
return false;
}
auto &firstVal = AvailableValues[firstElt];
// Make sure that the first element is available and is the correct type.
if (!firstVal || firstVal.getType() != loadTy)
return false;
auto &function = *firstVal.getValue()->getFunction();
return llvm::all_of(
range(getNumSubElements(loadTy, function, TypeExpansionContext(function))),
[&](unsigned index) -> bool {
auto &val = AvailableValues[firstElt + index];
return val.getValue() == firstVal.getValue() &&
val.getSubElementNumber() == index;
});
}
/// Given an aggregate value and an access path, non-destructively extract the
/// value indicated by the path.
static SILValue nonDestructivelyExtractSubElement(const AvailableValue &Val,
SILBuilder &B,
SILLocation Loc) {
SILType ValTy = Val.getType();
unsigned SubElementNumber = Val.SubElementNumber;
// Extract tuple elements.
if (auto TT = ValTy.getAs<TupleType>()) {
for (unsigned EltNo : indices(TT.getElementTypes())) {
// Keep track of what subelement is being referenced.
SILType EltTy = ValTy.getTupleElementType(EltNo);
unsigned NumSubElt = getNumSubElements(
EltTy, B.getFunction(), TypeExpansionContext(B.getFunction()));
if (SubElementNumber < NumSubElt) {
auto BorrowedVal = Val.emitBeginBorrow(B, Loc);
auto NewVal =
BorrowedVal.emitTupleExtract(B, Loc, EltNo, SubElementNumber);
SILValue result = nonDestructivelyExtractSubElement(NewVal, B, Loc);
// If our original value wasn't guaranteed and we did actually perform a
// borrow as a result, insert the end_borrow.
if (BorrowedVal.getValue() != Val.getValue())
B.createEndBorrow(Loc, BorrowedVal.getValue());
return result;
}
SubElementNumber -= NumSubElt;
}
llvm_unreachable("Didn't find field");
}
// Extract struct elements.
if (auto *SD = getFullyReferenceableStruct(ValTy)) {
for (auto *D : SD->getStoredProperties()) {
auto fieldType = ValTy.getFieldType(
D, B.getModule(), TypeExpansionContext(B.getFunction()));
unsigned NumSubElt = getNumSubElements(
fieldType, B.getFunction(), TypeExpansionContext(B.getFunction()));
if (SubElementNumber < NumSubElt) {
auto BorrowedVal = Val.emitBeginBorrow(B, Loc);
auto NewVal =
BorrowedVal.emitStructExtract(B, Loc, D, SubElementNumber);
SILValue result = nonDestructivelyExtractSubElement(NewVal, B, Loc);
// If our original value wasn't guaranteed and we did actually perform a
// borrow as a result, insert the end_borrow.
if (BorrowedVal.getValue() != Val.getValue())
B.createEndBorrow(Loc, BorrowedVal.getValue());
return result;
}
SubElementNumber -= NumSubElt;
}
llvm_unreachable("Didn't find field");
}
// Otherwise, we're down to a scalar. If we have ownership enabled,
// we return a copy. Otherwise, there we can ignore ownership
// issues. This is ok since in [ossa] we are going to eliminate a
// load [copy] or a load [trivial], while in non-[ossa] SIL we will
// be replacing unqualified loads.
assert(SubElementNumber == 0 && "Miscalculation indexing subelements");
return B.emitCopyValueOperation(Loc, Val.getValue());
}
//===----------------------------------------------------------------------===//
// Ownership Fixup
//===----------------------------------------------------------------------===//
namespace {
/// For OptimizedMode == PreserveAlloc. Track inserted copies and casts for
/// ownership fixup.
struct AvailableValueFixup {
/// Keep track of all instructions that we have added. Once we are done
/// promoting a value, we need to make sure that if we need to balance any
/// copies (to avoid leaks), we do so. This is not used if we are performing a
/// take.
SmallVector<SILInstruction *, 16> insertedInsts;
AvailableValueFixup() = default;
AvailableValueFixup(const AvailableValueFixup &) = delete;
AvailableValueFixup &operator=(const AvailableValueFixup &) = delete;
~AvailableValueFixup() {
assert(insertedInsts.empty() && "need to fix ownership");
}
};
} // end namespace
namespace {
/// For available value data flow with OptimizedMode::PreserveAlloc: track only
/// inserted copies and casts for ownership fixup after dataflow completes. Phis
/// are not allowed.
struct AvailableValueDataflowFixup: AvailableValueFixup {
/// Given a single available value, get an owned copy if it is possible to
/// create one without introducing a phi. Return the copy. Otherwise, return
/// an invalid SILValue.
SILValue getSingleOwnedValue(const AvailableValue &availableVal);
// Verify ownership of promoted instructions in asserts builds or when
// -sil-verify-all is set.
//
// Clears insertedInsts.
void verifyOwnership(DeadEndBlocks &deBlocks);
// Deletes all insertedInsts without fixing ownership.
// Clears insertedInsts.
void deleteInsertedInsts(InstructionDeleter &deleter);
};
} // end namespace
// In OptimizationMode::PreserveAlloc, get a copy of this single available
// value. This requires a copy at the allocation's insertion point (store).
//
// Assumption: this copy will be consumed at a cast-like instruction
// (mark_dependence), which caused dataflow to invalidate the currently
// available value. Therefore, since phis are not allowed, the consumption
// cannot be in an inner loop relative to the copy. Unlike getMergedOwnedValue,
// there is no need for a second copy.
//
// Note: the caller must add the consuming cast to this->insertedInsts.
SILValue AvailableValueDataflowFixup::getSingleOwnedValue(
const AvailableValue &availableVal) {
SILValue value = availableVal.getValue();
// If there is no need for copies, then we don't care about multiple insertion
// points.
if (value->getType().isTrivial(*value->getFunction())) {
return value;
}
ArrayRef<SILInstruction *> insertPts = availableVal.getInsertionPoints();
if (insertPts.size() > 1)
return SILValue();
return SILBuilderWithScope(insertPts[0], &insertedInsts)
.emitCopyValueOperation(insertPts[0]->getLoc(), value);
}
void AvailableValueDataflowFixup::verifyOwnership(DeadEndBlocks &deBlocks) {
for (auto *inst : insertedInsts) {
if (inst->isDeleted())
continue;
for (auto result : inst->getResults()) {
result.verifyOwnership(&deBlocks);
}
}
insertedInsts.clear();
}
void AvailableValueDataflowFixup::
deleteInsertedInsts(InstructionDeleter &deleter) {
for (auto *inst : insertedInsts) {
if (inst->isDeleted())
continue;
deleter.forceDeleteWithUsers(inst);
}
insertedInsts.clear();
}
// In OptimizationMode::PreserveAlloc: insert copies and phis for aggregate
// values.
struct AvailableValueAggregationFixup: AvailableValueFixup {
DeadEndBlocks &deadEndBlocks;
/// The list of phi nodes inserted by the SSA updater.
SmallVector<SILPhiArgument *, 16> insertedPhiNodes;
AvailableValueAggregationFixup(DeadEndBlocks &deadEndBlocks)
: deadEndBlocks(deadEndBlocks) {}
/// For a single 'availableVal', insert copies at each insertion point. Merge
/// all copies, creating phis if needed. Return the final copy. Otherwise,
/// return an invalid SILValue.
SILValue mergeCopies(const AvailableValue &availableVal,
SILType loadTy,
SILInstruction *availableAtInst,
bool isFullyAvailable);
private:
/// As a result of us using the SSA updater, insert hand off copy/destroys at
/// each phi and make sure that intermediate phis do not leak by inserting
/// destroys along paths that go through the intermediate phi that do not also
/// go through the.
void addHandOffCopyDestroysForPhis(SILInstruction *load, SILValue newVal);
};
// For OptimizationMode::PreserveAlloc, insert copies at the available value's
// insertion points to provide an owned value. Merge the copies into phis.
// Create another copy before 'availableAtInst' in case it consumes the owned
// value within an inner loop. Add the copies to insertedInsts and phis to
// insertedPhiNodes.
SILValue AvailableValueAggregationFixup::mergeCopies(
const AvailableValue &availableVal, SILType loadTy,
SILInstruction *availableAtInst,
bool isFullyAvailable) {
auto emitValue = [&](SILInstruction *insertPt) {
if (isFullyAvailable) {
SILValue fullVal = availableVal.getValue();
if (fullVal->use_empty()
&& fullVal->getOwnershipKind() == OwnershipKind::Owned) {
// This value was inserted during data flow to propagate ownership. It
// does not need to be copied.
return fullVal;
}
return SILBuilderWithScope(insertPt, &insertedInsts)
.emitCopyValueOperation(insertPt->getLoc(), fullVal);
}
SILBuilderWithScope builder(insertPt, &insertedInsts);
SILValue eltVal = nonDestructivelyExtractSubElement(availableVal, builder,
insertPt->getLoc());
assert(eltVal->getType() == loadTy && "Subelement types mismatch");
assert(eltVal->getOwnershipKind().isCompatibleWith(OwnershipKind::Owned));
return eltVal;
};
ArrayRef<SILInstruction *> insertPts = availableVal.getInsertionPoints();
if (insertPts.size() == 1) {
SILValue eltVal = emitValue(insertPts[0]);
if (isFullyAvailable)
return eltVal;
return SILBuilderWithScope(availableAtInst, &insertedInsts)
.emitCopyValueOperation(availableAtInst->getLoc(), eltVal);
}
// If we have multiple insertion points, put copies at each point and use the
// SSA updater to get a value. The reason why this is safe is that we can only
// have multiple insertion points if we are storing exactly the same value
// implying that we can just copy firstVal at each insertion point.
SILSSAUpdater updater(&insertedPhiNodes);
SILFunction *function = availableAtInst->getFunction();
assert(function->hasOwnership() && "requires OSSA");
updater.initialize(function, loadTy, OwnershipKind::Owned);
std::optional<SILValue> singularValue;
for (auto *insertPt : insertPts) {
SILValue eltVal = emitValue(insertPt);
if (!singularValue.has_value()) {
singularValue = eltVal;
} else if (*singularValue != eltVal) {
singularValue = SILValue();
}
// And then put the value into the SSA updater.
updater.addAvailableValue(insertPt->getParent(), eltVal);
}
// If we only are tracking a singular value, we do not need to construct
// SSA. Just return that value.
if (auto val = singularValue.value_or(SILValue())) {
// Non-trivial values will always be copied above at each insertion
// point, and will, therefore, have multiple incoming values.
assert(val->getType().isTrivial(*function) &&
"Should never reach this code path if we are in ossa and have a "
"non-trivial value");
return val;
}
// Finally, grab the value from the SSA updater.
SILValue result =
updater.getValueInMiddleOfBlock(availableAtInst->getParent());
assert(result->getOwnershipKind().isCompatibleWith(OwnershipKind::Owned));
assert(result->getType() == loadTy && "Subelement types mismatch");
// Insert another copy at the point of availability in case it is inside a
// loop.
return SILBuilderWithScope(availableAtInst, &insertedInsts)
.emitCopyValueOperation(availableAtInst->getLoc(), result);
}
//===----------------------------------------------------------------------===//
// Available Value Aggregation
//===----------------------------------------------------------------------===//
static bool anyMissing(unsigned StartSubElt, unsigned NumSubElts,
ArrayRef<AvailableValue> &Values) {
while (NumSubElts) {
if (!Values[StartSubElt])
return true;
++StartSubElt;
--NumSubElts;
}
return false;
}
namespace {
enum class AvailableValueExpectedOwnership {
Take,
Borrow,
Copy,
};
/// A class that aggregates available values, loading them if they are not
/// available.
class AvailableValueAggregator {
SILModule &M;
SILBuilderWithScope B;
SILLocation Loc;
ArrayRef<AvailableValue> AvailableValueList;
SmallVectorImpl<PMOMemoryUse> &Uses;
AvailableValueExpectedOwnership expectedOwnership;
AvailableValueAggregationFixup ownershipFixup;
public:
AvailableValueAggregator(SILInstruction *Inst,
ArrayRef<AvailableValue> AvailableValueList,
SmallVectorImpl<PMOMemoryUse> &Uses,
DeadEndBlocks &deadEndBlocks,
AvailableValueExpectedOwnership expectedOwnership)
: M(Inst->getModule()), B(Inst), Loc(Inst->getLoc()),
AvailableValueList(AvailableValueList), Uses(Uses),
expectedOwnership(expectedOwnership), ownershipFixup(deadEndBlocks)
{}
// This is intended to be passed by reference only once constructed.
AvailableValueAggregator(const AvailableValueAggregator &) = delete;
AvailableValueAggregator(AvailableValueAggregator &&) = delete;
AvailableValueAggregator &
operator=(const AvailableValueAggregator &) = delete;
AvailableValueAggregator &operator=(AvailableValueAggregator &&) = delete;
SILValue aggregateValues(SILType LoadTy, SILValue Address, unsigned FirstElt,
bool isTopLevel = true);
bool canTake(SILType loadTy, unsigned firstElt) const;
void print(llvm::raw_ostream &os) const;
void dump() const LLVM_ATTRIBUTE_USED;
bool isTake() const {
return expectedOwnership == AvailableValueExpectedOwnership::Take;
}
bool isBorrow() const {
return expectedOwnership == AvailableValueExpectedOwnership::Borrow;
}
bool isCopy() const {
return expectedOwnership == AvailableValueExpectedOwnership::Copy;
}
private:
SILValue aggregateFullyAvailableValue(SILType loadTy, unsigned firstElt);
SILValue aggregateTupleSubElts(TupleType *tt, SILType loadTy,
SILValue address, unsigned firstElt);
SILValue aggregateStructSubElts(StructDecl *sd, SILType loadTy,
SILValue address, unsigned firstElt);
};
} // end anonymous namespace
void AvailableValueAggregator::dump() const { print(llvm::dbgs()); }
void AvailableValueAggregator::print(llvm::raw_ostream &os) const {
os << "Available Value List, N = " << AvailableValueList.size()
<< ". Elts:\n";
for (auto &V : AvailableValueList) {
os << V;
}
}
// We can only take if we never have to split a larger value to promote this
// address.
bool AvailableValueAggregator::canTake(SILType loadTy,
unsigned firstElt) const {
// If we are trivially fully available, just return true.
if (isFullyAvailable(loadTy, firstElt, AvailableValueList))
return true;
// Otherwise see if we are an aggregate with fully available leaf types.
if (TupleType *tt = loadTy.getAs<TupleType>()) {
return llvm::all_of(indices(tt->getElements()), [&](unsigned eltNo) {
SILType eltTy = loadTy.getTupleElementType(eltNo);
unsigned numSubElt =
getNumSubElements(eltTy, B.getFunction(),
TypeExpansionContext(B.getFunction()));
bool success = canTake(eltTy, firstElt);
firstElt += numSubElt;
return success;
});
}
if (auto *sd = getFullyReferenceableStruct(loadTy)) {
return llvm::all_of(sd->getStoredProperties(), [&](VarDecl *decl) -> bool {
auto context = TypeExpansionContext(B.getFunction());
SILType eltTy = loadTy.getFieldType(decl, M, context);
unsigned numSubElt = getNumSubElements(eltTy, B.getFunction(), context);
bool success = canTake(eltTy, firstElt);
firstElt += numSubElt;
return success;
});
}
// Otherwise, fail. The value is not fully available at its leafs. We can not
// perform a take.
return false;
}
/// Given a bunch of primitive subelement values, build out the right aggregate
/// type (LoadTy) by emitting tuple and struct instructions as necessary.
SILValue AvailableValueAggregator::aggregateValues(SILType LoadTy,
SILValue Address,
unsigned FirstElt,
bool isTopLevel) {
// If we are performing a take, make sure that we have available values for
// /all/ of our values. Otherwise, bail.
if (isTopLevel && isTake() && !canTake(LoadTy, FirstElt)) {
return SILValue();
}
// Check to see if the requested value is fully available, as an aggregate.
// This is a super-common case for single-element structs, but is also a
// general answer for arbitrary structs and tuples as well.
if (SILValue Result = aggregateFullyAvailableValue(LoadTy, FirstElt)) {
return Result;
}
// If we have a tuple type, then aggregate the tuple's elements into a full
// tuple value.
if (TupleType *tupleType = LoadTy.getAs<TupleType>()) {
SILValue result =
aggregateTupleSubElts(tupleType, LoadTy, Address, FirstElt);
if (isTopLevel && result->getOwnershipKind() == OwnershipKind::Guaranteed) {
SILValue borrowedResult = result;
SILBuilderWithScope builder(&*B.getInsertionPoint(),
&ownershipFixup.insertedInsts);
result = builder.emitCopyValueOperation(Loc, borrowedResult);
SmallVector<BorrowedValue, 4> introducers;
bool foundIntroducers =
getAllBorrowIntroducingValues(borrowedResult, introducers);
(void)foundIntroducers;
assert(foundIntroducers);
for (auto value : introducers) {
builder.emitEndBorrowOperation(Loc, value.value);
}
}
return result;
}
// If we have a struct type, then aggregate the struct's elements into a full
// struct value.
if (auto *structDecl = getFullyReferenceableStruct(LoadTy)) {
SILValue result =
aggregateStructSubElts(structDecl, LoadTy, Address, FirstElt);
if (isTopLevel && result->getOwnershipKind() == OwnershipKind::Guaranteed) {
SILValue borrowedResult = result;
SILBuilderWithScope builder(&*B.getInsertionPoint(),
&ownershipFixup.insertedInsts);
result = builder.emitCopyValueOperation(Loc, borrowedResult);
SmallVector<BorrowedValue, 4> introducers;
bool foundIntroducers =
getAllBorrowIntroducingValues(borrowedResult, introducers);
(void)foundIntroducers;
assert(foundIntroducers);
for (auto value : introducers) {
builder.emitEndBorrowOperation(Loc, value.value);
}
}
return result;
}
// Otherwise, we have a non-aggregate primitive. Load or extract the value.
//
// NOTE: We should never call this when taking since when taking we know that
// our underlying value is always fully available.
assert(!isTake());
// If the value is not available, load the value and update our use list.
auto &val = AvailableValueList[FirstElt];
if (!val) {
LoadInst *load = ([&]() {
SILBuilderWithScope builder(&*B.getInsertionPoint(),
&ownershipFixup.insertedInsts);
return builder.createTrivialLoadOr(Loc, Address,
LoadOwnershipQualifier::Copy);
}());
Uses.emplace_back(load, PMOUseKind::Load);
return load;
}
return ownershipFixup.mergeCopies(val, LoadTy, &*B.getInsertionPoint(),
/*isFullyAvailable*/false);
}
// See if we have this value is fully available. In such a case, return it as an
// aggregate. This is a super-common case for single-element structs, but is
// also a general answer for arbitrary structs and tuples as well.
SILValue
AvailableValueAggregator::aggregateFullyAvailableValue(SILType loadTy,
unsigned firstElt) {
// Check if our underlying type is fully available. If it isn't, bail.
if (!isFullyAvailable(loadTy, firstElt, AvailableValueList))
return SILValue();
// Ok, grab out first value. (note: any actually will do).
auto &firstVal = AvailableValueList[firstElt];
// Ok, we know that all of our available values are all parts of the same
// value.
assert(B.hasOwnership() && "requires OSSA");
if (isTake())
return firstVal.getValue();
// Otherwise, we need to put in a copy. This is b/c we only propagate along +1
// values and we are eliminating a load [copy].
return
ownershipFixup.mergeCopies(firstVal, loadTy, &*B.getInsertionPoint(),
/*isFullyAvailable*/true);
}
SILValue AvailableValueAggregator::aggregateTupleSubElts(TupleType *TT,
SILType LoadTy,
SILValue Address,
unsigned FirstElt) {
SmallVector<SILValue, 4> ResultElts;
for (unsigned EltNo : indices(TT->getElements())) {
SILType EltTy = LoadTy.getTupleElementType(EltNo);
unsigned NumSubElt =
getNumSubElements(EltTy, B.getFunction(),
TypeExpansionContext(B.getFunction()));
// If we are missing any of the available values in this struct element,
// compute an address to load from.
SILValue EltAddr;
if (anyMissing(FirstElt, NumSubElt, AvailableValueList)) {
assert(!isTake() && "When taking, values should never be missing?!");
EltAddr =
B.createTupleElementAddr(Loc, Address, EltNo, EltTy.getAddressType());
}
ResultElts.push_back(
aggregateValues(EltTy, EltAddr, FirstElt, /*isTopLevel*/ false));
FirstElt += NumSubElt;
}
// If we are going to use this to promote a borrowed value, insert borrow
// operations. Eventually I am going to do this for everything, but this
// should make it easier to bring up.
if (!isTake()) {
for (unsigned i : indices(ResultElts)) {
ResultElts[i] = B.emitBeginBorrowOperation(Loc, ResultElts[i]);
}
}
return B.createTuple(Loc, LoadTy, ResultElts);
}
SILValue AvailableValueAggregator::aggregateStructSubElts(StructDecl *sd,
SILType loadTy,
SILValue address,
unsigned firstElt) {
SmallVector<SILValue, 4> resultElts;
for (auto *decl : sd->getStoredProperties()) {
auto context = TypeExpansionContext(B.getFunction());
SILType eltTy = loadTy.getFieldType(decl, M, context);
unsigned numSubElt = getNumSubElements(eltTy, B.getFunction(), context);
// If we are missing any of the available values in this struct element,
// compute an address to load from.
SILValue eltAddr;
if (anyMissing(firstElt, numSubElt, AvailableValueList)) {
assert(!isTake() && "When taking, values should never be missing?!");
eltAddr =
B.createStructElementAddr(Loc, address, decl, eltTy.getAddressType());
}
resultElts.push_back(
aggregateValues(eltTy, eltAddr, firstElt, /*isTopLevel*/ false));
firstElt += numSubElt;
}
if (!isTake()) {
for (unsigned i : indices(resultElts)) {
resultElts[i] = B.emitBeginBorrowOperation(Loc, resultElts[i]);
}
}
return B.createStruct(Loc, loadTy, resultElts);
}
//===----------------------------------------------------------------------===//
// Available Value Dataflow
//===----------------------------------------------------------------------===//
namespace {
/// Given a piece of memory, the memory's uses, and destroys perform a single
/// round of semi-optimistic backwards dataflow for each use. The result is the
/// set of available values that reach the specific use of the field in the
/// allocated object.
///
/// The general form of the algorithm is that in our constructor, we analyze our
/// uses and determine available values. Then users call computeAvailableValues
/// which looks backwards up the control flow graph for available values that we
/// can use.
///
/// NOTE: The reason why we say that the algorithm is semi-optimistic is that we
/// assume that all incoming elements into a loopheader will be the same. If we
/// find a conflict, we record it and fail.
class AvailableValueDataflowContext {
/// The base memory we are performing dataflow upon.
AllocationInst *TheMemory;
/// The number of sub elements of our memory.
unsigned NumMemorySubElements;
/// The set of uses that we are tracking. This is only here so we can update
/// when exploding copy_addr and mark_dependence. It would be great if we did
/// not have to store this.
SmallVectorImpl<PMOMemoryUse> &Uses;
InstructionDeleter &deleter;
/// The set of blocks with local definitions.
///
/// We use this to determine if we should visit a block or look at a block's
/// predecessors during dataflow for an available value.
BasicBlockFlag HasLocalDefinition;
/// The set of blocks that have definitions which specifically "kill" the
/// given value. If a block is in this set, there must be an instruction in
/// LoadTakeUse whose parent is the block. This is just used to speed up
/// computation.
///
/// NOTE: These are not considered escapes.
BasicBlockFlag HasLocalKill;
/// This is a set of load takes that we are tracking. HasLocalKill is the set
/// of parent blocks of these instructions.
llvm::SmallPtrSet<SILInstruction *, 8> LoadTakeUses;
/// This is a map of uses that are not loads (i.e., they are Stores,
/// InOutUses, and Escapes), to their entry in Uses.
llvm::SmallDenseMap<SILInstruction *, unsigned, 16> NonLoadUses;
AvailableValueDataflowFixup ownershipFixup;
/// Does this value escape anywhere in the function. We use this very
/// conservatively.
bool HasAnyEscape = false;
/// When promoting load [copy], the original allocation must be
/// preserved. This introduced extra copies.
OptimizationMode optimizationMode;
public:
AvailableValueDataflowContext(AllocationInst *TheMemory,
unsigned NumMemorySubElements,
OptimizationMode mode,
SmallVectorImpl<PMOMemoryUse> &Uses,
InstructionDeleter &deleter,
DeadEndBlocks &deBlocks);
// Find an available for for subelements of 'SrcAddr'.
// Return the SILType of the object in 'SrcAddr' and index of the first sub
// element in that object.
// If not all subelements are availab, return nullopt.
//
// Available value analysis can create dead owned casts. OSSA is invalid until
// the ownership chain is complete by replacing the uses of the loaded value
// and calling fixupOwnership().
std::optional<LoadInfo>
computeAvailableValues(SILValue SrcAddr, SILInstruction *Inst,
SmallVectorImpl<AvailableValue> &AvailableValues);
/// Return true if the box has escaped at the specified instruction. We are
/// not
/// allowed to do load promotion in an escape region.
bool hasEscapedAt(SILInstruction *I);
/// Explode a copy_addr, updating the Uses at the same time.
void explodeCopyAddr(CopyAddrInst *CAI);
void verifyOwnership(DeadEndBlocks &deBlocks) {
ownershipFixup.verifyOwnership(deBlocks);
}
void deleteInsertedInsts(InstructionDeleter &deleter) {
ownershipFixup.deleteInsertedInsts(deleter);
}
private:
SILModule &getModule() const { return TheMemory->getModule(); }
SILFunction &getFunction() const { return *TheMemory->getFunction(); }
void updateAvailableValues(
SILInstruction *Inst,
SmallBitVector &RequiredElts,
SmallVectorImpl<AvailableValue> &Result,
llvm::SmallDenseMap<SILBasicBlock *, SmallBitVector, 32> &VisitedBlocks,
SmallBitVector &ConflictingValues);
/// Try to compute available values for "TheMemory" at the instruction \p
/// StartingFrom. We only compute the values for set bits in \p
/// RequiredElts. We return the vailable values in \p Result. If any available
/// values were found, return true. Otherwise, return false.
///
/// In OptimizationMode::PreserveAlloc, this may insert casts and copies to
/// propagate owned values.
bool computeAvailableElementValues(SILInstruction *StartingFrom,
LoadInfo loadInfo,
SmallBitVector &RequiredElts,
SmallVectorImpl<AvailableValue> &Result);
void computeAvailableValuesFrom(
SILBasicBlock::iterator StartingFrom, SILBasicBlock *BB,
SmallBitVector &RequiredElts,
SmallVectorImpl<AvailableValue> &Result,
llvm::SmallDenseMap<SILBasicBlock *, SmallBitVector, 32>
&VisitedBlocks,
SmallBitVector &ConflictingValues);
/// Promote a mark_dependence, updating the available values.
void updateMarkDependenceValues(
MarkDependenceInst *md,
SmallBitVector &RequiredElts,
SmallVectorImpl<AvailableValue> &Result,
llvm::SmallDenseMap<SILBasicBlock *, SmallBitVector, 32> &VisitedBlocks,
SmallBitVector &ConflictingValues);
SILValue createAvailableMarkDependence(MarkDependenceInst *md,
AvailableValue &availableVal);
};
} // end anonymous namespace
AvailableValueDataflowContext::AvailableValueDataflowContext(
AllocationInst *InputTheMemory, unsigned NumMemorySubElements,
OptimizationMode mode, SmallVectorImpl<PMOMemoryUse> &InputUses,
InstructionDeleter &deleter, DeadEndBlocks &deBlocks)
: TheMemory(InputTheMemory), NumMemorySubElements(NumMemorySubElements),
Uses(InputUses), deleter(deleter),
HasLocalDefinition(InputTheMemory->getFunction()),
HasLocalKill(InputTheMemory->getFunction()), optimizationMode(mode) {
// The first step of processing an element is to collect information about the
// element into data structures we use later.
for (unsigned ui : indices(Uses)) {
auto &Use = Uses[ui];
assert(Use.Inst && "No instruction identified?");
// If we have a load...
if (Use.Kind == PMOUseKind::Load) {
// Skip load borrow use and open_existential_addr.
if (isa<LoadBorrowInst>(Use.Inst) ||
isa<OpenExistentialAddrInst>(Use.Inst))
continue;
// That is not a load take, continue. Otherwise, stash the load [take].
if (auto *LI = dyn_cast<LoadInst>(Use.Inst)) {
if (LI->getOwnershipQualifier() == LoadOwnershipQualifier::Take) {
LoadTakeUses.insert(LI);
HasLocalKill.set(LI->getParent());
}
continue;
}
// If we have a copy_addr as our load, it means we are processing a source
// of the value. If the copy_addr is taking from the source, we need to
// treat it like a load take use.
if (auto *CAI = dyn_cast<CopyAddrInst>(Use.Inst)) {
if (CAI->isTakeOfSrc() == IsTake) {
LoadTakeUses.insert(CAI);
HasLocalKill.set(CAI->getParent());
}
continue;
}
// mark_dependence of the dependent value is equivalent to take-init.
if (isa<MarkDependenceInst>(Use.Inst)) {
HasLocalKill.set(Use.Inst->getParent());
continue;
}
llvm_unreachable("Unhandled SILInstructionKind for PMOUseKind::Load?!");
}
if (Use.Kind == PMOUseKind::DependenceBase) {
// An address used as a dependence base does not affect load promotion.
continue;
}
// Keep track of all the uses that aren't loads.
NonLoadUses[Use.Inst] = ui;
HasLocalDefinition.set(Use.Inst->getParent());
if (Use.Kind == PMOUseKind::Escape) {
// Determine which blocks the value can escape from. We aren't allowed to
// promote loads in blocks reachable from an escape point.
HasAnyEscape = true;
}
}
// If isn't really a use, but we account for the alloc_box/mark_uninitialized
// as a use so we see it in our dataflow walks.
NonLoadUses[TheMemory] = ~0U;
HasLocalDefinition.set(TheMemory->getParent());
}
std::optional<LoadInfo>
AvailableValueDataflowContext::computeAvailableValues(
SILValue SrcAddr, SILInstruction *Inst,
SmallVectorImpl<AvailableValue> &AvailableValues) {
// If the box has escaped at this instruction, we can't safely promote the
// load.
if (hasEscapedAt(Inst))
return std::nullopt;
SILType LoadTy = SrcAddr->getType().getObjectType();
// If this is a load/copy_addr from a struct field that we want to promote,
// compute the access path down to the field so we can determine precise
// def/use behavior.
unsigned FirstElt = computeSubelement(SrcAddr, TheMemory);
// If this is a load from within an enum projection, we can't promote it since
// we don't track subelements in a type that could be changing.
if (FirstElt == ~0U)
return std::nullopt;
unsigned NumLoadSubElements = getNumSubElements(
LoadTy, getFunction(), TypeExpansionContext(getFunction()));
LoadInfo loadInfo = {LoadTy, FirstElt, NumLoadSubElements};
AvailableValues.resize(NumMemorySubElements);
// If no bits are demanded, we trivially succeed. This can happen when there
// is a load of an empty struct.
if (NumLoadSubElements == 0)
return loadInfo;
// Set up the bitvector of elements being demanded by the load.
SmallBitVector RequiredElts(NumMemorySubElements);
RequiredElts.set(*loadInfo.range().begin(), *loadInfo.range().end());
// Find out if we have any available values.
if (!computeAvailableElementValues(Inst, loadInfo, RequiredElts,
AvailableValues)) {
return std::nullopt;
}
return loadInfo;
}
// This function takes in the current (potentially uninitialized) available
// values for theMemory and for the subset of AvailableValues corresponding to
// \p address either:
//
// 1. If uninitialized, optionally initialize the available value with a new
// SILValue. It is optional since in certain cases, (for instance when
// invalidating one just wants to skip empty available values).
//
// 2. Given an initialized value, either add the given instruction as an
// insertion point or state that we have a conflict.
static inline void updateAvailableValuesHelper(
SingleValueInstruction *theMemory, SILInstruction *inst, SILValue address,
SmallBitVector &requiredElts, SmallVectorImpl<AvailableValue> &result,
SmallBitVector &conflictingValues,
function_ref<std::optional<AvailableValue>(unsigned)> defaultFunc,
function_ref<bool(AvailableValue &, unsigned)> isSafeFunc) {
unsigned startSubElt = computeSubelement(address, theMemory);
// TODO: Is this needed now?
assert(startSubElt != ~0U && "Store within enum projection not handled");
auto &f = *theMemory->getFunction();
for (unsigned i : range(getNumSubElements(
address->getType().getObjectType(), f,
TypeExpansionContext(f)))) {
// If this element is not required, don't fill it in.
if (!requiredElts[startSubElt + i])
continue;
// At this point we know that we will either mark the value as conflicting
// or give it a value.
requiredElts[startSubElt + i] = false;
// First see if we have an entry at all.
auto &entry = result[startSubElt + i];
// If we don't...
if (!entry) {
// and we are told to initialize it, do so.
if (auto defaultValue = defaultFunc(i)) {
entry = std::move(defaultValue.value());
} else {
// Otherwise, mark this as a conflicting value. There is some available
// value here, we just do not know what it is at this point. This
// ensures that if we visit a kill where we do not have an entry yet, we
// properly invalidate our state.
conflictingValues[startSubElt + i] = true;
}
continue;
}
// Check if our caller thinks that the value currently in entry is
// compatible with \p inst. If not, mark the values as conflicting and
// continue.
if (!isSafeFunc(entry, i)) {
conflictingValues[startSubElt + i] = true;
continue;
}
// Otherwise, we found another insertion point for our available
// value. Today this will always be a Store.
entry.addInsertionPoint(inst);
}
}
void AvailableValueDataflowContext::updateAvailableValues(
SILInstruction *Inst, SmallBitVector &RequiredElts,
SmallVectorImpl<AvailableValue> &Result,
llvm::SmallDenseMap<SILBasicBlock *, SmallBitVector, 32> &VisitedBlocks,
SmallBitVector &ConflictingValues) {
// If we are visiting a load [take], it invalidates the underlying available
// values.
//
// NOTE: Since we are always looking back from the instruction to promote,
// when we attempt to promote the load [take] itself, we will never hit this
// code since.
if (auto *LI = dyn_cast<LoadInst>(Inst)) {
// First see if this is a load inst that we are tracking.
if (LoadTakeUses.count(LI)) {
updateAvailableValuesHelper(
TheMemory, LI, LI->getOperand(), RequiredElts, Result,
ConflictingValues,
/*default*/
[](unsigned) -> std::optional<AvailableValue> {
// We never initialize values. We only
// want to invalidate.
return std::nullopt;
},
/*isSafe*/
[](AvailableValue &, unsigned) -> bool {
// Always assume values conflict.
return false;
});
return;
}
}
// Handle store.
if (auto *SI = dyn_cast<StoreInst>(Inst)) {
updateAvailableValuesHelper(
TheMemory, SI, SI->getDest(), RequiredElts, Result, ConflictingValues,
/*default*/
[&](unsigned ResultOffset) -> std::optional<AvailableValue> {
std::optional<AvailableValue> Result;
Result.emplace(SI->getSrc(), ResultOffset, SI);
return Result;
},
/*isSafe*/
[&](AvailableValue &Entry, unsigned ResultOffset) -> bool {
// TODO: This is /really/, /really/, conservative. This basically
// means that if we do not have an identical store, we will not
// promote.
return Entry.getValue() == SI->getSrc() &&
Entry.getSubElementNumber() == ResultOffset;
});
return;
}
// If we got here from an apply, we must either be initializing the element
// via an @out parameter or we are trying to model an invalidating load of the
// value (e.x.: indirect_in, indirect_inout).
// If we get here with a copy_addr, we must either be storing into the element
// or tracking some sort of take of the src. First check if we are taking (in
// which case, we just track invalidation of src) and continue. Otherwise we
// must be storing into the copy_addr so see which loaded subelements are
// being used, and if so, explode the copy_addr to its individual pieces.
if (auto *CAI = dyn_cast<CopyAddrInst>(Inst)) {
// If we have a load take use, we must be tracking a store of CAI.
if (LoadTakeUses.count(CAI)) {
updateAvailableValuesHelper(
TheMemory, CAI, CAI->getSrc(), RequiredElts, Result,
ConflictingValues,
/*default*/
[](unsigned) -> std::optional<AvailableValue> {
// We never give values default initialized
// values. We only want to invalidate.
return std::nullopt;
},
/*isSafe*/
[](AvailableValue &, unsigned) -> bool {
// Always assume values conflict.
return false;
});
return;
}
unsigned StartSubElt = computeSubelement(CAI->getDest(), TheMemory);
assert(StartSubElt != ~0U && "Store within enum projection not handled");
SILType ValTy = CAI->getDest()->getType();
bool AnyRequired = false;
for (unsigned i : range(getNumSubElements(
ValTy, getFunction(),
TypeExpansionContext(getFunction())))) {
// If this element is not required, don't fill it in.
AnyRequired = RequiredElts[StartSubElt+i];
if (AnyRequired) break;
}
// If this is a copy addr that doesn't intersect the loaded subelements,
// just continue with an unmodified load mask.
if (!AnyRequired)
return;
// If the copyaddr is of a non-loadable type, we can't promote it. Just
// consider it to be a clobber.
if (CAI->getSrc()->getType().isLoadable(*CAI->getFunction())) {
// Otherwise, some part of the copy_addr's value is demanded by a load, so
// we need to explode it to its component pieces. This only expands one
// level of the copyaddr.
explodeCopyAddr(CAI);
// The copy_addr doesn't provide any values, but we've arranged for our
// iterators to visit the newly generated instructions, which do.
return;
}
}
if (auto *MD = dyn_cast<MarkDependenceInst>(Inst)) {
unsigned StartSubElt = computeSubelement(MD->getValue(), TheMemory);
assert(StartSubElt != ~0U && "Store within enum projection not handled");
SILType ValTy = MD->getValue()->getType();
// Check if this mark_dependence provides any required values before
// potentially bailing out (because it is address-only).
bool AnyRequired = false;
for (unsigned i : range(getNumSubElements(
ValTy, getFunction(), TypeExpansionContext(getFunction())))) {
// If this element is not required, don't fill it in.
AnyRequired = RequiredElts[StartSubElt+i];
if (AnyRequired) break;
}
// If this is a dependence that doesn't intersect the loaded subelements,
// just continue with an unmodified load mask.
if (!AnyRequired)
return;
// If the mark_dependence is loadable, promote it.
if (MD->getValue()->getType().isLoadable(*MD->getFunction())) {
updateMarkDependenceValues(MD, RequiredElts, Result, VisitedBlocks,
ConflictingValues);
return;
}
}
// TODO: inout apply's should only clobber pieces passed in.
// Otherwise, this is some unknown instruction, conservatively assume that all
// values are clobbered.
RequiredElts.clear();
ConflictingValues = SmallBitVector(Result.size(), true);
return;
}
bool AvailableValueDataflowContext::computeAvailableElementValues(
SILInstruction *StartingFrom, LoadInfo loadInfo,
SmallBitVector &RequiredElts, SmallVectorImpl<AvailableValue> &Result) {
llvm::SmallDenseMap<SILBasicBlock*, SmallBitVector, 32> VisitedBlocks;
SmallBitVector ConflictingValues(Result.size());
computeAvailableValuesFrom(StartingFrom->getIterator(),
StartingFrom->getParent(), RequiredElts, Result,
VisitedBlocks, ConflictingValues);
// If there are no values available at this load point, then we fail to
// promote this load and there is nothing to do.
SmallBitVector AvailableValueIsPresent(NumMemorySubElements);
for (unsigned i : loadInfo.range()) {
AvailableValueIsPresent[i] = Result[i].getValue();
}
// If we do not have any values available, bail.
if (AvailableValueIsPresent.none())
return false;
// Otherwise, if we have any conflicting values, explicitly mask them out of
// the result, so we don't pick one arbitrary available value.
if (ConflictingValues.none()) {
return true;
}
// At this point, we know that we have /some/ conflicting values and some
// available values.
if (AvailableValueIsPresent.reset(ConflictingValues).none())
return false;
// Otherwise, mask out the available values and return true. We have at least
// 1 available value.
int NextIter = ConflictingValues.find_first();
while (NextIter != -1) {
assert(NextIter >= 0 && "Int can not be represented?!");
unsigned Iter = NextIter;
Result[Iter] = {};
NextIter = ConflictingValues.find_next(Iter);
}
return true;
}
void AvailableValueDataflowContext::computeAvailableValuesFrom(
SILBasicBlock::iterator StartingFrom, SILBasicBlock *BB,
SmallBitVector &RequiredElts, SmallVectorImpl<AvailableValue> &Result,
llvm::SmallDenseMap<SILBasicBlock *, SmallBitVector, 32>
&VisitedBlocks,
SmallBitVector &ConflictingValues) {
assert(!RequiredElts.none() && "Scanning with a goal of finding nothing?");
// If there is a potential modification in the current block, scan the block
// to see if the store, escape, or load [take] is before or after the load. If
// it is before, check to see if it produces the value we are looking for.
bool shouldCheckBlock =
HasLocalDefinition.get(BB) || HasLocalKill.get(BB);
if (shouldCheckBlock) {
for (SILBasicBlock::iterator BBI = StartingFrom; BBI != BB->begin();) {
SILInstruction *TheInst = &*std::prev(BBI);
// If this instruction is unrelated to the element, ignore it.
if (!NonLoadUses.count(TheInst) && !LoadTakeUses.count(TheInst)) {
--BBI;
continue;
}
// Given an interesting instruction, incorporate it into the set of
// results, and filter down the list of demanded subelements that we still
// need.
updateAvailableValues(TheInst, RequiredElts, Result, VisitedBlocks,
ConflictingValues);
// If this satisfied all of the demanded values, we're done.
if (RequiredElts.none())
return;
// Otherwise, keep scanning the block. If the instruction we were looking
// at just got exploded, don't skip the next instruction.
if (&*std::prev(BBI) == TheInst)
--BBI;
}
}
// Otherwise, we need to scan up the CFG looking for available values.
for (auto PI = BB->pred_begin(), E = BB->pred_end(); PI != E; ++PI) {
SILBasicBlock *PredBB = *PI;
// If the predecessor block has already been visited (potentially due to a
// cycle in the CFG), don't revisit it. We can do this safely because we
// are optimistically assuming that all incoming elements in a cycle will be
// the same. If we ever detect a conflicting element, we record it and do
// not look at the result.
auto Entry = VisitedBlocks.insert({PredBB, RequiredElts});
if (!Entry.second) {
// If we are revisiting a block and asking for different required elements
// then anything that isn't agreeing is in conflict.
const auto &PrevRequired = Entry.first->second;
if (PrevRequired != RequiredElts) {
ConflictingValues |= (PrevRequired ^ RequiredElts);
RequiredElts &= ~ConflictingValues;
if (RequiredElts.none())
return;
}
continue;
}
// Make sure to pass in the same set of required elements for each pred.
SmallBitVector Elts = RequiredElts;
computeAvailableValuesFrom(PredBB->end(), PredBB, Elts, Result,
VisitedBlocks, ConflictingValues);
// If we have any conflicting values, don't bother searching for them.
RequiredElts &= ~ConflictingValues;
if (RequiredElts.none())
return;
}
}
/// Explode a copy_addr instruction of a loadable type into lower level
/// operations like loads, stores, retains, releases, retain_value, etc.
void AvailableValueDataflowContext::explodeCopyAddr(CopyAddrInst *CAI) {
LLVM_DEBUG(llvm::dbgs() << " -- Exploding copy_addr: " << *CAI << "\n");
SILType ValTy = CAI->getDest()->getType().getObjectType();
SILFunction *F = CAI->getFunction();
auto &TL = F->getTypeLowering(ValTy);
// Keep track of the new instructions emitted.
SmallVector<SILInstruction *, 4> NewInsts;
SILBuilder B(CAI, &NewInsts);
B.setCurrentDebugScope(CAI->getDebugScope());
// Use type lowering to lower the copyaddr into a load sequence + store
// sequence appropriate for the type.
SILValue StoredValue =
TL.emitLoadOfCopy(B, CAI->getLoc(), CAI->getSrc(), CAI->isTakeOfSrc());
TL.emitStoreOfCopy(B, CAI->getLoc(), StoredValue, CAI->getDest(),
CAI->isInitializationOfDest());
// Update our internal state for this being gone.
NonLoadUses.erase(CAI);
LoadTakeUses.erase(CAI);
// NOTE: We do not need to update HasLocalKill since the copy_addr
// and the loads/stores will have the same parent block.
// Remove the copy_addr from Uses. A single copy_addr can appear multiple
// times if the source and dest are to elements within a single aggregate, but
// we only want to pick up the CopyAddrKind from the store.
PMOMemoryUse LoadUse, StoreUse;
for (auto &Use : Uses) {
if (Use.Inst != CAI)
continue;
if (Use.Kind == PMOUseKind::Load) {
assert(LoadUse.isInvalid());
LoadUse = Use;
} else {
assert(StoreUse.isInvalid());
StoreUse = Use;
}
Use.Inst = nullptr;
// Keep scanning in case the copy_addr appears multiple times.
}
assert((LoadUse.isValid() || StoreUse.isValid()) &&
"we should have a load or a store, possibly both");
assert(StoreUse.isInvalid() || StoreUse.Kind == Assign ||
StoreUse.Kind == Initialization || StoreUse.Kind == InitOrAssign);
// Now that we've emitted a bunch of instructions, including a load and store
// but also including other stuff, update the internal state of
// LifetimeChecker to reflect them.
// Update the instructions that touch the memory. NewInst can grow as this
// iterates, so we can't use a foreach loop.
for (auto *NewInst : NewInsts) {
switch (NewInst->getKind()) {
default:
NewInst->dump();
llvm_unreachable("Unknown instruction generated by copy_addr lowering");
case SILInstructionKind::StoreInst:
// If it is a store to the memory object (as oppose to a store to
// something else), track it as an access.
if (StoreUse.isValid()) {
StoreUse.Inst = NewInst;
// If our store use by the copy_addr is an assign, then we know that
// before we store the new value, we loaded the old value implying that
// our store is technically initializing memory when it occurs. So
// change the kind to Initialization.
if (StoreUse.Kind == Assign)
StoreUse.Kind = Initialization;
NonLoadUses[NewInst] = Uses.size();
Uses.push_back(StoreUse);
}
continue;
case SILInstructionKind::LoadInst:
// If it is a load from the memory object (as oppose to a load from
// something else), track it as an access. We need to explicitly check to
// see if the load accesses "TheMemory" because it could either be a load
// for the copy_addr source, or it could be a load corresponding to the
// "assign" operation on the destination of the copyaddr.
if (LoadUse.isValid() &&
getAccessPathRoot(NewInst->getOperand(0)) == TheMemory) {
if (auto *LI = dyn_cast<LoadInst>(NewInst)) {
if (LI->getOwnershipQualifier() == LoadOwnershipQualifier::Take) {
LoadTakeUses.insert(LI);
HasLocalKill.set(LI->getParent());
}
}
LoadUse.Inst = NewInst;
Uses.push_back(LoadUse);
}
continue;
case SILInstructionKind::RetainValueInst:
case SILInstructionKind::StrongRetainInst:
case SILInstructionKind::StrongReleaseInst:
case SILInstructionKind::ReleaseValueInst: // Destroy overwritten value
// These are ignored.
continue;
}
}
// Next, remove the copy_addr itself.
deleter.forceDelete(CAI);
}
/// Promote a mark_dependence instruction of a loadable type into a
/// mark_dependence
void AvailableValueDataflowContext::updateMarkDependenceValues(
MarkDependenceInst *md,
SmallBitVector &RequiredElts,
SmallVectorImpl<AvailableValue> &Result,
llvm::SmallDenseMap<SILBasicBlock *, SmallBitVector, 32> &VisitedBlocks,
SmallBitVector &ConflictingValues) {
// Recursively compute all currently required available values up to the
// mark_dependence, regardless of whether they are required for the
// mark_dependence.
computeAvailableValuesFrom(md->getIterator(), md->getParent(), RequiredElts,
Result, VisitedBlocks, ConflictingValues);
unsigned firstMDElt = computeSubelement(md->getValue(), TheMemory);
// If address is an enum projection, we can't promote it since we don't track
// subelements in a type that could be changing.
if (firstMDElt == ~0U) {
RequiredElts.clear();
ConflictingValues = SmallBitVector(Result.size(), true);
return;
}
SILType valueTy = md->getValue()->getType().getObjectType();
unsigned numMDSubElements = getNumSubElements(
valueTy, getFunction(), TypeExpansionContext(getFunction()));
// Update each required subelement of the mark_dependence value.
for (unsigned subIdx = firstMDElt; subIdx < firstMDElt + numMDSubElements;
++subIdx) {
// If the available value has a conflict, then AvailableValue still holds
// a value that was available on a different path. It cannot be used.
if (ConflictingValues[subIdx]) {
Result[subIdx] = {};
continue;
}
// If no value is available for this subelement, or it was never required,
// then no promotion happens.
if (auto &availableVal = Result[subIdx]) {
auto newMD = createAvailableMarkDependence(md, availableVal);
// Update the available value. This may invalidate the Result.
Result[subIdx] = AvailableValue(newMD, subIdx, md);
}
}
}
// Find or create a mark_dependence that is a promotion of 'md' for 'value'.
// Return nullptr if the mark_dependence cannot be promoted.
// This does not currently allow phi creation.
SILValue AvailableValueDataflowContext::
createAvailableMarkDependence(MarkDependenceInst *md,
AvailableValue &availableVal) {
SILValue value = availableVal.getValue();
// If the allocation is replaced, no copies are created for promoted values
// and any promoted mark_dependence must be reused.
if (optimizationMode == OptimizationMode::ReplaceAlloc) {
auto mdIter = md->getIterator();
auto instBegin = md->getParentBlock()->begin();
while (mdIter != instBegin) {
--mdIter;
auto *newMD = dyn_cast<MarkDependenceInst>(&*mdIter);
if (!newMD)
break;
if (newMD->getValue() == value
&& newMD->getBase() == md->getBase()
&& newMD->dependenceKind() == md->dependenceKind()) {
return newMD;
}
}
} else {
// With OptimizationMode::PreserveAlloc, always create a new copy for each
// promoted value. This creates separate ownership, which is needed for each
// promoted load, and prevents reusing the mark_dependence later (via the
// code above) if the allocation is eliminated.
value = ownershipFixup.getSingleOwnedValue(availableVal);
if (!value)
return SILValue();
}
LLVM_DEBUG(llvm::dbgs() << " -- Promoting mark_dependence: " << *md
<< " source: " << value << "\n");
auto *newMD = SILBuilderWithScope(md)
.createMarkDependence(md->getLoc(), value, md->getBase(),
md->dependenceKind());
ownershipFixup.insertedInsts.push_back(newMD);
return newMD;
}
bool AvailableValueDataflowContext::hasEscapedAt(SILInstruction *I) {
// Return true if the box has escaped at the specified instruction. We are
// not allowed to do load promotion in an escape region.
// FIXME: This is not an aggressive implementation. :)
// TODO: At some point, we should special case closures that just *read* from
// the escaped value (by looking at the body of the closure). They should not
// prevent load promotion, and will allow promoting values like X in regions
// dominated by "... && X != 0".
return HasAnyEscape;
}
static SILType getMemoryType(AllocationInst *memory) {
// Compute the type of the memory object.
if (auto *abi = dyn_cast<AllocBoxInst>(memory)) {
assert(abi->getBoxType()->getLayout()->getFields().size() == 1 &&
"optimizing multi-field boxes not implemented");
return getSILBoxFieldType(TypeExpansionContext(*abi->getFunction()),
abi->getBoxType(), abi->getModule().Types, 0);
}
assert(isa<AllocStackInst>(memory));
return cast<AllocStackInst>(memory)->getElementType();
}
//===----------------------------------------------------------------------===//
// Optimize dead allocation:
// Fully promote each access
//===----------------------------------------------------------------------===//
namespace {
class PromotableInstructions {
// All promotable instructions share a vector of available values.
SmallVectorImpl<AvailableValue> &allAvailableValues;
SmallVector<SILInstruction *> promotableInsts;
SmallVector<std::pair<unsigned, unsigned>, 8> availableValueOffsets;
public:
PromotableInstructions(SmallVectorImpl<AvailableValue> &allAvailableValues)
: allAvailableValues(allAvailableValues) {}
unsigned size() const { return promotableInsts.size(); }
void push(SILInstruction *instruction) {
promotableInsts.push_back(instruction);
}
// Available values must be initialized in the same order that the
// instructions are pushed. Return the instruction's index.
unsigned
initializeAvailableValues(SILInstruction *instruction,
SmallVectorImpl<AvailableValue> &&availableValues) {
unsigned nextInstIdx = availableValueOffsets.size();
assert(instruction == promotableInsts[nextInstIdx]);
unsigned startOffset = allAvailableValues.size();
unsigned endOffset = startOffset + availableValues.size();
availableValueOffsets.push_back({startOffset, endOffset});
std::move(availableValues.begin(), availableValues.end(),
std::back_inserter(allAvailableValues));
return nextInstIdx;
}
ArrayRef<SILInstruction *> instructions() const { return promotableInsts; }
ArrayRef<AvailableValue> availableValues(unsigned index) {
return mutableAvailableValues(index);
}
MutableArrayRef<AvailableValue> mutableAvailableValues(unsigned index) {
unsigned startOffset, endOffset;
std::tie(startOffset, endOffset) = availableValueOffsets[index];
return {allAvailableValues.begin() + startOffset,
allAvailableValues.begin() + endOffset};
}
#ifndef NDEBUG
void verify() {
for (unsigned i : range(promotableInsts.size())) {
promotableInsts[i]->verifyOperandOwnership();
assert(!availableValues(i).empty()
&& "Value without any available values?!");
}
}
#endif
};
} // end anonymous namespace
namespace {
struct Promotions {
SmallVector<AvailableValue, 32> allAvailableValues;
PromotableInstructions loadTakes;
PromotableInstructions destroys;
PromotableInstructions markDepBases;
Promotions()
: loadTakes(allAvailableValues), destroys(allAvailableValues),
markDepBases(allAvailableValues) {}
#ifndef NDEBUG
void verify() {
loadTakes.verify();
destroys.verify();
markDepBases.verify();
}
#endif
};
} // end anonymous namespace
namespace {
/// This performs load promotion and deletes synthesized allocations if all
/// loads can be removed.
class OptimizeDeadAlloc {
SILModule &Module;
/// This is either an alloc_box or alloc_stack instruction.
AllocationInst *TheMemory;
/// This is the SILType of the memory object.
SILType MemoryType;
/// The number of primitive subelements across all elements of this memory
/// value.
unsigned NumMemorySubElements;
SmallVectorImpl<PMOMemoryUse> &Uses;
SmallVectorImpl<SILInstruction *> &Releases;
DeadEndBlocks &deadEndBlocks;
InstructionDeleter &deleter;
DominanceInfo *domInfo;
/// A structure that we use to compute our available values.
AvailableValueDataflowContext DataflowContext;
Promotions promotions;
SmallBlotSetVector<SILValue, 32> valuesNeedingLifetimeCompletion;
public:
SILFunction *getFunction() const { return TheMemory->getFunction(); }
bool isTrivial() const { return MemoryType.isTrivial(getFunction()); }
OptimizeDeadAlloc(AllocationInst *memory,
SmallVectorImpl<PMOMemoryUse> &uses,
SmallVectorImpl<SILInstruction *> &releases,
DeadEndBlocks &deadEndBlocks, InstructionDeleter &deleter,
DominanceInfo *domInfo)
: Module(memory->getModule()), TheMemory(memory),
MemoryType(getMemoryType(memory)),
NumMemorySubElements(getNumSubElements(
MemoryType, *memory->getFunction(),
TypeExpansionContext(*memory->getFunction()))),
Uses(uses), Releases(releases), deadEndBlocks(deadEndBlocks),
deleter(deleter), domInfo(domInfo),
DataflowContext(TheMemory, NumMemorySubElements,
OptimizationMode::ReplaceAlloc, uses, deleter,
deadEndBlocks) {}
/// If the allocation is an autogenerated allocation that is only stored to
/// (after load promotion) then remove it completely.
bool tryToRemoveDeadAllocation();
private:
SILInstruction *collectUsesForPromotion();
/// Return true if a mark_dependence can be promoted. If so, this initializes
/// the available values in promotions.
bool canPromoteMarkDepBase(MarkDependenceInst *md);
/// Return true if a load [take] or destroy_addr can be promoted. If so, this
/// initializes the available values in promotions.
bool canPromoteTake(SILInstruction *i,
PromotableInstructions &promotableInsts);
SILValue promoteMarkDepBase(MarkDependenceInst *md,
ArrayRef<AvailableValue> availableValues);
void promoteMarkDepAddrBase(MarkDependenceAddrInst *md,
ArrayRef<AvailableValue> availableValues);
/// Promote a load take cleaning up everything except for RAUWing the
/// instruction with the aggregated result. The routine returns the new
/// aggregated result to the caller and expects the caller to eventually RAUW
/// \p inst with the return value. The reason why we do this is to allow for
/// the caller to work around invalidation issues by not deleting the load
/// [take] until after all load [take] have been cleaned up.
///
/// \returns the value that the caller will RAUW with \p inst.
SILValue promoteLoadTake(LoadInst *inst,
ArrayRef<AvailableValue> availableValues);
void promoteDestroyAddr(DestroyAddrInst *dai,
ArrayRef<AvailableValue> availableValues);
bool canRemoveDeadAllocation();
void removeDeadAllocation();
};
} // end anonymous namespace
// We don't want to remove allocations that are required for useful debug
// information at -O0. As such, we only remove allocations if:
//
// 1. They are in a transparent function.
// 2. They are in a normal function, but didn't come from a VarDecl, or came
// from one that was autogenerated or inlined from a transparent function.
static bool isRemovableAutogeneratedAllocation(AllocationInst *TheMemory) {
SILLocation loc = TheMemory->getLoc();
return TheMemory->getFunction()->isTransparent() ||
!loc.getAsASTNode<VarDecl>() || loc.isAutoGenerated() ||
loc.is<MandatoryInlinedLocation>();
}
bool OptimizeDeadAlloc::tryToRemoveDeadAllocation() {
if (!canRemoveDeadAllocation()) {
DataflowContext.deleteInsertedInsts(deleter);
return false;
}
removeDeadAllocation();
// Once the entire allocation is promoted, non of the instructions promoted
// during dataflow should need ownership fixup.
return true;
}
bool OptimizeDeadAlloc::canRemoveDeadAllocation() {
assert(TheMemory->getFunction()->hasOwnership() &&
"Can only eliminate dead allocations with ownership enabled");
assert((isa<AllocBoxInst>(TheMemory) || isa<AllocStackInst>(TheMemory)) &&
"Unhandled allocation case");
if (!isRemovableAutogeneratedAllocation(TheMemory))
return false;
// Check the uses list to see if there are any non-store uses left over after
// load promotion and other things PMO does.
if (auto *badUser = collectUsesForPromotion()) {
LLVM_DEBUG(llvm::dbgs() << "*** Failed to remove autogenerated alloc: "
"kept alive by: "
<< *badUser);
return false;
}
for (auto *md : promotions.markDepBases.instructions()) {
if (!canPromoteMarkDepBase(cast<MarkDependenceInst>(md)))
return false;
}
if (isTrivial()) {
return true;
}
for (auto *load : promotions.loadTakes.instructions()) {
if (!canPromoteTake(load, promotions.loadTakes))
return false;
}
for (auto *destroy : promotions.destroys.instructions()) {
if (!canPromoteTake(destroy, promotions.destroys))
return false;
}
// Gather up all found available values before promoting anything so we can
// fix up lifetimes later if we need to.
for (auto pmoMemUse : Uses) {
if (pmoMemUse.Inst && pmoMemUse.Kind == PMOUseKind::Initialization) {
if (isa<MarkDependenceInst>(pmoMemUse.Inst)) {
// mark_dependence of the dependent value is considered both a load and
// an init use. They can simply be deleted when the allocation is dead.
continue;
}
// Today if we promote, this is always a store, since we would have
// blown up the copy_addr otherwise. Given that, always make sure we
// clean up the src as appropriate after we optimize.
auto *si = dyn_cast<StoreInst>(pmoMemUse.Inst);
if (!si)
return false;
auto src = si->getSrc();
// Bail if src has any uses that are forwarding unowned uses. This
// allows us to know that we never have to deal with forwarding unowned
// instructions like br. These are corner cases that complicate the
// logic below.
for (auto *use : src->getUses()) {
if (use->getOperandOwnership() == OperandOwnership::ForwardingUnowned)
return false;
}
// FIXME: Lifetime completion on Boundary::Liveness requires that 'src'
// has no escaping uses. Check escaping uses here and either bailout or
// request completion on Boundary::Availability.
valuesNeedingLifetimeCompletion.insert(src);
}
}
return true;
}
// Collect all uses that require promotion before this allocation can be
// eliminated. Returns nullptr on success. Upon failure, return the first
// instruction corresponding to a use that cannot be promoted.
//
// Populates 'loadTakeList'.
SILInstruction *OptimizeDeadAlloc::collectUsesForPromotion() {
for (auto &u : Uses) {
// Ignore removed instructions.
if (u.Inst == nullptr)
continue;
switch (u.Kind) {
case PMOUseKind::Assign:
// Until we can promote the value being destroyed by the assign, we can
// not remove deallocations with such assigns.
return u.Inst;
case PMOUseKind::InitOrAssign:
continue; // These don't prevent removal.
case PMOUseKind::Load:
// For now only handle takes from alloc_stack.
//
// TODO: It should be implementable, but it has not been needed yet.
if (auto *li = dyn_cast<LoadInst>(u.Inst)) {
if (li->getOwnershipQualifier() == LoadOwnershipQualifier::Take) {
promotions.loadTakes.push(li);
continue;
}
}
if (isa<MarkDependenceInst>(u.Inst)) {
// A mark_dependence source use does not prevent removal. The use
// collector already looks through them to find other uses.
continue;
}
return u.Inst;
case PMOUseKind::DependenceBase:
promotions.markDepBases.push(u.Inst);
continue;
case PMOUseKind::Initialization:
if (!isa<ApplyInst>(u.Inst) &&
// A copy_addr that is not a take affects the retain count
// of the source.
(!isa<CopyAddrInst>(u.Inst)
|| cast<CopyAddrInst>(u.Inst)->isTakeOfSrc())) {
continue;
}
LLVM_FALLTHROUGH;
case PMOUseKind::IndirectIn:
case PMOUseKind::InOutUse:
case PMOUseKind::Escape:
return u.Inst; // These do prevent removal.
}
}
// Ignore destroys of trivial values. They are destroy_value instructions
// that only destroy the dead box itself.
if (!isTrivial()) {
// Non-trivial allocations require ownership cleanup. We only promote
// alloc_stack in that case--all releases must be destroy_addr.
for (auto *release : Releases) {
// We stash all of the destroy_addr that we see.
if (auto *dai = dyn_cast_or_null<DestroyAddrInst>(release)) {
promotions.destroys.push(dai);
continue;
}
return release;
}
}
return nullptr;
}
bool OptimizeDeadAlloc::canPromoteMarkDepBase(MarkDependenceInst *md) {
SILValue srcAddr = md->getBase();
SmallVector<AvailableValue, 8> availableValues;
auto loadInfo =
DataflowContext.computeAvailableValues(srcAddr, md, availableValues);
if (!loadInfo.has_value())
return false;
unsigned index = promotions.markDepBases.initializeAvailableValues(
md, std::move(availableValues));
SILType baseTy = loadInfo->loadType;
if (auto *abi = dyn_cast<AllocBoxInst>(TheMemory)) {
if (baseTy == abi->getType()) {
baseTy = MemoryType.getObjectType();
}
}
return isFullyAvailable(baseTy, loadInfo->firstElt,
promotions.markDepBases.availableValues(index));
}
/// Return true if we can promote the given destroy.
bool OptimizeDeadAlloc::canPromoteTake(
SILInstruction *inst, PromotableInstructions &promotableInsts) {
SILValue address = inst->getOperand(0);
// We cannot promote destroys of address-only types, because we can't expose
// the load.
if (address->getType().isAddressOnly(*inst->getFunction()))
return false;
SmallVector<AvailableValue, 8> availableValues;
auto loadInfo = DataflowContext.computeAvailableValues(address, inst,
availableValues);
if (!loadInfo.has_value())
return false;
// Now check that we can perform a take upon our available values. This
// implies today that our value is fully available. If the value is not fully
// available, we would need to split stores to promote this destroy_addr. We
// do not support that yet.
AvailableValueAggregator agg(inst, availableValues, Uses, deadEndBlocks,
AvailableValueExpectedOwnership::Take);
if (!agg.canTake(loadInfo->loadType, loadInfo->firstElt))
return false;
// As a final check, make sure that we have an available value for each value,
// if not bail.
for (const auto &av : availableValues)
if (!av.Value)
return false;
// Ok, we can promote this destroy_addr... move the temporary lists contents
// into the final AvailableValues list.
promotableInsts.initializeAvailableValues(inst, std::move(availableValues));
return true;
}
void OptimizeDeadAlloc::removeDeadAllocation() {
for (auto idxVal : llvm::enumerate(promotions.markDepBases.instructions())) {
auto vals = promotions.markDepBases.availableValues(idxVal.index());
if (auto *mdi = dyn_cast<MarkDependenceInst>(idxVal.value())) {
promoteMarkDepBase(mdi, vals);
continue;
}
auto *mda = cast<MarkDependenceAddrInst>(idxVal.value());
promoteMarkDepAddrBase(mda, vals);
}
// If our memory is trivially typed, we can just remove it without needing to
// consider if the stored value needs to be destroyed. So at this point,
// delete the memory!
if (isTrivial()) {
LLVM_DEBUG(llvm::dbgs() << "*** Removing autogenerated trivial allocation: "
<< *TheMemory);
// If it is safe to remove, do it. Recursively remove all instructions
// hanging off the allocation instruction, then return success. Let the
// caller remove the allocation itself to avoid iterator invalidation.
deleter.forceDeleteWithUsers(TheMemory);
return;
}
// Since our load [take] may be available values for our
// destroy_addr/load [take], we promote the destroy_addr first and then handle
// load [take] with extra rigour later to handle that possibility.
for (auto idxVal : llvm::enumerate(promotions.destroys.instructions())) {
auto *dai = cast<DestroyAddrInst>(idxVal.value());
auto vals = promotions.destroys.availableValues(idxVal.index());
promoteDestroyAddr(dai, vals);
// We do not need to unset releases, since we are going to exit here.
}
llvm::SmallMapVector<LoadInst *, SILValue, 32> loadsToDelete;
for (auto idxVal : llvm::enumerate(promotions.loadTakes.instructions())) {
for (auto &availableVal :
promotions.loadTakes.mutableAvailableValues(idxVal.index())) {
auto *availableLoad = dyn_cast<LoadInst>(availableVal.Value);
if (!availableLoad)
continue;
auto iter = loadsToDelete.find(availableLoad);
if (iter == loadsToDelete.end())
continue;
SILValue newValue = iter->second;
assert(newValue && "We should neer store a nil SILValue into this map");
availableVal.Value = newValue;
}
auto *loadTake = cast<LoadInst>(idxVal.value());
auto vals = promotions.loadTakes.availableValues(idxVal.index());
SILValue result = promoteLoadTake(loadTake, vals);
assert(result);
// We need to erase liCast here before we erase it since a load [take] that
// we are promoting could be an available value for another load
// [take]. Consider the following SIL:
//
// %mem = alloc_stack
// store %arg to [init] %mem
// %0 = load [take] %mem
// store %0 to [init] %mem
// %1 = load [take] %mem
// destroy_value %1
// dealloc_stack %mem
//
// In such a case, we are going to delete %0 here, but %0 is an available
// value for %1, so we will
auto insertIter = loadsToDelete.insert({loadTake, result});
valuesNeedingLifetimeCompletion.erase(loadTake);
(void)insertIter;
assert(insertIter.second && "loadTakeState doesn't have unique loads?!");
}
// Now that we have promoted all of our load [take], perform the actual
// RAUW/removal.
for (auto p : loadsToDelete) {
LoadInst *li = p.first;
SILValue newValue = p.second;
li->replaceAllUsesWith(newValue);
deleter.forceDelete(li);
}
LLVM_DEBUG(llvm::dbgs() << "*** Removing autogenerated non-trivial alloc: "
<< *TheMemory);
// If it is safe to remove, do it. Recursively remove all instructions
// hanging off the allocation instruction, then return success.
deleter.forceDeleteWithUsers(TheMemory);
DataflowContext.verifyOwnership(deadEndBlocks);
// Now look at all of our available values and complete any of their
// post-dominating consuming use sets. This can happen if we have an enum that
// is known dynamically none along a path. This is dynamically correct, but
// can not be represented in OSSA so we insert these destroys along said path.
OSSACompleteLifetime completion(TheMemory->getFunction(), domInfo,
deadEndBlocks);
while (!valuesNeedingLifetimeCompletion.empty()) {
auto optV = valuesNeedingLifetimeCompletion.pop_back_val();
if (!optV)
continue;
SILValue v = *optV;
// Lexical enums can have incomplete lifetimes in non payload paths that
// don't end in unreachable. Force their lifetime to end immediately after
// the last use instead.
auto boundary = v->getType().isOrHasEnum()
? OSSACompleteLifetime::Boundary::Liveness
: OSSACompleteLifetime::Boundary::Availability;
LLVM_DEBUG(llvm::dbgs() << "Completing lifetime of: ");
LLVM_DEBUG(v->dump());
completion.completeOSSALifetime(v, boundary);
}
}
SILValue OptimizeDeadAlloc::promoteMarkDepBase(
MarkDependenceInst *md, ArrayRef<AvailableValue> availableValues) {
LLVM_DEBUG(llvm::dbgs() << " *** Promoting mark_dependence base: " << *md);
SILBuilderWithScope B(md);
SILValue dependentValue = md->getValue();
for (auto &availableValue : availableValues) {
dependentValue =
B.createMarkDependence(md->getLoc(), dependentValue,
availableValue.getValue(), md->dependenceKind());
}
LLVM_DEBUG(llvm::dbgs() << " To value: " << dependentValue);
md->replaceAllUsesWith(dependentValue);
deleter.deleteIfDead(md);
return dependentValue;
}
void OptimizeDeadAlloc::promoteMarkDepAddrBase(
MarkDependenceAddrInst *md, ArrayRef<AvailableValue> availableValues) {
SILValue dependentAddress = md->getAddress();
LLVM_DEBUG(llvm::dbgs() << " *** Promoting mark_dependence_addr base: "
<< *md
<< " To address: " << dependentAddress);
SILBuilderWithScope B(md);
for (auto &availableValue : availableValues) {
B.createMarkDependenceAddr(md->getLoc(), dependentAddress,
availableValue.getValue(),
md->dependenceKind());
}
deleter.deleteIfDead(md);
}
SILValue
OptimizeDeadAlloc::promoteLoadTake(LoadInst *li,
ArrayRef<AvailableValue> availableValues) {
assert(li->getOwnershipQualifier() == LoadOwnershipQualifier::Take &&
"load [copy], load [trivial], load should be handled by "
"promoteLoadCopy");
SILValue address = li->getOperand();
SILType loadTy = address->getType().getObjectType();
// Compute the access path down to the field so we can determine precise
// def/use behavior.
unsigned firstElt = computeSubelement(address, TheMemory);
// Aggregate together all of the subelements into something that has the same
// type as the load did, and emit smaller) loads for any subelements that were
// not available.
AvailableValueAggregator agg(li, availableValues, Uses, deadEndBlocks,
AvailableValueExpectedOwnership::Take);
SILValue newVal = agg.aggregateValues(loadTy, address, firstElt);
assert(newVal);
++NumLoadTakePromoted;
LLVM_DEBUG(llvm::dbgs() << " *** Promoting load_take: " << *li);
LLVM_DEBUG(llvm::dbgs() << " To value: " << *newVal);
// Our parent RAUWs with newVal/erases li.
return newVal;
}
// DestroyAddr is a composed operation merging load [take] + destroy_value. If
// the implicit load's value is available, explode it.
//
// NOTE: We only do this if we have a fully available value.
//
// Note that we handle the general case of a destroy_addr of a piece of the
// memory object, not just destroy_addrs of the entire thing.
void OptimizeDeadAlloc::promoteDestroyAddr(
DestroyAddrInst *dai, ArrayRef<AvailableValue> availableValues) {
SILValue address = dai->getOperand();
SILType loadTy = address->getType().getObjectType();
// Compute the access path down to the field so we can determine precise
// def/use behavior.
unsigned firstElt = computeSubelement(address, TheMemory);
// Aggregate together all of the subelements into something that has the same
// type as the load did, and emit smaller) loads for any subelements that were
// not available.
AvailableValueAggregator agg(dai, availableValues, Uses, deadEndBlocks,
AvailableValueExpectedOwnership::Take);
SILValue newVal = agg.aggregateValues(loadTy, address, firstElt);
++NumDestroyAddrPromoted;
LLVM_DEBUG(llvm::dbgs() << " *** Promoting destroy_addr: " << *dai);
LLVM_DEBUG(llvm::dbgs() << " To value: " << *newVal);
SILBuilderWithScope(dai).emitDestroyValueOperation(dai->getLoc(), newVal);
deleter.forceDelete(dai);
}
//===----------------------------------------------------------------------===//
// Top Level Entrypoints
//===----------------------------------------------------------------------===//
static AllocationInst *getOptimizableAllocation(SILInstruction *i) {
if (!isa<AllocBoxInst>(i) && !isa<AllocStackInst>(i)) {
return nullptr;
}
auto *alloc = cast<AllocationInst>(i);
// If our aggregate has unreferencable storage, we can't optimize. Return
// nullptr.
if (getMemoryType(alloc).aggregateHasUnreferenceableStorage())
return nullptr;
// Do not perform this on move only values since we introduce copies to
// promote things.
if (getMemoryType(alloc).isMoveOnly())
return nullptr;
// Don't promote large types.
auto &mod = alloc->getFunction()->getModule();
if (EnableAggressiveExpansionBlocking &&
mod.getOptions().UseAggressiveReg2MemForCodeSize &&
!shouldExpand(mod, alloc->getType().getObjectType()))
return nullptr;
// Otherwise we are good to go. Lets try to optimize this memory!
return alloc;
}
bool swift::eliminateDeadAllocations(SILFunction *fn, DominanceInfo *domInfo) {
if (!fn->hasOwnership()) {
return false;
}
bool changed = false;
DeadEndBlocks deadEndBlocks(fn);
for (auto &bb : *fn) {
InstructionDeleter deleter;
for (SILInstruction &inst : bb.deletableInstructions()) {
// First see if i is an allocation that we can optimize. If not, skip it.
AllocationInst *alloc = getOptimizableAllocation(&inst);
if (!alloc) {
continue;
}
LLVM_DEBUG(llvm::dbgs()
<< "*** PMO Dead Allocation Elimination looking at: "
<< *alloc);
PMOMemoryObjectInfo memInfo(alloc);
// Set up the datastructure used to collect the uses of the allocation.
SmallVector<PMOMemoryUse, 16> uses;
SmallVector<SILInstruction *, 4> destroys;
// Walk the use list of the pointer, collecting them. If we are not able
// to optimize, skip this value. *NOTE* We may still scalarize values
// inside the value.
if (!collectPMOElementUsesAndDestroysFrom(memInfo, uses, destroys)) {
continue;
}
OptimizeDeadAlloc optimizeDeadAlloc(alloc, uses, destroys, deadEndBlocks,
deleter, domInfo);
if (optimizeDeadAlloc.tryToRemoveDeadAllocation()) {
deleter.cleanupDeadInstructions();
++NumAllocRemoved;
changed = true;
}
}
}
return changed;
}
namespace {
class PredictableDeadAllocationElimination : public SILFunctionTransform {
void run() override {
auto *func = getFunction();
if (!func->hasOwnership())
return;
LLVM_DEBUG(llvm::dbgs() << "Looking at: " << func->getName() << "\n");
auto *da = getAnalysis<DominanceAnalysis>();
// If we are already canonical or do not have ownership, just bail.
if (func->wasDeserializedCanonical() || !func->hasOwnership())
return;
if (eliminateDeadAllocations(func, da->get(func)))
invalidateAnalysis(SILAnalysis::InvalidationKind::FunctionBody);
}
};
} // end anonymous namespace
SILTransform *swift::createPredictableDeadAllocationElimination() {
return new PredictableDeadAllocationElimination();
}