mirror of
https://github.com/apple/swift.git
synced 2025-12-14 20:36:38 +01:00
2522 lines
94 KiB
C++
2522 lines
94 KiB
C++
//===--- PredictableMemOpt.cpp - Perform predictable memory optzns --------===//
|
|
//
|
|
// This source file is part of the Swift.org open source project
|
|
//
|
|
// Copyright (c) 2014 - 2017 Apple Inc. and the Swift project authors
|
|
// Licensed under Apache License v2.0 with Runtime Library Exception
|
|
//
|
|
// See https://swift.org/LICENSE.txt for license information
|
|
// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
#define DEBUG_TYPE "predictable-memopt"
|
|
|
|
#include "PMOMemoryUseCollector.h"
|
|
#include "swift/Basic/Assertions.h"
|
|
#include "swift/Basic/BlotMapVector.h"
|
|
#include "swift/Basic/BlotSetVector.h"
|
|
#include "swift/Basic/FrozenMultiMap.h"
|
|
#include "swift/Basic/STLExtras.h"
|
|
#include "swift/SIL/BasicBlockBits.h"
|
|
#include "swift/SIL/BasicBlockUtils.h"
|
|
#include "swift/SIL/LinearLifetimeChecker.h"
|
|
#include "swift/SIL/OSSACompleteLifetime.h"
|
|
#include "swift/SIL/OwnershipUtils.h"
|
|
#include "swift/SIL/SILBuilder.h"
|
|
#include "swift/SILOptimizer/PassManager/Passes.h"
|
|
#include "swift/SILOptimizer/PassManager/Transforms.h"
|
|
#include "swift/SILOptimizer/Utils/CFGOptUtils.h"
|
|
#include "swift/SILOptimizer/Utils/InstOptUtils.h"
|
|
#include "swift/SILOptimizer/Utils/OwnershipOptUtils.h"
|
|
#include "swift/SILOptimizer/Utils/SILSSAUpdater.h"
|
|
#include "swift/SILOptimizer/Utils/ValueLifetime.h"
|
|
#include "llvm/ADT/SmallBitVector.h"
|
|
#include "llvm/ADT/Statistic.h"
|
|
#include "llvm/Support/Compiler.h"
|
|
#include "llvm/Support/Debug.h"
|
|
|
|
using namespace swift;
|
|
|
|
static llvm::cl::opt<bool> EnableAggressiveExpansionBlocking(
|
|
"enable-aggressive-expansion-blocking", llvm::cl::init(false));
|
|
|
|
STATISTIC(NumLoadTakePromoted, "Number of load takes promoted");
|
|
STATISTIC(NumDestroyAddrPromoted, "Number of destroy_addrs promoted");
|
|
STATISTIC(NumAllocRemoved, "Number of allocations completely removed");
|
|
|
|
namespace {
|
|
|
|
/// The following utilities handle two fundamentally different optimizations:
|
|
/// - load-copy removal preserves allocation
|
|
/// - dead allocation removal replaces the allocation
|
|
///
|
|
/// Ownership handling is completely different in these cases. The utilities are
|
|
/// not well-factored to reflect this, so we have a mode flag.
|
|
enum class OptimizationMode {
|
|
PreserveAlloc,
|
|
ReplaceAlloc
|
|
};
|
|
|
|
} // end namespace
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
// Subelement Analysis
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
namespace {
|
|
|
|
// The type of a particular memory access and its corresponding subelement index
|
|
// range relative to the allocation being optimized.
|
|
struct LoadInfo {
|
|
SILType loadType;
|
|
unsigned firstElt;
|
|
unsigned numElts;
|
|
|
|
IntRange<unsigned> range() const {
|
|
return IntRange<unsigned>(firstElt, firstElt + numElts);
|
|
}
|
|
};
|
|
|
|
} // namespace
|
|
|
|
// We can only analyze components of structs whose storage is fully accessible
|
|
// from Swift.
|
|
static StructDecl *
|
|
getFullyReferenceableStruct(SILType Ty) {
|
|
auto SD = Ty.getStructOrBoundGenericStruct();
|
|
if (!SD || SD->hasUnreferenceableStorage())
|
|
return nullptr;
|
|
return SD;
|
|
}
|
|
|
|
static unsigned getNumSubElements(SILType T, SILFunction &F,
|
|
TypeExpansionContext context) {
|
|
|
|
if (auto TT = T.getAs<TupleType>()) {
|
|
unsigned NumElements = 0;
|
|
for (auto index : indices(TT.getElementTypes()))
|
|
NumElements +=
|
|
getNumSubElements(T.getTupleElementType(index), F, context);
|
|
return NumElements;
|
|
}
|
|
|
|
if (auto *SD = getFullyReferenceableStruct(T)) {
|
|
unsigned NumElements = 0;
|
|
for (auto *D : SD->getStoredProperties())
|
|
NumElements +=
|
|
getNumSubElements(T.getFieldType(D, F.getModule(), context), F,
|
|
context);
|
|
|
|
// Returning NumElements == 0 implies that "empty" values can be
|
|
// materialized without ownership. This is only valid for trivial types.
|
|
if (NumElements > 0 || T.isTrivial(F))
|
|
return NumElements;
|
|
}
|
|
|
|
// If this isn't a tuple or struct, it is a single element.
|
|
return 1;
|
|
}
|
|
|
|
/// getAccessPathRoot - Given an address, dive through any tuple/struct element
|
|
/// addresses to get the underlying value.
|
|
static SILValue getAccessPathRoot(SILValue pointer) {
|
|
while (true) {
|
|
if (auto *TEAI = dyn_cast<TupleElementAddrInst>(pointer)) {
|
|
pointer = TEAI->getOperand();
|
|
continue;
|
|
}
|
|
|
|
if (auto *SEAI = dyn_cast<StructElementAddrInst>(pointer)) {
|
|
pointer = SEAI->getOperand();
|
|
continue;
|
|
}
|
|
|
|
if (auto *BAI = dyn_cast<BeginAccessInst>(pointer)) {
|
|
pointer = BAI->getSource();
|
|
continue;
|
|
}
|
|
|
|
return pointer;
|
|
}
|
|
}
|
|
|
|
/// Compute the subelement number indicated by the specified pointer (which is
|
|
/// derived from the root by a series of tuple/struct element addresses) by
|
|
/// treating the type as a linearized namespace with sequential elements. For
|
|
/// example, given:
|
|
///
|
|
/// root = alloc { a: { c: i64, d: i64 }, b: (i64, i64) }
|
|
/// tmp1 = struct_element_addr root, 1
|
|
/// tmp2 = tuple_element_addr tmp1, 0
|
|
///
|
|
/// This will return a subelement number of 2.
|
|
///
|
|
/// If this pointer is to within an existential projection, it returns ~0U.
|
|
static unsigned computeSubelement(SILValue Pointer,
|
|
SingleValueInstruction *RootInst) {
|
|
unsigned SubElementNumber = 0;
|
|
auto &F = *RootInst->getFunction();
|
|
|
|
while (1) {
|
|
// If we got to the root, we're done.
|
|
if (RootInst == Pointer)
|
|
return SubElementNumber;
|
|
|
|
if (auto *PBI = dyn_cast<ProjectBoxInst>(Pointer)) {
|
|
Pointer = PBI->getOperand();
|
|
continue;
|
|
}
|
|
|
|
if (auto *BAI = dyn_cast<BeginAccessInst>(Pointer)) {
|
|
Pointer = BAI->getSource();
|
|
continue;
|
|
}
|
|
|
|
if (auto *TEAI = dyn_cast<TupleElementAddrInst>(Pointer)) {
|
|
SILType TT = TEAI->getOperand()->getType();
|
|
|
|
// Keep track of what subelement is being referenced.
|
|
for (unsigned i = 0, e = TEAI->getFieldIndex(); i != e; ++i) {
|
|
SubElementNumber +=
|
|
getNumSubElements(TT.getTupleElementType(i), F,
|
|
TypeExpansionContext(*RootInst->getFunction()));
|
|
}
|
|
Pointer = TEAI->getOperand();
|
|
continue;
|
|
}
|
|
|
|
if (auto *SEAI = dyn_cast<StructElementAddrInst>(Pointer)) {
|
|
SILType ST = SEAI->getOperand()->getType();
|
|
|
|
// Keep track of what subelement is being referenced.
|
|
StructDecl *SD = SEAI->getStructDecl();
|
|
for (auto *D : SD->getStoredProperties()) {
|
|
if (D == SEAI->getField()) break;
|
|
auto context = TypeExpansionContext(*RootInst->getFunction());
|
|
SubElementNumber +=
|
|
getNumSubElements(ST.getFieldType(D, F.getModule(), context), F,
|
|
context);
|
|
}
|
|
|
|
Pointer = SEAI->getOperand();
|
|
continue;
|
|
}
|
|
|
|
if (auto *MD = dyn_cast<MarkDependenceInst>(Pointer)) {
|
|
Pointer = MD->getValue();
|
|
continue;
|
|
}
|
|
|
|
// This fails when we visit unchecked_take_enum_data_addr. We should just
|
|
// add support for enums.
|
|
assert(isa<InitExistentialAddrInst>(Pointer) &&
|
|
"Unknown access path instruction");
|
|
// Cannot promote loads and stores from within an existential projection.
|
|
return ~0U;
|
|
}
|
|
}
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
// Available Value
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
namespace {
|
|
|
|
class AvailableValueAggregator;
|
|
|
|
struct AvailableValue {
|
|
friend class AvailableValueAggregator;
|
|
|
|
SILValue Value;
|
|
unsigned SubElementNumber;
|
|
|
|
/// If this gets too expensive in terms of copying, we can use an arena and a
|
|
/// FrozenPtrSet like we do in ARC.
|
|
llvm::SmallSetVector<SILInstruction *, 1> InsertionPoints;
|
|
|
|
/// Just for updating.
|
|
SmallVectorImpl<PMOMemoryUse> *Uses;
|
|
|
|
public:
|
|
AvailableValue() = default;
|
|
|
|
/// Main initializer for available values.
|
|
///
|
|
/// *NOTE* We assume that all available values start with a singular insertion
|
|
/// point and insertion points are added by merging.
|
|
AvailableValue(SILValue Value, unsigned SubElementNumber,
|
|
SILInstruction *InsertPoint)
|
|
: Value(Value), SubElementNumber(SubElementNumber), InsertionPoints() {
|
|
InsertionPoints.insert(InsertPoint);
|
|
}
|
|
|
|
/// Deleted copy constructor. This is a move only type.
|
|
AvailableValue(const AvailableValue &) = delete;
|
|
|
|
/// Deleted copy operator. This is a move only type.
|
|
AvailableValue &operator=(const AvailableValue &) = delete;
|
|
|
|
/// Move constructor.
|
|
AvailableValue(AvailableValue &&Other)
|
|
: Value(nullptr), SubElementNumber(~0), InsertionPoints() {
|
|
std::swap(Value, Other.Value);
|
|
std::swap(SubElementNumber, Other.SubElementNumber);
|
|
std::swap(InsertionPoints, Other.InsertionPoints);
|
|
}
|
|
|
|
/// Move operator.
|
|
AvailableValue &operator=(AvailableValue &&Other) {
|
|
std::swap(Value, Other.Value);
|
|
std::swap(SubElementNumber, Other.SubElementNumber);
|
|
std::swap(InsertionPoints, Other.InsertionPoints);
|
|
return *this;
|
|
}
|
|
|
|
operator bool() const { return bool(Value); }
|
|
|
|
bool operator==(const AvailableValue &Other) const {
|
|
return Value == Other.Value && SubElementNumber == Other.SubElementNumber;
|
|
}
|
|
|
|
bool operator!=(const AvailableValue &Other) const {
|
|
return !(*this == Other);
|
|
}
|
|
|
|
SILValue getValue() const { return Value; }
|
|
SILType getType() const { return Value->getType(); }
|
|
unsigned getSubElementNumber() const { return SubElementNumber; }
|
|
ArrayRef<SILInstruction *> getInsertionPoints() const {
|
|
return InsertionPoints.getArrayRef();
|
|
}
|
|
|
|
void mergeInsertionPoints(const AvailableValue &Other) & {
|
|
assert(Value == Other.Value && SubElementNumber == Other.SubElementNumber);
|
|
InsertionPoints.set_union(Other.InsertionPoints);
|
|
}
|
|
|
|
void addInsertionPoint(SILInstruction *i) & { InsertionPoints.insert(i); }
|
|
|
|
AvailableValue emitStructExtract(SILBuilder &B, SILLocation Loc, VarDecl *D,
|
|
unsigned SubElementNumber) const {
|
|
SILValue NewValue = B.emitStructExtract(Loc, Value, D);
|
|
return {NewValue, SubElementNumber, InsertionPoints};
|
|
}
|
|
|
|
AvailableValue emitTupleExtract(SILBuilder &B, SILLocation Loc,
|
|
unsigned EltNo,
|
|
unsigned SubElementNumber) const {
|
|
SILValue NewValue = B.emitTupleExtract(Loc, Value, EltNo);
|
|
return {NewValue, SubElementNumber, InsertionPoints};
|
|
}
|
|
|
|
AvailableValue emitBeginBorrow(SILBuilder &b, SILLocation loc) const {
|
|
// If we do not have ownership or already are guaranteed, just return a copy
|
|
// of our state.
|
|
if (Value->getOwnershipKind().isCompatibleWith(OwnershipKind::Guaranteed)) {
|
|
return {Value, SubElementNumber, InsertionPoints};
|
|
}
|
|
|
|
// Otherwise, return newValue.
|
|
return {b.createBeginBorrow(loc, Value), SubElementNumber, InsertionPoints};
|
|
}
|
|
|
|
void dump() const LLVM_ATTRIBUTE_USED;
|
|
void print(llvm::raw_ostream &os) const;
|
|
|
|
private:
|
|
/// Private constructor.
|
|
AvailableValue(SILValue Value, unsigned SubElementNumber,
|
|
const decltype(InsertionPoints) &InsertPoints)
|
|
: Value(Value), SubElementNumber(SubElementNumber),
|
|
InsertionPoints(InsertPoints) {}
|
|
};
|
|
|
|
} // end anonymous namespace
|
|
|
|
void AvailableValue::dump() const { print(llvm::dbgs()); }
|
|
|
|
void AvailableValue::print(llvm::raw_ostream &os) const {
|
|
os << "Available Value Dump. Value: ";
|
|
if (getValue()) {
|
|
os << getValue();
|
|
} else {
|
|
os << "NoValue;\n";
|
|
}
|
|
os << "SubElementNumber: " << getSubElementNumber() << "\n";
|
|
os << "Insertion Points:\n";
|
|
for (auto *I : getInsertionPoints()) {
|
|
os << *I;
|
|
}
|
|
}
|
|
|
|
namespace llvm {
|
|
|
|
llvm::raw_ostream &operator<<(llvm::raw_ostream &os, const AvailableValue &V) {
|
|
V.print(os);
|
|
return os;
|
|
}
|
|
|
|
} // end llvm namespace
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
// Subelement Extraction
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
static bool isFullyAvailable(SILType loadTy, unsigned firstElt,
|
|
ArrayRef<AvailableValue> AvailableValues) {
|
|
if (firstElt >= AvailableValues.size()) { // #Elements may be zero.
|
|
return false;
|
|
}
|
|
|
|
auto &firstVal = AvailableValues[firstElt];
|
|
|
|
// Make sure that the first element is available and is the correct type.
|
|
if (!firstVal || firstVal.getType() != loadTy)
|
|
return false;
|
|
|
|
auto &function = *firstVal.getValue()->getFunction();
|
|
return llvm::all_of(
|
|
range(getNumSubElements(loadTy, function, TypeExpansionContext(function))),
|
|
[&](unsigned index) -> bool {
|
|
auto &val = AvailableValues[firstElt + index];
|
|
return val.getValue() == firstVal.getValue() &&
|
|
val.getSubElementNumber() == index;
|
|
});
|
|
}
|
|
|
|
/// Given an aggregate value and an access path, non-destructively extract the
|
|
/// value indicated by the path.
|
|
static SILValue nonDestructivelyExtractSubElement(const AvailableValue &Val,
|
|
SILBuilder &B,
|
|
SILLocation Loc) {
|
|
SILType ValTy = Val.getType();
|
|
unsigned SubElementNumber = Val.SubElementNumber;
|
|
|
|
// Extract tuple elements.
|
|
if (auto TT = ValTy.getAs<TupleType>()) {
|
|
for (unsigned EltNo : indices(TT.getElementTypes())) {
|
|
// Keep track of what subelement is being referenced.
|
|
SILType EltTy = ValTy.getTupleElementType(EltNo);
|
|
unsigned NumSubElt = getNumSubElements(
|
|
EltTy, B.getFunction(), TypeExpansionContext(B.getFunction()));
|
|
if (SubElementNumber < NumSubElt) {
|
|
auto BorrowedVal = Val.emitBeginBorrow(B, Loc);
|
|
auto NewVal =
|
|
BorrowedVal.emitTupleExtract(B, Loc, EltNo, SubElementNumber);
|
|
SILValue result = nonDestructivelyExtractSubElement(NewVal, B, Loc);
|
|
// If our original value wasn't guaranteed and we did actually perform a
|
|
// borrow as a result, insert the end_borrow.
|
|
if (BorrowedVal.getValue() != Val.getValue())
|
|
B.createEndBorrow(Loc, BorrowedVal.getValue());
|
|
return result;
|
|
}
|
|
|
|
SubElementNumber -= NumSubElt;
|
|
}
|
|
|
|
llvm_unreachable("Didn't find field");
|
|
}
|
|
|
|
// Extract struct elements.
|
|
if (auto *SD = getFullyReferenceableStruct(ValTy)) {
|
|
for (auto *D : SD->getStoredProperties()) {
|
|
auto fieldType = ValTy.getFieldType(
|
|
D, B.getModule(), TypeExpansionContext(B.getFunction()));
|
|
unsigned NumSubElt = getNumSubElements(
|
|
fieldType, B.getFunction(), TypeExpansionContext(B.getFunction()));
|
|
|
|
if (SubElementNumber < NumSubElt) {
|
|
auto BorrowedVal = Val.emitBeginBorrow(B, Loc);
|
|
auto NewVal =
|
|
BorrowedVal.emitStructExtract(B, Loc, D, SubElementNumber);
|
|
SILValue result = nonDestructivelyExtractSubElement(NewVal, B, Loc);
|
|
// If our original value wasn't guaranteed and we did actually perform a
|
|
// borrow as a result, insert the end_borrow.
|
|
if (BorrowedVal.getValue() != Val.getValue())
|
|
B.createEndBorrow(Loc, BorrowedVal.getValue());
|
|
return result;
|
|
}
|
|
|
|
SubElementNumber -= NumSubElt;
|
|
|
|
}
|
|
llvm_unreachable("Didn't find field");
|
|
}
|
|
|
|
// Otherwise, we're down to a scalar. If we have ownership enabled,
|
|
// we return a copy. Otherwise, there we can ignore ownership
|
|
// issues. This is ok since in [ossa] we are going to eliminate a
|
|
// load [copy] or a load [trivial], while in non-[ossa] SIL we will
|
|
// be replacing unqualified loads.
|
|
assert(SubElementNumber == 0 && "Miscalculation indexing subelements");
|
|
return B.emitCopyValueOperation(Loc, Val.getValue());
|
|
}
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
// Ownership Fixup
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
namespace {
|
|
|
|
/// For OptimizedMode == PreserveAlloc. Track inserted copies and casts for
|
|
/// ownership fixup.
|
|
struct AvailableValueFixup {
|
|
/// Keep track of all instructions that we have added. Once we are done
|
|
/// promoting a value, we need to make sure that if we need to balance any
|
|
/// copies (to avoid leaks), we do so. This is not used if we are performing a
|
|
/// take.
|
|
SmallVector<SILInstruction *, 16> insertedInsts;
|
|
|
|
AvailableValueFixup() = default;
|
|
AvailableValueFixup(const AvailableValueFixup &) = delete;
|
|
AvailableValueFixup &operator=(const AvailableValueFixup &) = delete;
|
|
|
|
~AvailableValueFixup() {
|
|
assert(insertedInsts.empty() && "need to fix ownership");
|
|
}
|
|
};
|
|
|
|
} // end namespace
|
|
|
|
namespace {
|
|
|
|
/// For available value data flow with OptimizedMode::PreserveAlloc: track only
|
|
/// inserted copies and casts for ownership fixup after dataflow completes. Phis
|
|
/// are not allowed.
|
|
struct AvailableValueDataflowFixup: AvailableValueFixup {
|
|
/// Given a single available value, get an owned copy if it is possible to
|
|
/// create one without introducing a phi. Return the copy. Otherwise, return
|
|
/// an invalid SILValue.
|
|
SILValue getSingleOwnedValue(const AvailableValue &availableVal);
|
|
|
|
// Verify ownership of promoted instructions in asserts builds or when
|
|
// -sil-verify-all is set.
|
|
//
|
|
// Clears insertedInsts.
|
|
void verifyOwnership(DeadEndBlocks &deBlocks);
|
|
|
|
// Deletes all insertedInsts without fixing ownership.
|
|
// Clears insertedInsts.
|
|
void deleteInsertedInsts(InstructionDeleter &deleter);
|
|
};
|
|
|
|
} // end namespace
|
|
|
|
// In OptimizationMode::PreserveAlloc, get a copy of this single available
|
|
// value. This requires a copy at the allocation's insertion point (store).
|
|
//
|
|
// Assumption: this copy will be consumed at a cast-like instruction
|
|
// (mark_dependence), which caused dataflow to invalidate the currently
|
|
// available value. Therefore, since phis are not allowed, the consumption
|
|
// cannot be in an inner loop relative to the copy. Unlike getMergedOwnedValue,
|
|
// there is no need for a second copy.
|
|
//
|
|
// Note: the caller must add the consuming cast to this->insertedInsts.
|
|
SILValue AvailableValueDataflowFixup::getSingleOwnedValue(
|
|
const AvailableValue &availableVal) {
|
|
|
|
SILValue value = availableVal.getValue();
|
|
// If there is no need for copies, then we don't care about multiple insertion
|
|
// points.
|
|
if (value->getType().isTrivial(*value->getFunction())) {
|
|
return value;
|
|
}
|
|
ArrayRef<SILInstruction *> insertPts = availableVal.getInsertionPoints();
|
|
if (insertPts.size() > 1)
|
|
return SILValue();
|
|
|
|
return SILBuilderWithScope(insertPts[0], &insertedInsts)
|
|
.emitCopyValueOperation(insertPts[0]->getLoc(), value);
|
|
}
|
|
|
|
void AvailableValueDataflowFixup::verifyOwnership(DeadEndBlocks &deBlocks) {
|
|
for (auto *inst : insertedInsts) {
|
|
if (inst->isDeleted())
|
|
continue;
|
|
|
|
for (auto result : inst->getResults()) {
|
|
result.verifyOwnership(&deBlocks);
|
|
}
|
|
}
|
|
insertedInsts.clear();
|
|
}
|
|
|
|
void AvailableValueDataflowFixup::
|
|
deleteInsertedInsts(InstructionDeleter &deleter) {
|
|
for (auto *inst : insertedInsts) {
|
|
if (inst->isDeleted())
|
|
continue;
|
|
|
|
deleter.forceDeleteWithUsers(inst);
|
|
}
|
|
insertedInsts.clear();
|
|
}
|
|
|
|
// In OptimizationMode::PreserveAlloc: insert copies and phis for aggregate
|
|
// values.
|
|
struct AvailableValueAggregationFixup: AvailableValueFixup {
|
|
DeadEndBlocks &deadEndBlocks;
|
|
|
|
/// The list of phi nodes inserted by the SSA updater.
|
|
SmallVector<SILPhiArgument *, 16> insertedPhiNodes;
|
|
|
|
AvailableValueAggregationFixup(DeadEndBlocks &deadEndBlocks)
|
|
: deadEndBlocks(deadEndBlocks) {}
|
|
|
|
/// For a single 'availableVal', insert copies at each insertion point. Merge
|
|
/// all copies, creating phis if needed. Return the final copy. Otherwise,
|
|
/// return an invalid SILValue.
|
|
SILValue mergeCopies(const AvailableValue &availableVal,
|
|
SILType loadTy,
|
|
SILInstruction *availableAtInst,
|
|
bool isFullyAvailable);
|
|
|
|
private:
|
|
/// As a result of us using the SSA updater, insert hand off copy/destroys at
|
|
/// each phi and make sure that intermediate phis do not leak by inserting
|
|
/// destroys along paths that go through the intermediate phi that do not also
|
|
/// go through the.
|
|
void addHandOffCopyDestroysForPhis(SILInstruction *load, SILValue newVal);
|
|
};
|
|
|
|
// For OptimizationMode::PreserveAlloc, insert copies at the available value's
|
|
// insertion points to provide an owned value. Merge the copies into phis.
|
|
// Create another copy before 'availableAtInst' in case it consumes the owned
|
|
// value within an inner loop. Add the copies to insertedInsts and phis to
|
|
// insertedPhiNodes.
|
|
SILValue AvailableValueAggregationFixup::mergeCopies(
|
|
const AvailableValue &availableVal, SILType loadTy,
|
|
SILInstruction *availableAtInst,
|
|
bool isFullyAvailable) {
|
|
|
|
auto emitValue = [&](SILInstruction *insertPt) {
|
|
if (isFullyAvailable) {
|
|
SILValue fullVal = availableVal.getValue();
|
|
if (fullVal->use_empty()
|
|
&& fullVal->getOwnershipKind() == OwnershipKind::Owned) {
|
|
// This value was inserted during data flow to propagate ownership. It
|
|
// does not need to be copied.
|
|
return fullVal;
|
|
}
|
|
return SILBuilderWithScope(insertPt, &insertedInsts)
|
|
.emitCopyValueOperation(insertPt->getLoc(), fullVal);
|
|
}
|
|
SILBuilderWithScope builder(insertPt, &insertedInsts);
|
|
SILValue eltVal = nonDestructivelyExtractSubElement(availableVal, builder,
|
|
insertPt->getLoc());
|
|
assert(eltVal->getType() == loadTy && "Subelement types mismatch");
|
|
assert(eltVal->getOwnershipKind().isCompatibleWith(OwnershipKind::Owned));
|
|
return eltVal;
|
|
};
|
|
|
|
ArrayRef<SILInstruction *> insertPts = availableVal.getInsertionPoints();
|
|
if (insertPts.size() == 1) {
|
|
SILValue eltVal = emitValue(insertPts[0]);
|
|
if (isFullyAvailable)
|
|
return eltVal;
|
|
|
|
return SILBuilderWithScope(availableAtInst, &insertedInsts)
|
|
.emitCopyValueOperation(availableAtInst->getLoc(), eltVal);
|
|
}
|
|
// If we have multiple insertion points, put copies at each point and use the
|
|
// SSA updater to get a value. The reason why this is safe is that we can only
|
|
// have multiple insertion points if we are storing exactly the same value
|
|
// implying that we can just copy firstVal at each insertion point.
|
|
SILSSAUpdater updater(&insertedPhiNodes);
|
|
SILFunction *function = availableAtInst->getFunction();
|
|
assert(function->hasOwnership() && "requires OSSA");
|
|
updater.initialize(function, loadTy, OwnershipKind::Owned);
|
|
|
|
std::optional<SILValue> singularValue;
|
|
for (auto *insertPt : insertPts) {
|
|
SILValue eltVal = emitValue(insertPt);
|
|
|
|
if (!singularValue.has_value()) {
|
|
singularValue = eltVal;
|
|
} else if (*singularValue != eltVal) {
|
|
singularValue = SILValue();
|
|
}
|
|
// And then put the value into the SSA updater.
|
|
updater.addAvailableValue(insertPt->getParent(), eltVal);
|
|
}
|
|
|
|
// If we only are tracking a singular value, we do not need to construct
|
|
// SSA. Just return that value.
|
|
if (auto val = singularValue.value_or(SILValue())) {
|
|
// Non-trivial values will always be copied above at each insertion
|
|
// point, and will, therefore, have multiple incoming values.
|
|
assert(val->getType().isTrivial(*function) &&
|
|
"Should never reach this code path if we are in ossa and have a "
|
|
"non-trivial value");
|
|
return val;
|
|
}
|
|
|
|
// Finally, grab the value from the SSA updater.
|
|
SILValue result =
|
|
updater.getValueInMiddleOfBlock(availableAtInst->getParent());
|
|
assert(result->getOwnershipKind().isCompatibleWith(OwnershipKind::Owned));
|
|
assert(result->getType() == loadTy && "Subelement types mismatch");
|
|
// Insert another copy at the point of availability in case it is inside a
|
|
// loop.
|
|
return SILBuilderWithScope(availableAtInst, &insertedInsts)
|
|
.emitCopyValueOperation(availableAtInst->getLoc(), result);
|
|
}
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
// Available Value Aggregation
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
static bool anyMissing(unsigned StartSubElt, unsigned NumSubElts,
|
|
ArrayRef<AvailableValue> &Values) {
|
|
while (NumSubElts) {
|
|
if (!Values[StartSubElt])
|
|
return true;
|
|
++StartSubElt;
|
|
--NumSubElts;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
namespace {
|
|
|
|
enum class AvailableValueExpectedOwnership {
|
|
Take,
|
|
Borrow,
|
|
Copy,
|
|
};
|
|
|
|
/// A class that aggregates available values, loading them if they are not
|
|
/// available.
|
|
class AvailableValueAggregator {
|
|
SILModule &M;
|
|
SILBuilderWithScope B;
|
|
SILLocation Loc;
|
|
ArrayRef<AvailableValue> AvailableValueList;
|
|
SmallVectorImpl<PMOMemoryUse> &Uses;
|
|
AvailableValueExpectedOwnership expectedOwnership;
|
|
|
|
AvailableValueAggregationFixup ownershipFixup;
|
|
|
|
public:
|
|
AvailableValueAggregator(SILInstruction *Inst,
|
|
ArrayRef<AvailableValue> AvailableValueList,
|
|
SmallVectorImpl<PMOMemoryUse> &Uses,
|
|
DeadEndBlocks &deadEndBlocks,
|
|
AvailableValueExpectedOwnership expectedOwnership)
|
|
: M(Inst->getModule()), B(Inst), Loc(Inst->getLoc()),
|
|
AvailableValueList(AvailableValueList), Uses(Uses),
|
|
expectedOwnership(expectedOwnership), ownershipFixup(deadEndBlocks)
|
|
{}
|
|
|
|
// This is intended to be passed by reference only once constructed.
|
|
AvailableValueAggregator(const AvailableValueAggregator &) = delete;
|
|
AvailableValueAggregator(AvailableValueAggregator &&) = delete;
|
|
AvailableValueAggregator &
|
|
operator=(const AvailableValueAggregator &) = delete;
|
|
AvailableValueAggregator &operator=(AvailableValueAggregator &&) = delete;
|
|
|
|
SILValue aggregateValues(SILType LoadTy, SILValue Address, unsigned FirstElt,
|
|
bool isTopLevel = true);
|
|
bool canTake(SILType loadTy, unsigned firstElt) const;
|
|
|
|
void print(llvm::raw_ostream &os) const;
|
|
void dump() const LLVM_ATTRIBUTE_USED;
|
|
|
|
bool isTake() const {
|
|
return expectedOwnership == AvailableValueExpectedOwnership::Take;
|
|
}
|
|
|
|
bool isBorrow() const {
|
|
return expectedOwnership == AvailableValueExpectedOwnership::Borrow;
|
|
}
|
|
|
|
bool isCopy() const {
|
|
return expectedOwnership == AvailableValueExpectedOwnership::Copy;
|
|
}
|
|
|
|
private:
|
|
SILValue aggregateFullyAvailableValue(SILType loadTy, unsigned firstElt);
|
|
SILValue aggregateTupleSubElts(TupleType *tt, SILType loadTy,
|
|
SILValue address, unsigned firstElt);
|
|
SILValue aggregateStructSubElts(StructDecl *sd, SILType loadTy,
|
|
SILValue address, unsigned firstElt);
|
|
};
|
|
|
|
} // end anonymous namespace
|
|
|
|
void AvailableValueAggregator::dump() const { print(llvm::dbgs()); }
|
|
|
|
void AvailableValueAggregator::print(llvm::raw_ostream &os) const {
|
|
os << "Available Value List, N = " << AvailableValueList.size()
|
|
<< ". Elts:\n";
|
|
for (auto &V : AvailableValueList) {
|
|
os << V;
|
|
}
|
|
}
|
|
|
|
// We can only take if we never have to split a larger value to promote this
|
|
// address.
|
|
bool AvailableValueAggregator::canTake(SILType loadTy,
|
|
unsigned firstElt) const {
|
|
// If we are trivially fully available, just return true.
|
|
if (isFullyAvailable(loadTy, firstElt, AvailableValueList))
|
|
return true;
|
|
|
|
// Otherwise see if we are an aggregate with fully available leaf types.
|
|
if (TupleType *tt = loadTy.getAs<TupleType>()) {
|
|
return llvm::all_of(indices(tt->getElements()), [&](unsigned eltNo) {
|
|
SILType eltTy = loadTy.getTupleElementType(eltNo);
|
|
unsigned numSubElt =
|
|
getNumSubElements(eltTy, B.getFunction(),
|
|
TypeExpansionContext(B.getFunction()));
|
|
bool success = canTake(eltTy, firstElt);
|
|
firstElt += numSubElt;
|
|
return success;
|
|
});
|
|
}
|
|
|
|
if (auto *sd = getFullyReferenceableStruct(loadTy)) {
|
|
return llvm::all_of(sd->getStoredProperties(), [&](VarDecl *decl) -> bool {
|
|
auto context = TypeExpansionContext(B.getFunction());
|
|
SILType eltTy = loadTy.getFieldType(decl, M, context);
|
|
unsigned numSubElt = getNumSubElements(eltTy, B.getFunction(), context);
|
|
bool success = canTake(eltTy, firstElt);
|
|
firstElt += numSubElt;
|
|
return success;
|
|
});
|
|
}
|
|
|
|
// Otherwise, fail. The value is not fully available at its leafs. We can not
|
|
// perform a take.
|
|
return false;
|
|
}
|
|
|
|
/// Given a bunch of primitive subelement values, build out the right aggregate
|
|
/// type (LoadTy) by emitting tuple and struct instructions as necessary.
|
|
SILValue AvailableValueAggregator::aggregateValues(SILType LoadTy,
|
|
SILValue Address,
|
|
unsigned FirstElt,
|
|
bool isTopLevel) {
|
|
// If we are performing a take, make sure that we have available values for
|
|
// /all/ of our values. Otherwise, bail.
|
|
if (isTopLevel && isTake() && !canTake(LoadTy, FirstElt)) {
|
|
return SILValue();
|
|
}
|
|
|
|
// Check to see if the requested value is fully available, as an aggregate.
|
|
// This is a super-common case for single-element structs, but is also a
|
|
// general answer for arbitrary structs and tuples as well.
|
|
if (SILValue Result = aggregateFullyAvailableValue(LoadTy, FirstElt)) {
|
|
return Result;
|
|
}
|
|
|
|
// If we have a tuple type, then aggregate the tuple's elements into a full
|
|
// tuple value.
|
|
if (TupleType *tupleType = LoadTy.getAs<TupleType>()) {
|
|
SILValue result =
|
|
aggregateTupleSubElts(tupleType, LoadTy, Address, FirstElt);
|
|
if (isTopLevel && result->getOwnershipKind() == OwnershipKind::Guaranteed) {
|
|
SILValue borrowedResult = result;
|
|
SILBuilderWithScope builder(&*B.getInsertionPoint(),
|
|
&ownershipFixup.insertedInsts);
|
|
result = builder.emitCopyValueOperation(Loc, borrowedResult);
|
|
SmallVector<BorrowedValue, 4> introducers;
|
|
bool foundIntroducers =
|
|
getAllBorrowIntroducingValues(borrowedResult, introducers);
|
|
(void)foundIntroducers;
|
|
assert(foundIntroducers);
|
|
for (auto value : introducers) {
|
|
builder.emitEndBorrowOperation(Loc, value.value);
|
|
}
|
|
}
|
|
return result;
|
|
}
|
|
|
|
// If we have a struct type, then aggregate the struct's elements into a full
|
|
// struct value.
|
|
if (auto *structDecl = getFullyReferenceableStruct(LoadTy)) {
|
|
SILValue result =
|
|
aggregateStructSubElts(structDecl, LoadTy, Address, FirstElt);
|
|
if (isTopLevel && result->getOwnershipKind() == OwnershipKind::Guaranteed) {
|
|
SILValue borrowedResult = result;
|
|
SILBuilderWithScope builder(&*B.getInsertionPoint(),
|
|
&ownershipFixup.insertedInsts);
|
|
result = builder.emitCopyValueOperation(Loc, borrowedResult);
|
|
SmallVector<BorrowedValue, 4> introducers;
|
|
bool foundIntroducers =
|
|
getAllBorrowIntroducingValues(borrowedResult, introducers);
|
|
(void)foundIntroducers;
|
|
assert(foundIntroducers);
|
|
for (auto value : introducers) {
|
|
builder.emitEndBorrowOperation(Loc, value.value);
|
|
}
|
|
}
|
|
return result;
|
|
}
|
|
|
|
// Otherwise, we have a non-aggregate primitive. Load or extract the value.
|
|
//
|
|
// NOTE: We should never call this when taking since when taking we know that
|
|
// our underlying value is always fully available.
|
|
assert(!isTake());
|
|
// If the value is not available, load the value and update our use list.
|
|
auto &val = AvailableValueList[FirstElt];
|
|
if (!val) {
|
|
LoadInst *load = ([&]() {
|
|
SILBuilderWithScope builder(&*B.getInsertionPoint(),
|
|
&ownershipFixup.insertedInsts);
|
|
return builder.createTrivialLoadOr(Loc, Address,
|
|
LoadOwnershipQualifier::Copy);
|
|
}());
|
|
Uses.emplace_back(load, PMOUseKind::Load);
|
|
return load;
|
|
}
|
|
return ownershipFixup.mergeCopies(val, LoadTy, &*B.getInsertionPoint(),
|
|
/*isFullyAvailable*/false);
|
|
}
|
|
|
|
// See if we have this value is fully available. In such a case, return it as an
|
|
// aggregate. This is a super-common case for single-element structs, but is
|
|
// also a general answer for arbitrary structs and tuples as well.
|
|
SILValue
|
|
AvailableValueAggregator::aggregateFullyAvailableValue(SILType loadTy,
|
|
unsigned firstElt) {
|
|
// Check if our underlying type is fully available. If it isn't, bail.
|
|
if (!isFullyAvailable(loadTy, firstElt, AvailableValueList))
|
|
return SILValue();
|
|
|
|
// Ok, grab out first value. (note: any actually will do).
|
|
auto &firstVal = AvailableValueList[firstElt];
|
|
|
|
// Ok, we know that all of our available values are all parts of the same
|
|
// value.
|
|
assert(B.hasOwnership() && "requires OSSA");
|
|
if (isTake())
|
|
return firstVal.getValue();
|
|
|
|
// Otherwise, we need to put in a copy. This is b/c we only propagate along +1
|
|
// values and we are eliminating a load [copy].
|
|
return
|
|
ownershipFixup.mergeCopies(firstVal, loadTy, &*B.getInsertionPoint(),
|
|
/*isFullyAvailable*/true);
|
|
}
|
|
|
|
SILValue AvailableValueAggregator::aggregateTupleSubElts(TupleType *TT,
|
|
SILType LoadTy,
|
|
SILValue Address,
|
|
unsigned FirstElt) {
|
|
SmallVector<SILValue, 4> ResultElts;
|
|
|
|
for (unsigned EltNo : indices(TT->getElements())) {
|
|
SILType EltTy = LoadTy.getTupleElementType(EltNo);
|
|
unsigned NumSubElt =
|
|
getNumSubElements(EltTy, B.getFunction(),
|
|
TypeExpansionContext(B.getFunction()));
|
|
|
|
// If we are missing any of the available values in this struct element,
|
|
// compute an address to load from.
|
|
SILValue EltAddr;
|
|
if (anyMissing(FirstElt, NumSubElt, AvailableValueList)) {
|
|
assert(!isTake() && "When taking, values should never be missing?!");
|
|
EltAddr =
|
|
B.createTupleElementAddr(Loc, Address, EltNo, EltTy.getAddressType());
|
|
}
|
|
|
|
ResultElts.push_back(
|
|
aggregateValues(EltTy, EltAddr, FirstElt, /*isTopLevel*/ false));
|
|
FirstElt += NumSubElt;
|
|
}
|
|
|
|
// If we are going to use this to promote a borrowed value, insert borrow
|
|
// operations. Eventually I am going to do this for everything, but this
|
|
// should make it easier to bring up.
|
|
if (!isTake()) {
|
|
for (unsigned i : indices(ResultElts)) {
|
|
ResultElts[i] = B.emitBeginBorrowOperation(Loc, ResultElts[i]);
|
|
}
|
|
}
|
|
|
|
return B.createTuple(Loc, LoadTy, ResultElts);
|
|
}
|
|
|
|
SILValue AvailableValueAggregator::aggregateStructSubElts(StructDecl *sd,
|
|
SILType loadTy,
|
|
SILValue address,
|
|
unsigned firstElt) {
|
|
SmallVector<SILValue, 4> resultElts;
|
|
|
|
for (auto *decl : sd->getStoredProperties()) {
|
|
auto context = TypeExpansionContext(B.getFunction());
|
|
SILType eltTy = loadTy.getFieldType(decl, M, context);
|
|
unsigned numSubElt = getNumSubElements(eltTy, B.getFunction(), context);
|
|
|
|
// If we are missing any of the available values in this struct element,
|
|
// compute an address to load from.
|
|
SILValue eltAddr;
|
|
if (anyMissing(firstElt, numSubElt, AvailableValueList)) {
|
|
assert(!isTake() && "When taking, values should never be missing?!");
|
|
eltAddr =
|
|
B.createStructElementAddr(Loc, address, decl, eltTy.getAddressType());
|
|
}
|
|
|
|
resultElts.push_back(
|
|
aggregateValues(eltTy, eltAddr, firstElt, /*isTopLevel*/ false));
|
|
firstElt += numSubElt;
|
|
}
|
|
|
|
if (!isTake()) {
|
|
for (unsigned i : indices(resultElts)) {
|
|
resultElts[i] = B.emitBeginBorrowOperation(Loc, resultElts[i]);
|
|
}
|
|
}
|
|
|
|
return B.createStruct(Loc, loadTy, resultElts);
|
|
}
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
// Available Value Dataflow
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
namespace {
|
|
|
|
/// Given a piece of memory, the memory's uses, and destroys perform a single
|
|
/// round of semi-optimistic backwards dataflow for each use. The result is the
|
|
/// set of available values that reach the specific use of the field in the
|
|
/// allocated object.
|
|
///
|
|
/// The general form of the algorithm is that in our constructor, we analyze our
|
|
/// uses and determine available values. Then users call computeAvailableValues
|
|
/// which looks backwards up the control flow graph for available values that we
|
|
/// can use.
|
|
///
|
|
/// NOTE: The reason why we say that the algorithm is semi-optimistic is that we
|
|
/// assume that all incoming elements into a loopheader will be the same. If we
|
|
/// find a conflict, we record it and fail.
|
|
class AvailableValueDataflowContext {
|
|
/// The base memory we are performing dataflow upon.
|
|
AllocationInst *TheMemory;
|
|
|
|
/// The number of sub elements of our memory.
|
|
unsigned NumMemorySubElements;
|
|
|
|
/// The set of uses that we are tracking. This is only here so we can update
|
|
/// when exploding copy_addr and mark_dependence. It would be great if we did
|
|
/// not have to store this.
|
|
SmallVectorImpl<PMOMemoryUse> &Uses;
|
|
|
|
InstructionDeleter &deleter;
|
|
|
|
/// The set of blocks with local definitions.
|
|
///
|
|
/// We use this to determine if we should visit a block or look at a block's
|
|
/// predecessors during dataflow for an available value.
|
|
BasicBlockFlag HasLocalDefinition;
|
|
|
|
/// The set of blocks that have definitions which specifically "kill" the
|
|
/// given value. If a block is in this set, there must be an instruction in
|
|
/// LoadTakeUse whose parent is the block. This is just used to speed up
|
|
/// computation.
|
|
///
|
|
/// NOTE: These are not considered escapes.
|
|
BasicBlockFlag HasLocalKill;
|
|
|
|
/// This is a set of load takes that we are tracking. HasLocalKill is the set
|
|
/// of parent blocks of these instructions.
|
|
llvm::SmallPtrSet<SILInstruction *, 8> LoadTakeUses;
|
|
|
|
/// This is a map of uses that are not loads (i.e., they are Stores,
|
|
/// InOutUses, and Escapes), to their entry in Uses.
|
|
llvm::SmallDenseMap<SILInstruction *, unsigned, 16> NonLoadUses;
|
|
|
|
AvailableValueDataflowFixup ownershipFixup;
|
|
|
|
/// Does this value escape anywhere in the function. We use this very
|
|
/// conservatively.
|
|
bool HasAnyEscape = false;
|
|
|
|
/// When promoting load [copy], the original allocation must be
|
|
/// preserved. This introduced extra copies.
|
|
OptimizationMode optimizationMode;
|
|
|
|
public:
|
|
AvailableValueDataflowContext(AllocationInst *TheMemory,
|
|
unsigned NumMemorySubElements,
|
|
OptimizationMode mode,
|
|
SmallVectorImpl<PMOMemoryUse> &Uses,
|
|
InstructionDeleter &deleter,
|
|
DeadEndBlocks &deBlocks);
|
|
|
|
// Find an available for for subelements of 'SrcAddr'.
|
|
// Return the SILType of the object in 'SrcAddr' and index of the first sub
|
|
// element in that object.
|
|
// If not all subelements are availab, return nullopt.
|
|
//
|
|
// Available value analysis can create dead owned casts. OSSA is invalid until
|
|
// the ownership chain is complete by replacing the uses of the loaded value
|
|
// and calling fixupOwnership().
|
|
std::optional<LoadInfo>
|
|
computeAvailableValues(SILValue SrcAddr, SILInstruction *Inst,
|
|
SmallVectorImpl<AvailableValue> &AvailableValues);
|
|
|
|
/// Return true if the box has escaped at the specified instruction. We are
|
|
/// not
|
|
/// allowed to do load promotion in an escape region.
|
|
bool hasEscapedAt(SILInstruction *I);
|
|
|
|
/// Explode a copy_addr, updating the Uses at the same time.
|
|
void explodeCopyAddr(CopyAddrInst *CAI);
|
|
|
|
void verifyOwnership(DeadEndBlocks &deBlocks) {
|
|
ownershipFixup.verifyOwnership(deBlocks);
|
|
}
|
|
|
|
void deleteInsertedInsts(InstructionDeleter &deleter) {
|
|
ownershipFixup.deleteInsertedInsts(deleter);
|
|
}
|
|
|
|
private:
|
|
SILModule &getModule() const { return TheMemory->getModule(); }
|
|
|
|
SILFunction &getFunction() const { return *TheMemory->getFunction(); }
|
|
|
|
void updateAvailableValues(
|
|
SILInstruction *Inst,
|
|
SmallBitVector &RequiredElts,
|
|
SmallVectorImpl<AvailableValue> &Result,
|
|
llvm::SmallDenseMap<SILBasicBlock *, SmallBitVector, 32> &VisitedBlocks,
|
|
SmallBitVector &ConflictingValues);
|
|
|
|
/// Try to compute available values for "TheMemory" at the instruction \p
|
|
/// StartingFrom. We only compute the values for set bits in \p
|
|
/// RequiredElts. We return the vailable values in \p Result. If any available
|
|
/// values were found, return true. Otherwise, return false.
|
|
///
|
|
/// In OptimizationMode::PreserveAlloc, this may insert casts and copies to
|
|
/// propagate owned values.
|
|
bool computeAvailableElementValues(SILInstruction *StartingFrom,
|
|
LoadInfo loadInfo,
|
|
SmallBitVector &RequiredElts,
|
|
SmallVectorImpl<AvailableValue> &Result);
|
|
|
|
void computeAvailableValuesFrom(
|
|
SILBasicBlock::iterator StartingFrom, SILBasicBlock *BB,
|
|
SmallBitVector &RequiredElts,
|
|
SmallVectorImpl<AvailableValue> &Result,
|
|
llvm::SmallDenseMap<SILBasicBlock *, SmallBitVector, 32>
|
|
&VisitedBlocks,
|
|
SmallBitVector &ConflictingValues);
|
|
|
|
/// Promote a mark_dependence, updating the available values.
|
|
void updateMarkDependenceValues(
|
|
MarkDependenceInst *md,
|
|
SmallBitVector &RequiredElts,
|
|
SmallVectorImpl<AvailableValue> &Result,
|
|
llvm::SmallDenseMap<SILBasicBlock *, SmallBitVector, 32> &VisitedBlocks,
|
|
SmallBitVector &ConflictingValues);
|
|
|
|
SILValue createAvailableMarkDependence(MarkDependenceInst *md,
|
|
AvailableValue &availableVal);
|
|
};
|
|
|
|
} // end anonymous namespace
|
|
|
|
AvailableValueDataflowContext::AvailableValueDataflowContext(
|
|
AllocationInst *InputTheMemory, unsigned NumMemorySubElements,
|
|
OptimizationMode mode, SmallVectorImpl<PMOMemoryUse> &InputUses,
|
|
InstructionDeleter &deleter, DeadEndBlocks &deBlocks)
|
|
: TheMemory(InputTheMemory), NumMemorySubElements(NumMemorySubElements),
|
|
Uses(InputUses), deleter(deleter),
|
|
HasLocalDefinition(InputTheMemory->getFunction()),
|
|
HasLocalKill(InputTheMemory->getFunction()), optimizationMode(mode) {
|
|
// The first step of processing an element is to collect information about the
|
|
// element into data structures we use later.
|
|
for (unsigned ui : indices(Uses)) {
|
|
auto &Use = Uses[ui];
|
|
assert(Use.Inst && "No instruction identified?");
|
|
|
|
// If we have a load...
|
|
if (Use.Kind == PMOUseKind::Load) {
|
|
// Skip load borrow use and open_existential_addr.
|
|
if (isa<LoadBorrowInst>(Use.Inst) ||
|
|
isa<OpenExistentialAddrInst>(Use.Inst))
|
|
continue;
|
|
|
|
// That is not a load take, continue. Otherwise, stash the load [take].
|
|
if (auto *LI = dyn_cast<LoadInst>(Use.Inst)) {
|
|
if (LI->getOwnershipQualifier() == LoadOwnershipQualifier::Take) {
|
|
LoadTakeUses.insert(LI);
|
|
HasLocalKill.set(LI->getParent());
|
|
}
|
|
continue;
|
|
}
|
|
|
|
// If we have a copy_addr as our load, it means we are processing a source
|
|
// of the value. If the copy_addr is taking from the source, we need to
|
|
// treat it like a load take use.
|
|
if (auto *CAI = dyn_cast<CopyAddrInst>(Use.Inst)) {
|
|
if (CAI->isTakeOfSrc() == IsTake) {
|
|
LoadTakeUses.insert(CAI);
|
|
HasLocalKill.set(CAI->getParent());
|
|
}
|
|
continue;
|
|
}
|
|
|
|
// mark_dependence of the dependent value is equivalent to take-init.
|
|
if (isa<MarkDependenceInst>(Use.Inst)) {
|
|
HasLocalKill.set(Use.Inst->getParent());
|
|
continue;
|
|
}
|
|
llvm_unreachable("Unhandled SILInstructionKind for PMOUseKind::Load?!");
|
|
}
|
|
if (Use.Kind == PMOUseKind::DependenceBase) {
|
|
// An address used as a dependence base does not affect load promotion.
|
|
continue;
|
|
}
|
|
// Keep track of all the uses that aren't loads.
|
|
NonLoadUses[Use.Inst] = ui;
|
|
HasLocalDefinition.set(Use.Inst->getParent());
|
|
|
|
if (Use.Kind == PMOUseKind::Escape) {
|
|
// Determine which blocks the value can escape from. We aren't allowed to
|
|
// promote loads in blocks reachable from an escape point.
|
|
HasAnyEscape = true;
|
|
}
|
|
}
|
|
|
|
// If isn't really a use, but we account for the alloc_box/mark_uninitialized
|
|
// as a use so we see it in our dataflow walks.
|
|
NonLoadUses[TheMemory] = ~0U;
|
|
HasLocalDefinition.set(TheMemory->getParent());
|
|
}
|
|
|
|
|
|
std::optional<LoadInfo>
|
|
AvailableValueDataflowContext::computeAvailableValues(
|
|
SILValue SrcAddr, SILInstruction *Inst,
|
|
SmallVectorImpl<AvailableValue> &AvailableValues) {
|
|
// If the box has escaped at this instruction, we can't safely promote the
|
|
// load.
|
|
if (hasEscapedAt(Inst))
|
|
return std::nullopt;
|
|
|
|
SILType LoadTy = SrcAddr->getType().getObjectType();
|
|
|
|
// If this is a load/copy_addr from a struct field that we want to promote,
|
|
// compute the access path down to the field so we can determine precise
|
|
// def/use behavior.
|
|
unsigned FirstElt = computeSubelement(SrcAddr, TheMemory);
|
|
|
|
// If this is a load from within an enum projection, we can't promote it since
|
|
// we don't track subelements in a type that could be changing.
|
|
if (FirstElt == ~0U)
|
|
return std::nullopt;
|
|
|
|
unsigned NumLoadSubElements = getNumSubElements(
|
|
LoadTy, getFunction(), TypeExpansionContext(getFunction()));
|
|
|
|
LoadInfo loadInfo = {LoadTy, FirstElt, NumLoadSubElements};
|
|
|
|
AvailableValues.resize(NumMemorySubElements);
|
|
|
|
// If no bits are demanded, we trivially succeed. This can happen when there
|
|
// is a load of an empty struct.
|
|
if (NumLoadSubElements == 0)
|
|
return loadInfo;
|
|
|
|
// Set up the bitvector of elements being demanded by the load.
|
|
SmallBitVector RequiredElts(NumMemorySubElements);
|
|
RequiredElts.set(*loadInfo.range().begin(), *loadInfo.range().end());
|
|
|
|
// Find out if we have any available values.
|
|
if (!computeAvailableElementValues(Inst, loadInfo, RequiredElts,
|
|
AvailableValues)) {
|
|
return std::nullopt;
|
|
}
|
|
return loadInfo;
|
|
}
|
|
|
|
// This function takes in the current (potentially uninitialized) available
|
|
// values for theMemory and for the subset of AvailableValues corresponding to
|
|
// \p address either:
|
|
//
|
|
// 1. If uninitialized, optionally initialize the available value with a new
|
|
// SILValue. It is optional since in certain cases, (for instance when
|
|
// invalidating one just wants to skip empty available values).
|
|
//
|
|
// 2. Given an initialized value, either add the given instruction as an
|
|
// insertion point or state that we have a conflict.
|
|
static inline void updateAvailableValuesHelper(
|
|
SingleValueInstruction *theMemory, SILInstruction *inst, SILValue address,
|
|
SmallBitVector &requiredElts, SmallVectorImpl<AvailableValue> &result,
|
|
SmallBitVector &conflictingValues,
|
|
function_ref<std::optional<AvailableValue>(unsigned)> defaultFunc,
|
|
function_ref<bool(AvailableValue &, unsigned)> isSafeFunc) {
|
|
unsigned startSubElt = computeSubelement(address, theMemory);
|
|
|
|
// TODO: Is this needed now?
|
|
assert(startSubElt != ~0U && "Store within enum projection not handled");
|
|
auto &f = *theMemory->getFunction();
|
|
for (unsigned i : range(getNumSubElements(
|
|
address->getType().getObjectType(), f,
|
|
TypeExpansionContext(f)))) {
|
|
// If this element is not required, don't fill it in.
|
|
if (!requiredElts[startSubElt + i])
|
|
continue;
|
|
|
|
// At this point we know that we will either mark the value as conflicting
|
|
// or give it a value.
|
|
requiredElts[startSubElt + i] = false;
|
|
|
|
// First see if we have an entry at all.
|
|
auto &entry = result[startSubElt + i];
|
|
|
|
// If we don't...
|
|
if (!entry) {
|
|
// and we are told to initialize it, do so.
|
|
if (auto defaultValue = defaultFunc(i)) {
|
|
entry = std::move(defaultValue.value());
|
|
} else {
|
|
// Otherwise, mark this as a conflicting value. There is some available
|
|
// value here, we just do not know what it is at this point. This
|
|
// ensures that if we visit a kill where we do not have an entry yet, we
|
|
// properly invalidate our state.
|
|
conflictingValues[startSubElt + i] = true;
|
|
}
|
|
continue;
|
|
}
|
|
|
|
// Check if our caller thinks that the value currently in entry is
|
|
// compatible with \p inst. If not, mark the values as conflicting and
|
|
// continue.
|
|
if (!isSafeFunc(entry, i)) {
|
|
conflictingValues[startSubElt + i] = true;
|
|
continue;
|
|
}
|
|
|
|
// Otherwise, we found another insertion point for our available
|
|
// value. Today this will always be a Store.
|
|
entry.addInsertionPoint(inst);
|
|
}
|
|
}
|
|
|
|
void AvailableValueDataflowContext::updateAvailableValues(
|
|
SILInstruction *Inst, SmallBitVector &RequiredElts,
|
|
SmallVectorImpl<AvailableValue> &Result,
|
|
llvm::SmallDenseMap<SILBasicBlock *, SmallBitVector, 32> &VisitedBlocks,
|
|
SmallBitVector &ConflictingValues) {
|
|
|
|
// If we are visiting a load [take], it invalidates the underlying available
|
|
// values.
|
|
//
|
|
// NOTE: Since we are always looking back from the instruction to promote,
|
|
// when we attempt to promote the load [take] itself, we will never hit this
|
|
// code since.
|
|
if (auto *LI = dyn_cast<LoadInst>(Inst)) {
|
|
// First see if this is a load inst that we are tracking.
|
|
if (LoadTakeUses.count(LI)) {
|
|
updateAvailableValuesHelper(
|
|
TheMemory, LI, LI->getOperand(), RequiredElts, Result,
|
|
ConflictingValues,
|
|
/*default*/
|
|
[](unsigned) -> std::optional<AvailableValue> {
|
|
// We never initialize values. We only
|
|
// want to invalidate.
|
|
return std::nullopt;
|
|
},
|
|
/*isSafe*/
|
|
[](AvailableValue &, unsigned) -> bool {
|
|
// Always assume values conflict.
|
|
return false;
|
|
});
|
|
return;
|
|
}
|
|
}
|
|
|
|
// Handle store.
|
|
if (auto *SI = dyn_cast<StoreInst>(Inst)) {
|
|
updateAvailableValuesHelper(
|
|
TheMemory, SI, SI->getDest(), RequiredElts, Result, ConflictingValues,
|
|
/*default*/
|
|
[&](unsigned ResultOffset) -> std::optional<AvailableValue> {
|
|
std::optional<AvailableValue> Result;
|
|
Result.emplace(SI->getSrc(), ResultOffset, SI);
|
|
return Result;
|
|
},
|
|
/*isSafe*/
|
|
[&](AvailableValue &Entry, unsigned ResultOffset) -> bool {
|
|
// TODO: This is /really/, /really/, conservative. This basically
|
|
// means that if we do not have an identical store, we will not
|
|
// promote.
|
|
return Entry.getValue() == SI->getSrc() &&
|
|
Entry.getSubElementNumber() == ResultOffset;
|
|
});
|
|
return;
|
|
}
|
|
|
|
// If we got here from an apply, we must either be initializing the element
|
|
// via an @out parameter or we are trying to model an invalidating load of the
|
|
// value (e.x.: indirect_in, indirect_inout).
|
|
|
|
// If we get here with a copy_addr, we must either be storing into the element
|
|
// or tracking some sort of take of the src. First check if we are taking (in
|
|
// which case, we just track invalidation of src) and continue. Otherwise we
|
|
// must be storing into the copy_addr so see which loaded subelements are
|
|
// being used, and if so, explode the copy_addr to its individual pieces.
|
|
if (auto *CAI = dyn_cast<CopyAddrInst>(Inst)) {
|
|
// If we have a load take use, we must be tracking a store of CAI.
|
|
if (LoadTakeUses.count(CAI)) {
|
|
updateAvailableValuesHelper(
|
|
TheMemory, CAI, CAI->getSrc(), RequiredElts, Result,
|
|
ConflictingValues,
|
|
/*default*/
|
|
[](unsigned) -> std::optional<AvailableValue> {
|
|
// We never give values default initialized
|
|
// values. We only want to invalidate.
|
|
return std::nullopt;
|
|
},
|
|
/*isSafe*/
|
|
[](AvailableValue &, unsigned) -> bool {
|
|
// Always assume values conflict.
|
|
return false;
|
|
});
|
|
return;
|
|
}
|
|
|
|
unsigned StartSubElt = computeSubelement(CAI->getDest(), TheMemory);
|
|
assert(StartSubElt != ~0U && "Store within enum projection not handled");
|
|
SILType ValTy = CAI->getDest()->getType();
|
|
|
|
bool AnyRequired = false;
|
|
for (unsigned i : range(getNumSubElements(
|
|
ValTy, getFunction(),
|
|
TypeExpansionContext(getFunction())))) {
|
|
// If this element is not required, don't fill it in.
|
|
AnyRequired = RequiredElts[StartSubElt+i];
|
|
if (AnyRequired) break;
|
|
}
|
|
|
|
// If this is a copy addr that doesn't intersect the loaded subelements,
|
|
// just continue with an unmodified load mask.
|
|
if (!AnyRequired)
|
|
return;
|
|
|
|
// If the copyaddr is of a non-loadable type, we can't promote it. Just
|
|
// consider it to be a clobber.
|
|
if (CAI->getSrc()->getType().isLoadable(*CAI->getFunction())) {
|
|
// Otherwise, some part of the copy_addr's value is demanded by a load, so
|
|
// we need to explode it to its component pieces. This only expands one
|
|
// level of the copyaddr.
|
|
explodeCopyAddr(CAI);
|
|
|
|
// The copy_addr doesn't provide any values, but we've arranged for our
|
|
// iterators to visit the newly generated instructions, which do.
|
|
return;
|
|
}
|
|
}
|
|
|
|
if (auto *MD = dyn_cast<MarkDependenceInst>(Inst)) {
|
|
unsigned StartSubElt = computeSubelement(MD->getValue(), TheMemory);
|
|
assert(StartSubElt != ~0U && "Store within enum projection not handled");
|
|
SILType ValTy = MD->getValue()->getType();
|
|
|
|
// Check if this mark_dependence provides any required values before
|
|
// potentially bailing out (because it is address-only).
|
|
bool AnyRequired = false;
|
|
for (unsigned i : range(getNumSubElements(
|
|
ValTy, getFunction(), TypeExpansionContext(getFunction())))) {
|
|
// If this element is not required, don't fill it in.
|
|
AnyRequired = RequiredElts[StartSubElt+i];
|
|
if (AnyRequired) break;
|
|
}
|
|
|
|
// If this is a dependence that doesn't intersect the loaded subelements,
|
|
// just continue with an unmodified load mask.
|
|
if (!AnyRequired)
|
|
return;
|
|
|
|
// If the mark_dependence is loadable, promote it.
|
|
if (MD->getValue()->getType().isLoadable(*MD->getFunction())) {
|
|
updateMarkDependenceValues(MD, RequiredElts, Result, VisitedBlocks,
|
|
ConflictingValues);
|
|
return;
|
|
}
|
|
}
|
|
|
|
// TODO: inout apply's should only clobber pieces passed in.
|
|
|
|
// Otherwise, this is some unknown instruction, conservatively assume that all
|
|
// values are clobbered.
|
|
RequiredElts.clear();
|
|
ConflictingValues = SmallBitVector(Result.size(), true);
|
|
return;
|
|
}
|
|
|
|
bool AvailableValueDataflowContext::computeAvailableElementValues(
|
|
SILInstruction *StartingFrom, LoadInfo loadInfo,
|
|
SmallBitVector &RequiredElts, SmallVectorImpl<AvailableValue> &Result) {
|
|
llvm::SmallDenseMap<SILBasicBlock*, SmallBitVector, 32> VisitedBlocks;
|
|
SmallBitVector ConflictingValues(Result.size());
|
|
|
|
computeAvailableValuesFrom(StartingFrom->getIterator(),
|
|
StartingFrom->getParent(), RequiredElts, Result,
|
|
VisitedBlocks, ConflictingValues);
|
|
// If there are no values available at this load point, then we fail to
|
|
// promote this load and there is nothing to do.
|
|
SmallBitVector AvailableValueIsPresent(NumMemorySubElements);
|
|
|
|
for (unsigned i : loadInfo.range()) {
|
|
AvailableValueIsPresent[i] = Result[i].getValue();
|
|
}
|
|
|
|
// If we do not have any values available, bail.
|
|
if (AvailableValueIsPresent.none())
|
|
return false;
|
|
|
|
// Otherwise, if we have any conflicting values, explicitly mask them out of
|
|
// the result, so we don't pick one arbitrary available value.
|
|
if (ConflictingValues.none()) {
|
|
return true;
|
|
}
|
|
|
|
// At this point, we know that we have /some/ conflicting values and some
|
|
// available values.
|
|
if (AvailableValueIsPresent.reset(ConflictingValues).none())
|
|
return false;
|
|
|
|
// Otherwise, mask out the available values and return true. We have at least
|
|
// 1 available value.
|
|
int NextIter = ConflictingValues.find_first();
|
|
while (NextIter != -1) {
|
|
assert(NextIter >= 0 && "Int can not be represented?!");
|
|
unsigned Iter = NextIter;
|
|
Result[Iter] = {};
|
|
NextIter = ConflictingValues.find_next(Iter);
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
void AvailableValueDataflowContext::computeAvailableValuesFrom(
|
|
SILBasicBlock::iterator StartingFrom, SILBasicBlock *BB,
|
|
SmallBitVector &RequiredElts, SmallVectorImpl<AvailableValue> &Result,
|
|
llvm::SmallDenseMap<SILBasicBlock *, SmallBitVector, 32>
|
|
&VisitedBlocks,
|
|
SmallBitVector &ConflictingValues) {
|
|
assert(!RequiredElts.none() && "Scanning with a goal of finding nothing?");
|
|
|
|
// If there is a potential modification in the current block, scan the block
|
|
// to see if the store, escape, or load [take] is before or after the load. If
|
|
// it is before, check to see if it produces the value we are looking for.
|
|
bool shouldCheckBlock =
|
|
HasLocalDefinition.get(BB) || HasLocalKill.get(BB);
|
|
if (shouldCheckBlock) {
|
|
for (SILBasicBlock::iterator BBI = StartingFrom; BBI != BB->begin();) {
|
|
SILInstruction *TheInst = &*std::prev(BBI);
|
|
// If this instruction is unrelated to the element, ignore it.
|
|
if (!NonLoadUses.count(TheInst) && !LoadTakeUses.count(TheInst)) {
|
|
--BBI;
|
|
continue;
|
|
}
|
|
|
|
// Given an interesting instruction, incorporate it into the set of
|
|
// results, and filter down the list of demanded subelements that we still
|
|
// need.
|
|
updateAvailableValues(TheInst, RequiredElts, Result, VisitedBlocks,
|
|
ConflictingValues);
|
|
|
|
// If this satisfied all of the demanded values, we're done.
|
|
if (RequiredElts.none())
|
|
return;
|
|
|
|
// Otherwise, keep scanning the block. If the instruction we were looking
|
|
// at just got exploded, don't skip the next instruction.
|
|
if (&*std::prev(BBI) == TheInst)
|
|
--BBI;
|
|
}
|
|
}
|
|
|
|
// Otherwise, we need to scan up the CFG looking for available values.
|
|
for (auto PI = BB->pred_begin(), E = BB->pred_end(); PI != E; ++PI) {
|
|
SILBasicBlock *PredBB = *PI;
|
|
|
|
// If the predecessor block has already been visited (potentially due to a
|
|
// cycle in the CFG), don't revisit it. We can do this safely because we
|
|
// are optimistically assuming that all incoming elements in a cycle will be
|
|
// the same. If we ever detect a conflicting element, we record it and do
|
|
// not look at the result.
|
|
auto Entry = VisitedBlocks.insert({PredBB, RequiredElts});
|
|
if (!Entry.second) {
|
|
// If we are revisiting a block and asking for different required elements
|
|
// then anything that isn't agreeing is in conflict.
|
|
const auto &PrevRequired = Entry.first->second;
|
|
if (PrevRequired != RequiredElts) {
|
|
ConflictingValues |= (PrevRequired ^ RequiredElts);
|
|
|
|
RequiredElts &= ~ConflictingValues;
|
|
if (RequiredElts.none())
|
|
return;
|
|
}
|
|
continue;
|
|
}
|
|
|
|
// Make sure to pass in the same set of required elements for each pred.
|
|
SmallBitVector Elts = RequiredElts;
|
|
computeAvailableValuesFrom(PredBB->end(), PredBB, Elts, Result,
|
|
VisitedBlocks, ConflictingValues);
|
|
|
|
// If we have any conflicting values, don't bother searching for them.
|
|
RequiredElts &= ~ConflictingValues;
|
|
if (RequiredElts.none())
|
|
return;
|
|
}
|
|
}
|
|
|
|
/// Explode a copy_addr instruction of a loadable type into lower level
|
|
/// operations like loads, stores, retains, releases, retain_value, etc.
|
|
void AvailableValueDataflowContext::explodeCopyAddr(CopyAddrInst *CAI) {
|
|
LLVM_DEBUG(llvm::dbgs() << " -- Exploding copy_addr: " << *CAI << "\n");
|
|
|
|
SILType ValTy = CAI->getDest()->getType().getObjectType();
|
|
|
|
SILFunction *F = CAI->getFunction();
|
|
auto &TL = F->getTypeLowering(ValTy);
|
|
|
|
// Keep track of the new instructions emitted.
|
|
SmallVector<SILInstruction *, 4> NewInsts;
|
|
SILBuilder B(CAI, &NewInsts);
|
|
B.setCurrentDebugScope(CAI->getDebugScope());
|
|
|
|
// Use type lowering to lower the copyaddr into a load sequence + store
|
|
// sequence appropriate for the type.
|
|
SILValue StoredValue =
|
|
TL.emitLoadOfCopy(B, CAI->getLoc(), CAI->getSrc(), CAI->isTakeOfSrc());
|
|
|
|
TL.emitStoreOfCopy(B, CAI->getLoc(), StoredValue, CAI->getDest(),
|
|
CAI->isInitializationOfDest());
|
|
|
|
// Update our internal state for this being gone.
|
|
NonLoadUses.erase(CAI);
|
|
LoadTakeUses.erase(CAI);
|
|
// NOTE: We do not need to update HasLocalKill since the copy_addr
|
|
// and the loads/stores will have the same parent block.
|
|
|
|
// Remove the copy_addr from Uses. A single copy_addr can appear multiple
|
|
// times if the source and dest are to elements within a single aggregate, but
|
|
// we only want to pick up the CopyAddrKind from the store.
|
|
PMOMemoryUse LoadUse, StoreUse;
|
|
for (auto &Use : Uses) {
|
|
if (Use.Inst != CAI)
|
|
continue;
|
|
|
|
if (Use.Kind == PMOUseKind::Load) {
|
|
assert(LoadUse.isInvalid());
|
|
LoadUse = Use;
|
|
} else {
|
|
assert(StoreUse.isInvalid());
|
|
StoreUse = Use;
|
|
}
|
|
|
|
Use.Inst = nullptr;
|
|
|
|
// Keep scanning in case the copy_addr appears multiple times.
|
|
}
|
|
|
|
assert((LoadUse.isValid() || StoreUse.isValid()) &&
|
|
"we should have a load or a store, possibly both");
|
|
assert(StoreUse.isInvalid() || StoreUse.Kind == Assign ||
|
|
StoreUse.Kind == Initialization || StoreUse.Kind == InitOrAssign);
|
|
|
|
// Now that we've emitted a bunch of instructions, including a load and store
|
|
// but also including other stuff, update the internal state of
|
|
// LifetimeChecker to reflect them.
|
|
|
|
// Update the instructions that touch the memory. NewInst can grow as this
|
|
// iterates, so we can't use a foreach loop.
|
|
for (auto *NewInst : NewInsts) {
|
|
switch (NewInst->getKind()) {
|
|
default:
|
|
NewInst->dump();
|
|
llvm_unreachable("Unknown instruction generated by copy_addr lowering");
|
|
|
|
case SILInstructionKind::StoreInst:
|
|
// If it is a store to the memory object (as oppose to a store to
|
|
// something else), track it as an access.
|
|
if (StoreUse.isValid()) {
|
|
StoreUse.Inst = NewInst;
|
|
// If our store use by the copy_addr is an assign, then we know that
|
|
// before we store the new value, we loaded the old value implying that
|
|
// our store is technically initializing memory when it occurs. So
|
|
// change the kind to Initialization.
|
|
if (StoreUse.Kind == Assign)
|
|
StoreUse.Kind = Initialization;
|
|
NonLoadUses[NewInst] = Uses.size();
|
|
Uses.push_back(StoreUse);
|
|
}
|
|
continue;
|
|
|
|
case SILInstructionKind::LoadInst:
|
|
// If it is a load from the memory object (as oppose to a load from
|
|
// something else), track it as an access. We need to explicitly check to
|
|
// see if the load accesses "TheMemory" because it could either be a load
|
|
// for the copy_addr source, or it could be a load corresponding to the
|
|
// "assign" operation on the destination of the copyaddr.
|
|
if (LoadUse.isValid() &&
|
|
getAccessPathRoot(NewInst->getOperand(0)) == TheMemory) {
|
|
if (auto *LI = dyn_cast<LoadInst>(NewInst)) {
|
|
if (LI->getOwnershipQualifier() == LoadOwnershipQualifier::Take) {
|
|
LoadTakeUses.insert(LI);
|
|
HasLocalKill.set(LI->getParent());
|
|
}
|
|
}
|
|
LoadUse.Inst = NewInst;
|
|
Uses.push_back(LoadUse);
|
|
}
|
|
continue;
|
|
|
|
case SILInstructionKind::RetainValueInst:
|
|
case SILInstructionKind::StrongRetainInst:
|
|
case SILInstructionKind::StrongReleaseInst:
|
|
case SILInstructionKind::ReleaseValueInst: // Destroy overwritten value
|
|
// These are ignored.
|
|
continue;
|
|
}
|
|
}
|
|
|
|
// Next, remove the copy_addr itself.
|
|
deleter.forceDelete(CAI);
|
|
}
|
|
|
|
/// Promote a mark_dependence instruction of a loadable type into a
|
|
/// mark_dependence
|
|
void AvailableValueDataflowContext::updateMarkDependenceValues(
|
|
MarkDependenceInst *md,
|
|
SmallBitVector &RequiredElts,
|
|
SmallVectorImpl<AvailableValue> &Result,
|
|
llvm::SmallDenseMap<SILBasicBlock *, SmallBitVector, 32> &VisitedBlocks,
|
|
SmallBitVector &ConflictingValues) {
|
|
|
|
// Recursively compute all currently required available values up to the
|
|
// mark_dependence, regardless of whether they are required for the
|
|
// mark_dependence.
|
|
computeAvailableValuesFrom(md->getIterator(), md->getParent(), RequiredElts,
|
|
Result, VisitedBlocks, ConflictingValues);
|
|
|
|
unsigned firstMDElt = computeSubelement(md->getValue(), TheMemory);
|
|
// If address is an enum projection, we can't promote it since we don't track
|
|
// subelements in a type that could be changing.
|
|
if (firstMDElt == ~0U) {
|
|
RequiredElts.clear();
|
|
ConflictingValues = SmallBitVector(Result.size(), true);
|
|
return;
|
|
}
|
|
SILType valueTy = md->getValue()->getType().getObjectType();
|
|
unsigned numMDSubElements = getNumSubElements(
|
|
valueTy, getFunction(), TypeExpansionContext(getFunction()));
|
|
|
|
// Update each required subelement of the mark_dependence value.
|
|
for (unsigned subIdx = firstMDElt; subIdx < firstMDElt + numMDSubElements;
|
|
++subIdx) {
|
|
// If the available value has a conflict, then AvailableValue still holds
|
|
// a value that was available on a different path. It cannot be used.
|
|
if (ConflictingValues[subIdx]) {
|
|
Result[subIdx] = {};
|
|
continue;
|
|
}
|
|
// If no value is available for this subelement, or it was never required,
|
|
// then no promotion happens.
|
|
if (auto &availableVal = Result[subIdx]) {
|
|
auto newMD = createAvailableMarkDependence(md, availableVal);
|
|
// Update the available value. This may invalidate the Result.
|
|
Result[subIdx] = AvailableValue(newMD, subIdx, md);
|
|
}
|
|
}
|
|
}
|
|
|
|
// Find or create a mark_dependence that is a promotion of 'md' for 'value'.
|
|
// Return nullptr if the mark_dependence cannot be promoted.
|
|
// This does not currently allow phi creation.
|
|
SILValue AvailableValueDataflowContext::
|
|
createAvailableMarkDependence(MarkDependenceInst *md,
|
|
AvailableValue &availableVal) {
|
|
SILValue value = availableVal.getValue();
|
|
// If the allocation is replaced, no copies are created for promoted values
|
|
// and any promoted mark_dependence must be reused.
|
|
if (optimizationMode == OptimizationMode::ReplaceAlloc) {
|
|
auto mdIter = md->getIterator();
|
|
auto instBegin = md->getParentBlock()->begin();
|
|
while (mdIter != instBegin) {
|
|
--mdIter;
|
|
auto *newMD = dyn_cast<MarkDependenceInst>(&*mdIter);
|
|
if (!newMD)
|
|
break;
|
|
|
|
if (newMD->getValue() == value
|
|
&& newMD->getBase() == md->getBase()
|
|
&& newMD->dependenceKind() == md->dependenceKind()) {
|
|
return newMD;
|
|
}
|
|
}
|
|
} else {
|
|
// With OptimizationMode::PreserveAlloc, always create a new copy for each
|
|
// promoted value. This creates separate ownership, which is needed for each
|
|
// promoted load, and prevents reusing the mark_dependence later (via the
|
|
// code above) if the allocation is eliminated.
|
|
value = ownershipFixup.getSingleOwnedValue(availableVal);
|
|
if (!value)
|
|
return SILValue();
|
|
}
|
|
LLVM_DEBUG(llvm::dbgs() << " -- Promoting mark_dependence: " << *md
|
|
<< " source: " << value << "\n");
|
|
auto *newMD = SILBuilderWithScope(md)
|
|
.createMarkDependence(md->getLoc(), value, md->getBase(),
|
|
md->dependenceKind());
|
|
ownershipFixup.insertedInsts.push_back(newMD);
|
|
return newMD;
|
|
}
|
|
|
|
bool AvailableValueDataflowContext::hasEscapedAt(SILInstruction *I) {
|
|
// Return true if the box has escaped at the specified instruction. We are
|
|
// not allowed to do load promotion in an escape region.
|
|
|
|
// FIXME: This is not an aggressive implementation. :)
|
|
|
|
// TODO: At some point, we should special case closures that just *read* from
|
|
// the escaped value (by looking at the body of the closure). They should not
|
|
// prevent load promotion, and will allow promoting values like X in regions
|
|
// dominated by "... && X != 0".
|
|
return HasAnyEscape;
|
|
}
|
|
|
|
static SILType getMemoryType(AllocationInst *memory) {
|
|
// Compute the type of the memory object.
|
|
if (auto *abi = dyn_cast<AllocBoxInst>(memory)) {
|
|
assert(abi->getBoxType()->getLayout()->getFields().size() == 1 &&
|
|
"optimizing multi-field boxes not implemented");
|
|
return getSILBoxFieldType(TypeExpansionContext(*abi->getFunction()),
|
|
abi->getBoxType(), abi->getModule().Types, 0);
|
|
}
|
|
|
|
assert(isa<AllocStackInst>(memory));
|
|
return cast<AllocStackInst>(memory)->getElementType();
|
|
}
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
// Optimize dead allocation:
|
|
// Fully promote each access
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
namespace {
|
|
|
|
class PromotableInstructions {
|
|
// All promotable instructions share a vector of available values.
|
|
SmallVectorImpl<AvailableValue> &allAvailableValues;
|
|
|
|
SmallVector<SILInstruction *> promotableInsts;
|
|
SmallVector<std::pair<unsigned, unsigned>, 8> availableValueOffsets;
|
|
|
|
public:
|
|
PromotableInstructions(SmallVectorImpl<AvailableValue> &allAvailableValues)
|
|
: allAvailableValues(allAvailableValues) {}
|
|
|
|
unsigned size() const { return promotableInsts.size(); }
|
|
|
|
void push(SILInstruction *instruction) {
|
|
promotableInsts.push_back(instruction);
|
|
}
|
|
|
|
// Available values must be initialized in the same order that the
|
|
// instructions are pushed. Return the instruction's index.
|
|
unsigned
|
|
initializeAvailableValues(SILInstruction *instruction,
|
|
SmallVectorImpl<AvailableValue> &&availableValues) {
|
|
|
|
unsigned nextInstIdx = availableValueOffsets.size();
|
|
assert(instruction == promotableInsts[nextInstIdx]);
|
|
|
|
unsigned startOffset = allAvailableValues.size();
|
|
unsigned endOffset = startOffset + availableValues.size();
|
|
availableValueOffsets.push_back({startOffset, endOffset});
|
|
std::move(availableValues.begin(), availableValues.end(),
|
|
std::back_inserter(allAvailableValues));
|
|
return nextInstIdx;
|
|
}
|
|
|
|
ArrayRef<SILInstruction *> instructions() const { return promotableInsts; }
|
|
|
|
ArrayRef<AvailableValue> availableValues(unsigned index) {
|
|
return mutableAvailableValues(index);
|
|
}
|
|
|
|
MutableArrayRef<AvailableValue> mutableAvailableValues(unsigned index) {
|
|
unsigned startOffset, endOffset;
|
|
std::tie(startOffset, endOffset) = availableValueOffsets[index];
|
|
return {allAvailableValues.begin() + startOffset,
|
|
allAvailableValues.begin() + endOffset};
|
|
}
|
|
|
|
#ifndef NDEBUG
|
|
void verify() {
|
|
for (unsigned i : range(promotableInsts.size())) {
|
|
promotableInsts[i]->verifyOperandOwnership();
|
|
assert(!availableValues(i).empty()
|
|
&& "Value without any available values?!");
|
|
}
|
|
}
|
|
#endif
|
|
};
|
|
|
|
} // end anonymous namespace
|
|
|
|
namespace {
|
|
|
|
struct Promotions {
|
|
SmallVector<AvailableValue, 32> allAvailableValues;
|
|
PromotableInstructions loadTakes;
|
|
PromotableInstructions destroys;
|
|
PromotableInstructions markDepBases;
|
|
|
|
Promotions()
|
|
: loadTakes(allAvailableValues), destroys(allAvailableValues),
|
|
markDepBases(allAvailableValues) {}
|
|
|
|
#ifndef NDEBUG
|
|
void verify() {
|
|
loadTakes.verify();
|
|
destroys.verify();
|
|
markDepBases.verify();
|
|
}
|
|
#endif
|
|
};
|
|
|
|
} // end anonymous namespace
|
|
|
|
namespace {
|
|
|
|
/// This performs load promotion and deletes synthesized allocations if all
|
|
/// loads can be removed.
|
|
class OptimizeDeadAlloc {
|
|
|
|
SILModule &Module;
|
|
|
|
/// This is either an alloc_box or alloc_stack instruction.
|
|
AllocationInst *TheMemory;
|
|
|
|
/// This is the SILType of the memory object.
|
|
SILType MemoryType;
|
|
|
|
/// The number of primitive subelements across all elements of this memory
|
|
/// value.
|
|
unsigned NumMemorySubElements;
|
|
|
|
SmallVectorImpl<PMOMemoryUse> &Uses;
|
|
SmallVectorImpl<SILInstruction *> &Releases;
|
|
|
|
DeadEndBlocks &deadEndBlocks;
|
|
|
|
InstructionDeleter &deleter;
|
|
|
|
DominanceInfo *domInfo;
|
|
|
|
/// A structure that we use to compute our available values.
|
|
AvailableValueDataflowContext DataflowContext;
|
|
|
|
Promotions promotions;
|
|
|
|
SmallBlotSetVector<SILValue, 32> valuesNeedingLifetimeCompletion;
|
|
|
|
public:
|
|
SILFunction *getFunction() const { return TheMemory->getFunction(); }
|
|
|
|
bool isTrivial() const { return MemoryType.isTrivial(getFunction()); }
|
|
|
|
OptimizeDeadAlloc(AllocationInst *memory,
|
|
SmallVectorImpl<PMOMemoryUse> &uses,
|
|
SmallVectorImpl<SILInstruction *> &releases,
|
|
DeadEndBlocks &deadEndBlocks, InstructionDeleter &deleter,
|
|
DominanceInfo *domInfo)
|
|
: Module(memory->getModule()), TheMemory(memory),
|
|
MemoryType(getMemoryType(memory)),
|
|
NumMemorySubElements(getNumSubElements(
|
|
MemoryType, *memory->getFunction(),
|
|
TypeExpansionContext(*memory->getFunction()))),
|
|
Uses(uses), Releases(releases), deadEndBlocks(deadEndBlocks),
|
|
deleter(deleter), domInfo(domInfo),
|
|
DataflowContext(TheMemory, NumMemorySubElements,
|
|
OptimizationMode::ReplaceAlloc, uses, deleter,
|
|
deadEndBlocks) {}
|
|
|
|
/// If the allocation is an autogenerated allocation that is only stored to
|
|
/// (after load promotion) then remove it completely.
|
|
bool tryToRemoveDeadAllocation();
|
|
|
|
private:
|
|
SILInstruction *collectUsesForPromotion();
|
|
|
|
/// Return true if a mark_dependence can be promoted. If so, this initializes
|
|
/// the available values in promotions.
|
|
bool canPromoteMarkDepBase(MarkDependenceInst *md);
|
|
|
|
/// Return true if a load [take] or destroy_addr can be promoted. If so, this
|
|
/// initializes the available values in promotions.
|
|
bool canPromoteTake(SILInstruction *i,
|
|
PromotableInstructions &promotableInsts);
|
|
|
|
SILValue promoteMarkDepBase(MarkDependenceInst *md,
|
|
ArrayRef<AvailableValue> availableValues);
|
|
|
|
void promoteMarkDepAddrBase(MarkDependenceAddrInst *md,
|
|
ArrayRef<AvailableValue> availableValues);
|
|
|
|
/// Promote a load take cleaning up everything except for RAUWing the
|
|
/// instruction with the aggregated result. The routine returns the new
|
|
/// aggregated result to the caller and expects the caller to eventually RAUW
|
|
/// \p inst with the return value. The reason why we do this is to allow for
|
|
/// the caller to work around invalidation issues by not deleting the load
|
|
/// [take] until after all load [take] have been cleaned up.
|
|
///
|
|
/// \returns the value that the caller will RAUW with \p inst.
|
|
SILValue promoteLoadTake(LoadInst *inst,
|
|
ArrayRef<AvailableValue> availableValues);
|
|
void promoteDestroyAddr(DestroyAddrInst *dai,
|
|
ArrayRef<AvailableValue> availableValues);
|
|
|
|
bool canRemoveDeadAllocation();
|
|
|
|
void removeDeadAllocation();
|
|
};
|
|
|
|
} // end anonymous namespace
|
|
|
|
// We don't want to remove allocations that are required for useful debug
|
|
// information at -O0. As such, we only remove allocations if:
|
|
//
|
|
// 1. They are in a transparent function.
|
|
// 2. They are in a normal function, but didn't come from a VarDecl, or came
|
|
// from one that was autogenerated or inlined from a transparent function.
|
|
static bool isRemovableAutogeneratedAllocation(AllocationInst *TheMemory) {
|
|
SILLocation loc = TheMemory->getLoc();
|
|
return TheMemory->getFunction()->isTransparent() ||
|
|
!loc.getAsASTNode<VarDecl>() || loc.isAutoGenerated() ||
|
|
loc.is<MandatoryInlinedLocation>();
|
|
}
|
|
|
|
bool OptimizeDeadAlloc::tryToRemoveDeadAllocation() {
|
|
if (!canRemoveDeadAllocation()) {
|
|
DataflowContext.deleteInsertedInsts(deleter);
|
|
return false;
|
|
}
|
|
removeDeadAllocation();
|
|
// Once the entire allocation is promoted, non of the instructions promoted
|
|
// during dataflow should need ownership fixup.
|
|
return true;
|
|
}
|
|
|
|
bool OptimizeDeadAlloc::canRemoveDeadAllocation() {
|
|
assert(TheMemory->getFunction()->hasOwnership() &&
|
|
"Can only eliminate dead allocations with ownership enabled");
|
|
assert((isa<AllocBoxInst>(TheMemory) || isa<AllocStackInst>(TheMemory)) &&
|
|
"Unhandled allocation case");
|
|
|
|
if (!isRemovableAutogeneratedAllocation(TheMemory))
|
|
return false;
|
|
|
|
// Check the uses list to see if there are any non-store uses left over after
|
|
// load promotion and other things PMO does.
|
|
if (auto *badUser = collectUsesForPromotion()) {
|
|
LLVM_DEBUG(llvm::dbgs() << "*** Failed to remove autogenerated alloc: "
|
|
"kept alive by: "
|
|
<< *badUser);
|
|
return false;
|
|
}
|
|
|
|
for (auto *md : promotions.markDepBases.instructions()) {
|
|
if (!canPromoteMarkDepBase(cast<MarkDependenceInst>(md)))
|
|
return false;
|
|
}
|
|
if (isTrivial()) {
|
|
return true;
|
|
}
|
|
for (auto *load : promotions.loadTakes.instructions()) {
|
|
if (!canPromoteTake(load, promotions.loadTakes))
|
|
return false;
|
|
}
|
|
for (auto *destroy : promotions.destroys.instructions()) {
|
|
if (!canPromoteTake(destroy, promotions.destroys))
|
|
return false;
|
|
}
|
|
// Gather up all found available values before promoting anything so we can
|
|
// fix up lifetimes later if we need to.
|
|
for (auto pmoMemUse : Uses) {
|
|
if (pmoMemUse.Inst && pmoMemUse.Kind == PMOUseKind::Initialization) {
|
|
if (isa<MarkDependenceInst>(pmoMemUse.Inst)) {
|
|
// mark_dependence of the dependent value is considered both a load and
|
|
// an init use. They can simply be deleted when the allocation is dead.
|
|
continue;
|
|
}
|
|
// Today if we promote, this is always a store, since we would have
|
|
// blown up the copy_addr otherwise. Given that, always make sure we
|
|
// clean up the src as appropriate after we optimize.
|
|
auto *si = dyn_cast<StoreInst>(pmoMemUse.Inst);
|
|
if (!si)
|
|
return false;
|
|
auto src = si->getSrc();
|
|
|
|
// Bail if src has any uses that are forwarding unowned uses. This
|
|
// allows us to know that we never have to deal with forwarding unowned
|
|
// instructions like br. These are corner cases that complicate the
|
|
// logic below.
|
|
for (auto *use : src->getUses()) {
|
|
if (use->getOperandOwnership() == OperandOwnership::ForwardingUnowned)
|
|
return false;
|
|
}
|
|
// FIXME: Lifetime completion on Boundary::Liveness requires that 'src'
|
|
// has no escaping uses. Check escaping uses here and either bailout or
|
|
// request completion on Boundary::Availability.
|
|
valuesNeedingLifetimeCompletion.insert(src);
|
|
}
|
|
}
|
|
return true;
|
|
}
|
|
|
|
// Collect all uses that require promotion before this allocation can be
|
|
// eliminated. Returns nullptr on success. Upon failure, return the first
|
|
// instruction corresponding to a use that cannot be promoted.
|
|
//
|
|
// Populates 'loadTakeList'.
|
|
SILInstruction *OptimizeDeadAlloc::collectUsesForPromotion() {
|
|
for (auto &u : Uses) {
|
|
// Ignore removed instructions.
|
|
if (u.Inst == nullptr)
|
|
continue;
|
|
|
|
switch (u.Kind) {
|
|
case PMOUseKind::Assign:
|
|
// Until we can promote the value being destroyed by the assign, we can
|
|
// not remove deallocations with such assigns.
|
|
return u.Inst;
|
|
case PMOUseKind::InitOrAssign:
|
|
continue; // These don't prevent removal.
|
|
case PMOUseKind::Load:
|
|
// For now only handle takes from alloc_stack.
|
|
//
|
|
// TODO: It should be implementable, but it has not been needed yet.
|
|
if (auto *li = dyn_cast<LoadInst>(u.Inst)) {
|
|
if (li->getOwnershipQualifier() == LoadOwnershipQualifier::Take) {
|
|
promotions.loadTakes.push(li);
|
|
continue;
|
|
}
|
|
}
|
|
if (isa<MarkDependenceInst>(u.Inst)) {
|
|
// A mark_dependence source use does not prevent removal. The use
|
|
// collector already looks through them to find other uses.
|
|
continue;
|
|
}
|
|
return u.Inst;
|
|
case PMOUseKind::DependenceBase:
|
|
promotions.markDepBases.push(u.Inst);
|
|
continue;
|
|
case PMOUseKind::Initialization:
|
|
if (!isa<ApplyInst>(u.Inst) &&
|
|
// A copy_addr that is not a take affects the retain count
|
|
// of the source.
|
|
(!isa<CopyAddrInst>(u.Inst)
|
|
|| cast<CopyAddrInst>(u.Inst)->isTakeOfSrc())) {
|
|
continue;
|
|
}
|
|
LLVM_FALLTHROUGH;
|
|
case PMOUseKind::IndirectIn:
|
|
case PMOUseKind::InOutUse:
|
|
case PMOUseKind::Escape:
|
|
return u.Inst; // These do prevent removal.
|
|
}
|
|
}
|
|
// Ignore destroys of trivial values. They are destroy_value instructions
|
|
// that only destroy the dead box itself.
|
|
if (!isTrivial()) {
|
|
// Non-trivial allocations require ownership cleanup. We only promote
|
|
// alloc_stack in that case--all releases must be destroy_addr.
|
|
for (auto *release : Releases) {
|
|
// We stash all of the destroy_addr that we see.
|
|
if (auto *dai = dyn_cast_or_null<DestroyAddrInst>(release)) {
|
|
promotions.destroys.push(dai);
|
|
continue;
|
|
}
|
|
return release;
|
|
}
|
|
}
|
|
return nullptr;
|
|
}
|
|
|
|
bool OptimizeDeadAlloc::canPromoteMarkDepBase(MarkDependenceInst *md) {
|
|
SILValue srcAddr = md->getBase();
|
|
SmallVector<AvailableValue, 8> availableValues;
|
|
auto loadInfo =
|
|
DataflowContext.computeAvailableValues(srcAddr, md, availableValues);
|
|
if (!loadInfo.has_value())
|
|
return false;
|
|
|
|
unsigned index = promotions.markDepBases.initializeAvailableValues(
|
|
md, std::move(availableValues));
|
|
|
|
SILType baseTy = loadInfo->loadType;
|
|
if (auto *abi = dyn_cast<AllocBoxInst>(TheMemory)) {
|
|
if (baseTy == abi->getType()) {
|
|
baseTy = MemoryType.getObjectType();
|
|
}
|
|
}
|
|
return isFullyAvailable(baseTy, loadInfo->firstElt,
|
|
promotions.markDepBases.availableValues(index));
|
|
}
|
|
|
|
/// Return true if we can promote the given destroy.
|
|
bool OptimizeDeadAlloc::canPromoteTake(
|
|
SILInstruction *inst, PromotableInstructions &promotableInsts) {
|
|
|
|
SILValue address = inst->getOperand(0);
|
|
|
|
// We cannot promote destroys of address-only types, because we can't expose
|
|
// the load.
|
|
if (address->getType().isAddressOnly(*inst->getFunction()))
|
|
return false;
|
|
|
|
SmallVector<AvailableValue, 8> availableValues;
|
|
auto loadInfo = DataflowContext.computeAvailableValues(address, inst,
|
|
availableValues);
|
|
if (!loadInfo.has_value())
|
|
return false;
|
|
|
|
// Now check that we can perform a take upon our available values. This
|
|
// implies today that our value is fully available. If the value is not fully
|
|
// available, we would need to split stores to promote this destroy_addr. We
|
|
// do not support that yet.
|
|
AvailableValueAggregator agg(inst, availableValues, Uses, deadEndBlocks,
|
|
AvailableValueExpectedOwnership::Take);
|
|
if (!agg.canTake(loadInfo->loadType, loadInfo->firstElt))
|
|
return false;
|
|
|
|
// As a final check, make sure that we have an available value for each value,
|
|
// if not bail.
|
|
for (const auto &av : availableValues)
|
|
if (!av.Value)
|
|
return false;
|
|
|
|
// Ok, we can promote this destroy_addr... move the temporary lists contents
|
|
// into the final AvailableValues list.
|
|
promotableInsts.initializeAvailableValues(inst, std::move(availableValues));
|
|
|
|
return true;
|
|
}
|
|
|
|
void OptimizeDeadAlloc::removeDeadAllocation() {
|
|
for (auto idxVal : llvm::enumerate(promotions.markDepBases.instructions())) {
|
|
auto vals = promotions.markDepBases.availableValues(idxVal.index());
|
|
if (auto *mdi = dyn_cast<MarkDependenceInst>(idxVal.value())) {
|
|
promoteMarkDepBase(mdi, vals);
|
|
continue;
|
|
}
|
|
auto *mda = cast<MarkDependenceAddrInst>(idxVal.value());
|
|
promoteMarkDepAddrBase(mda, vals);
|
|
}
|
|
|
|
// If our memory is trivially typed, we can just remove it without needing to
|
|
// consider if the stored value needs to be destroyed. So at this point,
|
|
// delete the memory!
|
|
if (isTrivial()) {
|
|
LLVM_DEBUG(llvm::dbgs() << "*** Removing autogenerated trivial allocation: "
|
|
<< *TheMemory);
|
|
|
|
// If it is safe to remove, do it. Recursively remove all instructions
|
|
// hanging off the allocation instruction, then return success. Let the
|
|
// caller remove the allocation itself to avoid iterator invalidation.
|
|
deleter.forceDeleteWithUsers(TheMemory);
|
|
return;
|
|
}
|
|
|
|
// Since our load [take] may be available values for our
|
|
// destroy_addr/load [take], we promote the destroy_addr first and then handle
|
|
// load [take] with extra rigour later to handle that possibility.
|
|
for (auto idxVal : llvm::enumerate(promotions.destroys.instructions())) {
|
|
auto *dai = cast<DestroyAddrInst>(idxVal.value());
|
|
auto vals = promotions.destroys.availableValues(idxVal.index());
|
|
promoteDestroyAddr(dai, vals);
|
|
// We do not need to unset releases, since we are going to exit here.
|
|
}
|
|
|
|
llvm::SmallMapVector<LoadInst *, SILValue, 32> loadsToDelete;
|
|
for (auto idxVal : llvm::enumerate(promotions.loadTakes.instructions())) {
|
|
for (auto &availableVal :
|
|
promotions.loadTakes.mutableAvailableValues(idxVal.index())) {
|
|
auto *availableLoad = dyn_cast<LoadInst>(availableVal.Value);
|
|
if (!availableLoad)
|
|
continue;
|
|
|
|
auto iter = loadsToDelete.find(availableLoad);
|
|
if (iter == loadsToDelete.end())
|
|
continue;
|
|
|
|
SILValue newValue = iter->second;
|
|
assert(newValue && "We should neer store a nil SILValue into this map");
|
|
availableVal.Value = newValue;
|
|
}
|
|
|
|
auto *loadTake = cast<LoadInst>(idxVal.value());
|
|
auto vals = promotions.loadTakes.availableValues(idxVal.index());
|
|
SILValue result = promoteLoadTake(loadTake, vals);
|
|
assert(result);
|
|
|
|
// We need to erase liCast here before we erase it since a load [take] that
|
|
// we are promoting could be an available value for another load
|
|
// [take]. Consider the following SIL:
|
|
//
|
|
// %mem = alloc_stack
|
|
// store %arg to [init] %mem
|
|
// %0 = load [take] %mem
|
|
// store %0 to [init] %mem
|
|
// %1 = load [take] %mem
|
|
// destroy_value %1
|
|
// dealloc_stack %mem
|
|
//
|
|
// In such a case, we are going to delete %0 here, but %0 is an available
|
|
// value for %1, so we will
|
|
auto insertIter = loadsToDelete.insert({loadTake, result});
|
|
valuesNeedingLifetimeCompletion.erase(loadTake);
|
|
(void)insertIter;
|
|
assert(insertIter.second && "loadTakeState doesn't have unique loads?!");
|
|
}
|
|
|
|
// Now that we have promoted all of our load [take], perform the actual
|
|
// RAUW/removal.
|
|
for (auto p : loadsToDelete) {
|
|
LoadInst *li = p.first;
|
|
SILValue newValue = p.second;
|
|
li->replaceAllUsesWith(newValue);
|
|
deleter.forceDelete(li);
|
|
}
|
|
|
|
LLVM_DEBUG(llvm::dbgs() << "*** Removing autogenerated non-trivial alloc: "
|
|
<< *TheMemory);
|
|
|
|
// If it is safe to remove, do it. Recursively remove all instructions
|
|
// hanging off the allocation instruction, then return success.
|
|
deleter.forceDeleteWithUsers(TheMemory);
|
|
DataflowContext.verifyOwnership(deadEndBlocks);
|
|
|
|
// Now look at all of our available values and complete any of their
|
|
// post-dominating consuming use sets. This can happen if we have an enum that
|
|
// is known dynamically none along a path. This is dynamically correct, but
|
|
// can not be represented in OSSA so we insert these destroys along said path.
|
|
OSSACompleteLifetime completion(TheMemory->getFunction(), domInfo,
|
|
deadEndBlocks);
|
|
|
|
while (!valuesNeedingLifetimeCompletion.empty()) {
|
|
auto optV = valuesNeedingLifetimeCompletion.pop_back_val();
|
|
if (!optV)
|
|
continue;
|
|
SILValue v = *optV;
|
|
// Lexical enums can have incomplete lifetimes in non payload paths that
|
|
// don't end in unreachable. Force their lifetime to end immediately after
|
|
// the last use instead.
|
|
auto boundary = v->getType().isOrHasEnum()
|
|
? OSSACompleteLifetime::Boundary::Liveness
|
|
: OSSACompleteLifetime::Boundary::Availability;
|
|
LLVM_DEBUG(llvm::dbgs() << "Completing lifetime of: ");
|
|
LLVM_DEBUG(v->dump());
|
|
completion.completeOSSALifetime(v, boundary);
|
|
}
|
|
}
|
|
|
|
SILValue OptimizeDeadAlloc::promoteMarkDepBase(
|
|
MarkDependenceInst *md, ArrayRef<AvailableValue> availableValues) {
|
|
|
|
LLVM_DEBUG(llvm::dbgs() << " *** Promoting mark_dependence base: " << *md);
|
|
SILBuilderWithScope B(md);
|
|
SILValue dependentValue = md->getValue();
|
|
for (auto &availableValue : availableValues) {
|
|
dependentValue =
|
|
B.createMarkDependence(md->getLoc(), dependentValue,
|
|
availableValue.getValue(), md->dependenceKind());
|
|
}
|
|
LLVM_DEBUG(llvm::dbgs() << " To value: " << dependentValue);
|
|
md->replaceAllUsesWith(dependentValue);
|
|
deleter.deleteIfDead(md);
|
|
return dependentValue;
|
|
}
|
|
|
|
void OptimizeDeadAlloc::promoteMarkDepAddrBase(
|
|
MarkDependenceAddrInst *md, ArrayRef<AvailableValue> availableValues) {
|
|
|
|
SILValue dependentAddress = md->getAddress();
|
|
LLVM_DEBUG(llvm::dbgs() << " *** Promoting mark_dependence_addr base: "
|
|
<< *md
|
|
<< " To address: " << dependentAddress);
|
|
SILBuilderWithScope B(md);
|
|
for (auto &availableValue : availableValues) {
|
|
B.createMarkDependenceAddr(md->getLoc(), dependentAddress,
|
|
availableValue.getValue(),
|
|
md->dependenceKind());
|
|
}
|
|
deleter.deleteIfDead(md);
|
|
}
|
|
|
|
SILValue
|
|
OptimizeDeadAlloc::promoteLoadTake(LoadInst *li,
|
|
ArrayRef<AvailableValue> availableValues) {
|
|
assert(li->getOwnershipQualifier() == LoadOwnershipQualifier::Take &&
|
|
"load [copy], load [trivial], load should be handled by "
|
|
"promoteLoadCopy");
|
|
SILValue address = li->getOperand();
|
|
SILType loadTy = address->getType().getObjectType();
|
|
|
|
// Compute the access path down to the field so we can determine precise
|
|
// def/use behavior.
|
|
unsigned firstElt = computeSubelement(address, TheMemory);
|
|
|
|
// Aggregate together all of the subelements into something that has the same
|
|
// type as the load did, and emit smaller) loads for any subelements that were
|
|
// not available.
|
|
AvailableValueAggregator agg(li, availableValues, Uses, deadEndBlocks,
|
|
AvailableValueExpectedOwnership::Take);
|
|
SILValue newVal = agg.aggregateValues(loadTy, address, firstElt);
|
|
assert(newVal);
|
|
|
|
++NumLoadTakePromoted;
|
|
|
|
LLVM_DEBUG(llvm::dbgs() << " *** Promoting load_take: " << *li);
|
|
LLVM_DEBUG(llvm::dbgs() << " To value: " << *newVal);
|
|
|
|
// Our parent RAUWs with newVal/erases li.
|
|
return newVal;
|
|
}
|
|
|
|
// DestroyAddr is a composed operation merging load [take] + destroy_value. If
|
|
// the implicit load's value is available, explode it.
|
|
//
|
|
// NOTE: We only do this if we have a fully available value.
|
|
//
|
|
// Note that we handle the general case of a destroy_addr of a piece of the
|
|
// memory object, not just destroy_addrs of the entire thing.
|
|
void OptimizeDeadAlloc::promoteDestroyAddr(
|
|
DestroyAddrInst *dai, ArrayRef<AvailableValue> availableValues) {
|
|
SILValue address = dai->getOperand();
|
|
SILType loadTy = address->getType().getObjectType();
|
|
|
|
// Compute the access path down to the field so we can determine precise
|
|
// def/use behavior.
|
|
unsigned firstElt = computeSubelement(address, TheMemory);
|
|
|
|
// Aggregate together all of the subelements into something that has the same
|
|
// type as the load did, and emit smaller) loads for any subelements that were
|
|
// not available.
|
|
AvailableValueAggregator agg(dai, availableValues, Uses, deadEndBlocks,
|
|
AvailableValueExpectedOwnership::Take);
|
|
SILValue newVal = agg.aggregateValues(loadTy, address, firstElt);
|
|
|
|
++NumDestroyAddrPromoted;
|
|
|
|
LLVM_DEBUG(llvm::dbgs() << " *** Promoting destroy_addr: " << *dai);
|
|
LLVM_DEBUG(llvm::dbgs() << " To value: " << *newVal);
|
|
|
|
SILBuilderWithScope(dai).emitDestroyValueOperation(dai->getLoc(), newVal);
|
|
deleter.forceDelete(dai);
|
|
}
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
// Top Level Entrypoints
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
static AllocationInst *getOptimizableAllocation(SILInstruction *i) {
|
|
if (!isa<AllocBoxInst>(i) && !isa<AllocStackInst>(i)) {
|
|
return nullptr;
|
|
}
|
|
|
|
auto *alloc = cast<AllocationInst>(i);
|
|
|
|
// If our aggregate has unreferencable storage, we can't optimize. Return
|
|
// nullptr.
|
|
if (getMemoryType(alloc).aggregateHasUnreferenceableStorage())
|
|
return nullptr;
|
|
|
|
// Do not perform this on move only values since we introduce copies to
|
|
// promote things.
|
|
if (getMemoryType(alloc).isMoveOnly())
|
|
return nullptr;
|
|
|
|
// Don't promote large types.
|
|
auto &mod = alloc->getFunction()->getModule();
|
|
if (EnableAggressiveExpansionBlocking &&
|
|
mod.getOptions().UseAggressiveReg2MemForCodeSize &&
|
|
!shouldExpand(mod, alloc->getType().getObjectType()))
|
|
return nullptr;
|
|
|
|
// Otherwise we are good to go. Lets try to optimize this memory!
|
|
return alloc;
|
|
}
|
|
|
|
bool swift::eliminateDeadAllocations(SILFunction *fn, DominanceInfo *domInfo) {
|
|
if (!fn->hasOwnership()) {
|
|
return false;
|
|
}
|
|
bool changed = false;
|
|
DeadEndBlocks deadEndBlocks(fn);
|
|
|
|
for (auto &bb : *fn) {
|
|
InstructionDeleter deleter;
|
|
for (SILInstruction &inst : bb.deletableInstructions()) {
|
|
// First see if i is an allocation that we can optimize. If not, skip it.
|
|
AllocationInst *alloc = getOptimizableAllocation(&inst);
|
|
if (!alloc) {
|
|
continue;
|
|
}
|
|
|
|
LLVM_DEBUG(llvm::dbgs()
|
|
<< "*** PMO Dead Allocation Elimination looking at: "
|
|
<< *alloc);
|
|
PMOMemoryObjectInfo memInfo(alloc);
|
|
|
|
// Set up the datastructure used to collect the uses of the allocation.
|
|
SmallVector<PMOMemoryUse, 16> uses;
|
|
SmallVector<SILInstruction *, 4> destroys;
|
|
|
|
// Walk the use list of the pointer, collecting them. If we are not able
|
|
// to optimize, skip this value. *NOTE* We may still scalarize values
|
|
// inside the value.
|
|
if (!collectPMOElementUsesAndDestroysFrom(memInfo, uses, destroys)) {
|
|
continue;
|
|
}
|
|
OptimizeDeadAlloc optimizeDeadAlloc(alloc, uses, destroys, deadEndBlocks,
|
|
deleter, domInfo);
|
|
if (optimizeDeadAlloc.tryToRemoveDeadAllocation()) {
|
|
deleter.cleanupDeadInstructions();
|
|
++NumAllocRemoved;
|
|
changed = true;
|
|
}
|
|
}
|
|
}
|
|
return changed;
|
|
}
|
|
|
|
namespace {
|
|
|
|
class PredictableDeadAllocationElimination : public SILFunctionTransform {
|
|
void run() override {
|
|
auto *func = getFunction();
|
|
if (!func->hasOwnership())
|
|
return;
|
|
|
|
LLVM_DEBUG(llvm::dbgs() << "Looking at: " << func->getName() << "\n");
|
|
auto *da = getAnalysis<DominanceAnalysis>();
|
|
// If we are already canonical or do not have ownership, just bail.
|
|
if (func->wasDeserializedCanonical() || !func->hasOwnership())
|
|
return;
|
|
if (eliminateDeadAllocations(func, da->get(func)))
|
|
invalidateAnalysis(SILAnalysis::InvalidationKind::FunctionBody);
|
|
}
|
|
};
|
|
|
|
} // end anonymous namespace
|
|
|
|
SILTransform *swift::createPredictableDeadAllocationElimination() {
|
|
return new PredictableDeadAllocationElimination();
|
|
}
|