//===--- PredictableMemOpt.cpp - Perform predictable memory optzns --------===// // // This source file is part of the Swift.org open source project // // Copyright (c) 2014 - 2017 Apple Inc. and the Swift project authors // Licensed under Apache License v2.0 with Runtime Library Exception // // See https://swift.org/LICENSE.txt for license information // See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors // //===----------------------------------------------------------------------===// #define DEBUG_TYPE "predictable-memopt" #include "PMOMemoryUseCollector.h" #include "swift/Basic/Assertions.h" #include "swift/Basic/BlotMapVector.h" #include "swift/Basic/BlotSetVector.h" #include "swift/Basic/FrozenMultiMap.h" #include "swift/Basic/STLExtras.h" #include "swift/SIL/BasicBlockBits.h" #include "swift/SIL/BasicBlockUtils.h" #include "swift/SIL/LinearLifetimeChecker.h" #include "swift/SIL/OSSACompleteLifetime.h" #include "swift/SIL/OwnershipUtils.h" #include "swift/SIL/SILBuilder.h" #include "swift/SILOptimizer/PassManager/Passes.h" #include "swift/SILOptimizer/PassManager/Transforms.h" #include "swift/SILOptimizer/Utils/CFGOptUtils.h" #include "swift/SILOptimizer/Utils/InstOptUtils.h" #include "swift/SILOptimizer/Utils/OwnershipOptUtils.h" #include "swift/SILOptimizer/Utils/SILSSAUpdater.h" #include "swift/SILOptimizer/Utils/ValueLifetime.h" #include "llvm/ADT/SmallBitVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" using namespace swift; static llvm::cl::opt EnableAggressiveExpansionBlocking( "enable-aggressive-expansion-blocking", llvm::cl::init(false)); STATISTIC(NumLoadTakePromoted, "Number of load takes promoted"); STATISTIC(NumDestroyAddrPromoted, "Number of destroy_addrs promoted"); STATISTIC(NumAllocRemoved, "Number of allocations completely removed"); namespace { /// The following utilities handle two fundamentally different optimizations: /// - load-copy removal preserves allocation /// - dead allocation removal replaces the allocation /// /// Ownership handling is completely different in these cases. The utilities are /// not well-factored to reflect this, so we have a mode flag. enum class OptimizationMode { PreserveAlloc, ReplaceAlloc }; } // end namespace //===----------------------------------------------------------------------===// // Subelement Analysis //===----------------------------------------------------------------------===// namespace { // The type of a particular memory access and its corresponding subelement index // range relative to the allocation being optimized. struct LoadInfo { SILType loadType; unsigned firstElt; unsigned numElts; IntRange range() const { return IntRange(firstElt, firstElt + numElts); } }; } // namespace // We can only analyze components of structs whose storage is fully accessible // from Swift. static StructDecl * getFullyReferenceableStruct(SILType Ty) { auto SD = Ty.getStructOrBoundGenericStruct(); if (!SD || SD->hasUnreferenceableStorage()) return nullptr; return SD; } static unsigned getNumSubElements(SILType T, SILFunction &F, TypeExpansionContext context) { if (auto TT = T.getAs()) { unsigned NumElements = 0; for (auto index : indices(TT.getElementTypes())) NumElements += getNumSubElements(T.getTupleElementType(index), F, context); return NumElements; } if (auto *SD = getFullyReferenceableStruct(T)) { unsigned NumElements = 0; for (auto *D : SD->getStoredProperties()) NumElements += getNumSubElements(T.getFieldType(D, F.getModule(), context), F, context); // Returning NumElements == 0 implies that "empty" values can be // materialized without ownership. This is only valid for trivial types. if (NumElements > 0 || T.isTrivial(F)) return NumElements; } // If this isn't a tuple or struct, it is a single element. return 1; } /// getAccessPathRoot - Given an address, dive through any tuple/struct element /// addresses to get the underlying value. static SILValue getAccessPathRoot(SILValue pointer) { while (true) { if (auto *TEAI = dyn_cast(pointer)) { pointer = TEAI->getOperand(); continue; } if (auto *SEAI = dyn_cast(pointer)) { pointer = SEAI->getOperand(); continue; } if (auto *BAI = dyn_cast(pointer)) { pointer = BAI->getSource(); continue; } return pointer; } } /// Compute the subelement number indicated by the specified pointer (which is /// derived from the root by a series of tuple/struct element addresses) by /// treating the type as a linearized namespace with sequential elements. For /// example, given: /// /// root = alloc { a: { c: i64, d: i64 }, b: (i64, i64) } /// tmp1 = struct_element_addr root, 1 /// tmp2 = tuple_element_addr tmp1, 0 /// /// This will return a subelement number of 2. /// /// If this pointer is to within an existential projection, it returns ~0U. static unsigned computeSubelement(SILValue Pointer, SingleValueInstruction *RootInst) { unsigned SubElementNumber = 0; auto &F = *RootInst->getFunction(); while (1) { // If we got to the root, we're done. if (RootInst == Pointer) return SubElementNumber; if (auto *PBI = dyn_cast(Pointer)) { Pointer = PBI->getOperand(); continue; } if (auto *BAI = dyn_cast(Pointer)) { Pointer = BAI->getSource(); continue; } if (auto *TEAI = dyn_cast(Pointer)) { SILType TT = TEAI->getOperand()->getType(); // Keep track of what subelement is being referenced. for (unsigned i = 0, e = TEAI->getFieldIndex(); i != e; ++i) { SubElementNumber += getNumSubElements(TT.getTupleElementType(i), F, TypeExpansionContext(*RootInst->getFunction())); } Pointer = TEAI->getOperand(); continue; } if (auto *SEAI = dyn_cast(Pointer)) { SILType ST = SEAI->getOperand()->getType(); // Keep track of what subelement is being referenced. StructDecl *SD = SEAI->getStructDecl(); for (auto *D : SD->getStoredProperties()) { if (D == SEAI->getField()) break; auto context = TypeExpansionContext(*RootInst->getFunction()); SubElementNumber += getNumSubElements(ST.getFieldType(D, F.getModule(), context), F, context); } Pointer = SEAI->getOperand(); continue; } if (auto *MD = dyn_cast(Pointer)) { Pointer = MD->getValue(); continue; } // This fails when we visit unchecked_take_enum_data_addr. We should just // add support for enums. assert(isa(Pointer) && "Unknown access path instruction"); // Cannot promote loads and stores from within an existential projection. return ~0U; } } //===----------------------------------------------------------------------===// // Available Value //===----------------------------------------------------------------------===// namespace { class AvailableValueAggregator; struct AvailableValue { friend class AvailableValueAggregator; SILValue Value; unsigned SubElementNumber; /// If this gets too expensive in terms of copying, we can use an arena and a /// FrozenPtrSet like we do in ARC. llvm::SmallSetVector InsertionPoints; /// Just for updating. SmallVectorImpl *Uses; public: AvailableValue() = default; /// Main initializer for available values. /// /// *NOTE* We assume that all available values start with a singular insertion /// point and insertion points are added by merging. AvailableValue(SILValue Value, unsigned SubElementNumber, SILInstruction *InsertPoint) : Value(Value), SubElementNumber(SubElementNumber), InsertionPoints() { InsertionPoints.insert(InsertPoint); } /// Deleted copy constructor. This is a move only type. AvailableValue(const AvailableValue &) = delete; /// Deleted copy operator. This is a move only type. AvailableValue &operator=(const AvailableValue &) = delete; /// Move constructor. AvailableValue(AvailableValue &&Other) : Value(nullptr), SubElementNumber(~0), InsertionPoints() { std::swap(Value, Other.Value); std::swap(SubElementNumber, Other.SubElementNumber); std::swap(InsertionPoints, Other.InsertionPoints); } /// Move operator. AvailableValue &operator=(AvailableValue &&Other) { std::swap(Value, Other.Value); std::swap(SubElementNumber, Other.SubElementNumber); std::swap(InsertionPoints, Other.InsertionPoints); return *this; } operator bool() const { return bool(Value); } bool operator==(const AvailableValue &Other) const { return Value == Other.Value && SubElementNumber == Other.SubElementNumber; } bool operator!=(const AvailableValue &Other) const { return !(*this == Other); } SILValue getValue() const { return Value; } SILType getType() const { return Value->getType(); } unsigned getSubElementNumber() const { return SubElementNumber; } ArrayRef getInsertionPoints() const { return InsertionPoints.getArrayRef(); } void mergeInsertionPoints(const AvailableValue &Other) & { assert(Value == Other.Value && SubElementNumber == Other.SubElementNumber); InsertionPoints.set_union(Other.InsertionPoints); } void addInsertionPoint(SILInstruction *i) & { InsertionPoints.insert(i); } AvailableValue emitStructExtract(SILBuilder &B, SILLocation Loc, VarDecl *D, unsigned SubElementNumber) const { SILValue NewValue = B.emitStructExtract(Loc, Value, D); return {NewValue, SubElementNumber, InsertionPoints}; } AvailableValue emitTupleExtract(SILBuilder &B, SILLocation Loc, unsigned EltNo, unsigned SubElementNumber) const { SILValue NewValue = B.emitTupleExtract(Loc, Value, EltNo); return {NewValue, SubElementNumber, InsertionPoints}; } AvailableValue emitBeginBorrow(SILBuilder &b, SILLocation loc) const { // If we do not have ownership or already are guaranteed, just return a copy // of our state. if (Value->getOwnershipKind().isCompatibleWith(OwnershipKind::Guaranteed)) { return {Value, SubElementNumber, InsertionPoints}; } // Otherwise, return newValue. return {b.createBeginBorrow(loc, Value), SubElementNumber, InsertionPoints}; } void dump() const LLVM_ATTRIBUTE_USED; void print(llvm::raw_ostream &os) const; private: /// Private constructor. AvailableValue(SILValue Value, unsigned SubElementNumber, const decltype(InsertionPoints) &InsertPoints) : Value(Value), SubElementNumber(SubElementNumber), InsertionPoints(InsertPoints) {} }; } // end anonymous namespace void AvailableValue::dump() const { print(llvm::dbgs()); } void AvailableValue::print(llvm::raw_ostream &os) const { os << "Available Value Dump. Value: "; if (getValue()) { os << getValue(); } else { os << "NoValue;\n"; } os << "SubElementNumber: " << getSubElementNumber() << "\n"; os << "Insertion Points:\n"; for (auto *I : getInsertionPoints()) { os << *I; } } namespace llvm { llvm::raw_ostream &operator<<(llvm::raw_ostream &os, const AvailableValue &V) { V.print(os); return os; } } // end llvm namespace //===----------------------------------------------------------------------===// // Subelement Extraction //===----------------------------------------------------------------------===// static bool isFullyAvailable(SILType loadTy, unsigned firstElt, ArrayRef AvailableValues) { if (firstElt >= AvailableValues.size()) { // #Elements may be zero. return false; } auto &firstVal = AvailableValues[firstElt]; // Make sure that the first element is available and is the correct type. if (!firstVal || firstVal.getType() != loadTy) return false; auto &function = *firstVal.getValue()->getFunction(); return llvm::all_of( range(getNumSubElements(loadTy, function, TypeExpansionContext(function))), [&](unsigned index) -> bool { auto &val = AvailableValues[firstElt + index]; return val.getValue() == firstVal.getValue() && val.getSubElementNumber() == index; }); } /// Given an aggregate value and an access path, non-destructively extract the /// value indicated by the path. static SILValue nonDestructivelyExtractSubElement(const AvailableValue &Val, SILBuilder &B, SILLocation Loc) { SILType ValTy = Val.getType(); unsigned SubElementNumber = Val.SubElementNumber; // Extract tuple elements. if (auto TT = ValTy.getAs()) { for (unsigned EltNo : indices(TT.getElementTypes())) { // Keep track of what subelement is being referenced. SILType EltTy = ValTy.getTupleElementType(EltNo); unsigned NumSubElt = getNumSubElements( EltTy, B.getFunction(), TypeExpansionContext(B.getFunction())); if (SubElementNumber < NumSubElt) { auto BorrowedVal = Val.emitBeginBorrow(B, Loc); auto NewVal = BorrowedVal.emitTupleExtract(B, Loc, EltNo, SubElementNumber); SILValue result = nonDestructivelyExtractSubElement(NewVal, B, Loc); // If our original value wasn't guaranteed and we did actually perform a // borrow as a result, insert the end_borrow. if (BorrowedVal.getValue() != Val.getValue()) B.createEndBorrow(Loc, BorrowedVal.getValue()); return result; } SubElementNumber -= NumSubElt; } llvm_unreachable("Didn't find field"); } // Extract struct elements. if (auto *SD = getFullyReferenceableStruct(ValTy)) { for (auto *D : SD->getStoredProperties()) { auto fieldType = ValTy.getFieldType( D, B.getModule(), TypeExpansionContext(B.getFunction())); unsigned NumSubElt = getNumSubElements( fieldType, B.getFunction(), TypeExpansionContext(B.getFunction())); if (SubElementNumber < NumSubElt) { auto BorrowedVal = Val.emitBeginBorrow(B, Loc); auto NewVal = BorrowedVal.emitStructExtract(B, Loc, D, SubElementNumber); SILValue result = nonDestructivelyExtractSubElement(NewVal, B, Loc); // If our original value wasn't guaranteed and we did actually perform a // borrow as a result, insert the end_borrow. if (BorrowedVal.getValue() != Val.getValue()) B.createEndBorrow(Loc, BorrowedVal.getValue()); return result; } SubElementNumber -= NumSubElt; } llvm_unreachable("Didn't find field"); } // Otherwise, we're down to a scalar. If we have ownership enabled, // we return a copy. Otherwise, there we can ignore ownership // issues. This is ok since in [ossa] we are going to eliminate a // load [copy] or a load [trivial], while in non-[ossa] SIL we will // be replacing unqualified loads. assert(SubElementNumber == 0 && "Miscalculation indexing subelements"); return B.emitCopyValueOperation(Loc, Val.getValue()); } //===----------------------------------------------------------------------===// // Ownership Fixup //===----------------------------------------------------------------------===// namespace { /// For OptimizedMode == PreserveAlloc. Track inserted copies and casts for /// ownership fixup. struct AvailableValueFixup { /// Keep track of all instructions that we have added. Once we are done /// promoting a value, we need to make sure that if we need to balance any /// copies (to avoid leaks), we do so. This is not used if we are performing a /// take. SmallVector insertedInsts; AvailableValueFixup() = default; AvailableValueFixup(const AvailableValueFixup &) = delete; AvailableValueFixup &operator=(const AvailableValueFixup &) = delete; ~AvailableValueFixup() { assert(insertedInsts.empty() && "need to fix ownership"); } }; } // end namespace namespace { /// For available value data flow with OptimizedMode::PreserveAlloc: track only /// inserted copies and casts for ownership fixup after dataflow completes. Phis /// are not allowed. struct AvailableValueDataflowFixup: AvailableValueFixup { /// Given a single available value, get an owned copy if it is possible to /// create one without introducing a phi. Return the copy. Otherwise, return /// an invalid SILValue. SILValue getSingleOwnedValue(const AvailableValue &availableVal); // Verify ownership of promoted instructions in asserts builds or when // -sil-verify-all is set. // // Clears insertedInsts. void verifyOwnership(DeadEndBlocks &deBlocks); // Deletes all insertedInsts without fixing ownership. // Clears insertedInsts. void deleteInsertedInsts(InstructionDeleter &deleter); }; } // end namespace // In OptimizationMode::PreserveAlloc, get a copy of this single available // value. This requires a copy at the allocation's insertion point (store). // // Assumption: this copy will be consumed at a cast-like instruction // (mark_dependence), which caused dataflow to invalidate the currently // available value. Therefore, since phis are not allowed, the consumption // cannot be in an inner loop relative to the copy. Unlike getMergedOwnedValue, // there is no need for a second copy. // // Note: the caller must add the consuming cast to this->insertedInsts. SILValue AvailableValueDataflowFixup::getSingleOwnedValue( const AvailableValue &availableVal) { SILValue value = availableVal.getValue(); // If there is no need for copies, then we don't care about multiple insertion // points. if (value->getType().isTrivial(*value->getFunction())) { return value; } ArrayRef insertPts = availableVal.getInsertionPoints(); if (insertPts.size() > 1) return SILValue(); return SILBuilderWithScope(insertPts[0], &insertedInsts) .emitCopyValueOperation(insertPts[0]->getLoc(), value); } void AvailableValueDataflowFixup::verifyOwnership(DeadEndBlocks &deBlocks) { for (auto *inst : insertedInsts) { if (inst->isDeleted()) continue; for (auto result : inst->getResults()) { result.verifyOwnership(&deBlocks); } } insertedInsts.clear(); } void AvailableValueDataflowFixup:: deleteInsertedInsts(InstructionDeleter &deleter) { for (auto *inst : insertedInsts) { if (inst->isDeleted()) continue; deleter.forceDeleteWithUsers(inst); } insertedInsts.clear(); } // In OptimizationMode::PreserveAlloc: insert copies and phis for aggregate // values. struct AvailableValueAggregationFixup: AvailableValueFixup { DeadEndBlocks &deadEndBlocks; /// The list of phi nodes inserted by the SSA updater. SmallVector insertedPhiNodes; AvailableValueAggregationFixup(DeadEndBlocks &deadEndBlocks) : deadEndBlocks(deadEndBlocks) {} /// For a single 'availableVal', insert copies at each insertion point. Merge /// all copies, creating phis if needed. Return the final copy. Otherwise, /// return an invalid SILValue. SILValue mergeCopies(const AvailableValue &availableVal, SILType loadTy, SILInstruction *availableAtInst, bool isFullyAvailable); private: /// As a result of us using the SSA updater, insert hand off copy/destroys at /// each phi and make sure that intermediate phis do not leak by inserting /// destroys along paths that go through the intermediate phi that do not also /// go through the. void addHandOffCopyDestroysForPhis(SILInstruction *load, SILValue newVal); }; // For OptimizationMode::PreserveAlloc, insert copies at the available value's // insertion points to provide an owned value. Merge the copies into phis. // Create another copy before 'availableAtInst' in case it consumes the owned // value within an inner loop. Add the copies to insertedInsts and phis to // insertedPhiNodes. SILValue AvailableValueAggregationFixup::mergeCopies( const AvailableValue &availableVal, SILType loadTy, SILInstruction *availableAtInst, bool isFullyAvailable) { auto emitValue = [&](SILInstruction *insertPt) { if (isFullyAvailable) { SILValue fullVal = availableVal.getValue(); if (fullVal->use_empty() && fullVal->getOwnershipKind() == OwnershipKind::Owned) { // This value was inserted during data flow to propagate ownership. It // does not need to be copied. return fullVal; } return SILBuilderWithScope(insertPt, &insertedInsts) .emitCopyValueOperation(insertPt->getLoc(), fullVal); } SILBuilderWithScope builder(insertPt, &insertedInsts); SILValue eltVal = nonDestructivelyExtractSubElement(availableVal, builder, insertPt->getLoc()); assert(eltVal->getType() == loadTy && "Subelement types mismatch"); assert(eltVal->getOwnershipKind().isCompatibleWith(OwnershipKind::Owned)); return eltVal; }; ArrayRef insertPts = availableVal.getInsertionPoints(); if (insertPts.size() == 1) { SILValue eltVal = emitValue(insertPts[0]); if (isFullyAvailable) return eltVal; return SILBuilderWithScope(availableAtInst, &insertedInsts) .emitCopyValueOperation(availableAtInst->getLoc(), eltVal); } // If we have multiple insertion points, put copies at each point and use the // SSA updater to get a value. The reason why this is safe is that we can only // have multiple insertion points if we are storing exactly the same value // implying that we can just copy firstVal at each insertion point. SILSSAUpdater updater(&insertedPhiNodes); SILFunction *function = availableAtInst->getFunction(); assert(function->hasOwnership() && "requires OSSA"); updater.initialize(function, loadTy, OwnershipKind::Owned); std::optional singularValue; for (auto *insertPt : insertPts) { SILValue eltVal = emitValue(insertPt); if (!singularValue.has_value()) { singularValue = eltVal; } else if (*singularValue != eltVal) { singularValue = SILValue(); } // And then put the value into the SSA updater. updater.addAvailableValue(insertPt->getParent(), eltVal); } // If we only are tracking a singular value, we do not need to construct // SSA. Just return that value. if (auto val = singularValue.value_or(SILValue())) { // Non-trivial values will always be copied above at each insertion // point, and will, therefore, have multiple incoming values. assert(val->getType().isTrivial(*function) && "Should never reach this code path if we are in ossa and have a " "non-trivial value"); return val; } // Finally, grab the value from the SSA updater. SILValue result = updater.getValueInMiddleOfBlock(availableAtInst->getParent()); assert(result->getOwnershipKind().isCompatibleWith(OwnershipKind::Owned)); assert(result->getType() == loadTy && "Subelement types mismatch"); // Insert another copy at the point of availability in case it is inside a // loop. return SILBuilderWithScope(availableAtInst, &insertedInsts) .emitCopyValueOperation(availableAtInst->getLoc(), result); } //===----------------------------------------------------------------------===// // Available Value Aggregation //===----------------------------------------------------------------------===// static bool anyMissing(unsigned StartSubElt, unsigned NumSubElts, ArrayRef &Values) { while (NumSubElts) { if (!Values[StartSubElt]) return true; ++StartSubElt; --NumSubElts; } return false; } namespace { enum class AvailableValueExpectedOwnership { Take, Borrow, Copy, }; /// A class that aggregates available values, loading them if they are not /// available. class AvailableValueAggregator { SILModule &M; SILBuilderWithScope B; SILLocation Loc; ArrayRef AvailableValueList; SmallVectorImpl &Uses; AvailableValueExpectedOwnership expectedOwnership; AvailableValueAggregationFixup ownershipFixup; public: AvailableValueAggregator(SILInstruction *Inst, ArrayRef AvailableValueList, SmallVectorImpl &Uses, DeadEndBlocks &deadEndBlocks, AvailableValueExpectedOwnership expectedOwnership) : M(Inst->getModule()), B(Inst), Loc(Inst->getLoc()), AvailableValueList(AvailableValueList), Uses(Uses), expectedOwnership(expectedOwnership), ownershipFixup(deadEndBlocks) {} // This is intended to be passed by reference only once constructed. AvailableValueAggregator(const AvailableValueAggregator &) = delete; AvailableValueAggregator(AvailableValueAggregator &&) = delete; AvailableValueAggregator & operator=(const AvailableValueAggregator &) = delete; AvailableValueAggregator &operator=(AvailableValueAggregator &&) = delete; SILValue aggregateValues(SILType LoadTy, SILValue Address, unsigned FirstElt, bool isTopLevel = true); bool canTake(SILType loadTy, unsigned firstElt) const; void print(llvm::raw_ostream &os) const; void dump() const LLVM_ATTRIBUTE_USED; bool isTake() const { return expectedOwnership == AvailableValueExpectedOwnership::Take; } bool isBorrow() const { return expectedOwnership == AvailableValueExpectedOwnership::Borrow; } bool isCopy() const { return expectedOwnership == AvailableValueExpectedOwnership::Copy; } private: SILValue aggregateFullyAvailableValue(SILType loadTy, unsigned firstElt); SILValue aggregateTupleSubElts(TupleType *tt, SILType loadTy, SILValue address, unsigned firstElt); SILValue aggregateStructSubElts(StructDecl *sd, SILType loadTy, SILValue address, unsigned firstElt); }; } // end anonymous namespace void AvailableValueAggregator::dump() const { print(llvm::dbgs()); } void AvailableValueAggregator::print(llvm::raw_ostream &os) const { os << "Available Value List, N = " << AvailableValueList.size() << ". Elts:\n"; for (auto &V : AvailableValueList) { os << V; } } // We can only take if we never have to split a larger value to promote this // address. bool AvailableValueAggregator::canTake(SILType loadTy, unsigned firstElt) const { // If we are trivially fully available, just return true. if (isFullyAvailable(loadTy, firstElt, AvailableValueList)) return true; // Otherwise see if we are an aggregate with fully available leaf types. if (TupleType *tt = loadTy.getAs()) { return llvm::all_of(indices(tt->getElements()), [&](unsigned eltNo) { SILType eltTy = loadTy.getTupleElementType(eltNo); unsigned numSubElt = getNumSubElements(eltTy, B.getFunction(), TypeExpansionContext(B.getFunction())); bool success = canTake(eltTy, firstElt); firstElt += numSubElt; return success; }); } if (auto *sd = getFullyReferenceableStruct(loadTy)) { return llvm::all_of(sd->getStoredProperties(), [&](VarDecl *decl) -> bool { auto context = TypeExpansionContext(B.getFunction()); SILType eltTy = loadTy.getFieldType(decl, M, context); unsigned numSubElt = getNumSubElements(eltTy, B.getFunction(), context); bool success = canTake(eltTy, firstElt); firstElt += numSubElt; return success; }); } // Otherwise, fail. The value is not fully available at its leafs. We can not // perform a take. return false; } /// Given a bunch of primitive subelement values, build out the right aggregate /// type (LoadTy) by emitting tuple and struct instructions as necessary. SILValue AvailableValueAggregator::aggregateValues(SILType LoadTy, SILValue Address, unsigned FirstElt, bool isTopLevel) { // If we are performing a take, make sure that we have available values for // /all/ of our values. Otherwise, bail. if (isTopLevel && isTake() && !canTake(LoadTy, FirstElt)) { return SILValue(); } // Check to see if the requested value is fully available, as an aggregate. // This is a super-common case for single-element structs, but is also a // general answer for arbitrary structs and tuples as well. if (SILValue Result = aggregateFullyAvailableValue(LoadTy, FirstElt)) { return Result; } // If we have a tuple type, then aggregate the tuple's elements into a full // tuple value. if (TupleType *tupleType = LoadTy.getAs()) { SILValue result = aggregateTupleSubElts(tupleType, LoadTy, Address, FirstElt); if (isTopLevel && result->getOwnershipKind() == OwnershipKind::Guaranteed) { SILValue borrowedResult = result; SILBuilderWithScope builder(&*B.getInsertionPoint(), &ownershipFixup.insertedInsts); result = builder.emitCopyValueOperation(Loc, borrowedResult); SmallVector introducers; bool foundIntroducers = getAllBorrowIntroducingValues(borrowedResult, introducers); (void)foundIntroducers; assert(foundIntroducers); for (auto value : introducers) { builder.emitEndBorrowOperation(Loc, value.value); } } return result; } // If we have a struct type, then aggregate the struct's elements into a full // struct value. if (auto *structDecl = getFullyReferenceableStruct(LoadTy)) { SILValue result = aggregateStructSubElts(structDecl, LoadTy, Address, FirstElt); if (isTopLevel && result->getOwnershipKind() == OwnershipKind::Guaranteed) { SILValue borrowedResult = result; SILBuilderWithScope builder(&*B.getInsertionPoint(), &ownershipFixup.insertedInsts); result = builder.emitCopyValueOperation(Loc, borrowedResult); SmallVector introducers; bool foundIntroducers = getAllBorrowIntroducingValues(borrowedResult, introducers); (void)foundIntroducers; assert(foundIntroducers); for (auto value : introducers) { builder.emitEndBorrowOperation(Loc, value.value); } } return result; } // Otherwise, we have a non-aggregate primitive. Load or extract the value. // // NOTE: We should never call this when taking since when taking we know that // our underlying value is always fully available. assert(!isTake()); // If the value is not available, load the value and update our use list. auto &val = AvailableValueList[FirstElt]; if (!val) { LoadInst *load = ([&]() { SILBuilderWithScope builder(&*B.getInsertionPoint(), &ownershipFixup.insertedInsts); return builder.createTrivialLoadOr(Loc, Address, LoadOwnershipQualifier::Copy); }()); Uses.emplace_back(load, PMOUseKind::Load); return load; } return ownershipFixup.mergeCopies(val, LoadTy, &*B.getInsertionPoint(), /*isFullyAvailable*/false); } // See if we have this value is fully available. In such a case, return it as an // aggregate. This is a super-common case for single-element structs, but is // also a general answer for arbitrary structs and tuples as well. SILValue AvailableValueAggregator::aggregateFullyAvailableValue(SILType loadTy, unsigned firstElt) { // Check if our underlying type is fully available. If it isn't, bail. if (!isFullyAvailable(loadTy, firstElt, AvailableValueList)) return SILValue(); // Ok, grab out first value. (note: any actually will do). auto &firstVal = AvailableValueList[firstElt]; // Ok, we know that all of our available values are all parts of the same // value. assert(B.hasOwnership() && "requires OSSA"); if (isTake()) return firstVal.getValue(); // Otherwise, we need to put in a copy. This is b/c we only propagate along +1 // values and we are eliminating a load [copy]. return ownershipFixup.mergeCopies(firstVal, loadTy, &*B.getInsertionPoint(), /*isFullyAvailable*/true); } SILValue AvailableValueAggregator::aggregateTupleSubElts(TupleType *TT, SILType LoadTy, SILValue Address, unsigned FirstElt) { SmallVector ResultElts; for (unsigned EltNo : indices(TT->getElements())) { SILType EltTy = LoadTy.getTupleElementType(EltNo); unsigned NumSubElt = getNumSubElements(EltTy, B.getFunction(), TypeExpansionContext(B.getFunction())); // If we are missing any of the available values in this struct element, // compute an address to load from. SILValue EltAddr; if (anyMissing(FirstElt, NumSubElt, AvailableValueList)) { assert(!isTake() && "When taking, values should never be missing?!"); EltAddr = B.createTupleElementAddr(Loc, Address, EltNo, EltTy.getAddressType()); } ResultElts.push_back( aggregateValues(EltTy, EltAddr, FirstElt, /*isTopLevel*/ false)); FirstElt += NumSubElt; } // If we are going to use this to promote a borrowed value, insert borrow // operations. Eventually I am going to do this for everything, but this // should make it easier to bring up. if (!isTake()) { for (unsigned i : indices(ResultElts)) { ResultElts[i] = B.emitBeginBorrowOperation(Loc, ResultElts[i]); } } return B.createTuple(Loc, LoadTy, ResultElts); } SILValue AvailableValueAggregator::aggregateStructSubElts(StructDecl *sd, SILType loadTy, SILValue address, unsigned firstElt) { SmallVector resultElts; for (auto *decl : sd->getStoredProperties()) { auto context = TypeExpansionContext(B.getFunction()); SILType eltTy = loadTy.getFieldType(decl, M, context); unsigned numSubElt = getNumSubElements(eltTy, B.getFunction(), context); // If we are missing any of the available values in this struct element, // compute an address to load from. SILValue eltAddr; if (anyMissing(firstElt, numSubElt, AvailableValueList)) { assert(!isTake() && "When taking, values should never be missing?!"); eltAddr = B.createStructElementAddr(Loc, address, decl, eltTy.getAddressType()); } resultElts.push_back( aggregateValues(eltTy, eltAddr, firstElt, /*isTopLevel*/ false)); firstElt += numSubElt; } if (!isTake()) { for (unsigned i : indices(resultElts)) { resultElts[i] = B.emitBeginBorrowOperation(Loc, resultElts[i]); } } return B.createStruct(Loc, loadTy, resultElts); } //===----------------------------------------------------------------------===// // Available Value Dataflow //===----------------------------------------------------------------------===// namespace { /// Given a piece of memory, the memory's uses, and destroys perform a single /// round of semi-optimistic backwards dataflow for each use. The result is the /// set of available values that reach the specific use of the field in the /// allocated object. /// /// The general form of the algorithm is that in our constructor, we analyze our /// uses and determine available values. Then users call computeAvailableValues /// which looks backwards up the control flow graph for available values that we /// can use. /// /// NOTE: The reason why we say that the algorithm is semi-optimistic is that we /// assume that all incoming elements into a loopheader will be the same. If we /// find a conflict, we record it and fail. class AvailableValueDataflowContext { /// The base memory we are performing dataflow upon. AllocationInst *TheMemory; /// The number of sub elements of our memory. unsigned NumMemorySubElements; /// The set of uses that we are tracking. This is only here so we can update /// when exploding copy_addr and mark_dependence. It would be great if we did /// not have to store this. SmallVectorImpl &Uses; InstructionDeleter &deleter; /// The set of blocks with local definitions. /// /// We use this to determine if we should visit a block or look at a block's /// predecessors during dataflow for an available value. BasicBlockFlag HasLocalDefinition; /// The set of blocks that have definitions which specifically "kill" the /// given value. If a block is in this set, there must be an instruction in /// LoadTakeUse whose parent is the block. This is just used to speed up /// computation. /// /// NOTE: These are not considered escapes. BasicBlockFlag HasLocalKill; /// This is a set of load takes that we are tracking. HasLocalKill is the set /// of parent blocks of these instructions. llvm::SmallPtrSet LoadTakeUses; /// This is a map of uses that are not loads (i.e., they are Stores, /// InOutUses, and Escapes), to their entry in Uses. llvm::SmallDenseMap NonLoadUses; AvailableValueDataflowFixup ownershipFixup; /// Does this value escape anywhere in the function. We use this very /// conservatively. bool HasAnyEscape = false; /// When promoting load [copy], the original allocation must be /// preserved. This introduced extra copies. OptimizationMode optimizationMode; public: AvailableValueDataflowContext(AllocationInst *TheMemory, unsigned NumMemorySubElements, OptimizationMode mode, SmallVectorImpl &Uses, InstructionDeleter &deleter, DeadEndBlocks &deBlocks); // Find an available for for subelements of 'SrcAddr'. // Return the SILType of the object in 'SrcAddr' and index of the first sub // element in that object. // If not all subelements are availab, return nullopt. // // Available value analysis can create dead owned casts. OSSA is invalid until // the ownership chain is complete by replacing the uses of the loaded value // and calling fixupOwnership(). std::optional computeAvailableValues(SILValue SrcAddr, SILInstruction *Inst, SmallVectorImpl &AvailableValues); /// Return true if the box has escaped at the specified instruction. We are /// not /// allowed to do load promotion in an escape region. bool hasEscapedAt(SILInstruction *I); /// Explode a copy_addr, updating the Uses at the same time. void explodeCopyAddr(CopyAddrInst *CAI); void verifyOwnership(DeadEndBlocks &deBlocks) { ownershipFixup.verifyOwnership(deBlocks); } void deleteInsertedInsts(InstructionDeleter &deleter) { ownershipFixup.deleteInsertedInsts(deleter); } private: SILModule &getModule() const { return TheMemory->getModule(); } SILFunction &getFunction() const { return *TheMemory->getFunction(); } void updateAvailableValues( SILInstruction *Inst, SmallBitVector &RequiredElts, SmallVectorImpl &Result, llvm::SmallDenseMap &VisitedBlocks, SmallBitVector &ConflictingValues); /// Try to compute available values for "TheMemory" at the instruction \p /// StartingFrom. We only compute the values for set bits in \p /// RequiredElts. We return the vailable values in \p Result. If any available /// values were found, return true. Otherwise, return false. /// /// In OptimizationMode::PreserveAlloc, this may insert casts and copies to /// propagate owned values. bool computeAvailableElementValues(SILInstruction *StartingFrom, LoadInfo loadInfo, SmallBitVector &RequiredElts, SmallVectorImpl &Result); void computeAvailableValuesFrom( SILBasicBlock::iterator StartingFrom, SILBasicBlock *BB, SmallBitVector &RequiredElts, SmallVectorImpl &Result, llvm::SmallDenseMap &VisitedBlocks, SmallBitVector &ConflictingValues); /// Promote a mark_dependence, updating the available values. void updateMarkDependenceValues( MarkDependenceInst *md, SmallBitVector &RequiredElts, SmallVectorImpl &Result, llvm::SmallDenseMap &VisitedBlocks, SmallBitVector &ConflictingValues); SILValue createAvailableMarkDependence(MarkDependenceInst *md, AvailableValue &availableVal); }; } // end anonymous namespace AvailableValueDataflowContext::AvailableValueDataflowContext( AllocationInst *InputTheMemory, unsigned NumMemorySubElements, OptimizationMode mode, SmallVectorImpl &InputUses, InstructionDeleter &deleter, DeadEndBlocks &deBlocks) : TheMemory(InputTheMemory), NumMemorySubElements(NumMemorySubElements), Uses(InputUses), deleter(deleter), HasLocalDefinition(InputTheMemory->getFunction()), HasLocalKill(InputTheMemory->getFunction()), optimizationMode(mode) { // The first step of processing an element is to collect information about the // element into data structures we use later. for (unsigned ui : indices(Uses)) { auto &Use = Uses[ui]; assert(Use.Inst && "No instruction identified?"); // If we have a load... if (Use.Kind == PMOUseKind::Load) { // Skip load borrow use and open_existential_addr. if (isa(Use.Inst) || isa(Use.Inst)) continue; // That is not a load take, continue. Otherwise, stash the load [take]. if (auto *LI = dyn_cast(Use.Inst)) { if (LI->getOwnershipQualifier() == LoadOwnershipQualifier::Take) { LoadTakeUses.insert(LI); HasLocalKill.set(LI->getParent()); } continue; } // If we have a copy_addr as our load, it means we are processing a source // of the value. If the copy_addr is taking from the source, we need to // treat it like a load take use. if (auto *CAI = dyn_cast(Use.Inst)) { if (CAI->isTakeOfSrc() == IsTake) { LoadTakeUses.insert(CAI); HasLocalKill.set(CAI->getParent()); } continue; } // mark_dependence of the dependent value is equivalent to take-init. if (isa(Use.Inst)) { HasLocalKill.set(Use.Inst->getParent()); continue; } llvm_unreachable("Unhandled SILInstructionKind for PMOUseKind::Load?!"); } if (Use.Kind == PMOUseKind::DependenceBase) { // An address used as a dependence base does not affect load promotion. continue; } // Keep track of all the uses that aren't loads. NonLoadUses[Use.Inst] = ui; HasLocalDefinition.set(Use.Inst->getParent()); if (Use.Kind == PMOUseKind::Escape) { // Determine which blocks the value can escape from. We aren't allowed to // promote loads in blocks reachable from an escape point. HasAnyEscape = true; } } // If isn't really a use, but we account for the alloc_box/mark_uninitialized // as a use so we see it in our dataflow walks. NonLoadUses[TheMemory] = ~0U; HasLocalDefinition.set(TheMemory->getParent()); } std::optional AvailableValueDataflowContext::computeAvailableValues( SILValue SrcAddr, SILInstruction *Inst, SmallVectorImpl &AvailableValues) { // If the box has escaped at this instruction, we can't safely promote the // load. if (hasEscapedAt(Inst)) return std::nullopt; SILType LoadTy = SrcAddr->getType().getObjectType(); // If this is a load/copy_addr from a struct field that we want to promote, // compute the access path down to the field so we can determine precise // def/use behavior. unsigned FirstElt = computeSubelement(SrcAddr, TheMemory); // If this is a load from within an enum projection, we can't promote it since // we don't track subelements in a type that could be changing. if (FirstElt == ~0U) return std::nullopt; unsigned NumLoadSubElements = getNumSubElements( LoadTy, getFunction(), TypeExpansionContext(getFunction())); LoadInfo loadInfo = {LoadTy, FirstElt, NumLoadSubElements}; AvailableValues.resize(NumMemorySubElements); // If no bits are demanded, we trivially succeed. This can happen when there // is a load of an empty struct. if (NumLoadSubElements == 0) return loadInfo; // Set up the bitvector of elements being demanded by the load. SmallBitVector RequiredElts(NumMemorySubElements); RequiredElts.set(*loadInfo.range().begin(), *loadInfo.range().end()); // Find out if we have any available values. if (!computeAvailableElementValues(Inst, loadInfo, RequiredElts, AvailableValues)) { return std::nullopt; } return loadInfo; } // This function takes in the current (potentially uninitialized) available // values for theMemory and for the subset of AvailableValues corresponding to // \p address either: // // 1. If uninitialized, optionally initialize the available value with a new // SILValue. It is optional since in certain cases, (for instance when // invalidating one just wants to skip empty available values). // // 2. Given an initialized value, either add the given instruction as an // insertion point or state that we have a conflict. static inline void updateAvailableValuesHelper( SingleValueInstruction *theMemory, SILInstruction *inst, SILValue address, SmallBitVector &requiredElts, SmallVectorImpl &result, SmallBitVector &conflictingValues, function_ref(unsigned)> defaultFunc, function_ref isSafeFunc) { unsigned startSubElt = computeSubelement(address, theMemory); // TODO: Is this needed now? assert(startSubElt != ~0U && "Store within enum projection not handled"); auto &f = *theMemory->getFunction(); for (unsigned i : range(getNumSubElements( address->getType().getObjectType(), f, TypeExpansionContext(f)))) { // If this element is not required, don't fill it in. if (!requiredElts[startSubElt + i]) continue; // At this point we know that we will either mark the value as conflicting // or give it a value. requiredElts[startSubElt + i] = false; // First see if we have an entry at all. auto &entry = result[startSubElt + i]; // If we don't... if (!entry) { // and we are told to initialize it, do so. if (auto defaultValue = defaultFunc(i)) { entry = std::move(defaultValue.value()); } else { // Otherwise, mark this as a conflicting value. There is some available // value here, we just do not know what it is at this point. This // ensures that if we visit a kill where we do not have an entry yet, we // properly invalidate our state. conflictingValues[startSubElt + i] = true; } continue; } // Check if our caller thinks that the value currently in entry is // compatible with \p inst. If not, mark the values as conflicting and // continue. if (!isSafeFunc(entry, i)) { conflictingValues[startSubElt + i] = true; continue; } // Otherwise, we found another insertion point for our available // value. Today this will always be a Store. entry.addInsertionPoint(inst); } } void AvailableValueDataflowContext::updateAvailableValues( SILInstruction *Inst, SmallBitVector &RequiredElts, SmallVectorImpl &Result, llvm::SmallDenseMap &VisitedBlocks, SmallBitVector &ConflictingValues) { // If we are visiting a load [take], it invalidates the underlying available // values. // // NOTE: Since we are always looking back from the instruction to promote, // when we attempt to promote the load [take] itself, we will never hit this // code since. if (auto *LI = dyn_cast(Inst)) { // First see if this is a load inst that we are tracking. if (LoadTakeUses.count(LI)) { updateAvailableValuesHelper( TheMemory, LI, LI->getOperand(), RequiredElts, Result, ConflictingValues, /*default*/ [](unsigned) -> std::optional { // We never initialize values. We only // want to invalidate. return std::nullopt; }, /*isSafe*/ [](AvailableValue &, unsigned) -> bool { // Always assume values conflict. return false; }); return; } } // Handle store. if (auto *SI = dyn_cast(Inst)) { updateAvailableValuesHelper( TheMemory, SI, SI->getDest(), RequiredElts, Result, ConflictingValues, /*default*/ [&](unsigned ResultOffset) -> std::optional { std::optional Result; Result.emplace(SI->getSrc(), ResultOffset, SI); return Result; }, /*isSafe*/ [&](AvailableValue &Entry, unsigned ResultOffset) -> bool { // TODO: This is /really/, /really/, conservative. This basically // means that if we do not have an identical store, we will not // promote. return Entry.getValue() == SI->getSrc() && Entry.getSubElementNumber() == ResultOffset; }); return; } // If we got here from an apply, we must either be initializing the element // via an @out parameter or we are trying to model an invalidating load of the // value (e.x.: indirect_in, indirect_inout). // If we get here with a copy_addr, we must either be storing into the element // or tracking some sort of take of the src. First check if we are taking (in // which case, we just track invalidation of src) and continue. Otherwise we // must be storing into the copy_addr so see which loaded subelements are // being used, and if so, explode the copy_addr to its individual pieces. if (auto *CAI = dyn_cast(Inst)) { // If we have a load take use, we must be tracking a store of CAI. if (LoadTakeUses.count(CAI)) { updateAvailableValuesHelper( TheMemory, CAI, CAI->getSrc(), RequiredElts, Result, ConflictingValues, /*default*/ [](unsigned) -> std::optional { // We never give values default initialized // values. We only want to invalidate. return std::nullopt; }, /*isSafe*/ [](AvailableValue &, unsigned) -> bool { // Always assume values conflict. return false; }); return; } unsigned StartSubElt = computeSubelement(CAI->getDest(), TheMemory); assert(StartSubElt != ~0U && "Store within enum projection not handled"); SILType ValTy = CAI->getDest()->getType(); bool AnyRequired = false; for (unsigned i : range(getNumSubElements( ValTy, getFunction(), TypeExpansionContext(getFunction())))) { // If this element is not required, don't fill it in. AnyRequired = RequiredElts[StartSubElt+i]; if (AnyRequired) break; } // If this is a copy addr that doesn't intersect the loaded subelements, // just continue with an unmodified load mask. if (!AnyRequired) return; // If the copyaddr is of a non-loadable type, we can't promote it. Just // consider it to be a clobber. if (CAI->getSrc()->getType().isLoadable(*CAI->getFunction())) { // Otherwise, some part of the copy_addr's value is demanded by a load, so // we need to explode it to its component pieces. This only expands one // level of the copyaddr. explodeCopyAddr(CAI); // The copy_addr doesn't provide any values, but we've arranged for our // iterators to visit the newly generated instructions, which do. return; } } if (auto *MD = dyn_cast(Inst)) { unsigned StartSubElt = computeSubelement(MD->getValue(), TheMemory); assert(StartSubElt != ~0U && "Store within enum projection not handled"); SILType ValTy = MD->getValue()->getType(); // Check if this mark_dependence provides any required values before // potentially bailing out (because it is address-only). bool AnyRequired = false; for (unsigned i : range(getNumSubElements( ValTy, getFunction(), TypeExpansionContext(getFunction())))) { // If this element is not required, don't fill it in. AnyRequired = RequiredElts[StartSubElt+i]; if (AnyRequired) break; } // If this is a dependence that doesn't intersect the loaded subelements, // just continue with an unmodified load mask. if (!AnyRequired) return; // If the mark_dependence is loadable, promote it. if (MD->getValue()->getType().isLoadable(*MD->getFunction())) { updateMarkDependenceValues(MD, RequiredElts, Result, VisitedBlocks, ConflictingValues); return; } } // TODO: inout apply's should only clobber pieces passed in. // Otherwise, this is some unknown instruction, conservatively assume that all // values are clobbered. RequiredElts.clear(); ConflictingValues = SmallBitVector(Result.size(), true); return; } bool AvailableValueDataflowContext::computeAvailableElementValues( SILInstruction *StartingFrom, LoadInfo loadInfo, SmallBitVector &RequiredElts, SmallVectorImpl &Result) { llvm::SmallDenseMap VisitedBlocks; SmallBitVector ConflictingValues(Result.size()); computeAvailableValuesFrom(StartingFrom->getIterator(), StartingFrom->getParent(), RequiredElts, Result, VisitedBlocks, ConflictingValues); // If there are no values available at this load point, then we fail to // promote this load and there is nothing to do. SmallBitVector AvailableValueIsPresent(NumMemorySubElements); for (unsigned i : loadInfo.range()) { AvailableValueIsPresent[i] = Result[i].getValue(); } // If we do not have any values available, bail. if (AvailableValueIsPresent.none()) return false; // Otherwise, if we have any conflicting values, explicitly mask them out of // the result, so we don't pick one arbitrary available value. if (ConflictingValues.none()) { return true; } // At this point, we know that we have /some/ conflicting values and some // available values. if (AvailableValueIsPresent.reset(ConflictingValues).none()) return false; // Otherwise, mask out the available values and return true. We have at least // 1 available value. int NextIter = ConflictingValues.find_first(); while (NextIter != -1) { assert(NextIter >= 0 && "Int can not be represented?!"); unsigned Iter = NextIter; Result[Iter] = {}; NextIter = ConflictingValues.find_next(Iter); } return true; } void AvailableValueDataflowContext::computeAvailableValuesFrom( SILBasicBlock::iterator StartingFrom, SILBasicBlock *BB, SmallBitVector &RequiredElts, SmallVectorImpl &Result, llvm::SmallDenseMap &VisitedBlocks, SmallBitVector &ConflictingValues) { assert(!RequiredElts.none() && "Scanning with a goal of finding nothing?"); // If there is a potential modification in the current block, scan the block // to see if the store, escape, or load [take] is before or after the load. If // it is before, check to see if it produces the value we are looking for. bool shouldCheckBlock = HasLocalDefinition.get(BB) || HasLocalKill.get(BB); if (shouldCheckBlock) { for (SILBasicBlock::iterator BBI = StartingFrom; BBI != BB->begin();) { SILInstruction *TheInst = &*std::prev(BBI); // If this instruction is unrelated to the element, ignore it. if (!NonLoadUses.count(TheInst) && !LoadTakeUses.count(TheInst)) { --BBI; continue; } // Given an interesting instruction, incorporate it into the set of // results, and filter down the list of demanded subelements that we still // need. updateAvailableValues(TheInst, RequiredElts, Result, VisitedBlocks, ConflictingValues); // If this satisfied all of the demanded values, we're done. if (RequiredElts.none()) return; // Otherwise, keep scanning the block. If the instruction we were looking // at just got exploded, don't skip the next instruction. if (&*std::prev(BBI) == TheInst) --BBI; } } // Otherwise, we need to scan up the CFG looking for available values. for (auto PI = BB->pred_begin(), E = BB->pred_end(); PI != E; ++PI) { SILBasicBlock *PredBB = *PI; // If the predecessor block has already been visited (potentially due to a // cycle in the CFG), don't revisit it. We can do this safely because we // are optimistically assuming that all incoming elements in a cycle will be // the same. If we ever detect a conflicting element, we record it and do // not look at the result. auto Entry = VisitedBlocks.insert({PredBB, RequiredElts}); if (!Entry.second) { // If we are revisiting a block and asking for different required elements // then anything that isn't agreeing is in conflict. const auto &PrevRequired = Entry.first->second; if (PrevRequired != RequiredElts) { ConflictingValues |= (PrevRequired ^ RequiredElts); RequiredElts &= ~ConflictingValues; if (RequiredElts.none()) return; } continue; } // Make sure to pass in the same set of required elements for each pred. SmallBitVector Elts = RequiredElts; computeAvailableValuesFrom(PredBB->end(), PredBB, Elts, Result, VisitedBlocks, ConflictingValues); // If we have any conflicting values, don't bother searching for them. RequiredElts &= ~ConflictingValues; if (RequiredElts.none()) return; } } /// Explode a copy_addr instruction of a loadable type into lower level /// operations like loads, stores, retains, releases, retain_value, etc. void AvailableValueDataflowContext::explodeCopyAddr(CopyAddrInst *CAI) { LLVM_DEBUG(llvm::dbgs() << " -- Exploding copy_addr: " << *CAI << "\n"); SILType ValTy = CAI->getDest()->getType().getObjectType(); SILFunction *F = CAI->getFunction(); auto &TL = F->getTypeLowering(ValTy); // Keep track of the new instructions emitted. SmallVector NewInsts; SILBuilder B(CAI, &NewInsts); B.setCurrentDebugScope(CAI->getDebugScope()); // Use type lowering to lower the copyaddr into a load sequence + store // sequence appropriate for the type. SILValue StoredValue = TL.emitLoadOfCopy(B, CAI->getLoc(), CAI->getSrc(), CAI->isTakeOfSrc()); TL.emitStoreOfCopy(B, CAI->getLoc(), StoredValue, CAI->getDest(), CAI->isInitializationOfDest()); // Update our internal state for this being gone. NonLoadUses.erase(CAI); LoadTakeUses.erase(CAI); // NOTE: We do not need to update HasLocalKill since the copy_addr // and the loads/stores will have the same parent block. // Remove the copy_addr from Uses. A single copy_addr can appear multiple // times if the source and dest are to elements within a single aggregate, but // we only want to pick up the CopyAddrKind from the store. PMOMemoryUse LoadUse, StoreUse; for (auto &Use : Uses) { if (Use.Inst != CAI) continue; if (Use.Kind == PMOUseKind::Load) { assert(LoadUse.isInvalid()); LoadUse = Use; } else { assert(StoreUse.isInvalid()); StoreUse = Use; } Use.Inst = nullptr; // Keep scanning in case the copy_addr appears multiple times. } assert((LoadUse.isValid() || StoreUse.isValid()) && "we should have a load or a store, possibly both"); assert(StoreUse.isInvalid() || StoreUse.Kind == Assign || StoreUse.Kind == Initialization || StoreUse.Kind == InitOrAssign); // Now that we've emitted a bunch of instructions, including a load and store // but also including other stuff, update the internal state of // LifetimeChecker to reflect them. // Update the instructions that touch the memory. NewInst can grow as this // iterates, so we can't use a foreach loop. for (auto *NewInst : NewInsts) { switch (NewInst->getKind()) { default: NewInst->dump(); llvm_unreachable("Unknown instruction generated by copy_addr lowering"); case SILInstructionKind::StoreInst: // If it is a store to the memory object (as oppose to a store to // something else), track it as an access. if (StoreUse.isValid()) { StoreUse.Inst = NewInst; // If our store use by the copy_addr is an assign, then we know that // before we store the new value, we loaded the old value implying that // our store is technically initializing memory when it occurs. So // change the kind to Initialization. if (StoreUse.Kind == Assign) StoreUse.Kind = Initialization; NonLoadUses[NewInst] = Uses.size(); Uses.push_back(StoreUse); } continue; case SILInstructionKind::LoadInst: // If it is a load from the memory object (as oppose to a load from // something else), track it as an access. We need to explicitly check to // see if the load accesses "TheMemory" because it could either be a load // for the copy_addr source, or it could be a load corresponding to the // "assign" operation on the destination of the copyaddr. if (LoadUse.isValid() && getAccessPathRoot(NewInst->getOperand(0)) == TheMemory) { if (auto *LI = dyn_cast(NewInst)) { if (LI->getOwnershipQualifier() == LoadOwnershipQualifier::Take) { LoadTakeUses.insert(LI); HasLocalKill.set(LI->getParent()); } } LoadUse.Inst = NewInst; Uses.push_back(LoadUse); } continue; case SILInstructionKind::RetainValueInst: case SILInstructionKind::StrongRetainInst: case SILInstructionKind::StrongReleaseInst: case SILInstructionKind::ReleaseValueInst: // Destroy overwritten value // These are ignored. continue; } } // Next, remove the copy_addr itself. deleter.forceDelete(CAI); } /// Promote a mark_dependence instruction of a loadable type into a /// mark_dependence void AvailableValueDataflowContext::updateMarkDependenceValues( MarkDependenceInst *md, SmallBitVector &RequiredElts, SmallVectorImpl &Result, llvm::SmallDenseMap &VisitedBlocks, SmallBitVector &ConflictingValues) { // Recursively compute all currently required available values up to the // mark_dependence, regardless of whether they are required for the // mark_dependence. computeAvailableValuesFrom(md->getIterator(), md->getParent(), RequiredElts, Result, VisitedBlocks, ConflictingValues); unsigned firstMDElt = computeSubelement(md->getValue(), TheMemory); // If address is an enum projection, we can't promote it since we don't track // subelements in a type that could be changing. if (firstMDElt == ~0U) { RequiredElts.clear(); ConflictingValues = SmallBitVector(Result.size(), true); return; } SILType valueTy = md->getValue()->getType().getObjectType(); unsigned numMDSubElements = getNumSubElements( valueTy, getFunction(), TypeExpansionContext(getFunction())); // Update each required subelement of the mark_dependence value. for (unsigned subIdx = firstMDElt; subIdx < firstMDElt + numMDSubElements; ++subIdx) { // If the available value has a conflict, then AvailableValue still holds // a value that was available on a different path. It cannot be used. if (ConflictingValues[subIdx]) { Result[subIdx] = {}; continue; } // If no value is available for this subelement, or it was never required, // then no promotion happens. if (auto &availableVal = Result[subIdx]) { auto newMD = createAvailableMarkDependence(md, availableVal); // Update the available value. This may invalidate the Result. Result[subIdx] = AvailableValue(newMD, subIdx, md); } } } // Find or create a mark_dependence that is a promotion of 'md' for 'value'. // Return nullptr if the mark_dependence cannot be promoted. // This does not currently allow phi creation. SILValue AvailableValueDataflowContext:: createAvailableMarkDependence(MarkDependenceInst *md, AvailableValue &availableVal) { SILValue value = availableVal.getValue(); // If the allocation is replaced, no copies are created for promoted values // and any promoted mark_dependence must be reused. if (optimizationMode == OptimizationMode::ReplaceAlloc) { auto mdIter = md->getIterator(); auto instBegin = md->getParentBlock()->begin(); while (mdIter != instBegin) { --mdIter; auto *newMD = dyn_cast(&*mdIter); if (!newMD) break; if (newMD->getValue() == value && newMD->getBase() == md->getBase() && newMD->dependenceKind() == md->dependenceKind()) { return newMD; } } } else { // With OptimizationMode::PreserveAlloc, always create a new copy for each // promoted value. This creates separate ownership, which is needed for each // promoted load, and prevents reusing the mark_dependence later (via the // code above) if the allocation is eliminated. value = ownershipFixup.getSingleOwnedValue(availableVal); if (!value) return SILValue(); } LLVM_DEBUG(llvm::dbgs() << " -- Promoting mark_dependence: " << *md << " source: " << value << "\n"); auto *newMD = SILBuilderWithScope(md) .createMarkDependence(md->getLoc(), value, md->getBase(), md->dependenceKind()); ownershipFixup.insertedInsts.push_back(newMD); return newMD; } bool AvailableValueDataflowContext::hasEscapedAt(SILInstruction *I) { // Return true if the box has escaped at the specified instruction. We are // not allowed to do load promotion in an escape region. // FIXME: This is not an aggressive implementation. :) // TODO: At some point, we should special case closures that just *read* from // the escaped value (by looking at the body of the closure). They should not // prevent load promotion, and will allow promoting values like X in regions // dominated by "... && X != 0". return HasAnyEscape; } static SILType getMemoryType(AllocationInst *memory) { // Compute the type of the memory object. if (auto *abi = dyn_cast(memory)) { assert(abi->getBoxType()->getLayout()->getFields().size() == 1 && "optimizing multi-field boxes not implemented"); return getSILBoxFieldType(TypeExpansionContext(*abi->getFunction()), abi->getBoxType(), abi->getModule().Types, 0); } assert(isa(memory)); return cast(memory)->getElementType(); } //===----------------------------------------------------------------------===// // Optimize dead allocation: // Fully promote each access //===----------------------------------------------------------------------===// namespace { class PromotableInstructions { // All promotable instructions share a vector of available values. SmallVectorImpl &allAvailableValues; SmallVector promotableInsts; SmallVector, 8> availableValueOffsets; public: PromotableInstructions(SmallVectorImpl &allAvailableValues) : allAvailableValues(allAvailableValues) {} unsigned size() const { return promotableInsts.size(); } void push(SILInstruction *instruction) { promotableInsts.push_back(instruction); } // Available values must be initialized in the same order that the // instructions are pushed. Return the instruction's index. unsigned initializeAvailableValues(SILInstruction *instruction, SmallVectorImpl &&availableValues) { unsigned nextInstIdx = availableValueOffsets.size(); assert(instruction == promotableInsts[nextInstIdx]); unsigned startOffset = allAvailableValues.size(); unsigned endOffset = startOffset + availableValues.size(); availableValueOffsets.push_back({startOffset, endOffset}); std::move(availableValues.begin(), availableValues.end(), std::back_inserter(allAvailableValues)); return nextInstIdx; } ArrayRef instructions() const { return promotableInsts; } ArrayRef availableValues(unsigned index) { return mutableAvailableValues(index); } MutableArrayRef mutableAvailableValues(unsigned index) { unsigned startOffset, endOffset; std::tie(startOffset, endOffset) = availableValueOffsets[index]; return {allAvailableValues.begin() + startOffset, allAvailableValues.begin() + endOffset}; } #ifndef NDEBUG void verify() { for (unsigned i : range(promotableInsts.size())) { promotableInsts[i]->verifyOperandOwnership(); assert(!availableValues(i).empty() && "Value without any available values?!"); } } #endif }; } // end anonymous namespace namespace { struct Promotions { SmallVector allAvailableValues; PromotableInstructions loadTakes; PromotableInstructions destroys; PromotableInstructions markDepBases; Promotions() : loadTakes(allAvailableValues), destroys(allAvailableValues), markDepBases(allAvailableValues) {} #ifndef NDEBUG void verify() { loadTakes.verify(); destroys.verify(); markDepBases.verify(); } #endif }; } // end anonymous namespace namespace { /// This performs load promotion and deletes synthesized allocations if all /// loads can be removed. class OptimizeDeadAlloc { SILModule &Module; /// This is either an alloc_box or alloc_stack instruction. AllocationInst *TheMemory; /// This is the SILType of the memory object. SILType MemoryType; /// The number of primitive subelements across all elements of this memory /// value. unsigned NumMemorySubElements; SmallVectorImpl &Uses; SmallVectorImpl &Releases; DeadEndBlocks &deadEndBlocks; InstructionDeleter &deleter; DominanceInfo *domInfo; /// A structure that we use to compute our available values. AvailableValueDataflowContext DataflowContext; Promotions promotions; SmallBlotSetVector valuesNeedingLifetimeCompletion; public: SILFunction *getFunction() const { return TheMemory->getFunction(); } bool isTrivial() const { return MemoryType.isTrivial(getFunction()); } OptimizeDeadAlloc(AllocationInst *memory, SmallVectorImpl &uses, SmallVectorImpl &releases, DeadEndBlocks &deadEndBlocks, InstructionDeleter &deleter, DominanceInfo *domInfo) : Module(memory->getModule()), TheMemory(memory), MemoryType(getMemoryType(memory)), NumMemorySubElements(getNumSubElements( MemoryType, *memory->getFunction(), TypeExpansionContext(*memory->getFunction()))), Uses(uses), Releases(releases), deadEndBlocks(deadEndBlocks), deleter(deleter), domInfo(domInfo), DataflowContext(TheMemory, NumMemorySubElements, OptimizationMode::ReplaceAlloc, uses, deleter, deadEndBlocks) {} /// If the allocation is an autogenerated allocation that is only stored to /// (after load promotion) then remove it completely. bool tryToRemoveDeadAllocation(); private: SILInstruction *collectUsesForPromotion(); /// Return true if a mark_dependence can be promoted. If so, this initializes /// the available values in promotions. bool canPromoteMarkDepBase(MarkDependenceInst *md); /// Return true if a load [take] or destroy_addr can be promoted. If so, this /// initializes the available values in promotions. bool canPromoteTake(SILInstruction *i, PromotableInstructions &promotableInsts); SILValue promoteMarkDepBase(MarkDependenceInst *md, ArrayRef availableValues); void promoteMarkDepAddrBase(MarkDependenceAddrInst *md, ArrayRef availableValues); /// Promote a load take cleaning up everything except for RAUWing the /// instruction with the aggregated result. The routine returns the new /// aggregated result to the caller and expects the caller to eventually RAUW /// \p inst with the return value. The reason why we do this is to allow for /// the caller to work around invalidation issues by not deleting the load /// [take] until after all load [take] have been cleaned up. /// /// \returns the value that the caller will RAUW with \p inst. SILValue promoteLoadTake(LoadInst *inst, ArrayRef availableValues); void promoteDestroyAddr(DestroyAddrInst *dai, ArrayRef availableValues); bool canRemoveDeadAllocation(); void removeDeadAllocation(); }; } // end anonymous namespace // We don't want to remove allocations that are required for useful debug // information at -O0. As such, we only remove allocations if: // // 1. They are in a transparent function. // 2. They are in a normal function, but didn't come from a VarDecl, or came // from one that was autogenerated or inlined from a transparent function. static bool isRemovableAutogeneratedAllocation(AllocationInst *TheMemory) { SILLocation loc = TheMemory->getLoc(); return TheMemory->getFunction()->isTransparent() || !loc.getAsASTNode() || loc.isAutoGenerated() || loc.is(); } bool OptimizeDeadAlloc::tryToRemoveDeadAllocation() { if (!canRemoveDeadAllocation()) { DataflowContext.deleteInsertedInsts(deleter); return false; } removeDeadAllocation(); // Once the entire allocation is promoted, non of the instructions promoted // during dataflow should need ownership fixup. return true; } bool OptimizeDeadAlloc::canRemoveDeadAllocation() { assert(TheMemory->getFunction()->hasOwnership() && "Can only eliminate dead allocations with ownership enabled"); assert((isa(TheMemory) || isa(TheMemory)) && "Unhandled allocation case"); if (!isRemovableAutogeneratedAllocation(TheMemory)) return false; // Check the uses list to see if there are any non-store uses left over after // load promotion and other things PMO does. if (auto *badUser = collectUsesForPromotion()) { LLVM_DEBUG(llvm::dbgs() << "*** Failed to remove autogenerated alloc: " "kept alive by: " << *badUser); return false; } for (auto *md : promotions.markDepBases.instructions()) { if (!canPromoteMarkDepBase(cast(md))) return false; } if (isTrivial()) { return true; } for (auto *load : promotions.loadTakes.instructions()) { if (!canPromoteTake(load, promotions.loadTakes)) return false; } for (auto *destroy : promotions.destroys.instructions()) { if (!canPromoteTake(destroy, promotions.destroys)) return false; } // Gather up all found available values before promoting anything so we can // fix up lifetimes later if we need to. for (auto pmoMemUse : Uses) { if (pmoMemUse.Inst && pmoMemUse.Kind == PMOUseKind::Initialization) { if (isa(pmoMemUse.Inst)) { // mark_dependence of the dependent value is considered both a load and // an init use. They can simply be deleted when the allocation is dead. continue; } // Today if we promote, this is always a store, since we would have // blown up the copy_addr otherwise. Given that, always make sure we // clean up the src as appropriate after we optimize. auto *si = dyn_cast(pmoMemUse.Inst); if (!si) return false; auto src = si->getSrc(); // Bail if src has any uses that are forwarding unowned uses. This // allows us to know that we never have to deal with forwarding unowned // instructions like br. These are corner cases that complicate the // logic below. for (auto *use : src->getUses()) { if (use->getOperandOwnership() == OperandOwnership::ForwardingUnowned) return false; } // FIXME: Lifetime completion on Boundary::Liveness requires that 'src' // has no escaping uses. Check escaping uses here and either bailout or // request completion on Boundary::Availability. valuesNeedingLifetimeCompletion.insert(src); } } return true; } // Collect all uses that require promotion before this allocation can be // eliminated. Returns nullptr on success. Upon failure, return the first // instruction corresponding to a use that cannot be promoted. // // Populates 'loadTakeList'. SILInstruction *OptimizeDeadAlloc::collectUsesForPromotion() { for (auto &u : Uses) { // Ignore removed instructions. if (u.Inst == nullptr) continue; switch (u.Kind) { case PMOUseKind::Assign: // Until we can promote the value being destroyed by the assign, we can // not remove deallocations with such assigns. return u.Inst; case PMOUseKind::InitOrAssign: continue; // These don't prevent removal. case PMOUseKind::Load: // For now only handle takes from alloc_stack. // // TODO: It should be implementable, but it has not been needed yet. if (auto *li = dyn_cast(u.Inst)) { if (li->getOwnershipQualifier() == LoadOwnershipQualifier::Take) { promotions.loadTakes.push(li); continue; } } if (isa(u.Inst)) { // A mark_dependence source use does not prevent removal. The use // collector already looks through them to find other uses. continue; } return u.Inst; case PMOUseKind::DependenceBase: promotions.markDepBases.push(u.Inst); continue; case PMOUseKind::Initialization: if (!isa(u.Inst) && // A copy_addr that is not a take affects the retain count // of the source. (!isa(u.Inst) || cast(u.Inst)->isTakeOfSrc())) { continue; } LLVM_FALLTHROUGH; case PMOUseKind::IndirectIn: case PMOUseKind::InOutUse: case PMOUseKind::Escape: return u.Inst; // These do prevent removal. } } // Ignore destroys of trivial values. They are destroy_value instructions // that only destroy the dead box itself. if (!isTrivial()) { // Non-trivial allocations require ownership cleanup. We only promote // alloc_stack in that case--all releases must be destroy_addr. for (auto *release : Releases) { // We stash all of the destroy_addr that we see. if (auto *dai = dyn_cast_or_null(release)) { promotions.destroys.push(dai); continue; } return release; } } return nullptr; } bool OptimizeDeadAlloc::canPromoteMarkDepBase(MarkDependenceInst *md) { SILValue srcAddr = md->getBase(); SmallVector availableValues; auto loadInfo = DataflowContext.computeAvailableValues(srcAddr, md, availableValues); if (!loadInfo.has_value()) return false; unsigned index = promotions.markDepBases.initializeAvailableValues( md, std::move(availableValues)); SILType baseTy = loadInfo->loadType; if (auto *abi = dyn_cast(TheMemory)) { if (baseTy == abi->getType()) { baseTy = MemoryType.getObjectType(); } } return isFullyAvailable(baseTy, loadInfo->firstElt, promotions.markDepBases.availableValues(index)); } /// Return true if we can promote the given destroy. bool OptimizeDeadAlloc::canPromoteTake( SILInstruction *inst, PromotableInstructions &promotableInsts) { SILValue address = inst->getOperand(0); // We cannot promote destroys of address-only types, because we can't expose // the load. if (address->getType().isAddressOnly(*inst->getFunction())) return false; SmallVector availableValues; auto loadInfo = DataflowContext.computeAvailableValues(address, inst, availableValues); if (!loadInfo.has_value()) return false; // Now check that we can perform a take upon our available values. This // implies today that our value is fully available. If the value is not fully // available, we would need to split stores to promote this destroy_addr. We // do not support that yet. AvailableValueAggregator agg(inst, availableValues, Uses, deadEndBlocks, AvailableValueExpectedOwnership::Take); if (!agg.canTake(loadInfo->loadType, loadInfo->firstElt)) return false; // As a final check, make sure that we have an available value for each value, // if not bail. for (const auto &av : availableValues) if (!av.Value) return false; // Ok, we can promote this destroy_addr... move the temporary lists contents // into the final AvailableValues list. promotableInsts.initializeAvailableValues(inst, std::move(availableValues)); return true; } void OptimizeDeadAlloc::removeDeadAllocation() { for (auto idxVal : llvm::enumerate(promotions.markDepBases.instructions())) { auto vals = promotions.markDepBases.availableValues(idxVal.index()); if (auto *mdi = dyn_cast(idxVal.value())) { promoteMarkDepBase(mdi, vals); continue; } auto *mda = cast(idxVal.value()); promoteMarkDepAddrBase(mda, vals); } // If our memory is trivially typed, we can just remove it without needing to // consider if the stored value needs to be destroyed. So at this point, // delete the memory! if (isTrivial()) { LLVM_DEBUG(llvm::dbgs() << "*** Removing autogenerated trivial allocation: " << *TheMemory); // If it is safe to remove, do it. Recursively remove all instructions // hanging off the allocation instruction, then return success. Let the // caller remove the allocation itself to avoid iterator invalidation. deleter.forceDeleteWithUsers(TheMemory); return; } // Since our load [take] may be available values for our // destroy_addr/load [take], we promote the destroy_addr first and then handle // load [take] with extra rigour later to handle that possibility. for (auto idxVal : llvm::enumerate(promotions.destroys.instructions())) { auto *dai = cast(idxVal.value()); auto vals = promotions.destroys.availableValues(idxVal.index()); promoteDestroyAddr(dai, vals); // We do not need to unset releases, since we are going to exit here. } llvm::SmallMapVector loadsToDelete; for (auto idxVal : llvm::enumerate(promotions.loadTakes.instructions())) { for (auto &availableVal : promotions.loadTakes.mutableAvailableValues(idxVal.index())) { auto *availableLoad = dyn_cast(availableVal.Value); if (!availableLoad) continue; auto iter = loadsToDelete.find(availableLoad); if (iter == loadsToDelete.end()) continue; SILValue newValue = iter->second; assert(newValue && "We should neer store a nil SILValue into this map"); availableVal.Value = newValue; } auto *loadTake = cast(idxVal.value()); auto vals = promotions.loadTakes.availableValues(idxVal.index()); SILValue result = promoteLoadTake(loadTake, vals); assert(result); // We need to erase liCast here before we erase it since a load [take] that // we are promoting could be an available value for another load // [take]. Consider the following SIL: // // %mem = alloc_stack // store %arg to [init] %mem // %0 = load [take] %mem // store %0 to [init] %mem // %1 = load [take] %mem // destroy_value %1 // dealloc_stack %mem // // In such a case, we are going to delete %0 here, but %0 is an available // value for %1, so we will auto insertIter = loadsToDelete.insert({loadTake, result}); valuesNeedingLifetimeCompletion.erase(loadTake); (void)insertIter; assert(insertIter.second && "loadTakeState doesn't have unique loads?!"); } // Now that we have promoted all of our load [take], perform the actual // RAUW/removal. for (auto p : loadsToDelete) { LoadInst *li = p.first; SILValue newValue = p.second; li->replaceAllUsesWith(newValue); deleter.forceDelete(li); } LLVM_DEBUG(llvm::dbgs() << "*** Removing autogenerated non-trivial alloc: " << *TheMemory); // If it is safe to remove, do it. Recursively remove all instructions // hanging off the allocation instruction, then return success. deleter.forceDeleteWithUsers(TheMemory); DataflowContext.verifyOwnership(deadEndBlocks); // Now look at all of our available values and complete any of their // post-dominating consuming use sets. This can happen if we have an enum that // is known dynamically none along a path. This is dynamically correct, but // can not be represented in OSSA so we insert these destroys along said path. OSSACompleteLifetime completion(TheMemory->getFunction(), domInfo, deadEndBlocks); while (!valuesNeedingLifetimeCompletion.empty()) { auto optV = valuesNeedingLifetimeCompletion.pop_back_val(); if (!optV) continue; SILValue v = *optV; // Lexical enums can have incomplete lifetimes in non payload paths that // don't end in unreachable. Force their lifetime to end immediately after // the last use instead. auto boundary = v->getType().isOrHasEnum() ? OSSACompleteLifetime::Boundary::Liveness : OSSACompleteLifetime::Boundary::Availability; LLVM_DEBUG(llvm::dbgs() << "Completing lifetime of: "); LLVM_DEBUG(v->dump()); completion.completeOSSALifetime(v, boundary); } } SILValue OptimizeDeadAlloc::promoteMarkDepBase( MarkDependenceInst *md, ArrayRef availableValues) { LLVM_DEBUG(llvm::dbgs() << " *** Promoting mark_dependence base: " << *md); SILBuilderWithScope B(md); SILValue dependentValue = md->getValue(); for (auto &availableValue : availableValues) { dependentValue = B.createMarkDependence(md->getLoc(), dependentValue, availableValue.getValue(), md->dependenceKind()); } LLVM_DEBUG(llvm::dbgs() << " To value: " << dependentValue); md->replaceAllUsesWith(dependentValue); deleter.deleteIfDead(md); return dependentValue; } void OptimizeDeadAlloc::promoteMarkDepAddrBase( MarkDependenceAddrInst *md, ArrayRef availableValues) { SILValue dependentAddress = md->getAddress(); LLVM_DEBUG(llvm::dbgs() << " *** Promoting mark_dependence_addr base: " << *md << " To address: " << dependentAddress); SILBuilderWithScope B(md); for (auto &availableValue : availableValues) { B.createMarkDependenceAddr(md->getLoc(), dependentAddress, availableValue.getValue(), md->dependenceKind()); } deleter.deleteIfDead(md); } SILValue OptimizeDeadAlloc::promoteLoadTake(LoadInst *li, ArrayRef availableValues) { assert(li->getOwnershipQualifier() == LoadOwnershipQualifier::Take && "load [copy], load [trivial], load should be handled by " "promoteLoadCopy"); SILValue address = li->getOperand(); SILType loadTy = address->getType().getObjectType(); // Compute the access path down to the field so we can determine precise // def/use behavior. unsigned firstElt = computeSubelement(address, TheMemory); // Aggregate together all of the subelements into something that has the same // type as the load did, and emit smaller) loads for any subelements that were // not available. AvailableValueAggregator agg(li, availableValues, Uses, deadEndBlocks, AvailableValueExpectedOwnership::Take); SILValue newVal = agg.aggregateValues(loadTy, address, firstElt); assert(newVal); ++NumLoadTakePromoted; LLVM_DEBUG(llvm::dbgs() << " *** Promoting load_take: " << *li); LLVM_DEBUG(llvm::dbgs() << " To value: " << *newVal); // Our parent RAUWs with newVal/erases li. return newVal; } // DestroyAddr is a composed operation merging load [take] + destroy_value. If // the implicit load's value is available, explode it. // // NOTE: We only do this if we have a fully available value. // // Note that we handle the general case of a destroy_addr of a piece of the // memory object, not just destroy_addrs of the entire thing. void OptimizeDeadAlloc::promoteDestroyAddr( DestroyAddrInst *dai, ArrayRef availableValues) { SILValue address = dai->getOperand(); SILType loadTy = address->getType().getObjectType(); // Compute the access path down to the field so we can determine precise // def/use behavior. unsigned firstElt = computeSubelement(address, TheMemory); // Aggregate together all of the subelements into something that has the same // type as the load did, and emit smaller) loads for any subelements that were // not available. AvailableValueAggregator agg(dai, availableValues, Uses, deadEndBlocks, AvailableValueExpectedOwnership::Take); SILValue newVal = agg.aggregateValues(loadTy, address, firstElt); ++NumDestroyAddrPromoted; LLVM_DEBUG(llvm::dbgs() << " *** Promoting destroy_addr: " << *dai); LLVM_DEBUG(llvm::dbgs() << " To value: " << *newVal); SILBuilderWithScope(dai).emitDestroyValueOperation(dai->getLoc(), newVal); deleter.forceDelete(dai); } //===----------------------------------------------------------------------===// // Top Level Entrypoints //===----------------------------------------------------------------------===// static AllocationInst *getOptimizableAllocation(SILInstruction *i) { if (!isa(i) && !isa(i)) { return nullptr; } auto *alloc = cast(i); // If our aggregate has unreferencable storage, we can't optimize. Return // nullptr. if (getMemoryType(alloc).aggregateHasUnreferenceableStorage()) return nullptr; // Do not perform this on move only values since we introduce copies to // promote things. if (getMemoryType(alloc).isMoveOnly()) return nullptr; // Don't promote large types. auto &mod = alloc->getFunction()->getModule(); if (EnableAggressiveExpansionBlocking && mod.getOptions().UseAggressiveReg2MemForCodeSize && !shouldExpand(mod, alloc->getType().getObjectType())) return nullptr; // Otherwise we are good to go. Lets try to optimize this memory! return alloc; } bool swift::eliminateDeadAllocations(SILFunction *fn, DominanceInfo *domInfo) { if (!fn->hasOwnership()) { return false; } bool changed = false; DeadEndBlocks deadEndBlocks(fn); for (auto &bb : *fn) { InstructionDeleter deleter; for (SILInstruction &inst : bb.deletableInstructions()) { // First see if i is an allocation that we can optimize. If not, skip it. AllocationInst *alloc = getOptimizableAllocation(&inst); if (!alloc) { continue; } LLVM_DEBUG(llvm::dbgs() << "*** PMO Dead Allocation Elimination looking at: " << *alloc); PMOMemoryObjectInfo memInfo(alloc); // Set up the datastructure used to collect the uses of the allocation. SmallVector uses; SmallVector destroys; // Walk the use list of the pointer, collecting them. If we are not able // to optimize, skip this value. *NOTE* We may still scalarize values // inside the value. if (!collectPMOElementUsesAndDestroysFrom(memInfo, uses, destroys)) { continue; } OptimizeDeadAlloc optimizeDeadAlloc(alloc, uses, destroys, deadEndBlocks, deleter, domInfo); if (optimizeDeadAlloc.tryToRemoveDeadAllocation()) { deleter.cleanupDeadInstructions(); ++NumAllocRemoved; changed = true; } } } return changed; } namespace { class PredictableDeadAllocationElimination : public SILFunctionTransform { void run() override { auto *func = getFunction(); if (!func->hasOwnership()) return; LLVM_DEBUG(llvm::dbgs() << "Looking at: " << func->getName() << "\n"); auto *da = getAnalysis(); // If we are already canonical or do not have ownership, just bail. if (func->wasDeserializedCanonical() || !func->hasOwnership()) return; if (eliminateDeadAllocations(func, da->get(func))) invalidateAnalysis(SILAnalysis::InvalidationKind::FunctionBody); } }; } // end anonymous namespace SILTransform *swift::createPredictableDeadAllocationElimination() { return new PredictableDeadAllocationElimination(); }