//===--- PredictableMemOpt.cpp - Perform predictable memory optzns --------===// // // This source file is part of the Swift.org open source project // // Copyright (c) 2014 - 2017 Apple Inc. and the Swift project authors // Licensed under Apache License v2.0 with Runtime Library Exception // // See https://swift.org/LICENSE.txt for license information // See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors // //===----------------------------------------------------------------------===// #define DEBUG_TYPE "predictable-memopt" #include "PMOMemoryUseCollector.h" #include "swift/SIL/BasicBlockUtils.h" #include "swift/SIL/BranchPropagatedUser.h" #include "swift/SIL/OwnershipUtils.h" #include "swift/SIL/SILBuilder.h" #include "swift/SILOptimizer/PassManager/Passes.h" #include "swift/SILOptimizer/PassManager/Transforms.h" #include "swift/SILOptimizer/Utils/Local.h" #include "swift/SILOptimizer/Utils/SILSSAUpdater.h" #include "llvm/ADT/SmallBitVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" using namespace swift; STATISTIC(NumLoadPromoted, "Number of loads promoted"); STATISTIC(NumDestroyAddrPromoted, "Number of destroy_addrs promoted"); STATISTIC(NumAllocRemoved, "Number of allocations completely removed"); //===----------------------------------------------------------------------===// // Subelement Analysis //===----------------------------------------------------------------------===// // We can only analyze components of structs whose storage is fully accessible // from Swift. static StructDecl * getFullyReferenceableStruct(SILType Ty) { auto SD = Ty.getStructOrBoundGenericStruct(); if (!SD || SD->hasUnreferenceableStorage()) return nullptr; return SD; } static unsigned getNumSubElements(SILType T, SILModule &M) { if (auto TT = T.getAs()) { unsigned NumElements = 0; for (auto index : indices(TT.getElementTypes())) NumElements += getNumSubElements(T.getTupleElementType(index), M); return NumElements; } if (auto *SD = getFullyReferenceableStruct(T)) { unsigned NumElements = 0; for (auto *D : SD->getStoredProperties()) NumElements += getNumSubElements(T.getFieldType(D, M), M); return NumElements; } // If this isn't a tuple or struct, it is a single element. return 1; } /// getAccessPathRoot - Given an address, dive through any tuple/struct element /// addresses to get the underlying value. static SILValue getAccessPathRoot(SILValue pointer) { while (true) { if (auto *TEAI = dyn_cast(pointer)) { pointer = TEAI->getOperand(); continue; } if (auto *SEAI = dyn_cast(pointer)) { pointer = SEAI->getOperand(); continue; } if (auto *BAI = dyn_cast(pointer)) { pointer = BAI->getSource(); continue; } return pointer; } } /// Compute the subelement number indicated by the specified pointer (which is /// derived from the root by a series of tuple/struct element addresses) by /// treating the type as a linearized namespace with sequential elements. For /// example, given: /// /// root = alloc { a: { c: i64, d: i64 }, b: (i64, i64) } /// tmp1 = struct_element_addr root, 1 /// tmp2 = tuple_element_addr tmp1, 0 /// /// This will return a subelement number of 2. /// /// If this pointer is to within an existential projection, it returns ~0U. static unsigned computeSubelement(SILValue Pointer, SingleValueInstruction *RootInst) { unsigned SubElementNumber = 0; SILModule &M = RootInst->getModule(); while (1) { // If we got to the root, we're done. if (RootInst == Pointer) return SubElementNumber; if (auto *PBI = dyn_cast(Pointer)) { Pointer = PBI->getOperand(); continue; } if (auto *BAI = dyn_cast(Pointer)) { Pointer = BAI->getSource(); continue; } if (auto *TEAI = dyn_cast(Pointer)) { SILType TT = TEAI->getOperand()->getType(); // Keep track of what subelement is being referenced. for (unsigned i = 0, e = TEAI->getFieldNo(); i != e; ++i) { SubElementNumber += getNumSubElements(TT.getTupleElementType(i), M); } Pointer = TEAI->getOperand(); continue; } if (auto *SEAI = dyn_cast(Pointer)) { SILType ST = SEAI->getOperand()->getType(); // Keep track of what subelement is being referenced. StructDecl *SD = SEAI->getStructDecl(); for (auto *D : SD->getStoredProperties()) { if (D == SEAI->getField()) break; SubElementNumber += getNumSubElements(ST.getFieldType(D, M), M); } Pointer = SEAI->getOperand(); continue; } assert(isa(Pointer) && "Unknown access path instruction"); // Cannot promote loads and stores from within an existential projection. return ~0U; } } //===----------------------------------------------------------------------===// // Available Value //===----------------------------------------------------------------------===// namespace { class AvailableValueAggregator; struct AvailableValue { friend class AvailableValueAggregator; SILValue Value; unsigned SubElementNumber; /// If this gets too expensive in terms of copying, we can use an arena and a /// FrozenPtrSet like we do in ARC. SmallSetVector InsertionPoints; /// Just for updating. SmallVectorImpl *Uses; public: AvailableValue() = default; /// Main initializer for available values. /// /// *NOTE* We assume that all available values start with a singular insertion /// point and insertion points are added by merging. AvailableValue(SILValue Value, unsigned SubElementNumber, StoreInst *InsertPoint) : Value(Value), SubElementNumber(SubElementNumber), InsertionPoints() { InsertionPoints.insert(InsertPoint); } /// Deleted copy constructor. This is a move only type. AvailableValue(const AvailableValue &) = delete; /// Deleted copy operator. This is a move only type. AvailableValue &operator=(const AvailableValue &) = delete; /// Move constructor. AvailableValue(AvailableValue &&Other) : Value(nullptr), SubElementNumber(~0), InsertionPoints() { std::swap(Value, Other.Value); std::swap(SubElementNumber, Other.SubElementNumber); std::swap(InsertionPoints, Other.InsertionPoints); } /// Move operator. AvailableValue &operator=(AvailableValue &&Other) { std::swap(Value, Other.Value); std::swap(SubElementNumber, Other.SubElementNumber); std::swap(InsertionPoints, Other.InsertionPoints); return *this; } operator bool() const { return bool(Value); } bool operator==(const AvailableValue &Other) const { return Value == Other.Value && SubElementNumber == Other.SubElementNumber; } bool operator!=(const AvailableValue &Other) const { return !(*this == Other); } SILValue getValue() const { return Value; } SILType getType() const { return Value->getType(); } unsigned getSubElementNumber() const { return SubElementNumber; } ArrayRef getInsertionPoints() const { return InsertionPoints.getArrayRef(); } void mergeInsertionPoints(const AvailableValue &Other) & { assert(Value == Other.Value && SubElementNumber == Other.SubElementNumber); InsertionPoints.set_union(Other.InsertionPoints); } void addInsertionPoint(StoreInst *I) & { InsertionPoints.insert(I); } AvailableValue emitStructExtract(SILBuilder &B, SILLocation Loc, VarDecl *D, unsigned SubElementNumber) const { SILValue NewValue = B.emitStructExtract(Loc, Value, D); return {NewValue, SubElementNumber, InsertionPoints}; } AvailableValue emitTupleExtract(SILBuilder &B, SILLocation Loc, unsigned EltNo, unsigned SubElementNumber) const { SILValue NewValue = B.emitTupleExtract(Loc, Value, EltNo); return {NewValue, SubElementNumber, InsertionPoints}; } AvailableValue emitBeginBorrow(SILBuilder &b, SILLocation loc) const { // If we do not have ownership or already are guaranteed, just return a copy // of our state. if (!b.hasOwnership() || Value.getOwnershipKind().isCompatibleWith( ValueOwnershipKind::Guaranteed)) { return {Value, SubElementNumber, InsertionPoints}; } // Otherwise, return newValue. return {b.createBeginBorrow(loc, Value), SubElementNumber, InsertionPoints}; } void dump() const LLVM_ATTRIBUTE_USED; void print(llvm::raw_ostream &os) const; private: /// Private constructor. AvailableValue(SILValue Value, unsigned SubElementNumber, const decltype(InsertionPoints) &InsertPoints) : Value(Value), SubElementNumber(SubElementNumber), InsertionPoints(InsertPoints) {} }; } // end anonymous namespace void AvailableValue::dump() const { print(llvm::dbgs()); } void AvailableValue::print(llvm::raw_ostream &os) const { os << "Available Value Dump. Value: "; if (getValue()) { os << getValue(); } else { os << "NoValue;\n"; } os << "SubElementNumber: " << getSubElementNumber() << "\n"; os << "Insertion Points:\n"; for (auto *I : getInsertionPoints()) { os << *I; } } namespace llvm { llvm::raw_ostream &operator<<(llvm::raw_ostream &os, const AvailableValue &V) { V.print(os); return os; } } // end llvm namespace //===----------------------------------------------------------------------===// // Subelement Extraction //===----------------------------------------------------------------------===// /// Given an aggregate value and an access path, non-destructively extract the /// value indicated by the path. static SILValue nonDestructivelyExtractSubElement(const AvailableValue &Val, SILBuilder &B, SILLocation Loc) { SILType ValTy = Val.getType(); unsigned SubElementNumber = Val.SubElementNumber; // Extract tuple elements. if (auto TT = ValTy.getAs()) { for (unsigned EltNo : indices(TT.getElementTypes())) { // Keep track of what subelement is being referenced. SILType EltTy = ValTy.getTupleElementType(EltNo); unsigned NumSubElt = getNumSubElements(EltTy, B.getModule()); if (SubElementNumber < NumSubElt) { auto BorrowedVal = Val.emitBeginBorrow(B, Loc); auto NewVal = BorrowedVal.emitTupleExtract(B, Loc, EltNo, SubElementNumber); SILValue result = nonDestructivelyExtractSubElement(NewVal, B, Loc); // If our original value wasn't guaranteed and we did actually perform a // borrow as a result, insert the end_borrow. if (BorrowedVal.getValue() != Val.getValue()) B.createEndBorrow(Loc, BorrowedVal.getValue()); return result; } SubElementNumber -= NumSubElt; } llvm_unreachable("Didn't find field"); } // Extract struct elements. if (auto *SD = getFullyReferenceableStruct(ValTy)) { for (auto *D : SD->getStoredProperties()) { auto fieldType = ValTy.getFieldType(D, B.getModule()); unsigned NumSubElt = getNumSubElements(fieldType, B.getModule()); if (SubElementNumber < NumSubElt) { auto BorrowedVal = Val.emitBeginBorrow(B, Loc); auto NewVal = BorrowedVal.emitStructExtract(B, Loc, D, SubElementNumber); SILValue result = nonDestructivelyExtractSubElement(NewVal, B, Loc); // If our original value wasn't guaranteed and we did actually perform a // borrow as a result, insert the end_borrow. if (BorrowedVal.getValue() != Val.getValue()) B.createEndBorrow(Loc, BorrowedVal.getValue()); return result; } SubElementNumber -= NumSubElt; } llvm_unreachable("Didn't find field"); } // Otherwise, we're down to a scalar. If we have ownership enabled, // we return a copy. Otherwise, there we can ignore ownership // issues. This is ok since in [ossa] we are going to eliminate a // load [copy] or a load [trivial], while in non-[ossa] SIL we will // be replacing unqualified loads. assert(SubElementNumber == 0 && "Miscalculation indexing subelements"); if (!B.hasOwnership()) return Val.getValue(); return B.emitCopyValueOperation(Loc, Val.getValue()); } //===----------------------------------------------------------------------===// // Available Value Aggregation //===----------------------------------------------------------------------===// static bool anyMissing(unsigned StartSubElt, unsigned NumSubElts, ArrayRef &Values) { while (NumSubElts) { if (!Values[StartSubElt]) return true; ++StartSubElt; --NumSubElts; } return false; } namespace { /// A class that aggregates available values, loading them if they are not /// available. class AvailableValueAggregator { SILModule &M; SILBuilderWithScope B; SILLocation Loc; MutableArrayRef AvailableValueList; SmallVectorImpl &Uses; DeadEndBlocks &deadEndBlocks; bool isTake; /// Keep track of all instructions that we have added. Once we are done /// promoting a value, we need to make sure that if we need to balance any /// copies (to avoid leaks), we do so. This is not used if we are performing a /// take. SmallVector insertedInsts; public: AvailableValueAggregator(SILInstruction *Inst, MutableArrayRef AvailableValueList, SmallVectorImpl &Uses, DeadEndBlocks &deadEndBlocks, bool isTake) : M(Inst->getModule()), B(Inst), Loc(Inst->getLoc()), AvailableValueList(AvailableValueList), Uses(Uses), deadEndBlocks(deadEndBlocks), isTake(isTake) {} // This is intended to be passed by reference only once constructed. AvailableValueAggregator(const AvailableValueAggregator &) = delete; AvailableValueAggregator(AvailableValueAggregator &&) = delete; AvailableValueAggregator & operator=(const AvailableValueAggregator &) = delete; AvailableValueAggregator &operator=(AvailableValueAggregator &&) = delete; SILValue aggregateValues(SILType LoadTy, SILValue Address, unsigned FirstElt, bool isTopLevel = true); bool canTake(SILType loadTy, unsigned firstElt) const; /// If as a result of us copying values, we may have unconsumed destroys, find /// the appropriate location and place the values there. Only used when /// ownership is enabled. SingleValueInstruction * addMissingDestroysForCopiedValues(SingleValueInstruction *li, SILValue newVal); void print(llvm::raw_ostream &os) const; void dump() const LLVM_ATTRIBUTE_USED; private: SILValue aggregateFullyAvailableValue(SILType loadTy, unsigned firstElt); SILValue aggregateTupleSubElts(TupleType *tt, SILType loadTy, SILValue address, unsigned firstElt); SILValue aggregateStructSubElts(StructDecl *sd, SILType loadTy, SILValue address, unsigned firstElt); SILValue handlePrimitiveValue(SILType loadTy, SILValue address, unsigned firstElt); bool isFullyAvailable(SILType loadTy, unsigned firstElt) const; }; } // end anonymous namespace void AvailableValueAggregator::dump() const { print(llvm::dbgs()); } void AvailableValueAggregator::print(llvm::raw_ostream &os) const { os << "Available Value List, N = " << AvailableValueList.size() << ". Elts:\n"; for (auto &V : AvailableValueList) { os << V; } } bool AvailableValueAggregator::isFullyAvailable(SILType loadTy, unsigned firstElt) const { if (firstElt >= AvailableValueList.size()) { // #Elements may be zero. return false; } auto &firstVal = AvailableValueList[firstElt]; // Make sure that the first element is available and is the correct type. if (!firstVal || firstVal.getType() != loadTy) return false; return llvm::all_of(range(getNumSubElements(loadTy, M)), [&](unsigned index) -> bool { auto &val = AvailableValueList[firstElt + index]; return val.getValue() == firstVal.getValue() && val.getSubElementNumber() == index; }); } // We can only take if we never have to split a larger value to promote this // address. bool AvailableValueAggregator::canTake(SILType loadTy, unsigned firstElt) const { // If we do not have ownership, we can always take since we do not need to // keep any ownership invariants up to date. In the future, we should be able // to chop up larger values before they are being stored. if (!B.hasOwnership()) return true; // If we are trivially fully available, just return true. if (isFullyAvailable(loadTy, firstElt)) return true; // Otherwise see if we are an aggregate with fully available leaf types. if (TupleType *tt = loadTy.getAs()) { return llvm::all_of(indices(tt->getElements()), [&](unsigned eltNo) { SILType eltTy = loadTy.getTupleElementType(eltNo); unsigned numSubElt = getNumSubElements(eltTy, M); bool success = canTake(eltTy, firstElt); firstElt += numSubElt; return success; }); } if (auto *sd = getFullyReferenceableStruct(loadTy)) { return llvm::all_of(sd->getStoredProperties(), [&](VarDecl *decl) -> bool { SILType eltTy = loadTy.getFieldType(decl, M); unsigned numSubElt = getNumSubElements(eltTy, M); bool success = canTake(eltTy, firstElt); firstElt += numSubElt; return success; }); } // Otherwise, fail. The value is not fully available at its leafs. We can not // perform a take. return false; } /// Given a bunch of primitive subelement values, build out the right aggregate /// type (LoadTy) by emitting tuple and struct instructions as necessary. SILValue AvailableValueAggregator::aggregateValues(SILType LoadTy, SILValue Address, unsigned FirstElt, bool isTopLevel) { // If we are performing a take, make sure that we have available values for // /all/ of our values. Otherwise, bail. if (isTopLevel && isTake && !canTake(LoadTy, FirstElt)) { return SILValue(); } // Check to see if the requested value is fully available, as an aggregate. // This is a super-common case for single-element structs, but is also a // general answer for arbitrary structs and tuples as well. if (SILValue Result = aggregateFullyAvailableValue(LoadTy, FirstElt)) return Result; // If we have a tuple type, then aggregate the tuple's elements into a full // tuple value. if (TupleType *TT = LoadTy.getAs()) return aggregateTupleSubElts(TT, LoadTy, Address, FirstElt); // If we have a struct type, then aggregate the struct's elements into a full // struct value. if (auto *SD = getFullyReferenceableStruct(LoadTy)) return aggregateStructSubElts(SD, LoadTy, Address, FirstElt); // Otherwise, we have a non-aggregate primitive. Load or extract the value. return handlePrimitiveValue(LoadTy, Address, FirstElt); } // See if we have this value is fully available. In such a case, return it as an // aggregate. This is a super-common case for single-element structs, but is // also a general answer for arbitrary structs and tuples as well. SILValue AvailableValueAggregator::aggregateFullyAvailableValue(SILType loadTy, unsigned firstElt) { // Check if our underlying type is fully available. If it isn't, bail. if (!isFullyAvailable(loadTy, firstElt)) return SILValue(); // Ok, grab out first value. (note: any actually will do). auto &firstVal = AvailableValueList[firstElt]; // Ok, we know that all of our available values are all parts of the same // value. Without ownership, we can just return the underlying first value. if (!B.hasOwnership()) return firstVal.getValue(); // Otherwise, we need to put in a copy. This is b/c we only propagate along +1 // values and we are eliminating a load [copy]. ArrayRef insertPts = firstVal.getInsertionPoints(); if (insertPts.size() == 1) { // Use the scope and location of the store at the insertion point. SILBuilderWithScope builder(insertPts[0], &insertedInsts); SILLocation loc = insertPts[0]->getLoc(); // If we have a take, just return the value. if (isTake) return firstVal.getValue(); // Otherwise, return a copy of the value. return builder.emitCopyValueOperation(loc, firstVal.getValue()); } // If we have multiple insertion points, put copies at each point and use the // SSA updater to get a value. The reason why this is safe is that we can only // have multiple insertion points if we are storing exactly the same value // implying that we can just copy firstVal at each insertion point. SILSSAUpdater updater; updater.Initialize(loadTy); Optional singularValue; for (auto *insertPt : insertPts) { // Use the scope and location of the store at the insertion point. SILBuilderWithScope builder(insertPt, &insertedInsts); SILLocation loc = insertPt->getLoc(); SILValue eltVal = firstVal.getValue(); // If we are not taking, copy the element value. if (!isTake) { eltVal = builder.emitCopyValueOperation(loc, eltVal); } if (!singularValue.hasValue()) { singularValue = eltVal; } else if (*singularValue != eltVal) { singularValue = SILValue(); } // And then put the value into the SSA updater. updater.AddAvailableValue(insertPt->getParent(), eltVal); } // If we only are tracking a singular value, we do not need to construct // SSA. Just return that value. if (auto val = singularValue.getValueOr(SILValue())) return val; // Finally, grab the value from the SSA updater. SILValue result = updater.GetValueInMiddleOfBlock(B.getInsertionBB()); assert(result.getOwnershipKind().isCompatibleWith(ValueOwnershipKind::Owned)); return result; } SILValue AvailableValueAggregator::aggregateTupleSubElts(TupleType *TT, SILType LoadTy, SILValue Address, unsigned FirstElt) { SmallVector ResultElts; for (unsigned EltNo : indices(TT->getElements())) { SILType EltTy = LoadTy.getTupleElementType(EltNo); unsigned NumSubElt = getNumSubElements(EltTy, M); // If we are missing any of the available values in this struct element, // compute an address to load from. SILValue EltAddr; if (anyMissing(FirstElt, NumSubElt, AvailableValueList)) { assert(!isTake && "When taking, values should never be missing?!"); EltAddr = B.createTupleElementAddr(Loc, Address, EltNo, EltTy.getAddressType()); } ResultElts.push_back( aggregateValues(EltTy, EltAddr, FirstElt, /*isTopLevel*/ false)); FirstElt += NumSubElt; } return B.createTuple(Loc, LoadTy, ResultElts); } SILValue AvailableValueAggregator::aggregateStructSubElts(StructDecl *sd, SILType loadTy, SILValue address, unsigned firstElt) { SmallVector resultElts; for (auto *decl : sd->getStoredProperties()) { SILType eltTy = loadTy.getFieldType(decl, M); unsigned numSubElt = getNumSubElements(eltTy, M); // If we are missing any of the available values in this struct element, // compute an address to load from. SILValue eltAddr; if (anyMissing(firstElt, numSubElt, AvailableValueList)) { assert(!isTake && "When taking, values should never be missing?!"); eltAddr = B.createStructElementAddr(Loc, address, decl, eltTy.getAddressType()); } resultElts.push_back( aggregateValues(eltTy, eltAddr, firstElt, /*isTopLevel*/ false)); firstElt += numSubElt; } return B.createStruct(Loc, loadTy, resultElts); } // We have looked through all of the aggregate values and finally found a // "primitive value". If the value is available, use it (extracting if we need // to), otherwise emit a load of the value with the appropriate qualifier. SILValue AvailableValueAggregator::handlePrimitiveValue(SILType loadTy, SILValue address, unsigned firstElt) { auto &val = AvailableValueList[firstElt]; // If the value is not available, load the value and update our use list. if (!val) { assert(!isTake && "Should only take fully available values?!"); LoadInst *load = ([&]() { if (B.hasOwnership()) { return B.createTrivialLoadOr(Loc, address, LoadOwnershipQualifier::Copy); } return B.createLoad(Loc, address, LoadOwnershipQualifier::Unqualified); }()); Uses.emplace_back(load, PMOUseKind::Load); return load; } // If we have 1 insertion point, just extract the value and return. // // This saves us from having to spend compile time in the SSA updater in this // case. ArrayRef insertPts = val.getInsertionPoints(); if (insertPts.size() == 1) { // Use the scope and location of the store at the insertion point. SILBuilderWithScope builder(insertPts[0], &insertedInsts); SILLocation loc = insertPts[0]->getLoc(); SILValue eltVal = nonDestructivelyExtractSubElement(val, builder, loc); assert( !builder.hasOwnership() || eltVal.getOwnershipKind().isCompatibleWith(ValueOwnershipKind::Owned)); assert(eltVal->getType() == loadTy && "Subelement types mismatch"); return eltVal; } // If we have an available value, then we want to extract the subelement from // the borrowed aggregate before each insertion point. SILSSAUpdater updater; updater.Initialize(loadTy); Optional singularValue; for (auto *i : insertPts) { // Use the scope and location of the store at the insertion point. SILBuilderWithScope builder(i, &insertedInsts); SILLocation loc = i->getLoc(); SILValue eltVal = nonDestructivelyExtractSubElement(val, builder, loc); assert( !builder.hasOwnership() || eltVal.getOwnershipKind().isCompatibleWith(ValueOwnershipKind::Owned)); if (!singularValue.hasValue()) { singularValue = eltVal; } else if (*singularValue != eltVal) { singularValue = SILValue(); } updater.AddAvailableValue(i->getParent(), eltVal); } // If we only are tracking a singular value, we do not need to construct // SSA. Just return that value. if (auto val = singularValue.getValueOr(SILValue())) return val; // Finally, grab the value from the SSA updater. SILValue eltVal = updater.GetValueInMiddleOfBlock(B.getInsertionBB()); assert(!B.hasOwnership() || eltVal.getOwnershipKind().isCompatibleWith(ValueOwnershipKind::Owned)); assert(eltVal->getType() == loadTy && "Subelement types mismatch"); return eltVal; } SingleValueInstruction * AvailableValueAggregator::addMissingDestroysForCopiedValues( SingleValueInstruction *svi, SILValue newVal) { // If ownership is not enabled... bail. We do not need to do this since we do // not need to insert an extra copy unless we have ownership since without // ownership stores do not consume. if (!B.hasOwnership()) return svi; SmallPtrSet visitedBlocks; SmallVector leakingBlocks; bool foundLoop = false; auto loc = RegularLocation::getAutoGeneratedLocation(); while (!insertedInsts.empty()) { auto *cvi = dyn_cast(insertedInsts.pop_back_val()); if (!cvi) continue; // Clear our state. visitedBlocks.clear(); leakingBlocks.clear(); // The linear lifetime checker doesn't care if the passed in load is // actually a user of our copy_value. What we care about is that the load is // guaranteed to be in the block where we have reformed the tuple in a // consuming manner. This means if we add it as the consuming use of the // copy, we can find the leaking places if any exist. // // Then perform the linear lifetime check. If we succeed, continue. We have // no further work to do. auto errorKind = ownership::ErrorBehaviorKind::ReturnFalse; auto error = valueHasLinearLifetime(cvi, {svi}, {}, visitedBlocks, deadEndBlocks, errorKind, &leakingBlocks); if (!error.getFoundError()) continue; // Ok, we found some leaking blocks. Since we are using the linear lifetime // checker with memory, we do not have any guarantees that the store is out // side of a loop and a load is in a loop. In such a case, we want to // replace the load with a copy_value. foundLoop |= error.getFoundOverConsume(); // Ok, we found some leaking blocks. Insert destroys at the // beginning of these blocks for our copy_value. for (auto *bb : leakingBlocks) { SILBuilderWithScope b(bb->begin()); b.emitDestroyValueOperation(loc, cvi); } } // If we didn't find a loop, we are done, just return svi to get RAUWed. if (!foundLoop) { // If we had a load_borrow, we have created an extra copy that we are going // to borrow at the load point. This means we need to handle the destroying // of the value along paths reachable from the load_borrow. Luckily that // will exactly be after the end_borrows of the load_borrow. if (isa(svi)) { for (auto *use : svi->getUses()) { if (auto *ebi = dyn_cast(use->getUser())) { auto next = std::next(ebi->getIterator()); SILBuilderWithScope(next).emitDestroyValueOperation(ebi->getLoc(), newVal); } } } return svi; } // If we found a loop, then we know that our leaking blocks are the exiting // blocks of the loop and the value has been lifetime extended over the loop. if (isa(svi)) { // If we have a load, we need to put in a copy so that the destroys within // the loop are properly balanced. newVal = SILBuilderWithScope(svi).emitCopyValueOperation(loc, newVal); } else { // If we have a load_borrow, we create a begin_borrow for the end_borrows in // the loop. assert(isa(svi)); newVal = SILBuilderWithScope(svi).createBeginBorrow(svi->getLoc(), newVal); } svi->replaceAllUsesWith(newVal); SILValue addr = svi->getOperand(0); svi->eraseFromParent(); if (auto *addrI = addr->getDefiningInstruction()) recursivelyDeleteTriviallyDeadInstructions(addrI); return nullptr; } //===----------------------------------------------------------------------===// // Available Value Dataflow //===----------------------------------------------------------------------===// namespace { /// Given a piece of memory, the memory's uses, and destroys perform a single /// round of optimistic dataflow switching to intersection when a back edge is /// encountered. class AvailableValueDataflowContext { /// The base memory we are performing dataflow upon. AllocationInst *TheMemory; /// The number of sub elements of our memory. unsigned NumMemorySubElements; /// The set of uses that we are tracking. This is only here so we can update /// when exploding copy_addr. It would be great if we did not have to store /// this. SmallVectorImpl &Uses; /// The set of blocks with local definitions. /// /// We use this to determine if we should visit a block or look at a block's /// predecessors during dataflow. llvm::SmallPtrSet HasLocalDefinition; /// This is a map of uses that are not loads (i.e., they are Stores, /// InOutUses, and Escapes), to their entry in Uses. llvm::SmallDenseMap NonLoadUses; /// Does this value escape anywhere in the function. We use this very /// conservatively. bool HasAnyEscape = false; public: AvailableValueDataflowContext(AllocationInst *TheMemory, unsigned NumMemorySubElements, SmallVectorImpl &Uses); /// Try to compute available values for "TheMemory" at the instruction \p /// StartingFrom. We only compute the values for set bits in \p /// RequiredElts. We return the vailable values in \p Result. If any available /// values were found, return true. Otherwise, return false. bool computeAvailableValues(SILInstruction *StartingFrom, unsigned FirstEltOffset, unsigned NumLoadSubElements, SmallBitVector &RequiredElts, SmallVectorImpl &Result); /// Return true if the box has escaped at the specified instruction. We are /// not /// allowed to do load promotion in an escape region. bool hasEscapedAt(SILInstruction *I); /// Explode a copy_addr, updating the Uses at the same time. void explodeCopyAddr(CopyAddrInst *CAI); private: SILModule &getModule() const { return TheMemory->getModule(); } void updateAvailableValues(SILInstruction *Inst, SmallBitVector &RequiredElts, SmallVectorImpl &Result, SmallBitVector &ConflictingValues); void computeAvailableValuesFrom( SILBasicBlock::iterator StartingFrom, SILBasicBlock *BB, SmallBitVector &RequiredElts, SmallVectorImpl &Result, llvm::SmallDenseMap &VisitedBlocks, SmallBitVector &ConflictingValues); }; } // end anonymous namespace AvailableValueDataflowContext::AvailableValueDataflowContext( AllocationInst *InputTheMemory, unsigned NumMemorySubElements, SmallVectorImpl &InputUses) : TheMemory(InputTheMemory), NumMemorySubElements(NumMemorySubElements), Uses(InputUses) { // The first step of processing an element is to collect information about the // element into data structures we use later. for (unsigned ui : indices(Uses)) { auto &Use = Uses[ui]; assert(Use.Inst && "No instruction identified?"); // Keep track of all the uses that aren't loads. if (Use.Kind == PMOUseKind::Load) continue; NonLoadUses[Use.Inst] = ui; HasLocalDefinition.insert(Use.Inst->getParent()); if (Use.Kind == PMOUseKind::Escape) { // Determine which blocks the value can escape from. We aren't allowed to // promote loads in blocks reachable from an escape point. HasAnyEscape = true; } } // If isn't really a use, but we account for the alloc_box/mark_uninitialized // as a use so we see it in our dataflow walks. NonLoadUses[TheMemory] = ~0U; HasLocalDefinition.insert(TheMemory->getParent()); } void AvailableValueDataflowContext::updateAvailableValues( SILInstruction *Inst, SmallBitVector &RequiredElts, SmallVectorImpl &Result, SmallBitVector &ConflictingValues) { // Handle store. if (auto *SI = dyn_cast(Inst)) { unsigned StartSubElt = computeSubelement(SI->getDest(), TheMemory); assert(StartSubElt != ~0U && "Store within enum projection not handled"); SILType ValTy = SI->getSrc()->getType(); for (unsigned i : range(getNumSubElements(ValTy, getModule()))) { // If this element is not required, don't fill it in. if (!RequiredElts[StartSubElt+i]) continue; // This element is now provided. RequiredElts[StartSubElt + i] = false; // If there is no result computed for this subelement, record it. If // there already is a result, check it for conflict. If there is no // conflict, then we're ok. auto &Entry = Result[StartSubElt+i]; if (!Entry) { Entry = {SI->getSrc(), i, SI}; continue; } // TODO: This is /really/, /really/, conservative. This basically means // that if we do not have an identical store, we will not promote. if (Entry.getValue() != SI->getSrc() || Entry.getSubElementNumber() != i) { ConflictingValues[StartSubElt + i] = true; continue; } Entry.addInsertionPoint(SI); } return; } // If we get here with a copy_addr, it must be storing into the element. Check // to see if any loaded subelements are being used, and if so, explode the // copy_addr to its individual pieces. if (auto *CAI = dyn_cast(Inst)) { unsigned StartSubElt = computeSubelement(CAI->getDest(), TheMemory); assert(StartSubElt != ~0U && "Store within enum projection not handled"); SILType ValTy = CAI->getDest()->getType(); bool AnyRequired = false; for (unsigned i : range(getNumSubElements(ValTy, getModule()))) { // If this element is not required, don't fill it in. AnyRequired = RequiredElts[StartSubElt+i]; if (AnyRequired) break; } // If this is a copy addr that doesn't intersect the loaded subelements, // just continue with an unmodified load mask. if (!AnyRequired) return; // If the copyaddr is of a non-loadable type, we can't promote it. Just // consider it to be a clobber. if (CAI->getSrc()->getType().isLoadable(*CAI->getFunction())) { // Otherwise, some part of the copy_addr's value is demanded by a load, so // we need to explode it to its component pieces. This only expands one // level of the copyaddr. explodeCopyAddr(CAI); // The copy_addr doesn't provide any values, but we've arranged for our // iterators to visit the newly generated instructions, which do. return; } } // TODO: inout apply's should only clobber pieces passed in. // Otherwise, this is some unknown instruction, conservatively assume that all // values are clobbered. RequiredElts.clear(); ConflictingValues = SmallBitVector(Result.size(), true); return; } bool AvailableValueDataflowContext::computeAvailableValues( SILInstruction *StartingFrom, unsigned FirstEltOffset, unsigned NumLoadSubElements, SmallBitVector &RequiredElts, SmallVectorImpl &Result) { llvm::SmallDenseMap VisitedBlocks; SmallBitVector ConflictingValues(Result.size()); computeAvailableValuesFrom(StartingFrom->getIterator(), StartingFrom->getParent(), RequiredElts, Result, VisitedBlocks, ConflictingValues); // If there are no values available at this load point, then we fail to // promote this load and there is nothing to do. SmallBitVector AvailableValueIsPresent(NumMemorySubElements); for (unsigned i : range(FirstEltOffset, FirstEltOffset + NumLoadSubElements)) { AvailableValueIsPresent[i] = Result[i].getValue(); } // If we do not have any values available, bail. if (AvailableValueIsPresent.none()) return false; // Otherwise, if we have any conflicting values, explicitly mask them out of // the result, so we don't pick one arbitrary available value. if (ConflictingValues.none()) { return true; } // At this point, we know that we have /some/ conflicting values and some // available values. if (AvailableValueIsPresent.reset(ConflictingValues).none()) return false; // Otherwise, mask out the available values and return true. We have at least // 1 available value. int NextIter = ConflictingValues.find_first(); while (NextIter != -1) { assert(NextIter >= 0 && "Int can not be represented?!"); unsigned Iter = NextIter; Result[Iter] = {}; NextIter = ConflictingValues.find_next(Iter); } return true; } void AvailableValueDataflowContext::computeAvailableValuesFrom( SILBasicBlock::iterator StartingFrom, SILBasicBlock *BB, SmallBitVector &RequiredElts, SmallVectorImpl &Result, llvm::SmallDenseMap &VisitedBlocks, SmallBitVector &ConflictingValues) { assert(!RequiredElts.none() && "Scanning with a goal of finding nothing?"); // If there is a potential modification in the current block, scan the block // to see if the store or escape is before or after the load. If it is // before, check to see if it produces the value we are looking for. if (HasLocalDefinition.count(BB)) { for (SILBasicBlock::iterator BBI = StartingFrom; BBI != BB->begin();) { SILInstruction *TheInst = &*std::prev(BBI); // If this instruction is unrelated to the element, ignore it. if (!NonLoadUses.count(TheInst)) { --BBI; continue; } // Given an interesting instruction, incorporate it into the set of // results, and filter down the list of demanded subelements that we still // need. updateAvailableValues(TheInst, RequiredElts, Result, ConflictingValues); // If this satisfied all of the demanded values, we're done. if (RequiredElts.none()) return; // Otherwise, keep scanning the block. If the instruction we were looking // at just got exploded, don't skip the next instruction. if (&*std::prev(BBI) == TheInst) --BBI; } } // Otherwise, we need to scan up the CFG looking for available values. for (auto PI = BB->pred_begin(), E = BB->pred_end(); PI != E; ++PI) { SILBasicBlock *PredBB = *PI; // If the predecessor block has already been visited (potentially due to a // cycle in the CFG), don't revisit it. We can do this safely because we // are optimistically assuming that all incoming elements in a cycle will be // the same. If we ever detect a conflicting element, we record it and do // not look at the result. auto Entry = VisitedBlocks.insert({PredBB, RequiredElts}); if (!Entry.second) { // If we are revisiting a block and asking for different required elements // then anything that isn't agreeing is in conflict. const auto &PrevRequired = Entry.first->second; if (PrevRequired != RequiredElts) { ConflictingValues |= (PrevRequired ^ RequiredElts); RequiredElts &= ~ConflictingValues; if (RequiredElts.none()) return; } continue; } // Make sure to pass in the same set of required elements for each pred. SmallBitVector Elts = RequiredElts; computeAvailableValuesFrom(PredBB->end(), PredBB, Elts, Result, VisitedBlocks, ConflictingValues); // If we have any conflicting values, don't bother searching for them. RequiredElts &= ~ConflictingValues; if (RequiredElts.none()) return; } } /// Explode a copy_addr instruction of a loadable type into lower level /// operations like loads, stores, retains, releases, retain_value, etc. void AvailableValueDataflowContext::explodeCopyAddr(CopyAddrInst *CAI) { LLVM_DEBUG(llvm::dbgs() << " -- Exploding copy_addr: " << *CAI << "\n"); SILType ValTy = CAI->getDest()->getType().getObjectType(); SILFunction *F = CAI->getFunction(); auto &TL = F->getTypeLowering(ValTy); // Keep track of the new instructions emitted. SmallVector NewInsts; SILBuilder B(CAI, &NewInsts); B.setCurrentDebugScope(CAI->getDebugScope()); // Use type lowering to lower the copyaddr into a load sequence + store // sequence appropriate for the type. SILValue StoredValue = TL.emitLoadOfCopy(B, CAI->getLoc(), CAI->getSrc(), CAI->isTakeOfSrc()); TL.emitStoreOfCopy(B, CAI->getLoc(), StoredValue, CAI->getDest(), CAI->isInitializationOfDest()); // Update our internal state for this being gone. NonLoadUses.erase(CAI); // Remove the copy_addr from Uses. A single copy_addr can appear multiple // times if the source and dest are to elements within a single aggregate, but // we only want to pick up the CopyAddrKind from the store. PMOMemoryUse LoadUse, StoreUse; for (auto &Use : Uses) { if (Use.Inst != CAI) continue; if (Use.Kind == PMOUseKind::Load) { assert(LoadUse.isInvalid()); LoadUse = Use; } else { assert(StoreUse.isInvalid()); StoreUse = Use; } Use.Inst = nullptr; // Keep scanning in case the copy_addr appears multiple times. } assert((LoadUse.isValid() || StoreUse.isValid()) && "we should have a load or a store, possibly both"); assert(StoreUse.isInvalid() || StoreUse.Kind == Assign || StoreUse.Kind == Initialization || StoreUse.Kind == InitOrAssign); // Now that we've emitted a bunch of instructions, including a load and store // but also including other stuff, update the internal state of // LifetimeChecker to reflect them. // Update the instructions that touch the memory. NewInst can grow as this // iterates, so we can't use a foreach loop. for (auto *NewInst : NewInsts) { switch (NewInst->getKind()) { default: NewInst->dump(); llvm_unreachable("Unknown instruction generated by copy_addr lowering"); case SILInstructionKind::StoreInst: // If it is a store to the memory object (as oppose to a store to // something else), track it as an access. if (StoreUse.isValid()) { StoreUse.Inst = NewInst; // If our store use by the copy_addr is an assign, then we know that // before we store the new value, we loaded the old value implying that // our store is technically initializing memory when it occurs. So // change the kind to Initialization. if (StoreUse.Kind == Assign) StoreUse.Kind = Initialization; NonLoadUses[NewInst] = Uses.size(); Uses.push_back(StoreUse); } continue; case SILInstructionKind::LoadInst: // If it is a load from the memory object (as oppose to a load from // something else), track it as an access. We need to explicitly check to // see if the load accesses "TheMemory" because it could either be a load // for the copy_addr source, or it could be a load corresponding to the // "assign" operation on the destination of the copyaddr. if (LoadUse.isValid() && getAccessPathRoot(NewInst->getOperand(0)) == TheMemory) { LoadUse.Inst = NewInst; Uses.push_back(LoadUse); } continue; case SILInstructionKind::RetainValueInst: case SILInstructionKind::StrongRetainInst: case SILInstructionKind::StrongReleaseInst: case SILInstructionKind::ReleaseValueInst: // Destroy overwritten value // These are ignored. continue; } } // Next, remove the copy_addr itself. CAI->eraseFromParent(); } bool AvailableValueDataflowContext::hasEscapedAt(SILInstruction *I) { // Return true if the box has escaped at the specified instruction. We are // not allowed to do load promotion in an escape region. // FIXME: This is not an aggressive implementation. :) // TODO: At some point, we should special case closures that just *read* from // the escaped value (by looking at the body of the closure). They should not // prevent load promotion, and will allow promoting values like X in regions // dominated by "... && X != 0". return HasAnyEscape; } //===----------------------------------------------------------------------===// // Allocation Optimization //===----------------------------------------------------------------------===// static SILType getMemoryType(AllocationInst *memory) { // Compute the type of the memory object. if (auto *abi = dyn_cast(memory)) { assert(abi->getBoxType()->getLayout()->getFields().size() == 1 && "optimizing multi-field boxes not implemented"); return abi->getBoxType()->getFieldType(abi->getModule(), 0); } assert(isa(memory)); return cast(memory)->getElementType(); } namespace { /// This performs load promotion and deletes synthesized allocations if all /// loads can be removed. class AllocOptimize { SILModule &Module; /// This is either an alloc_box or alloc_stack instruction. AllocationInst *TheMemory; /// This is the SILType of the memory object. SILType MemoryType; /// The number of primitive subelements across all elements of this memory /// value. unsigned NumMemorySubElements; SmallVectorImpl &Uses; SmallVectorImpl &Releases; DeadEndBlocks &deadEndBlocks; /// A structure that we use to compute our available values. AvailableValueDataflowContext DataflowContext; public: AllocOptimize(AllocationInst *memory, SmallVectorImpl &uses, SmallVectorImpl &releases, DeadEndBlocks &deadEndBlocks) : Module(memory->getModule()), TheMemory(memory), MemoryType(getMemoryType(memory)), NumMemorySubElements(getNumSubElements(MemoryType, Module)), Uses(uses), Releases(releases), deadEndBlocks(deadEndBlocks), DataflowContext(TheMemory, NumMemorySubElements, uses) {} bool optimizeMemoryAccesses(); bool tryToRemoveDeadAllocation(); private: bool promoteLoad(SILInstruction *Inst); void promoteDestroyAddr(DestroyAddrInst *dai, MutableArrayRef values); bool canPromoteDestroyAddr(DestroyAddrInst *dai, SmallVectorImpl &availableValues); }; } // end anonymous namespace /// If we are able to optimize \p Inst, return the source address that /// instruction is loading from. If we can not optimize \p Inst, then just /// return an empty SILValue. static SILValue tryFindSrcAddrForLoad(SILInstruction *i) { // We can always promote a load_borrow. if (auto *lbi = dyn_cast(i)) return lbi->getOperand(); // We only handle load [copy], load [trivial], load and copy_addr right // now. Notably we do not support load [take] when promoting loads. if (auto *li = dyn_cast(i)) if (li->getOwnershipQualifier() != LoadOwnershipQualifier::Take) return li->getOperand(); // If this is a CopyAddr, verify that the element type is loadable. If not, // we can't explode to a load. auto *cai = dyn_cast(i); if (!cai || !cai->getSrc()->getType().isLoadable(*cai->getFunction())) return SILValue(); return cai->getSrc(); } /// At this point, we know that this element satisfies the definitive init /// requirements, so we can try to promote loads to enable SSA-based dataflow /// analysis. We know that accesses to this element only access this element, /// cross element accesses have been scalarized. /// /// This returns true if the load has been removed from the program. bool AllocOptimize::promoteLoad(SILInstruction *Inst) { // Note that we intentionally don't support forwarding of weak pointers, // because the underlying value may drop be deallocated at any time. We would // have to prove that something in this function is holding the weak value // live across the promoted region and that isn't desired for a stable // diagnostics pass this like one. // First attempt to find a source addr for our "load" instruction. If we fail // to find a valid value, just return. SILValue SrcAddr = tryFindSrcAddrForLoad(Inst); if (!SrcAddr) return false; // If the box has escaped at this instruction, we can't safely promote the // load. if (DataflowContext.hasEscapedAt(Inst)) return false; SILType LoadTy = SrcAddr->getType().getObjectType(); // If this is a load/copy_addr from a struct field that we want to promote, // compute the access path down to the field so we can determine precise // def/use behavior. unsigned FirstElt = computeSubelement(SrcAddr, TheMemory); // If this is a load from within an enum projection, we can't promote it since // we don't track subelements in a type that could be changing. if (FirstElt == ~0U) return false; unsigned NumLoadSubElements = getNumSubElements(LoadTy, Module); // Set up the bitvector of elements being demanded by the load. SmallBitVector RequiredElts(NumMemorySubElements); RequiredElts.set(FirstElt, FirstElt+NumLoadSubElements); SmallVector AvailableValues; AvailableValues.resize(NumMemorySubElements); // Find out if we have any available values. If no bits are demanded, we // trivially succeed. This can happen when there is a load of an empty struct. if (NumLoadSubElements != 0 && !DataflowContext.computeAvailableValues( Inst, FirstElt, NumLoadSubElements, RequiredElts, AvailableValues)) return false; // Ok, we have some available values. If we have a copy_addr, explode it now, // exposing the load operation within it. Subsequent optimization passes will // see the load and propagate the available values into it. if (auto *CAI = dyn_cast(Inst)) { DataflowContext.explodeCopyAddr(CAI); // This is removing the copy_addr, but explodeCopyAddr takes care of // removing the instruction from Uses for us, so we return false. return false; } assert((isa(Inst) || isa(Inst)) && "Unhandled instruction for this code path!"); // Aggregate together all of the subelements into something that has the same // type as the load did, and emit smaller loads for any subelements that were // not available. We are "propagating" a +1 available value from the store // points. auto *load = dyn_cast(Inst); AvailableValueAggregator agg(load, AvailableValues, Uses, deadEndBlocks, false /*isTake*/); SILValue newVal = agg.aggregateValues(LoadTy, load->getOperand(0), FirstElt); LLVM_DEBUG(llvm::dbgs() << " *** Promoting load: " << *load << "\n"); LLVM_DEBUG(llvm::dbgs() << " To value: " << *newVal << "\n"); // If we inserted any copies, we created the copies at our stores. We know // that in our load block, we will reform the aggregate as appropriate at the // load implying that the value /must/ be fully consumed. If we promoted a +0 // value, we created dominating destroys along those paths. Thus any leaking // blocks that we may have can be found by performing a linear lifetime check // over all copies that we found using the load as the "consuming uses" (just // for the purposes of identifying the consuming block). auto *oldLoad = agg.addMissingDestroysForCopiedValues(load, newVal); ++NumLoadPromoted; // If we are returned the load, eliminate it. Otherwise, it was already // handled for us... so return true. if (!oldLoad) return true; // If our load was a +0 value, borrow the value and the RAUW. We reuse the // end_borrows of our load_borrow. if (isa(oldLoad)) { newVal = SILBuilderWithScope(oldLoad).createBeginBorrow(oldLoad->getLoc(), newVal); } oldLoad->replaceAllUsesWith(newVal); SILValue addr = oldLoad->getOperand(0); oldLoad->eraseFromParent(); if (auto *addrI = addr->getDefiningInstruction()) recursivelyDeleteTriviallyDeadInstructions(addrI); return true; } /// Return true if we can promote the given destroy. bool AllocOptimize::canPromoteDestroyAddr( DestroyAddrInst *dai, SmallVectorImpl &availableValues) { SILValue address = dai->getOperand(); // We cannot promote destroys of address-only types, because we can't expose // the load. SILType loadTy = address->getType().getObjectType(); if (loadTy.isAddressOnly(*dai->getFunction())) return false; // If the box has escaped at this instruction, we can't safely promote the // load. if (DataflowContext.hasEscapedAt(dai)) return false; // Compute the access path down to the field so we can determine precise // def/use behavior. unsigned firstElt = computeSubelement(address, TheMemory); assert(firstElt != ~0U && "destroy within enum projection is not valid"); unsigned numLoadSubElements = getNumSubElements(loadTy, Module); // Find out if we have any available values. If no bits are demanded, we // trivially succeed. This can happen when there is a load of an empty struct. if (numLoadSubElements == 0) return true; // Set up the bitvector of elements being demanded by the load. SmallBitVector requiredElts(NumMemorySubElements); requiredElts.set(firstElt, firstElt + numLoadSubElements); // Compute our available values. If we do not have any available values, // return false. We have nothing further to do. SmallVector tmpList; tmpList.resize(NumMemorySubElements); if (!DataflowContext.computeAvailableValues(dai, firstElt, numLoadSubElements, requiredElts, tmpList)) return false; // Now check that we can perform a take upon our available values. This // implies today that our value is fully available. If the value is not fully // available, we would need to split stores to promote this destroy_addr. We // do not support that yet. AvailableValueAggregator agg(dai, tmpList, Uses, deadEndBlocks, true /*isTake*/); if (!agg.canTake(loadTy, firstElt)) return false; // Ok, we can promote this destroy_addr... move the temporary lists contents // into the final AvailableValues list. std::move(tmpList.begin(), tmpList.end(), std::back_inserter(availableValues)); return true; } // DestroyAddr is a composed operation merging load [take] + destroy_value. If // the implicit load's value is available, explode it. // // NOTE: We only do this if we have a fully available value. // // Note that we handle the general case of a destroy_addr of a piece of the // memory object, not just destroy_addrs of the entire thing. void AllocOptimize::promoteDestroyAddr( DestroyAddrInst *dai, MutableArrayRef availableValues) { SILValue address = dai->getOperand(); SILType loadTy = address->getType().getObjectType(); // Compute the access path down to the field so we can determine precise // def/use behavior. unsigned firstElt = computeSubelement(address, TheMemory); // Aggregate together all of the subelements into something that has the same // type as the load did, and emit smaller) loads for any subelements that were // not available. AvailableValueAggregator agg(dai, availableValues, Uses, deadEndBlocks, true /*isTake*/); SILValue newVal = agg.aggregateValues(loadTy, address, firstElt); ++NumDestroyAddrPromoted; LLVM_DEBUG(llvm::dbgs() << " *** Promoting destroy_addr: " << *dai << "\n"); LLVM_DEBUG(llvm::dbgs() << " To value: " << *newVal << "\n"); SILBuilderWithScope(dai).emitDestroyValueOperation(dai->getLoc(), newVal); dai->eraseFromParent(); } namespace { struct DestroyAddrPromotionState { ArrayRef destroys; SmallVector destroyAddrIndices; SmallVector availableValueList; SmallVector availableValueStartOffsets; DestroyAddrPromotionState(ArrayRef destroys) : destroys(destroys) {} unsigned size() const { return destroyAddrIndices.size(); } void initializeForDestroyAddr(unsigned destroyAddrIndex) { availableValueStartOffsets.push_back(availableValueList.size()); destroyAddrIndices.push_back(destroyAddrIndex); } std::pair> getData(unsigned index) { unsigned destroyAddrIndex = destroyAddrIndices[index]; unsigned startOffset = availableValueStartOffsets[index]; unsigned count; if ((availableValueStartOffsets.size() - 1) != index) { count = availableValueStartOffsets[index + 1] - startOffset; } else { count = availableValueList.size() - startOffset; } MutableArrayRef values(&availableValueList[startOffset], count); auto *dai = cast(destroys[destroyAddrIndex]); return {dai, values}; } }; } // end anonymous namespace /// If the allocation is an autogenerated allocation that is only stored to /// (after load promotion) then remove it completely. bool AllocOptimize::tryToRemoveDeadAllocation() { assert((isa(TheMemory) || isa(TheMemory)) && "Unhandled allocation case"); auto *f = TheMemory->getFunction(); // We don't want to remove allocations that are required for useful debug // information at -O0. As such, we only remove allocations if: // // 1. They are in a transparent function. // 2. They are in a normal function, but didn't come from a VarDecl, or came // from one that was autogenerated or inlined from a transparent function. SILLocation loc = TheMemory->getLoc(); if (!f->isTransparent() && loc.getAsASTNode() && !loc.isAutoGenerated() && !loc.is()) return false; // Check the uses list to see if there are any non-store uses left over after // load promotion and other things PMO does. for (auto &u : Uses) { // Ignore removed instructions. if (u.Inst == nullptr) continue; switch (u.Kind) { case PMOUseKind::Assign: // Until we can promote the value being destroyed by the assign, we can // not remove deallocations with such assigns. return false; case PMOUseKind::InitOrAssign: break; // These don't prevent removal. case PMOUseKind::Initialization: if (!isa(u.Inst) && // A copy_addr that is not a take affects the retain count // of the source. (!isa(u.Inst) || cast(u.Inst)->isTakeOfSrc())) break; // FALL THROUGH. LLVM_FALLTHROUGH; case PMOUseKind::Load: case PMOUseKind::IndirectIn: case PMOUseKind::InOutUse: case PMOUseKind::Escape: LLVM_DEBUG(llvm::dbgs() << "*** Failed to remove autogenerated alloc: " "kept alive by: " << *u.Inst); return false; // These do prevent removal. } } // If our memory is trivially typed, we can just remove it without needing to // consider if the stored value needs to be destroyed. So at this point, // delete the memory! if (MemoryType.isTrivial(*f)) { LLVM_DEBUG(llvm::dbgs() << "*** Removing autogenerated trivial allocation: " << *TheMemory); // If it is safe to remove, do it. Recursively remove all instructions // hanging off the allocation instruction, then return success. Let the // caller remove the allocation itself to avoid iterator invalidation. eraseUsesOfInstruction(TheMemory); return true; } // Otherwise removing the deallocation will drop any releases. Check that // there is nothing preventing removal. DestroyAddrPromotionState state(Releases); for (auto p : llvm::enumerate(Releases)) { auto *r = p.value(); if (r == nullptr) continue; // We stash all of the destroy_addr that we see. if (auto *dai = dyn_cast(r)) { state.initializeForDestroyAddr(p.index() /*destroyAddrIndex*/); // Make sure we can actually promote this destroy addr. If we can not, // then we must bail. In order to not gather available values twice, we // gather the available values here that we will use to promote the // values. if (!canPromoteDestroyAddr(dai, state.availableValueList)) return false; continue; } LLVM_DEBUG(llvm::dbgs() << "*** Failed to remove autogenerated non-trivial alloc: " "kept alive by release: " << *r); return false; } // If we reached this point, we can promote all of our destroy_addr. for (unsigned i : range(state.size())) { DestroyAddrInst *dai; MutableArrayRef values; std::tie(dai, values) = state.getData(i); promoteDestroyAddr(dai, values); // We do not need to unset releases, since we are going to exit here. } LLVM_DEBUG(llvm::dbgs() << "*** Removing autogenerated non-trivial alloc: " << *TheMemory); // If it is safe to remove, do it. Recursively remove all instructions // hanging off the allocation instruction, then return success. Let the // caller remove the allocation itself to avoid iterator invalidation. eraseUsesOfInstruction(TheMemory); return true; } bool AllocOptimize::optimizeMemoryAccesses() { bool changed = false; // If we've successfully checked all of the definitive initialization // requirements, try to promote loads. This can explode copy_addrs, so the // use list may change size. for (unsigned i = 0; i != Uses.size(); ++i) { auto &use = Uses[i]; // Ignore entries for instructions that got expanded along the way. if (use.Inst && use.Kind == PMOUseKind::Load) { if (promoteLoad(use.Inst)) { Uses[i].Inst = nullptr; // remove entry if load got deleted. changed = true; } } } return changed; } //===----------------------------------------------------------------------===// // Top Level Entrypoints //===----------------------------------------------------------------------===// static AllocationInst *getOptimizableAllocation(SILInstruction *i) { if (!isa(i) && !isa(i)) { return nullptr; } auto *alloc = cast(i); // If our aggregate has unreferencable storage, we can't optimize. Return // nullptr. if (getMemoryType(alloc).aggregateHasUnreferenceableStorage()) return nullptr; // Otherwise we are good to go. Lets try to optimize this memory! return alloc; } static bool optimizeMemoryAccesses(SILFunction &fn) { bool changed = false; DeadEndBlocks deadEndBlocks(&fn); for (auto &bb : fn) { auto i = bb.begin(), e = bb.end(); while (i != e) { // First see if i is an allocation that we can optimize. If not, skip it. AllocationInst *alloc = getOptimizableAllocation(&*i); if (!alloc) { ++i; continue; } LLVM_DEBUG(llvm::dbgs() << "*** PMO Optimize Memory Accesses looking at: " << *alloc << "\n"); PMOMemoryObjectInfo memInfo(alloc); // Set up the datastructure used to collect the uses of the allocation. SmallVector uses; SmallVector destroys; // Walk the use list of the pointer, collecting them. If we are not able // to optimize, skip this value. *NOTE* We may still scalarize values // inside the value. if (!collectPMOElementUsesFrom(memInfo, uses, destroys)) { ++i; continue; } AllocOptimize allocOptimize(alloc, uses, destroys, deadEndBlocks); changed |= allocOptimize.optimizeMemoryAccesses(); // Move onto the next instruction. We know this is safe since we do not // eliminate allocations here. ++i; } } return changed; } static bool eliminateDeadAllocations(SILFunction &fn) { bool changed = false; DeadEndBlocks deadEndBlocks(&fn); for (auto &bb : fn) { auto i = bb.begin(), e = bb.end(); while (i != e) { // First see if i is an allocation that we can optimize. If not, skip it. AllocationInst *alloc = getOptimizableAllocation(&*i); if (!alloc) { ++i; continue; } LLVM_DEBUG(llvm::dbgs() << "*** PMO Dead Allocation Elimination looking at: " << *alloc << "\n"); PMOMemoryObjectInfo memInfo(alloc); // Set up the datastructure used to collect the uses of the allocation. SmallVector uses; SmallVector destroys; // Walk the use list of the pointer, collecting them. If we are not able // to optimize, skip this value. *NOTE* We may still scalarize values // inside the value. if (!collectPMOElementUsesFrom(memInfo, uses, destroys)) { ++i; continue; } AllocOptimize allocOptimize(alloc, uses, destroys, deadEndBlocks); changed |= allocOptimize.tryToRemoveDeadAllocation(); // Move onto the next instruction. We know this is safe since we do not // eliminate allocations here. ++i; if (alloc->use_empty()) { alloc->eraseFromParent(); ++NumAllocRemoved; changed = true; } } } return changed; } namespace { class PredictableMemoryAccessOptimizations : public SILFunctionTransform { /// The entry point to the transformation. /// /// FIXME: This pass should not need to rerun on deserialized /// functions. Nothing should have changed in the upstream pipeline after /// deserialization. However, rerunning does improve some benchmarks. This /// either indicates that this pass missing some opportunities the first time, /// or has a pass order dependency on other early passes. void run() override { // TODO: Can we invalidate here just instructions? if (optimizeMemoryAccesses(*getFunction())) invalidateAnalysis(SILAnalysis::InvalidationKind::FunctionBody); } }; class PredictableDeadAllocationElimination : public SILFunctionTransform { void run() override { if (eliminateDeadAllocations(*getFunction())) invalidateAnalysis(SILAnalysis::InvalidationKind::FunctionBody); } }; } // end anonymous namespace SILTransform *swift::createPredictableMemoryAccessOptimizations() { return new PredictableMemoryAccessOptimizations(); } SILTransform *swift::createPredictableDeadAllocationElimination() { return new PredictableDeadAllocationElimination(); }