//===--- PredictableMemOpt.cpp - Perform predictable memory optzns --------===// // // This source file is part of the Swift.org open source project // // Copyright (c) 2014 - 2017 Apple Inc. and the Swift project authors // Licensed under Apache License v2.0 with Runtime Library Exception // // See https://swift.org/LICENSE.txt for license information // See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors // //===----------------------------------------------------------------------===// #define DEBUG_TYPE "predictable-memopt" #include "swift/SILOptimizer/PassManager/Passes.h" #include "DIMemoryUseCollector.h" #include "swift/SIL/SILBuilder.h" #include "swift/SILOptimizer/Utils/Local.h" #include "swift/SILOptimizer/PassManager/Transforms.h" #include "llvm/ADT/SmallBitVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" using namespace swift; STATISTIC(NumLoadPromoted, "Number of loads promoted"); STATISTIC(NumDestroyAddrPromoted, "Number of destroy_addrs promoted"); STATISTIC(NumAllocRemoved, "Number of allocations completely removed"); //===----------------------------------------------------------------------===// // Subelement Analysis Implementation //===----------------------------------------------------------------------===// // We can only analyze components of structs whose storage is fully accessible // from Swift. static StructDecl * getFullyReferenceableStruct(SILType Ty) { auto SD = Ty.getStructOrBoundGenericStruct(); if (!SD || SD->hasUnreferenceableStorage()) return nullptr; return SD; } static unsigned getNumSubElements(SILType T, SILModule &M) { if (auto TT = T.getAs()) { unsigned NumElements = 0; for (auto index : indices(TT.getElementTypes())) NumElements += getNumSubElements(T.getTupleElementType(index), M); return NumElements; } if (auto *SD = getFullyReferenceableStruct(T)) { unsigned NumElements = 0; for (auto *D : SD->getStoredProperties()) NumElements += getNumSubElements(T.getFieldType(D, M), M); return NumElements; } // If this isn't a tuple or struct, it is a single element. return 1; } /// getAccessPathRoot - Given an address, dive through any tuple/struct element /// addresses to get the underlying value. static SILValue getAccessPathRoot(SILValue Pointer) { while (1) { if (auto *TEAI = dyn_cast(Pointer)) Pointer = TEAI->getOperand(); else if (auto SEAI = dyn_cast(Pointer)) Pointer = SEAI->getOperand(); else if (auto BAI = dyn_cast(Pointer)) Pointer = BAI->getSource(); else return Pointer; } } /// Compute the subelement number indicated by the specified pointer (which is /// derived from the root by a series of tuple/struct element addresses) by /// treating the type as a linearized namespace with sequential elements. For /// example, given: /// /// root = alloc { a: { c: i64, d: i64 }, b: (i64, i64) } /// tmp1 = struct_element_addr root, 1 /// tmp2 = tuple_element_addr tmp1, 0 /// /// This will return a subelement number of 2. /// /// If this pointer is to within an existential projection, it returns ~0U. /// static unsigned computeSubelement(SILValue Pointer, SingleValueInstruction *RootInst) { unsigned SubEltNumber = 0; SILModule &M = RootInst->getModule(); while (1) { // If we got to the root, we're done. if (RootInst == Pointer) return SubEltNumber; if (auto *PBI = dyn_cast(Pointer)) { Pointer = PBI->getOperand(); continue; } if (auto *BAI = dyn_cast(Pointer)) { Pointer = BAI->getSource(); continue; } if (auto *TEAI = dyn_cast(Pointer)) { SILType TT = TEAI->getOperand()->getType(); // Keep track of what subelement is being referenced. for (unsigned i = 0, e = TEAI->getFieldNo(); i != e; ++i) { SubEltNumber += getNumSubElements(TT.getTupleElementType(i), M); } Pointer = TEAI->getOperand(); continue; } if (auto *SEAI = dyn_cast(Pointer)) { SILType ST = SEAI->getOperand()->getType(); // Keep track of what subelement is being referenced. StructDecl *SD = SEAI->getStructDecl(); for (auto *D : SD->getStoredProperties()) { if (D == SEAI->getField()) break; SubEltNumber += getNumSubElements(ST.getFieldType(D, M), M); } Pointer = SEAI->getOperand(); continue; } assert(isa(Pointer) && "Unknown access path instruction"); // Cannot promote loads and stores from within an existential projection. return ~0U; } } /// Given an aggregate value and an access path, extract the value indicated by /// the path. static SILValue extractSubElement(SILValue Val, unsigned SubElementNumber, SILBuilder &B, SILLocation Loc) { SILType ValTy = Val->getType(); // Extract tuple elements. if (auto TT = ValTy.getAs()) { for (unsigned EltNo : indices(TT.getElementTypes())) { // Keep track of what subelement is being referenced. SILType EltTy = ValTy.getTupleElementType(EltNo); unsigned NumSubElt = getNumSubElements(EltTy, B.getModule()); if (SubElementNumber < NumSubElt) { Val = B.emitTupleExtract(Loc, Val, EltNo, EltTy); return extractSubElement(Val, SubElementNumber, B, Loc); } SubElementNumber -= NumSubElt; } llvm_unreachable("Didn't find field"); } // Extract struct elements. if (auto *SD = getFullyReferenceableStruct(ValTy)) { for (auto *D : SD->getStoredProperties()) { auto fieldType = ValTy.getFieldType(D, B.getModule()); unsigned NumSubElt = getNumSubElements(fieldType, B.getModule()); if (SubElementNumber < NumSubElt) { Val = B.emitStructExtract(Loc, Val, D); return extractSubElement(Val, SubElementNumber, B, Loc); } SubElementNumber -= NumSubElt; } llvm_unreachable("Didn't find field"); } // Otherwise, we're down to a scalar. assert(SubElementNumber == 0 && "Miscalculation indexing subelements"); return Val; } //===----------------------------------------------------------------------===// // Allocation Optimization //===----------------------------------------------------------------------===// namespace { /// AllocOptimize - This performs load promotion and deletes synthesized /// allocations if all loads can be removed. class AllocOptimize { SILModule &Module; /// TheMemory - This is either an alloc_box or alloc_stack instruction. AllocationInst *TheMemory; /// This is the SILType of the memory object. SILType MemoryType; /// The number of primitive subelements across all elements of this memory /// value. unsigned NumMemorySubElements; SmallVectorImpl &Uses; SmallVectorImpl &Releases; llvm::SmallPtrSet HasLocalDefinition; /// This is a map of uses that are not loads (i.e., they are Stores, /// InOutUses, and Escapes), to their entry in Uses. llvm::SmallDenseMap NonLoadUses; /// Does this value escape anywhere in the function. bool HasAnyEscape = false; public: AllocOptimize(AllocationInst *TheMemory, SmallVectorImpl &Uses, SmallVectorImpl &Releases); bool doIt(); private: bool promoteLoad(SILInstruction *Inst); bool promoteDestroyAddr(DestroyAddrInst *DAI); // Load promotion. bool hasEscapedAt(SILInstruction *I); void updateAvailableValues(SILInstruction *Inst, llvm::SmallBitVector &RequiredElts, SmallVectorImpl> &Result, llvm::SmallBitVector &ConflictingValues); void computeAvailableValues(SILInstruction *StartingFrom, llvm::SmallBitVector &RequiredElts, SmallVectorImpl> &Result); void computeAvailableValuesFrom(SILBasicBlock::iterator StartingFrom, SILBasicBlock *BB, llvm::SmallBitVector &RequiredElts, SmallVectorImpl> &Result, llvm::SmallDenseMap &VisitedBlocks, llvm::SmallBitVector &ConflictingValues); void explodeCopyAddr(CopyAddrInst *CAI); bool tryToRemoveDeadAllocation(); }; } // end anonymous namespace AllocOptimize::AllocOptimize(AllocationInst *TheMemory, SmallVectorImpl &Uses, SmallVectorImpl &Releases) : Module(TheMemory->getModule()), TheMemory(TheMemory), Uses(Uses), Releases(Releases) { // Compute the type of the memory object. if (auto *ABI = dyn_cast(TheMemory)) { assert(ABI->getBoxType()->getLayout()->getFields().size() == 1 && "optimizing multi-field boxes not implemented"); MemoryType = ABI->getBoxType()->getFieldType(ABI->getModule(), 0); } else { assert(isa(TheMemory)); MemoryType = cast(TheMemory)->getElementType(); } NumMemorySubElements = getNumSubElements(MemoryType, Module); // The first step of processing an element is to collect information about the // element into data structures we use later. for (unsigned ui = 0, e = Uses.size(); ui != e; ++ui) { auto &Use = Uses[ui]; assert(Use.Inst && "No instruction identified?"); // Keep track of all the uses that aren't loads. if (Use.Kind == DIUseKind::Load) continue; NonLoadUses[Use.Inst] = ui; HasLocalDefinition.insert(Use.Inst->getParent()); if (Use.Kind == DIUseKind::Escape) { // Determine which blocks the value can escape from. We aren't allowed to // promote loads in blocks reachable from an escape point. HasAnyEscape = true; } } // If isn't really a use, but we account for the alloc_box/mark_uninitialized // as a use so we see it in our dataflow walks. NonLoadUses[TheMemory] = ~0U; HasLocalDefinition.insert(TheMemory->getParent()); } /// hasEscapedAt - Return true if the box has escaped at the specified /// instruction. We are not allowed to do load promotion in an escape region. bool AllocOptimize::hasEscapedAt(SILInstruction *I) { // FIXME: This is not an aggressive implementation. :) // TODO: At some point, we should special case closures that just *read* from // the escaped value (by looking at the body of the closure). They should not // prevent load promotion, and will allow promoting values like X in regions // dominated by "... && X != 0". return HasAnyEscape; } /// The specified instruction is a non-load access of the element being /// promoted. See if it provides a value or refines the demanded element mask /// used for load promotion. void AllocOptimize:: updateAvailableValues(SILInstruction *Inst, llvm::SmallBitVector &RequiredElts, SmallVectorImpl> &Result, llvm::SmallBitVector &ConflictingValues) { // Handle store and assign. if (isa(Inst) || isa(Inst)) { unsigned StartSubElt = computeSubelement(Inst->getOperand(1), TheMemory); assert(StartSubElt != ~0U && "Store within enum projection not handled"); SILType ValTy = Inst->getOperand(0)->getType(); for (unsigned i = 0, e = getNumSubElements(ValTy, Module); i != e; ++i) { // If this element is not required, don't fill it in. if (!RequiredElts[StartSubElt+i]) continue; // If there is no result computed for this subelement, record it. If // there already is a result, check it for conflict. If there is no // conflict, then we're ok. auto &Entry = Result[StartSubElt+i]; if (Entry.first == SILValue()) Entry = { Inst->getOperand(0), i }; else if (Entry.first != Inst->getOperand(0) || Entry.second != i) ConflictingValues[StartSubElt+i] = true; // This element is now provided. RequiredElts[StartSubElt+i] = false; } return; } // If we get here with a copy_addr, it must be storing into the element. Check // to see if any loaded subelements are being used, and if so, explode the // copy_addr to its individual pieces. if (auto *CAI = dyn_cast(Inst)) { unsigned StartSubElt = computeSubelement(Inst->getOperand(1), TheMemory); assert(StartSubElt != ~0U && "Store within enum projection not handled"); SILType ValTy = Inst->getOperand(1)->getType(); bool AnyRequired = false; for (unsigned i = 0, e = getNumSubElements(ValTy, Module); i != e; ++i) { // If this element is not required, don't fill it in. AnyRequired = RequiredElts[StartSubElt+i]; if (AnyRequired) break; } // If this is a copy addr that doesn't intersect the loaded subelements, // just continue with an unmodified load mask. if (!AnyRequired) return; // If the copyaddr is of a non-loadable type, we can't promote it. Just // consider it to be a clobber. if (CAI->getOperand(0)->getType().isLoadable(Module)) { // Otherwise, some part of the copy_addr's value is demanded by a load, so // we need to explode it to its component pieces. This only expands one // level of the copyaddr. explodeCopyAddr(CAI); // The copy_addr doesn't provide any values, but we've arranged for our // iterators to visit the newly generated instructions, which do. return; } } // TODO: inout apply's should only clobber pieces passed in. // Otherwise, this is some unknown instruction, conservatively assume that all // values are clobbered. RequiredElts.clear(); ConflictingValues = llvm::SmallBitVector(Result.size(), true); return; } /// Try to find available values of a set of subelements of the current value, /// starting right before the specified instruction. /// /// The bitvector indicates which subelements we're interested in, and result /// captures the available value (plus an indicator of which subelement of that /// value is needed). /// void AllocOptimize:: computeAvailableValues(SILInstruction *StartingFrom, llvm::SmallBitVector &RequiredElts, SmallVectorImpl> &Result) { llvm::SmallDenseMap VisitedBlocks; llvm::SmallBitVector ConflictingValues(Result.size()); computeAvailableValuesFrom(StartingFrom->getIterator(), StartingFrom->getParent(), RequiredElts, Result, VisitedBlocks, ConflictingValues); // If we have any conflicting values, explicitly mask them out of the result, // so we don't pick one arbitrary available value. if (!ConflictingValues.none()) for (unsigned i = 0, e = Result.size(); i != e; ++i) if (ConflictingValues[i]) Result[i] = { SILValue(), 0U }; return; } void AllocOptimize:: computeAvailableValuesFrom(SILBasicBlock::iterator StartingFrom, SILBasicBlock *BB, llvm::SmallBitVector &RequiredElts, SmallVectorImpl> &Result, llvm::SmallDenseMap &VisitedBlocks, llvm::SmallBitVector &ConflictingValues) { assert(!RequiredElts.none() && "Scanning with a goal of finding nothing?"); // If there is a potential modification in the current block, scan the block // to see if the store or escape is before or after the load. If it is // before, check to see if it produces the value we are looking for. if (HasLocalDefinition.count(BB)) { for (SILBasicBlock::iterator BBI = StartingFrom; BBI != BB->begin();) { SILInstruction *TheInst = &*std::prev(BBI); // If this instruction is unrelated to the element, ignore it. if (!NonLoadUses.count(TheInst)) { --BBI; continue; } // Given an interesting instruction, incorporate it into the set of // results, and filter down the list of demanded subelements that we still // need. updateAvailableValues(TheInst, RequiredElts, Result, ConflictingValues); // If this satisfied all of the demanded values, we're done. if (RequiredElts.none()) return; // Otherwise, keep scanning the block. If the instruction we were looking // at just got exploded, don't skip the next instruction. if (&*std::prev(BBI) == TheInst) --BBI; } } // Otherwise, we need to scan up the CFG looking for available values. for (auto PI = BB->pred_begin(), E = BB->pred_end(); PI != E; ++PI) { SILBasicBlock *PredBB = *PI; // If the predecessor block has already been visited (potentially due to a // cycle in the CFG), don't revisit it. We can do this safely because we // are optimistically assuming that all incoming elements in a cycle will be // the same. If we ever detect a conflicting element, we record it and do // not look at the result. auto Entry = VisitedBlocks.insert({PredBB, RequiredElts}); if (!Entry.second) { // If we are revisiting a block and asking for different required elements // then anything that isn't agreeing is in conflict. const auto &PrevRequired = Entry.first->second; if (PrevRequired != RequiredElts) { ConflictingValues |= (PrevRequired ^ RequiredElts); RequiredElts &= ~ConflictingValues; if (RequiredElts.none()) return; } continue; } // Make sure to pass in the same set of required elements for each pred. llvm::SmallBitVector Elts = RequiredElts; computeAvailableValuesFrom(PredBB->end(), PredBB, Elts, Result, VisitedBlocks, ConflictingValues); // If we have any conflicting values, don't bother searching for them. RequiredElts &= ~ConflictingValues; if (RequiredElts.none()) return; } } static bool anyMissing(unsigned StartSubElt, unsigned NumSubElts, ArrayRef> &Values) { while (NumSubElts) { if (!Values[StartSubElt].first) return true; ++StartSubElt; --NumSubElts; } return false; } /// AggregateAvailableValues - Given a bunch of primitive subelement values, /// build out the right aggregate type (LoadTy) by emitting tuple and struct /// instructions as necessary. static SILValue aggregateAvailableValues( SILInstruction *Inst, SILType LoadTy, SILValue Address, ArrayRef> AvailableValues, unsigned FirstElt) { assert(LoadTy.isObject()); SILModule &M = Inst->getModule(); // Check to see if the requested value is fully available, as an aggregate. // This is a super-common case for single-element structs, but is also a // general answer for arbitrary structs and tuples as well. if (FirstElt < AvailableValues.size()) { // #Elements may be zero. SILValue FirstVal = AvailableValues[FirstElt].first; if (FirstVal && AvailableValues[FirstElt].second == 0 && FirstVal->getType() == LoadTy) { // If the first element of this value is available, check any extra ones // before declaring success. bool AllMatch = true; for (unsigned i = 0, e = getNumSubElements(LoadTy, M); i != e; ++i) if (AvailableValues[FirstElt+i].first != FirstVal || AvailableValues[FirstElt+i].second != i) { AllMatch = false; break; } if (AllMatch) return FirstVal; } } SILBuilderWithScope B(Inst); if (TupleType *TT = LoadTy.getAs()) { SmallVector ResultElts; for (unsigned EltNo : indices(TT->getElements())) { SILType EltTy = LoadTy.getTupleElementType(EltNo); unsigned NumSubElt = getNumSubElements(EltTy, M); // If we are missing any of the available values in this struct element, // compute an address to load from. SILValue EltAddr; if (anyMissing(FirstElt, NumSubElt, AvailableValues)) EltAddr = B.createTupleElementAddr(Inst->getLoc(), Address, EltNo, EltTy.getAddressType()); ResultElts.push_back(aggregateAvailableValues(Inst, EltTy, EltAddr, AvailableValues, FirstElt)); FirstElt += NumSubElt; } return B.createTuple(Inst->getLoc(), LoadTy, ResultElts); } // Extract struct elements from fully referenceable structs. if (auto *SD = getFullyReferenceableStruct(LoadTy)) { SmallVector ResultElts; for (auto *FD : SD->getStoredProperties()) { SILType EltTy = LoadTy.getFieldType(FD, M); unsigned NumSubElt = getNumSubElements(EltTy, M); // If we are missing any of the available values in this struct element, // compute an address to load from. SILValue EltAddr; if (anyMissing(FirstElt, NumSubElt, AvailableValues)) EltAddr = B.createStructElementAddr(Inst->getLoc(), Address, FD, EltTy.getAddressType()); ResultElts.push_back(aggregateAvailableValues(Inst, EltTy, EltAddr, AvailableValues, FirstElt)); FirstElt += NumSubElt; } return B.createStruct(Inst->getLoc(), LoadTy, ResultElts); } // Otherwise, we have a simple primitive. If the value is available, use it, // otherwise emit a load of the value. auto Val = AvailableValues[FirstElt]; if (!Val.first) return B.createLoad(Inst->getLoc(), Address, LoadOwnershipQualifier::Unqualified); SILValue EltVal = extractSubElement(Val.first, Val.second, B, Inst->getLoc()); // It must be the same type as LoadTy if available. assert(EltVal->getType() == LoadTy && "Subelement types mismatch"); return EltVal; } /// At this point, we know that this element satisfies the definitive init /// requirements, so we can try to promote loads to enable SSA-based dataflow /// analysis. We know that accesses to this element only access this element, /// cross element accesses have been scalarized. /// /// This returns true if the load has been removed from the program. /// bool AllocOptimize::promoteLoad(SILInstruction *Inst) { // Note that we intentionally don't support forwarding of weak pointers, // because the underlying value may drop be deallocated at any time. We would // have to prove that something in this function is holding the weak value // live across the promoted region and that isn't desired for a stable // diagnostics pass this like one. // We only handle load and copy_addr right now. SILValue src; if (auto CAI = dyn_cast(Inst)) { // If this is a CopyAddr, verify that the element type is loadable. If not, // we can't explode to a load. src = CAI->getSrc(); if (!src->getType().isLoadable(Module)) return false; } else if (auto load = dyn_cast(Inst)) { src = load->getOperand(); } else { return false; } // If the box has escaped at this instruction, we can't safely promote the // load. if (hasEscapedAt(Inst)) return false; SILType LoadTy = src->getType().getObjectType(); // If this is a load/copy_addr from a struct field that we want to promote, // compute the access path down to the field so we can determine precise // def/use behavior. unsigned FirstElt = computeSubelement(src, TheMemory); // If this is a load from within an enum projection, we can't promote it since // we don't track subelements in a type that could be changing. if (FirstElt == ~0U) return false; unsigned NumLoadSubElements = getNumSubElements(LoadTy, Module); // Set up the bitvector of elements being demanded by the load. llvm::SmallBitVector RequiredElts(NumMemorySubElements); RequiredElts.set(FirstElt, FirstElt+NumLoadSubElements); SmallVector, 8> AvailableValues; AvailableValues.resize(NumMemorySubElements); // Find out if we have any available values. If no bits are demanded, we // trivially succeed. This can happen when there is a load of an empty struct. if (NumLoadSubElements != 0) { computeAvailableValues(Inst, RequiredElts, AvailableValues); // If there are no values available at this load point, then we fail to // promote this load and there is nothing to do. bool AnyAvailable = false; for (unsigned i = FirstElt, e = i+NumLoadSubElements; i != e; ++i) if (AvailableValues[i].first) { AnyAvailable = true; break; } if (!AnyAvailable) return false; } // Ok, we have some available values. If we have a copy_addr, explode it now, // exposing the load operation within it. Subsequent optimization passes will // see the load and propagate the available values into it. if (auto *CAI = dyn_cast(Inst)) { explodeCopyAddr(CAI); // This is removing the copy_addr, but explodeCopyAddr takes care of // removing the instruction from Uses for us, so we return false. return false; } // Aggregate together all of the subelements into something that has the same // type as the load did, and emit smaller) loads for any subelements that were // not available. auto Load = cast(Inst); auto NewVal = aggregateAvailableValues(Load, LoadTy, Load->getOperand(), AvailableValues, FirstElt); ++NumLoadPromoted; // Simply replace the load. DEBUG(llvm::dbgs() << " *** Promoting load: " << *Load << "\n"); DEBUG(llvm::dbgs() << " To value: " << *NewVal << "\n"); Load->replaceAllUsesWith(NewVal); SILValue Addr = Load->getOperand(); Load->eraseFromParent(); if (auto *AddrI = Addr->getDefiningInstruction()) recursivelyDeleteTriviallyDeadInstructions(AddrI); return true; } /// promoteDestroyAddr - DestroyAddr is a composed operation merging /// load+strong_release. If the implicit load's value is available, explode it. /// /// Note that we handle the general case of a destroy_addr of a piece of the /// memory object, not just destroy_addrs of the entire thing. /// bool AllocOptimize::promoteDestroyAddr(DestroyAddrInst *DAI) { SILValue Address = DAI->getOperand(); // We cannot promote destroys of address-only types, because we can't expose // the load. SILType LoadTy = Address->getType().getObjectType(); if (LoadTy.isAddressOnly(Module)) return false; // If the box has escaped at this instruction, we can't safely promote the // load. if (hasEscapedAt(DAI)) return false; // Compute the access path down to the field so we can determine precise // def/use behavior. unsigned FirstElt = computeSubelement(Address, TheMemory); assert(FirstElt != ~0U && "destroy within enum projection is not valid"); unsigned NumLoadSubElements = getNumSubElements(LoadTy, Module); // Set up the bitvector of elements being demanded by the load. llvm::SmallBitVector RequiredElts(NumMemorySubElements); RequiredElts.set(FirstElt, FirstElt+NumLoadSubElements); SmallVector, 8> AvailableValues; AvailableValues.resize(NumMemorySubElements); // Find out if we have any available values. If no bits are demanded, we // trivially succeed. This can happen when there is a load of an empty struct. if (NumLoadSubElements != 0) { computeAvailableValues(DAI, RequiredElts, AvailableValues); // If some value is not available at this load point, then we fail. for (unsigned i = FirstElt, e = FirstElt+NumLoadSubElements; i != e; ++i) if (!AvailableValues[i].first) return false; } // Aggregate together all of the subelements into something that has the same // type as the load did, and emit smaller) loads for any subelements that were // not available. auto NewVal = aggregateAvailableValues(DAI, LoadTy, Address, AvailableValues, FirstElt); ++NumDestroyAddrPromoted; DEBUG(llvm::dbgs() << " *** Promoting destroy_addr: " << *DAI << "\n"); DEBUG(llvm::dbgs() << " To value: " << *NewVal << "\n"); SILBuilderWithScope(DAI).emitDestroyValueOperation(DAI->getLoc(), NewVal); DAI->eraseFromParent(); return true; } /// Explode a copy_addr instruction of a loadable type into lower level /// operations like loads, stores, retains, releases, retain_value, etc. void AllocOptimize::explodeCopyAddr(CopyAddrInst *CAI) { DEBUG(llvm::dbgs() << " -- Exploding copy_addr: " << *CAI << "\n"); SILType ValTy = CAI->getDest()->getType().getObjectType(); auto &TL = Module.getTypeLowering(ValTy); // Keep track of the new instructions emitted. SmallVector NewInsts; SILBuilder B(CAI, &NewInsts); B.setCurrentDebugScope(CAI->getDebugScope()); // Use type lowering to lower the copyaddr into a load sequence + store // sequence appropriate for the type. SILValue StoredValue = TL.emitLoadOfCopy(B, CAI->getLoc(), CAI->getSrc(), CAI->isTakeOfSrc()); TL.emitStoreOfCopy(B, CAI->getLoc(), StoredValue, CAI->getDest(), CAI->isInitializationOfDest()); // Update our internal state for this being gone. NonLoadUses.erase(CAI); // Remove the copy_addr from Uses. A single copy_addr can appear multiple // times if the source and dest are to elements within a single aggregate, but // we only want to pick up the CopyAddrKind from the store. DIMemoryUse LoadUse, StoreUse; for (auto &Use : Uses) { if (Use.Inst != CAI) continue; if (Use.Kind == DIUseKind::Load) { assert(LoadUse.isInvalid()); LoadUse = Use; } else { assert(StoreUse.isInvalid()); StoreUse = Use; } Use.Inst = nullptr; // Keep scanning in case the copy_addr appears multiple times. } assert((LoadUse.isValid() || StoreUse.isValid()) && "we should have a load or a store, possibly both"); assert(StoreUse.isInvalid() || StoreUse.Kind == Assign || StoreUse.Kind == PartialStore || StoreUse.Kind == Initialization); // Now that we've emitted a bunch of instructions, including a load and store // but also including other stuff, update the internal state of // LifetimeChecker to reflect them. // Update the instructions that touch the memory. NewInst can grow as this // iterates, so we can't use a foreach loop. for (auto *NewInst : NewInsts) { switch (NewInst->getKind()) { default: NewInst->dump(); llvm_unreachable("Unknown instruction generated by copy_addr lowering"); case SILInstructionKind::StoreInst: // If it is a store to the memory object (as oppose to a store to // something else), track it as an access. if (StoreUse.isValid()) { StoreUse.Inst = NewInst; NonLoadUses[NewInst] = Uses.size(); Uses.push_back(StoreUse); } continue; case SILInstructionKind::LoadInst: // If it is a load from the memory object (as oppose to a load from // something else), track it as an access. We need to explicitly check to // see if the load accesses "TheMemory" because it could either be a load // for the copy_addr source, or it could be a load corresponding to the // "assign" operation on the destination of the copyaddr. if (LoadUse.isValid() && getAccessPathRoot(NewInst->getOperand(0)) == TheMemory) { LoadUse.Inst = NewInst; Uses.push_back(LoadUse); } continue; case SILInstructionKind::RetainValueInst: case SILInstructionKind::StrongRetainInst: case SILInstructionKind::StrongReleaseInst: case SILInstructionKind::UnownedRetainInst: case SILInstructionKind::UnownedReleaseInst: case SILInstructionKind::ReleaseValueInst: // Destroy overwritten value // These are ignored. continue; } } // Next, remove the copy_addr itself. CAI->eraseFromParent(); } /// tryToRemoveDeadAllocation - If the allocation is an autogenerated allocation /// that is only stored to (after load promotion) then remove it completely. bool AllocOptimize::tryToRemoveDeadAllocation() { assert((isa(TheMemory) || isa(TheMemory)) && "Unhandled allocation case"); // We don't want to remove allocations that are required for useful debug // information at -O0. As such, we only remove allocations if: // // 1. They are in a transparent function. // 2. They are in a normal function, but didn't come from a VarDecl, or came // from one that was autogenerated or inlined from a transparent function. SILLocation Loc = TheMemory->getLoc(); if (!TheMemory->getFunction()->isTransparent() && Loc.getAsASTNode() && !Loc.isAutoGenerated() && !Loc.is()) return false; // Check the uses list to see if there are any non-store uses left over after // load promotion and other things DI does. for (auto &U : Uses) { // Ignore removed instructions. if (U.Inst == nullptr) continue; switch (U.Kind) { case DIUseKind::SelfInit: case DIUseKind::SuperInit: llvm_unreachable("Can't happen on allocations"); case DIUseKind::Assign: case DIUseKind::PartialStore: case DIUseKind::InitOrAssign: break; // These don't prevent removal. case DIUseKind::Initialization: if (!isa(U.Inst) && // A copy_addr that is not a take affects the retain count // of the source. (!isa(U.Inst) || cast(U.Inst)->isTakeOfSrc())) break; // FALL THROUGH. LLVM_FALLTHROUGH; case DIUseKind::Load: case DIUseKind::IndirectIn: case DIUseKind::InOutUse: case DIUseKind::Escape: DEBUG(llvm::dbgs() << "*** Failed to remove autogenerated alloc: " "kept alive by: " << *U.Inst); return false; // These do prevent removal. } } // If the memory object has non-trivial type, then removing the deallocation // will drop any releases. Check that there is nothing preventing removal. if (!MemoryType.isTrivial(Module)) { for (auto *R : Releases) { if (R == nullptr || isa(R) || isa(R)) continue; DEBUG(llvm::dbgs() << "*** Failed to remove autogenerated alloc: " "kept alive by release: " << *R); return false; } } DEBUG(llvm::dbgs() << "*** Removing autogenerated alloc_stack: "<<*TheMemory); // If it is safe to remove, do it. Recursively remove all instructions // hanging off the allocation instruction, then return success. Let the // caller remove the allocation itself to avoid iterator invalidation. eraseUsesOfInstruction(TheMemory); return true; } /// doIt - returns true on error. bool AllocOptimize::doIt() { bool Changed = false; // Don't try to optimize incomplete aggregates. if (MemoryType.aggregateHasUnreferenceableStorage()) return false; // If we've successfully checked all of the definitive initialization // requirements, try to promote loads. This can explode copy_addrs, so the // use list may change size. for (unsigned i = 0; i != Uses.size(); ++i) { auto &Use = Uses[i]; // Ignore entries for instructions that got expanded along the way. if (Use.Inst && Use.Kind == DIUseKind::Load) { if (promoteLoad(Use.Inst)) { Uses[i].Inst = nullptr; // remove entry if load got deleted. Changed = true; } } } // destroy_addr(p) is strong_release(load(p)), try to promote it too. for (unsigned i = 0; i != Releases.size(); ++i) { if (auto *DAI = dyn_cast_or_null(Releases[i])) if (promoteDestroyAddr(DAI)) { // remove entry if destroy_addr got deleted. Releases[i] = nullptr; Changed = true; } } // If this is an allocation, try to remove it completely. Changed |= tryToRemoveDeadAllocation(); return Changed; } static bool optimizeMemoryAllocations(SILFunction &Fn) { bool Changed = false; for (auto &BB : Fn) { auto I = BB.begin(), E = BB.end(); while (I != E) { SILInstruction *Inst = &*I; if (!isa(Inst) && !isa(Inst)) { ++I; continue; } auto Alloc = cast(Inst); DEBUG(llvm::dbgs() << "*** DI Optimize looking at: " << *Alloc << "\n"); DIMemoryObjectInfo MemInfo(Alloc); // Set up the datastructure used to collect the uses of the allocation. SmallVector Uses; SmallVector Releases; // Walk the use list of the pointer, collecting them. collectDIElementUsesFrom(MemInfo, Uses, Releases); Changed |= AllocOptimize(Alloc, Uses, Releases).doIt(); // Carefully move iterator to avoid invalidation problems. ++I; if (Alloc->use_empty()) { Alloc->eraseFromParent(); ++NumAllocRemoved; Changed = true; } } } return Changed; } namespace { class PredictableMemoryOptimizations : public SILFunctionTransform { /// The entry point to the transformation. void run() override { if (optimizeMemoryAllocations(*getFunction())) invalidateAnalysis(SILAnalysis::InvalidationKind::FunctionBody); } }; } // end anonymous namespace SILTransform *swift::createPredictableMemoryOptimizations() { return new PredictableMemoryOptimizations(); }