//===--- StackNesting.cpp - Utility for stack nesting --------------------===// // // This source file is part of the Swift.org open source project // // Copyright (c) 2014 - 2017 Apple Inc. and the Swift project authors // Licensed under Apache License v2.0 with Runtime Library Exception // // See https://swift.org/LICENSE.txt for license information // See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors // //===----------------------------------------------------------------------===// #include "swift/SILOptimizer/Utils/StackNesting.h" #include "swift/Basic/Assertions.h" #include "swift/SIL/BasicBlockUtils.h" #include "swift/SIL/Dominance.h" #include "swift/SIL/SILBuilder.h" #include "swift/SIL/SILFunction.h" #include "swift/SIL/StackAllocation.h" #include "swift/SIL/Test.h" #include "llvm/Support/Debug.h" using namespace swift; /// Run the given function exactly once on each of the reachable blocks in /// a SIL function. Blocks will be visited in a post-order consistent with /// dominance, which is to say, after all dominating blocks but otherwise /// in an unspecified order. /// /// The function is passed a state value, which it can freely mutate. The /// initial value of the state will be the same as the value left in the /// state for an unspecified predecessor. For the entry block of the /// function, this is the initial state passed to runInDominanceOrder. /// /// Essentially, runInDominanceOrder finds an arbitrary simple path to /// the block and runs the callback function for each block in that path /// in order. As long as the callback: /// - only looks at instructions in the current block and the blocks /// it dominates, /// - has no dependencies outside of the state (which must have "value /// semantics"), and /// - can handle the arbitrariness of the choice of path, /// then the callback can act as if only the work done along the current /// path has happened and ignore the impact of arbitrary visitation order. /// /// This function assumes you don't change the CFG during its operation. template void runInDominanceOrder(SILFunction &F, State &&state, const Fn &fn) { // The set of blocks that have ever been enqueued onto the worklist. // (We actually skip the worklist a lot, but *abstractly* they're // enqueued, and everything but the entry block does get added to this // set.) BasicBlockSet visitedBlocks(&F); // The next basic block to operate on. We always operate on `state`. SILBasicBlock *curBB = F.getEntryBlock(); // We need to copy `state` whenever we enqueue a block onto the worklist. // We'll move-assign it back to `state` when we dequeue it. using StateValue = std::remove_reference_t; SmallVector> worklist; // Count the remaining edges to each dead-end region, and keep track // of a current merged state for each. DeadEndEdges deadEndEdges(&F); SmallVector> deadEndRegionStates; deadEndRegionStates.resize(deadEndEdges.getNumDeadEndRegions()); auto visitedDeadEndEdges = deadEndEdges.createVisitingSet(); while (true) { // Run the function on the current block, updating the current state. fn(curBB, state); // Enqueue the successors. SILBasicBlock *nextBB = nullptr; for (SILBasicBlock *succBB : curBB->getSuccessorBlocks()) { // If this edge visits a dead-end region, we may need to conservatively // merge the input state before visiting the region. if (auto deadEndRegionIndex = deadEndEdges.entersDeadEndRegion(curBB, succBB)) { auto &existingRegionState = deadEndRegionStates[*deadEndRegionIndex]; bool isLastEdgeToRegion = visitedDeadEndEdges.visitEdgeTo(succBB); // If this is not the last edge to the dead-end region, we can't // enqueue yet. Merge the current state into the existing state we're // tracking for the region. if (!isLastEdgeToRegion) { if (!existingRegionState) { existingRegionState.emplace(/*copied*/ state); } else { existingRegionState->merge(state); } continue; } // Otherwise, this is the last edge to the dead-end region. // The dead-end-edges counting means we know we haven't visited the // successor block yet (or anything else in its region), so we're // going to end up enqueuing it. But we do need to add it to // visitedBlocks so we don't re-enter it if its region is cyclic. auto inserted = visitedBlocks.insert(succBB); assert(inserted); (void) inserted; // If we have an existing state for the region, merge the current // state into it, then enqueue that state. if (existingRegionState) { // In theory we could merge into state and then just continue if // this is the current block's only reasonable successor, but // avoiding a few move-assignments wouldn't really justify the // more complicated bookkeeping that would require. existingRegionState->merge(state); worklist.emplace_back(succBB, std::move(*existingRegionState)); continue; } // Otherwise there was just a single edge into the dead-end region, // so there's no conservative state merge required, and we can just // fall into the code below. // If this edge doesn't visit a dead-end region, just check whether // we've already enqueued the successor block. If so, we can just // skip it. } else { // Insertion returns false if it the block was already in the set. // We could avoid this set operation by checking whether the block // has a unique predecessor edge, but BasicBlockSet is fast enough // that that's not worthwhile. if (!visitedBlocks.insert(succBB)) continue; } // If we haven't found a successor to visit yet, pick this one. if (!nextBB) { nextBB = succBB; // Otherwise, add it to the worklist, copying the current state. } else { worklist.emplace_back(succBB, /*copied*/ state); } } // If there's a viable direct successor, just continue along this // path, editing the current state in-place. if (nextBB) { curBB = nextBB; continue; } // Otherwise, if the worklist is empty, we're done. if (worklist.empty()) { return; } // Otherwise, pull the next item off the worklist and overwrite the // current state with the state we saved for it before. auto &nextItem = worklist.back(); curBB = nextItem.first; state = std::move(nextItem.second); worklist.pop_back(); } } /// Create a dealloc for a particular allocation. /// /// This is expected to work for all allocations that don't have /// properly-nested deallocations. It's fine to have a kind of allocation /// that you can't do this for, as long as as it's always explicitly /// deallocated on all paths. This pass doesn't change any allocations /// or deallocations that are properly nested already. /// /// Only allocations whose deallocations return true from canMoveDealloc /// need to support this. static void createDealloc(SILBuilder &B, SILLocation loc, SILInstruction *alloc) { auto allocation = alloc->getStackAllocation(); assert(allocation); switch (allocation->getKind()) { case StackAllocationKind::PartialApply: case StackAllocationKind::AllocStack: case StackAllocationKind::CalleeAllocatedBeginApply: B.createDeallocStack(loc, allocation->getValue()); return; case StackAllocationKind::AllocRefDynamic: case StackAllocationKind::AllocRef: B.createDeallocStackRef(loc, allocation->getValue()); return; case StackAllocationKind::AllocPack: B.createDeallocPack(loc, allocation->getValue()); return; case StackAllocationKind::BuiltinStackAlloc: case StackAllocationKind::BuiltinUnprotectedStackAlloc: { auto &ctx = alloc->getFunction()->getModule().getASTContext(); auto identifier = ctx.getIdentifier(getBuiltinName(BuiltinValueKind::StackDealloc)); B.createBuiltin(loc, identifier, SILType::getEmptyTupleType(ctx), SubstitutionMap(), {allocation->getValue()}); return; } case StackAllocationKind::BuiltinStartAsyncLet: case StackAllocationKind::BuiltinAddTaskLocalValue: case StackAllocationKind::BuiltinTaskAddPriorityEscalationHandler: case StackAllocationKind::BuiltinTaskAddCancellationHandler: llvm_unreachable("cannot insert this builtin; not safely reorderable"); return; case StackAllocationKind::AllocPackMetadata: B.createDeallocPackMetadata(loc, allocation->getValue()); return; } llvm_unreachable("unknown stack allocation"); } static bool isUnreorderableAllocation(StackAllocation allocation) { switch (allocation.getKind()) { // These are all simple allocations for which the deallocation can // always be sunk. case StackAllocationKind::PartialApply: case StackAllocationKind::AllocStack: case StackAllocationKind::AllocRefDynamic: case StackAllocationKind::AllocRef: case StackAllocationKind::AllocPack: case StackAllocationKind::AllocPackMetadata: case StackAllocationKind::BuiltinStackAlloc: case StackAllocationKind::BuiltinUnprotectedStackAlloc: return false; // end_apply is side-effectful in general, but the allocation here // is specifically the callee allocation, which *can* be reordered. case StackAllocationKind::CalleeAllocatedBeginApply: return false; // These deallocations cannot be reordered across because they are // associated with semantic effects. case StackAllocationKind::BuiltinStartAsyncLet: case StackAllocationKind::BuiltinAddTaskLocalValue: case StackAllocationKind::BuiltinTaskAddPriorityEscalationHandler: case StackAllocationKind::BuiltinTaskAddCancellationHandler: return true; } llvm_unreachable("unknown stack allocation"); } namespace { enum class AllocationStatus { /// No deallocation has been processed for this allocation. Allocated, /// No deallocation has been processed for this allocation, but it /// was live across a non-reorderable deallocation, so the allocation /// is going to end up being marked [non_nested]. AllocatedAndNonNested, /// A deallocation has been processed for this allocation, but the /// allocation was not at the top of the stack, so the deallocation /// has been deferred. Pending, /// A deallocation has been processed / emitted for this allocation. /// This state usually does not need to be represented explicitly because /// we normally remove the allocation from the stack whenever we emit its /// deallocation. However, non-reorderable deallocations mean we sometimes /// will have encountered or emitted deallocations while higher allocations /// are still outstanding. /// (This state is not described in the correctness proof.) Deallocated, /// The allocation cannot be deallocated because there was a stack /// mismatch on a branch into the current region (which is dead-end). /// Whether a deallocation has been processed for this allocation is /// no longer known. Undeallocatable, }; static bool isAllocatedOrUndeallocatable(AllocationStatus status) { return status == AllocationStatus::Allocated || status == AllocationStatus::AllocatedAndNonNested || status == AllocationStatus::Undeallocatable; } static StringRef getNameForStatus(AllocationStatus status) { switch (status) { case AllocationStatus::Allocated: return "allocated"; case AllocationStatus::AllocatedAndNonNested: return "allocated-non-nested"; case AllocationStatus::Pending: return "pending"; case AllocationStatus::Deallocated: return "deallocated"; case AllocationStatus::Undeallocatable: return "undeallocatable"; } llvm_unreachable("bad kind"); } class ActiveAllocation { llvm::PointerIntPair valueAndStatus; public: explicit ActiveAllocation(SILInstruction *value) : valueAndStatus(value, AllocationStatus::Allocated) {} SILInstruction *getValue() const { return valueAndStatus.getPointer(); } AllocationStatus getStatus() const { return valueAndStatus.getInt(); } void setPending() { assert(getStatus() == AllocationStatus::Allocated); valueAndStatus.setInt(AllocationStatus::Pending); } void setNonNested() { assert(getStatus() == AllocationStatus::Allocated); valueAndStatus.setInt(AllocationStatus::AllocatedAndNonNested); } void setDeallocated(bool expectPending) { assert(expectPending ? getStatus() == AllocationStatus::Pending : (getStatus() == AllocationStatus::Allocated || getStatus() == AllocationStatus::AllocatedAndNonNested)); valueAndStatus.setInt(AllocationStatus::Deallocated); } void setUndeallocatable() { valueAndStatus.setInt(AllocationStatus::Undeallocatable); } bool operator==(const ActiveAllocation &other) const { return valueAndStatus == other.valueAndStatus; } bool operator!=(const ActiveAllocation &other) const { return valueAndStatus != other.valueAndStatus; } }; using IndexForAllocationMap = llvm::DenseMap; struct State { /// The stack of active allocations and their statuses. SmallVector allocations; ActiveAllocation &getEntryForNonTop(SILInstruction *alloc, SILInstruction *dealloc, IndexForAllocationMap &indexForAllocation); /// Given that these are the end states of two blocks with edges into /// a dead-end region R, merge them. /// /// In the formal presentation of the algorithm, we decide whether PSS(R) /// is non-singleton and, if so, apply STATE_MERGE. We can do this /// iteratively by checking for pairwise equality and doing a pairwise /// CONSERVATIVE_MERGE if they're not. void merge(const State &other) { // If the states match exactly, there's nothing to do. if (ArrayRef(allocations) == ArrayRef(other.allocations)) { return; } // Otherwise, find the longest common prefix of allocations. We can start // from the end and stop as soon as we find a match because all states // that contain A have a common prefix of allocations; see // [allocation-index] in the proof. auto commonPrefixLength = std::min(allocations.size(), other.allocations.size()); while (commonPrefixLength > 0 && allocations[commonPrefixLength - 1].getValue() != other.allocations[commonPrefixLength - 1].getValue()) { commonPrefixLength--; } // Truncate to the common prefix length and set all entries to // undeallocatable. allocations.truncate(commonPrefixLength); for (auto &entry : allocations) { entry.setUndeallocatable(); } } #ifndef NDEBUG SWIFT_ATTRIBUTE_NORETURN void abortForUnknownAllocation(SILInstruction *alloc, SILInstruction *dealloc) { auto &out = llvm::errs(); out << "fatal error: StackNesting could not find record of " "allocation for deallocation:\n " << *dealloc << "Allocation might not be jointly post-dominated. " "Current stack:\n"; for (auto i : indices(allocations)) { out << "[" << i << "] "; auto status = allocations[i].getStatus(); if (status != AllocationStatus::Allocated) { out << "(" << getNameForStatus(status) << ") "; } out << *allocations[i].getValue() << "\n"; } out << "Complete function:\n"; alloc->getFunction()->dump(); abort(); } #endif }; } // end anonymous namespace ActiveAllocation & State::getEntryForNonTop(SILInstruction *alloc, SILInstruction *dealloc, IndexForAllocationMap &indexForAllocation) { auto stack = MutableArrayRef(allocations); assert(!stack.empty()); // By precondition, we know the top entry doesn't match the allocation, // so we can drop it. assert(stack.back().getValue() != alloc); stack = stack.drop_back(); // This is really just searching for the allocation in the stack. // All the complexity has to do with trying to avoid super-linear // behavior while also trying very hard to avoid actually filling // in indexForAllocation for simple cases. // It's very common for allocations to never be improperly nested, // so we don't want to eagerly add allocations to indexForAllocation // when we encounter them. This means we can't rely on it having // an entry for `alloc` now. // `alloc` is very likely to be close to the top of the stack. Just do // a short linear scan there first. This is quick, and it means we // can probably avoid ever filling in indexForAllocation. const size_t linearScanLimit = 8; auto linearScanEntries = stack.take_back(linearScanLimit); for (auto &entry : linearScanEntries) { if (entry.getValue() == alloc) { return entry; } } // Okay, so much for that, time for the hashtable. #ifndef NDEBUG if (stack.size() <= linearScanLimit) { abortForUnknownAllocation(alloc, dealloc); } #endif // We don't need to consider entries that we've already linearly scanned. stack = stack.drop_back(linearScanLimit); // Check if the entry's already in the hashtable. if (auto it = indexForAllocation.find(alloc); it != indexForAllocation.end()) { auto index = it->second; assert(stack[index].getValue() == alloc); return stack[index]; } // Fill in any missing entries in indexForAllocations. // // The invariant we maintain is that there may be allocations at the // top of the stack that aren't hashed, but once we reach a hashed // entry, everything beneath it is hashed. The first half of this // is necessary because we don't eagerly add allocations to the table, // but it's also what makes it okay that we skip the entries we // linearly scanned. The second half of this means that, if we start // adding entries from the top down, we can stop hashing once we find // that the entries we're adding are redundant. That's what keeps this // O(N). // // This caching works because allocations have a constant index as long // as they're in the stack; see [allocation-index] in the proof. // Look for the target allocation index in this loop rather than doing // a hash lookup at the end. std::optional foundIndexForAlloc; for (size_t onePast = stack.size(); onePast != 0; --onePast) { size_t entryIndex = onePast - 1; auto entryAlloc = stack[entryIndex].getValue(); // Remember this if it's the allocation we're looking for. if (entryAlloc == alloc) { foundIndexForAlloc = entryIndex; } // Add this entry to the hashtable. We can stop adding entries as soon as // we find something that already has an entry. auto insertResult = indexForAllocation.insert({entryAlloc, entryIndex}); if (!insertResult.second) { continue; } } #ifndef NDEBUG if (!foundIndexForAlloc) { abortForUnknownAllocation(alloc, dealloc); } #endif return stack[*foundIndexForAlloc]; } /// Pop and emit deallocations for any allocations on top of the /// active allocations stack that are pending deallocation. Also pop /// allocations in the deallocated-out-of-order state, but do not /// emit them. /// /// This operation is called whenever we pop an allocation; it /// restores the invariant that the top of the stack is never in a /// pending or deallocated-out-of-order state. static void emitPendingDeallocations(State &state, SILInstruction *insertAfterDealloc, bool &madeChanges) { // The builder we use for inserting deallocations. Initialized lazily // to insert after the initial dealloc. We have to reuse the same // builder so that, if we pop multiple deallocations, we order them // correctly w.r.t each other. std::optional builder; while (!state.allocations.empty()) { switch (state.allocations.back().getStatus()) { // Pop and emit a deallocation for an allocation with pending status. case AllocationStatus::Pending: { auto entry = state.allocations.pop_back_val(); SILInstruction *alloc = entry.getValue(); // Create a builder if necessary. if (!builder) { // We want to use the location of (and inherit debug scopes from) // the initial dealloc that we're inserting after. builder.emplace(/*insertion point*/ std::next(insertAfterDealloc->getIterator()), /*inherit scope from*/insertAfterDealloc); } createDealloc(*builder, insertAfterDealloc->getLoc(), alloc); madeChanges = true; continue; } // Just pop allocations that already have deallocated status. case AllocationStatus::Deallocated: { state.allocations.pop_back(); continue; } // Stop if we see an allocation with a different status. case AllocationStatus::Allocated: case AllocationStatus::AllocatedAndNonNested: case AllocationStatus::Undeallocatable: return; } llvm_unreachable("bad state"); } } /// We've found a deallocation for an allocation that is not the top of /// the stack and which cannot be reordered. Mark this allocation as /// already deallocated and process every non-deallocated allocation /// above it on the stack: immediately emit any pending deallocations, /// and flag outstanding allocations as [non_nested] and requiring /// immediate deallocation. /// /// We need to be able to mark allocations as [non_nested] if they cross /// the non-reorderable deallocation. That's just the unfortunate reality /// of non-reorderable allocations existing. Emitting pending deallocations /// immediately allows us to avoid marking allocations as [non_nested] when /// they're still properly nested w.r.t any non-reorderable deallocations(*). /// Requiring immediate deallocation for outstanding allocations is necessary /// to preserve consistency. /// /// (*) This is good both because it avoids potentially moving them to the /// heap and because it allows us to not have to support marking /// certain allocations as [non_nested] at all. Allocations that are /// emitted by SILGen and never by SIL passes don't end up improperly /// nested in the first place because SILGen uses lexical scopes that /// are naturally nested in source. /// /// The hand-wavey consistency argument here is that joint post-dominance /// on the allocation and the non-reorderable allocation forces two paths /// that share a common suffix to either agree about the state of the /// allocation at that point or not contain any deallocation of it. /// Deallocation cannot be delayed on one path in a way that "inserts" a /// deallocation on an overlapping path where it wouldn't occur if the /// latter were analyzed instead because that would require disagreement /// about the state of the allocation where the paths join, which can /// happen neither in terminating paths (because then the paths cannot /// agree that the allocation is deallocated at termination) nor in /// dead-end paths (because conservative merger will force analysis to /// agree that the allocation is undeallocatable). /// /// An alternative approach here that would achieve some of the same goals /// would be to recognize allocations that we're going to mark [non_nested] /// and just completely avoid changing their deallocations, probably by /// undoing the actions of the pass at the last minute for anything in the /// unnestedAllocations set. If we used a less online algorithm, we could /// also avoid unnecessary delays when these allocations cross others. static void handleOutOfOrderDeallocation(State &state, ActiveAllocation &allocEntry, SILInstruction *dealloc, SmallPtrSetImpl &unnestedAllocations) { // Flag that the target allocation is deallocated. allocEntry.setDeallocated(/*expect pending*/ false); // The builder we use for inserting deallocations. Initialized lazily // to insert prior to the out-of-order deallocation. std::optional builder; // Iterate from the top of the stack down to the paired allocation. We // can't pop anything yet because we might still see deallocations later // for the allocations that are still live, but we do need to transition // literally everything in the suffix. auto i = state.allocations.end(); while (true) { assert(i != state.allocations.begin() && "allocEntry not in stack!"); --i; if (i == &allocEntry) return; switch (i->getStatus()) { // Eagerly emit pending deallocations. We need to insert these in the // reverse of the order we encountered them, which is conveniently // exactly the order we see them as we pop them off the stack. case AllocationStatus::Pending: { if (!builder) { builder.emplace(/*insertion point and inherit scope from*/ dealloc); } createDealloc(*builder, dealloc->getLoc(), i->getValue()); i->setDeallocated(/*expect pending*/ true); continue; } // Mark outstanding allocations [non_nested]. case AllocationStatus::Allocated: { #ifndef NDEBUG auto sa = i->getValue()->getStackAllocation(); assert(sa && !isUnreorderableAllocation(*sa)); #endif // Add the allocation to the set that we'll mark [non_nested]. // We can't mark it immediately because our main iteration // ignores deallocations of [non_nested] allocations, and we could // get messed up if we do that "re-entrantly" during the loop. unnestedAllocations.insert(i->getValue()); // Change the state of the allocation so that we'll leave // any future deallocations alone. i->setNonNested(); continue; } // Ignore allocations that have already been emitted or that are already // [non_nested]. case AllocationStatus::Deallocated: case AllocationStatus::AllocatedAndNonNested: continue; // Undeallocatable allocations are a prefix, so since the paired // allocation is not undeallocatable and we haven't reached it yet, // we cannot see an undeallocatable allocation. case AllocationStatus::Undeallocatable: ABORT("cannot see an undeallocatable allocation"); } llvm_unreachable("bad kind"); } } /// The main entrypoint for clients. /// /// We use a straightforward, single-pass algorithm: /// /// enum AllocationStatus { /// case allocated /// case pendingDeallocation /// case undeallocatable /// } /// struct State { /// var stack = [(StackAllocationInst, AllocationStatus)]() /// } /// F.searchBlocksForJointPostDominance(initialState: State()) { /// (block, state) in /// for inst in block.instructions { /// if inst.isStackAllocation { /// state.stack.push((inst, .allocated)) /// } else if inst.isStackDeallocation { /// let allocation = inst.stackAllocation /// if state.stack.top == (A, .allocated) { /// _ = state.stack.pop() /// let builder = SILBuilder(insertAfter: I) /// while !state.stack.isEmpty && /// state.stack.top!.1 == .pendingDeallocation { /// builder.createStackDeallocation(for: state.stack.pop().0) /// } /// } else { /// I.remove() /// let curStatus = state.findStatus(A) /// assert(curStatus != .pendingDeallocation) /// if curStatus == .allocated) { /// state.setStatus(A, .pendingDeallocation) /// } /// } /// } /// } /// } /// /// The expectation is that searchBlocksForJointPostDominance performs /// a depth-first search, passing a state that reflects the current /// simple path from the entry block that is being explored. However, /// there's a twist to this from a standard DFS; see below. /// /// For the most part, the value in `state` at the end of processing /// a block is the initial value of `state` at the start of processing /// its successor as visited by the DFS, but this also has a twist; /// see below. /// /// The state consists of (1) an active stack of allocations which /// haven't yet been deallocated on this path and (2) an active /// status for each allocation. /// /// It has five invariants: /// /// 1. Allocations on the stack dominate all allocations above them. /// 2. Every allocation on the stack dominates the current point /// being considered. /// 3. The top of the stack never has pending status. /// 4. If a stack item has undeallocatable status, every item /// below it also has undeallocatable status. /// 5. If an allocation has pending status, there is an allocation /// above it on the stack which has allocated status. /// /// The twist about the search order and state relates to the /// non-coherence of SIL's joint post-dominance requirement for stack /// allocations. Specifically, SIL permits the current state of the /// stack to vary on different edges into a dead-end region; this just /// means it is not permitted to pop any of those deallocations from /// the stack. (More allocations can be pushed and popped, but the /// existing ones become untouchable.) StackNesting therefore permits /// its input to be non-coherent: whether an allocation has been /// deallocated is allowed to vary across the entries to a dead-end /// region. /// /// To handle this, the search delays considering edges into a dead-end /// region until it has seen the last such edge. Furthermore, the /// initial state of the destination block is set to a conservative /// merger of the final states of all of the predecessors: if two /// states differ in any way, the contents of the stack are pared /// back to the common prefix, and all allocations are placed in the /// undeallocatable state. /// /// A proof of correctness can be found in StackNestingProof.txt. StackNesting::Changes StackNesting::fixNesting(SILFunction *F) { bool madeChanges = false; // The set of allocations that we need to flag as unnested. // We can't do this live because we might run across more // deallocations of the allocation. SmallPtrSet unnestedAllocations; // The index in the allocation stack for each allocation. // This function never uses this directly; it's just a cache for // setAllocationAsPending. IndexForAllocationMap indexForAllocation; // Visit each block of the function in an order consistent with dominance. // The state represents the stack of active allocations; it starts // with an empty stack because so does the function. runInDominanceOrder(*F, State(), [&](SILBasicBlock *B, State &state) { // We can't use a foreach loop because we sometimes remove the // current instruction or add instructions (that we don't want to // visit) after it. Advancing the iterator immediately within the // loop is sufficient to protect against both. for (auto II = B->begin(), IE = B->end(); II != IE; ) { SILInstruction *I = &*II; ++II; // Invariant: the top of the stack is either allocated or // undeallocatable. assert(state.allocations.empty() || isAllocatedOrUndeallocatable(state.allocations.back().getStatus())); // Push allocations onto the current stack in the non-pending state. // // In the formal presentation, the state change is // state = STATE_PUSH(state, alloc) if (auto alloc = I->getStackAllocation()) { // Only handle nested stack allocations. if (I->isStackAllocationNested() == StackAllocationIsNested) state.allocations.emplace_back(I); continue; } // Ignore instructions other than allocations and deallocations. auto deallocation = I->getStackDeallocation(); if (!deallocation) { continue; } // Get the allocation for the deallocation. The allocation should be // in the state, and it should not have pending status; see // [deallocation-preconditions] in the proof. auto allocation = deallocation->getAllocation(); // Since we only processed nested allocations above, ignore deallocations // from non-nested allocations. if (allocation.isNested() == StackAllocationIsNotNested) continue; auto dealloc = deallocation->getInstruction(); auto alloc = allocation.getInstruction(); #ifndef NDEBUG if (state.allocations.empty()) { state.abortForUnknownAllocation(alloc, dealloc); } #endif // If the allocation is the top of the allocations stack: if (alloc == state.allocations.back().getValue()) { auto status = state.allocations.back().getStatus(); assert(isAllocatedOrUndeallocatable(status)); // If the allocation has allocated status (including non-nested), // leave the deallocation alone, pop the record of it off the stack, // and pop and emit any pending allocations beneath it. // // In the formal presentation, the state change is // state = STATE_POP(state, alloc) if (status == AllocationStatus::Allocated || status == AllocationStatus::AllocatedAndNonNested) { state.allocations.pop_back(); emitPendingDeallocations(state, /*after*/ dealloc, madeChanges); // Otherwise, it must have undeallocatable status; remove the // deallocation, but make no changes to the state. } else { assert(status == AllocationStatus::Undeallocatable); dealloc->eraseFromParent(); madeChanges = true; } // If the allocation is not the top of the allocations stack: } else { // Find its entry. auto &entry = state.getEntryForNonTop(alloc, dealloc, indexForAllocation); auto status = entry.getStatus(); assert(isAllocatedOrUndeallocatable(status)); // If the allocation has allocated status but its deallocation // cannot be reordered, mark the allocation as deallocated out // of order, then add all the allocations above it to the // allocated-out-of-order set. Do not remove the deallocation. // // FIXME: We really need to do this even for allocations with // undeallocatable status. The problem is that we'd need to collect // the *union* of the active allocations on entry to this dead-end // region in order to force the allocation to remain deallocatable // here, and we don't have that information easily at hand. if (status == AllocationStatus::Allocated && isUnreorderableAllocation(allocation)) { handleOutOfOrderDeallocation(state, entry, dealloc, unnestedAllocations); continue; } // If the allocation has allocated status, remove the deallocation // and update the allocation status to pending. // // In the formal presentation, the state change is // state = STATE_PEND(state, alloc) if (status == AllocationStatus::Allocated) { entry.setPending(); // If the allocation has allocated-and-non-nested status, leave the // deallocation alone. } else if (status == AllocationStatus::AllocatedAndNonNested) { entry.setDeallocated(/*allow pending*/ false); continue; // Otherwise, it has undeallocatable status; just remove the // deallocation but make no changes to the state. } else { assert(status == AllocationStatus::Undeallocatable); } // In any case, remove the deallocation. dealloc->eraseFromParent(); madeChanges = true; } } }); // Now that we're done traversing the CFG, record all of the allocations // as unnested that we need to. for (auto alloc : unnestedAllocations) { alloc->setStackAllocationIsNested(StackAllocationIsNotNested); madeChanges = true; } // We never make changes to the CFG. return (madeChanges ? Changes::Instructions : Changes::None); } namespace swift::test { static FunctionTest StackNestingTests("stack_nesting_fixup", [](auto &function, auto &arguments, auto &test) { StackNesting::fixNesting(&function); function.print(llvm::outs()); }); } // end namespace swift::test