//===--- AllocStackHoisting.cpp - Hoist alloc_stack instructions ----------===// // // This source file is part of the Swift.org open source project // // Copyright (c) 2014 - 2017 Apple Inc. and the Swift project authors // Licensed under Apache License v2.0 with Runtime Library Exception // // See https://swift.org/LICENSE.txt for license information // See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors // //===----------------------------------------------------------------------===// #define DEBUG_TYPE "alloc-stack-hoisting" #include "swift/AST/IRGenOptions.h" #include "swift/AST/SemanticAttrs.h" #include "swift/Basic/Assertions.h" #include "swift/IRGen/IRGenSILPasses.h" #include "swift/SIL/DebugUtils.h" #include "swift/SIL/Dominance.h" #include "swift/SIL/LoopInfo.h" #include "swift/SIL/SILArgument.h" #include "swift/SIL/SILBuilder.h" #include "swift/SIL/SILInstruction.h" #include "swift/SILOptimizer/Analysis/Analysis.h" #include "swift/SILOptimizer/Analysis/DominanceAnalysis.h" #include "swift/SILOptimizer/PassManager/Passes.h" #include "swift/SILOptimizer/PassManager/Transforms.h" #include "swift/SILOptimizer/Utils/CFGOptUtils.h" #include "IRGenModule.h" #include "NonFixedTypeInfo.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" using namespace swift; llvm::cl::opt SILUseStackSlotMerging( "sil-merge-stack-slots", llvm::cl::init(true), llvm::cl::desc("Merge generic alloc_stack instructions")); /// Hoist generic alloc_stack instructions to the entry basic block and merge /// alloc_stack instructions if their users span non-overlapping live-ranges. /// /// This helps avoid llvm.stacksave/stackrestore intrinsic calls during code /// generation. IRGen will only dynamic alloca instructions if the alloc_stack /// is in the entry block but will emit a dynamic alloca and /// llvm.stacksave/stackrestore for all other basic blocks. /// /// Merging alloc_stack instructions saves code size and stack size. /// An alloc_stack instructions is hoistable if it is of generic type and the /// type parameter is not dependent on an opened type. static bool isHoistable(AllocStackInst *Inst, irgen::IRGenModule &Mod) { auto SILTy = Inst->getType(); // We don't need to hoist types that have reference semantics no dynamic // alloca will be generated as they are fixed size. if (SILTy.hasReferenceSemantics()) return false; // Only hoist types that are dynamically sized (generics and resilient types). auto &TI = Mod.getTypeInfo(SILTy); if (TI.isFixedSize()) return false; // Don't hoist weakly imported (weakly linked) types. bool foundWeaklyImported = SILTy.getASTType().findIf([&Mod](CanType type) -> bool { if (auto nominal = type->getNominalOrBoundGenericNominal()) if (nominal->isWeakImported(Mod.getSwiftModule())) { return true; } return false; }); if (foundWeaklyImported) return false; // Don't hoist generics with opened archetypes. We would have to hoist the // open archetype instruction which might not be possible. return Inst->getTypeDependentOperands().empty(); } /// A partition of alloc_stack instructions. /// /// Initially, a partition contains alloc_stack instructions of one type. /// After merging non-overlapping alloc_stack live ranges, a partition contains /// a set of alloc_stack instructions that can be assigned a single stack /// location. namespace { using InstructionIndices = llvm::SmallDenseMap; class Partition { public: SmallVector Elts; Partition(AllocStackInst *A) : Elts(1, A) {} Partition() {} /// Assign a single alloc_stack instruction to all the alloc_stacks in the /// partition. /// /// This assumes that the live ranges of the alloc_stack instructions are /// non-overlapping. void assignStackLocation( SmallVectorImpl &FunctionExits, SmallVectorImpl &DebugValueToBreakBlocksAt); /// Returns true if any of the alloc_stack that we are merging were /// moved. Causes us to insert extra debug addr. /// /// TODO: In the future we want to do this for /all/ alloc_stack but that /// would require us moving /most of/ swift's IRGen emission to use /// llvm.dbg.addr instead of llvm.dbg.declare and that would require us to do /// statistics to make sure that we haven't hurt debuggability by making the /// change. bool hasMovedElt() const { return llvm::any_of(Elts, [](AllocStackInst *asi) { return asi->usesMoveableValueDebugInfo(); }); } }; } // end anonymous namespace /// Erases all dealloc_stack users of an alloc_stack static void eraseDeallocStacks(AllocStackInst *AllocStack) { // Delete dealloc_stacks. SmallVector DeallocStacksToDelete; for (auto *U : AllocStack->getUses()) { if (auto *DeallocStack = dyn_cast(U->getUser())) DeallocStacksToDelete.push_back(DeallocStack); } for (auto *D : DeallocStacksToDelete) D->eraseFromParent(); } /// Inserts a dealloc_stack at all the function exits. static void insertDeallocStackAtEndOf(SmallVectorImpl &FunctionExits, AllocStackInst *AllocStack) { // Insert dealloc_stack in the exit blocks. for (auto *Exit : FunctionExits) { SILBuilderWithScope Builder(Exit); Builder.createDeallocStack(CleanupLocation(AllocStack->getLoc()), AllocStack); } } /// Hack to workaround a clang LTO bug. LLVM_ATTRIBUTE_NOINLINE void moveAllocStackToBeginningOfBlock( AllocStackInst *AS, SILBasicBlock *BB, bool haveMovedElt, SmallVectorImpl &DebugValueToBreakBlocksAt) { // If we have var info, create the debug_value at the alloc_stack position and // invalidate the alloc_stack's var info. This transfers the debug info state // of the debug_value to the original position. if (haveMovedElt) { if (auto varInfo = AS->getVarInfo()) { // SILBuilderWithScope skips over meta instructions when picking a scope. SILBuilder Builder(AS, AS->getDebugScope()); auto *DVI = Builder.createDebugValue(AS->getLoc(), AS, *varInfo); DVI->setUsesMoveableValueDebugInfo(); DebugValueToBreakBlocksAt.push_back(DVI); AS->invalidateVarInfo(); AS->markUsesMoveableValueDebugInfo(); } } AS->moveFront(BB); } /// Assign a single alloc_stack instruction to all the alloc_stacks in the /// partition. void Partition::assignStackLocation( SmallVectorImpl &FunctionExits, SmallVectorImpl &DebugValueToBreakBlocksAt) { assert(!Elts.empty() && "Must have a least one location"); bool hasAtLeastOneMovedElt = hasMovedElt(); // The assigned location is the first alloc_stack in our partition. auto *AssignedLoc = Elts[0]; // Move this assigned location to the beginning of the entry block. auto *EntryBB = AssignedLoc->getFunction()->getEntryBlock(); moveAllocStackToBeginningOfBlock(AssignedLoc, EntryBB, hasAtLeastOneMovedElt, DebugValueToBreakBlocksAt); // Erase the dealloc_stacks. eraseDeallocStacks(AssignedLoc); // Insert a new dealloc_stack at the exit(s) of the function. insertDeallocStackAtEndOf(FunctionExits, AssignedLoc); // Rewrite all the other alloc_stacks in the partition to use the assigned // location. for (auto *AllocStack : Elts) { if (AssignedLoc == AllocStack) continue; eraseDeallocStacks(AllocStack); AllocStack->replaceAllUsesWith(AssignedLoc); if (auto VarInfo = AllocStack->getVarInfo()) { SILBuilder Builder(AllocStack, AllocStack->getDebugScope()); auto *DVI = Builder.createDebugValueAddr(AllocStack->getLoc(), AssignedLoc, *VarInfo); if (hasAtLeastOneMovedElt) { DVI->setUsesMoveableValueDebugInfo(); } DebugValueToBreakBlocksAt.push_back(DVI); } AllocStack->eraseFromParent(); } } /// Returns a single dealloc_stack user of the alloc_stack or nullptr otherwise. static SILInstruction *getSingleDeallocStack(AllocStackInst *ASI) { return ASI->getSingleDeallocStack(); } namespace { /// Compute liveness for the partition to allow for an interference check /// between two alloc_stack instructions. /// /// For now liveness is computed and this just performs a simple check /// whether two regions of alloc_stack instructions might overlap. class Liveness { public: Liveness(Partition &P) {} /// Check whether the live ranges of the two alloc_stack instructions /// might overlap. /// /// Currently this does not use a liveness analysis. Rather we check that for /// both alloc_stack we have: /// * a single dealloc_stack user /// * the dealloc_stack is in the same basic block /// If the alloc_stack instructions are in different basic blocks we know that /// the live-ranges can't overlap. /// If they are in the same basic block we scan the basic block to determine /// whether one dealloc_stack dominates the other alloc_stack. If this is the /// case the live ranges can't overlap. bool mayOverlap(AllocStackInst *A, AllocStackInst *B, const InstructionIndices &stackInstructionIndices) { assert(A != B); // Check that we have a single dealloc_stack user in the same block. auto *singleDeallocA = getSingleDeallocStack(A); if (singleDeallocA == nullptr || singleDeallocA->getParent() != A->getParent()) return true; auto *singleDeallocB = getSingleDeallocStack(B); if (singleDeallocB == nullptr || singleDeallocB->getParent() != B->getParent()) return true; // Different basic blocks. if (A->getParent() != B->getParent()) return false; // Within the same basic block we can use the consecutive instruction indices // to check for overlapping. if (stackInstructionIndices.lookup(A) > stackInstructionIndices.lookup(singleDeallocB)) return false; if (stackInstructionIndices.lookup(B) > stackInstructionIndices.lookup(singleDeallocA)) return false; return true; } }; } // end anonymous namespace namespace { /// Merge alloc_stack instructions. /// /// This merges alloc_stack instructions of one type by: /// * building partitions of alloc_stack instructions of one type /// * merging alloc_stack instructions in each partition into one alloc_stack /// if the live ranges spanned by the alloc_stack users are known not to /// overlap. class MergeStackSlots { /// Contains partitions of alloc_stack instructions by type. SmallVector PartitionByType; /// The function exits. SmallVectorImpl &FunctionExits; /// Consecutive indices for all `alloc_stack` and `dealloc_stack` /// instructions in the function. const InstructionIndices &stackInstructionIndices; /// If we are merging any alloc_stack that were moved, to work around a bug in /// SelectionDAG that sinks to llvm.dbg.addr, we need to break blocks right /// after each llvm.dbg.addr. /// /// TODO: Once we have /any/ FastISel/better SelectionDAG support, this can be /// removed. SmallVector DebugValueToBreakBlocksAt; public: MergeStackSlots(SmallVectorImpl &AllocStacks, SmallVectorImpl &FuncExits, const InstructionIndices &stackInstructionIndices); /// Merge alloc_stack instructions if possible and hoist them to the entry /// block. SILAnalysis::InvalidationKind mergeSlots(DominanceInfo *domToUpdate); }; } // end anonymous namespace MergeStackSlots::MergeStackSlots(SmallVectorImpl &AllocStacks, SmallVectorImpl &FuncExits, const InstructionIndices &stackInstructionIndices) : FunctionExits(FuncExits), stackInstructionIndices(stackInstructionIndices) { // Build initial partitions based on the type. llvm::DenseMap TypeToPartitionMap; for (auto *AS : AllocStacks) { auto Ty = AS->getType(); auto It = TypeToPartitionMap.find(Ty); if (It != TypeToPartitionMap.end()) { PartitionByType[It->second].Elts.push_back(AS); } else { PartitionByType.push_back(Partition(AS)); TypeToPartitionMap[Ty] = PartitionByType.size() - 1; } } } /// Merge alloc_stack instructions if possible and hoist them to the entry /// block. SILAnalysis::InvalidationKind MergeStackSlots::mergeSlots(DominanceInfo *DomToUpdate) { auto Result = SILAnalysis::InvalidationKind::Instructions; for (auto &PartitionOfOneType : PartitionByType) { Liveness Live(PartitionOfOneType); // Partitions that are know to contain non-overlapping alloc_stack // live-ranges. SmallVector DisjointPartitions(1, Partition()); // Look at all the alloc_stacks of one type. for (auto *CurAllocStack : PartitionOfOneType.Elts) { bool FoundAPartition = false; // Check if we can add it to an existing partition that we have show to be // non-interfering. for (auto &CandidateP : DisjointPartitions) { // If the candidate partition is empty (the very first time we look at an // alloc_stack) we can just add the alloc_stack. if (CandidateP.Elts.empty()) { CandidateP.Elts.push_back(CurAllocStack); FoundAPartition = true; break; } // Otherwise, we check interference of the current alloc_stack with the // candidate partition. bool InterferesWithCandidateP = false; for (auto *AllocStackInPartition : CandidateP.Elts) { if (Live.mayOverlap(AllocStackInPartition, CurAllocStack, stackInstructionIndices)) { InterferesWithCandidateP = true; break; } } // No interference add the current alloc_stack to the candidate // partition. if (!InterferesWithCandidateP) { CandidateP.Elts.push_back(CurAllocStack); FoundAPartition = true; break; } // Otherwise, we look at the next partition. } // If not partition was found add a new one. if (!FoundAPartition) { DisjointPartitions.push_back(Partition(CurAllocStack)); } } // Assign stack locations to disjoint partition hoisting alloc_stacks to the // entry block at the same time. for (auto &Par : DisjointPartitions) { Par.assignStackLocation(FunctionExits, DebugValueToBreakBlocksAt); } } // Now that we have finished merging slots/hoisting, break any blocks that we // need to. if (!DebugValueToBreakBlocksAt.empty()) { auto &Mod = DebugValueToBreakBlocksAt.front()->getModule(); SILBuilderContext Context(Mod); do { auto *Next = DebugValueToBreakBlocksAt.pop_back_val(); splitBasicBlockAndBranch(Context, Next->getNextInstruction(), DomToUpdate, nullptr); } while (!DebugValueToBreakBlocksAt.empty()); Result = SILAnalysis::InvalidationKind::BranchesAndInstructions; } return Result; } namespace { /// Hoist alloc_stack instructions to the entry block and merge them. class HoistAllocStack { /// The function to process. SILFunction *F; /// The current IRGenModule. irgen::IRGenModule &IRGenMod; SmallVector AllocStackToHoist; SmallVector FunctionExits; /// Consecutive indices for all `alloc_stack` and `dealloc_stack` /// instructions in the function. InstructionIndices stackInstructionIndices; std::optional InvalidationKind = std::nullopt; DominanceInfo *DomInfoToUpdate = nullptr; public: HoistAllocStack(SILFunction *F, irgen::IRGenModule &Mod) : F(F), IRGenMod(Mod) {} /// Try to hoist generic alloc_stack instructions to the entry block. Returns /// none if the function was not changed. Otherwise, returns the analysis /// invalidation kind to use if the function was changed. std::optional run(); void setDominanceToUpdate(DominanceInfo *DI) { DomInfoToUpdate = DI; } private: /// Collect generic alloc_stack instructions that can be moved to the entry /// block. void collectHoistableInstructions(); /// Move the hoistable alloc_stack instructions to the entry block. void hoist(); }; } // end anonymous namespace bool inhibitsAllocStackHoisting(SILInstruction *I) { if (auto *Apply = dyn_cast(I)) { return Apply->hasSemantics(semantics::AVAILABILITY_OSVERSION); } if (auto *bi = dyn_cast(I)) { return bi->getBuiltinInfo().ID == BuiltinValueKind::TargetOSVersionAtLeast || bi->getBuiltinInfo().ID == BuiltinValueKind::TargetVariantOSVersionAtLeast || bi->getBuiltinInfo().ID == BuiltinValueKind::TargetOSVersionOrVariantOSVersionAtLeast; } if (isa(I)) { return true; } return false; } /// Collect generic alloc_stack instructions in the current function can be /// hoisted. /// We can hoist generic alloc_stack instructions if they are not dependent on a /// another instruction that we would have to hoist. /// A generic alloc_stack could reference an opened archetype that was not /// opened in the entry block. void HoistAllocStack::collectHoistableInstructions() { int stackInstructionIndex = 0; for (auto &BB : *F) { for (auto &Inst : BB) { // Terminators that are function exits are our dealloc_stack // insertion points. if (auto *Term = dyn_cast(&Inst)) { if (Term->isFunctionExiting()) FunctionExits.push_back(Term); continue; } // Don't perform alloc_stack hoisting in functions containing // instructions that indicate hoisting may be unsafe (e.g. `if // #available(...)` or `if #_hasSymbol(...)`. if (inhibitsAllocStackHoisting(&Inst)) { AllocStackToHoist.clear(); return; } if (isa(&Inst)) stackInstructionIndices[&Inst] = stackInstructionIndex++; auto *ASI = dyn_cast(&Inst); if (!ASI) { continue; } stackInstructionIndices[ASI] = stackInstructionIndex++; if (isHoistable(ASI, IRGenMod)) { LLVM_DEBUG(llvm::dbgs() << "Hoisting " << Inst); AllocStackToHoist.push_back(ASI); } else { LLVM_DEBUG(llvm::dbgs() << "Not hoisting " << Inst); } } } } /// Hoist the alloc_stack instructions to the entry block and sink the /// dealloc_stack instructions to the function exists. void HoistAllocStack::hoist() { if (SILUseStackSlotMerging) { MergeStackSlots Merger(AllocStackToHoist, FunctionExits, stackInstructionIndices); InvalidationKind = Merger.mergeSlots(DomInfoToUpdate); return; } // Hoist alloc_stacks to the entry block and delete dealloc_stacks. auto *EntryBB = F->getEntryBlock(); for (auto *AllocStack : AllocStackToHoist) { // Insert at the beginning of the entry block. AllocStack->moveFront(EntryBB); // Delete dealloc_stacks. eraseDeallocStacks(AllocStack); InvalidationKind = SILAnalysis::InvalidationKind::Instructions; } // Insert dealloc_stack in the exit blocks. for (auto *AllocStack : AllocStackToHoist) { insertDeallocStackAtEndOf(FunctionExits, AllocStack); } } /// Try to hoist generic alloc_stack instructions to the entry block. /// Returns true if the function was changed. std::optional HoistAllocStack::run() { collectHoistableInstructions(); // Nothing to hoist? if (AllocStackToHoist.empty()) return {}; hoist(); return InvalidationKind; } namespace { class AllocStackHoisting : public SILFunctionTransform { void run() override { auto *F = getFunction(); auto *Mod = getIRGenModule(); assert(Mod && "This pass must be run as part of an IRGen pipeline"); HoistAllocStack Hoist(F, *Mod); // Update DomInfo when breaking. We don't use loop info right now this late, // so we don't need to do that. auto *DA = getAnalysis(); if (DA->hasFunctionInfo(F)) Hoist.setDominanceToUpdate(DA->get(F)); auto InvalidationKind = Hoist.run(); if (InvalidationKind) { AnalysisPreserver preserveDominance(DA); PM->invalidateAnalysis(F, *InvalidationKind); } } }; } // end anonymous namespace SILTransform *irgen::createAllocStackHoisting() { return new AllocStackHoisting(); }