//===--- ARCCodeMotion.cpp - SIL ARC Code Motion --------------------------===// // // This source file is part of the Swift.org open source project // // Copyright (c) 2014 - 2017 Apple Inc. and the Swift project authors // Licensed under Apache License v2.0 with Runtime Library Exception // // See https://swift.org/LICENSE.txt for license information // See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors // //===----------------------------------------------------------------------===// /// /// \file /// /// This pass moves retains down and releases up. This, hopefully, will help /// ARC sequence opt to remove retain and release pairs without worrying too /// much about control flows. /// /// It uses an optimistic iterative data flow to compute where to insert the /// retains and releases for every reference-counted root. It then removes all /// the old retain and release instructions and create the new ones. /// /// This pass is more sophisticated than SILCodeMotion, as arc optimizations /// can be very beneficial, use an optimistic global data flow to achieve /// optimality. /// /// Proof of Correctness: /// ------------------- /// /// 1. Retains are blocked by MayDecrements. Its straightforward to prove that /// retain sinking is correct. /// /// If a retain is sunk from Region A to Region B, that means there is no /// blocking operation between where the retain was in Region A to where it is /// sunk to in Region B. Since we only sink retains (we do not move any other /// instructions) which themselves are NOT MayDecrement operations, and moving /// retains can't turn non-decrement instruction MayDecrement. /// /// 2. Releases are blocked by MayInterfere. If a release is hoisted from /// Region B to Region A, that means there is no blocking operation from where /// the release was in Region B and where the release is hoisted to in Region A. /// /// The question is whether we can introduce such operation while we hoist /// other releases. The answer is NO. because if such releases exist, they /// would be blocked by the old release (we remove old release and recreate new /// ones at the end of the pass) and will not be able to be hoisted beyond the /// old release. /// /// This proof also hinges on the fact that if release A interferes with /// releases B then release B must interfere with release A. i.e. the 2 /// releases must have the symmetric property. Consider the 2 releases as 2 /// function calls, i.e. CallA (release A) and CallB (release B), if CallA /// interferes with CallB, that means CallA must share some program states /// (through read or write) with CallB. Then it is not possible for CallB /// to not share any states with CallA. And if they do share states, then /// its not possible for CallB to block CallA and CallA not to block CallB. /// /// TODO: Sinking retains can block releases to be hoisted, and hoisting /// releases can block retains to be sunk. Investigate when to sink retains and /// when to hoist releases and their ordering in the pass pipeline. /// /// TODO: Consider doing retain hoisting and release sinking. This can help /// to discover disjoint lifetimes and we can try to stitch them together. /// /// TODO: There are a lot of code duplications between retain and release code /// motion in the data flow part. Consider whether we can share them. /// Essentially, we can implement the release code motion by inverting the /// retain code motion, but this can also make the code less readable. /// //===----------------------------------------------------------------------===// #define DEBUG_TYPE "sil-rr-code-motion" #include "swift/SIL/SILBuilder.h" #include "swift/SILOptimizer/Analysis/AliasAnalysis.h" #include "swift/SILOptimizer/Analysis/ARCAnalysis.h" #include "swift/SILOptimizer/Analysis/EscapeAnalysis.h" #include "swift/SILOptimizer/Analysis/PostOrderAnalysis.h" #include "swift/SILOptimizer/Analysis/RCIdentityAnalysis.h" #include "swift/SILOptimizer/PassManager/Passes.h" #include "swift/SILOptimizer/PassManager/Transforms.h" #include "swift/SILOptimizer/Utils/CFG.h" #include "swift/SILOptimizer/Utils/Local.h" #include "llvm/ADT/BitVector.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/Statistic.h" #include "llvm/Support/Allocator.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" using namespace swift; STATISTIC(NumRetainsSunk, "Number of retains sunk"); STATISTIC(NumReleasesHoisted, "Number of releases hoisted"); llvm::cl::opt DisableARCCodeMotion("disable-arc-cm", llvm::cl::init(false)); /// Disable optimization if we have to break critical edges in the function. llvm::cl::opt DisableIfWithCriticalEdge("disable-with-critical-edge", llvm::cl::init(false)); //===----------------------------------------------------------------------===// // Block State //===----------------------------------------------------------------------===// struct BlockState { /// A bit vector for which the ith bit represents the ith refcounted root in /// RCRootVault. /// /// NOTE: we could do the data flow with BBSetIn or BBSetOut, but that would /// require us to create a temporary copy to check whether the BBSet has /// changed after the genset and killset has been applied. llvm::SmallBitVector BBSetIn; /// A bit vector for which the ith bit represents the ith refcounted root in /// RCRootVault. llvm::SmallBitVector BBSetOut; /// A bit vector for which the ith bit represents the ith refcounted root in /// RCRootVault. If the bit is set, that means this basic block creates a /// retain which can be sunk or a release which can be hoisted. llvm::SmallBitVector BBGenSet; /// A bit vector for which the ith bit represents the ith refcounted root in /// RCRootVault. If this bit is set, that means this basic block stops retain /// or release of the refcounted root to be moved across. llvm::SmallBitVector BBKillSet; /// A bit vector for which the ith bit represents the ith refcounted root in /// RCRootVault. If this bit is set, that means this is potentially a retain /// or release that can be sunk or hoisted to this point. This is used to /// optimize the time for computing genset and killset. /// /// NOTE: this vector contains an approximation of whether there will be a /// retain or release to a certain point of a basic block. llvm::SmallBitVector BBMaxSet; }; /// CodeMotionContext - This is the base class which retain code motion and /// release code motion inherits from. It defines an interface as to how the /// code motion procedure should be. class CodeMotionContext { protected: /// Dataflow needs multiple iteration to converge. If this is false, then we /// do not need to generate the genset or killset, i.e. we can simply do 1 /// pessimistic data flow iteration. bool MultiIteration; /// The allocator we are currently using. llvm::SpecificBumpPtrAllocator &BPA; /// Current function we are analyzing. SILFunction *F; /// Current post-order we are using. PostOrderFunctionInfo *PO; /// Current alias analysis we are using. AliasAnalysis *AA; /// Current rc-identity we are using. RCIdentityFunctionInfo *RCFI; /// All the unique refcount roots retained or released in the function. llvm::SetVector RCRootVault; /// Contains a map between RC roots to their index in the RCRootVault. /// used to facilitate fast RC roots to index lookup. llvm::DenseMap RCRootIndex; /// All the retains or releases originally in the function. Eventually /// they will all be removed after all the new ones are generated. llvm::SmallPtrSet RCInstructions; /// All the places to place the new retains or releases after code motion. using InsertPointList = llvm::SmallVector; llvm::SmallDenseMap InsertPoints; /// These are the blocks that have an RC instruction to process or it blocks /// some RC instructions. If the basic block has neither, we do not need to /// process the block again in the last iteration. We populate this set when /// we compute the genset and killset. llvm::SmallPtrSet InterestBlocks; /// Return the rc-identity root of the SILValue. SILValue getRCRoot(SILValue R) { return RCFI->getRCIdentityRoot(R); } /// Return the rc-identity root of the RC instruction, i.e. /// retain or release. SILValue getRCRoot(SILInstruction *I) { assert(isRetainInstruction(I) || isReleaseInstruction(I) && "Extracting RC root from invalid instruction"); return getRCRoot(I->getOperand(0)); } public: /// Constructor. CodeMotionContext(llvm::SpecificBumpPtrAllocator &BPA, SILFunction *F, PostOrderFunctionInfo *PO, AliasAnalysis *AA, RCIdentityFunctionInfo *RCFI) : MultiIteration(true), BPA(BPA), F(F), PO(PO), AA(AA), RCFI(RCFI) {} /// virtual destructor. virtual ~CodeMotionContext() {} /// Run the data flow to move retains and releases. bool run(); /// Check whether we need to run an optimistic iteration data flow. /// or a pessimistic would suffice. virtual bool requireIteration() = 0; /// Initialize necessary things to run the iterative data flow. virtual void initializeCodeMotionDataFlow() = 0; /// Initialize the basic block maximum refcounted set. virtual void initializeCodeMotionBBMaxSet() = 0; /// Compute the genset and killset for every root in every basic block. virtual void computeCodeMotionGenKillSet() = 0; /// Run the iterative data flow to converge. virtual void convergeCodeMotionDataFlow() = 0; /// Use the data flow results, come up with places to insert the new inst. virtual void computeCodeMotionInsertPoints() = 0; /// Remove the old retains and create the new *moved* refcounted instructions virtual bool performCodeMotion() = 0; /// Merge the data flow states. virtual void mergeBBDataFlowStates(SILBasicBlock *BB) = 0; /// Compute the BBSetIn and BBSetOut for the current basic /// block with the generated gen and kill set. virtual bool processBBWithGenKillSet(SILBasicBlock *BB) = 0; /// Return true if the instruction blocks the Ptr to be moved further. virtual bool mayBlockCodeMotion(SILInstruction *II, SILValue Ptr) = 0; }; bool CodeMotionContext::run() { // Initialize the data flow. initializeCodeMotionDataFlow(); // Converge the BBSetOut with iterative data flow. if (MultiIteration) { initializeCodeMotionBBMaxSet(); computeCodeMotionGenKillSet(); convergeCodeMotionDataFlow(); } // Compute the insertion point where each RC root can be moved to. computeCodeMotionInsertPoints(); // Finally, generate new retains and remove the old retains. return performCodeMotion(); } //===----------------------------------------------------------------------===// // Retain Code Motion //===----------------------------------------------------------------------===// class RetainBlockState : public BlockState { public: /// Check whether the BBSetOut has changed. If it does, we need to rerun /// the data flow on this block's successors to reach fixed point. bool updateBBSetOut(llvm::SmallBitVector &X) { if (BBSetOut == X) return false; BBSetOut = X; return true; } /// constructor. RetainBlockState(bool IsEntry, unsigned size, bool MultiIteration) { // Iterative forward data flow. BBSetIn.resize(size, false); // Initialize to true if we are running optimistic data flow, i.e. // MultiIteration is true. BBSetOut.resize(size, MultiIteration); BBMaxSet.resize(size, !IsEntry && MultiIteration); // Genset and Killset are initially empty. BBGenSet.resize(size, false); BBKillSet.resize(size, false); } }; /// RetainCodeMotionContext - Context to perform retain code motion. class RetainCodeMotionContext : public CodeMotionContext { /// All the retain block state for all the basic blocks in the function. llvm::SmallDenseMap BlockStates; /// Return true if the instruction blocks the Ptr to be moved further. bool mayBlockCodeMotion(SILInstruction *II, SILValue Ptr) override { // NOTE: If more checks are to be added, place the most expensive in the // end, this function is called many times. // // These terminator instructions block. if (isa(II) || isa(II) || isa(II)) return true; // Identical RC root blocks code motion, we will be able to move this retain // further once we move the blocking retain. if (isRetainInstruction(II) && getRCRoot(II) == Ptr) return true; // Ref count checks do not have side effects, but are barriers for retains. if (mayCheckRefCount(II)) return true; // mayDecrement reference count stops code motion. if (mayDecrementRefCount(II, Ptr, AA)) return true; // This instruction does not block the retain code motion. return false; } /// Return the previous instruction if it happens to be a retain with the /// given RC root, nullptr otherwise. SILInstruction *getPrevReusableInst(SILInstruction *I, SILValue Root) { if (&*I->getParent()->begin() == I) return nullptr; auto Prev = &*std::prev(SILBasicBlock::iterator(I)); if (isRetainInstruction(Prev) && getRCRoot(Prev) == Root) return Prev; return nullptr; } public: /// Constructor. RetainCodeMotionContext(llvm::SpecificBumpPtrAllocator &BPA, SILFunction *F, PostOrderFunctionInfo *PO, AliasAnalysis *AA, RCIdentityFunctionInfo *RCFI) : CodeMotionContext(BPA, F, PO, AA, RCFI) { MultiIteration = requireIteration(); } /// virtual destructor. ~RetainCodeMotionContext() override {} /// Return true if we do not need optimistic data flow. bool requireIteration() override; /// Initialize necessary things to run the iterative data flow. void initializeCodeMotionDataFlow() override; /// Initialize the basic block maximum refcounted set. void initializeCodeMotionBBMaxSet() override; /// Compute the genset and killset for every root in every basic block. void computeCodeMotionGenKillSet() override; /// Run the iterative data flow to converge. void convergeCodeMotionDataFlow() override; /// Use the data flow results, come up with places to insert the new inst. void computeCodeMotionInsertPoints() override; /// Remove the old retains and create the new *moved* refcounted instructions bool performCodeMotion() override; /// Compute the BBSetIn and BBSetOut for the current basic block with the /// generated gen and kill set. bool processBBWithGenKillSet(SILBasicBlock *BB) override; /// Merge the data flow states. void mergeBBDataFlowStates(SILBasicBlock *BB) override; }; bool RetainCodeMotionContext::requireIteration() { // If all basic blocks will have their predecessors processed if the basic // blocks in the functions are iterated in reverse post order. Then this // function can be processed in one iteration, i.e. no need to generate the // genset and killset. llvm::SmallPtrSet PBBs; for (SILBasicBlock *B : PO->getReversePostOrder()) { for (auto X : B->getPredecessorBlocks()) { if (!PBBs.count(X)) return true; } PBBs.insert(B); } return false; } void RetainCodeMotionContext::initializeCodeMotionDataFlow() { // Find all the RC roots in the function. for (auto &BB : *F) { for (auto &II : BB) { if (!isRetainInstruction(&II)) continue; RCInstructions.insert(&II); SILValue Root = getRCRoot(&II); if (RCRootIndex.find(Root) != RCRootIndex.end()) continue; RCRootIndex[Root] = RCRootVault.size(); RCRootVault.insert(Root); } } // Initialize all the data flow bit vector for all basic blocks. for (auto &BB : *F) { BlockStates[&BB] = new (BPA.Allocate()) RetainBlockState(&BB == &*F->begin(), RCRootVault.size(), MultiIteration); } } void RetainCodeMotionContext::initializeCodeMotionBBMaxSet() { for (SILBasicBlock *BB : PO->getReversePostOrder()) { // If basic block has no predecessor, do nothing. BlockState *State = BlockStates[BB]; if (BB->pred_empty()) { State->BBMaxSet.reset(); } else { // Intersect in all predecessors' BBSetOut. State->BBMaxSet.set(); for (auto E = BB->pred_end(), I = BB->pred_begin(); I != E; ++I) { State->BBMaxSet &= BlockStates[*I]->BBMaxSet; } } // Process the instructions in the basic block to find what refcounted // roots are retained. If we know that an RC root can't be retained at a // basic block, then we know we do not need to consider it for the killset. // NOTE: this is a conservative approximation, because some retains may be // blocked before it reaches this block. for (auto &II : *BB) { if (!isRetainInstruction(&II)) continue; State->BBMaxSet.set(RCRootIndex[getRCRoot(&II)]); } } } void RetainCodeMotionContext::computeCodeMotionGenKillSet() { for (SILBasicBlock *BB : PO->getReversePostOrder()) { auto *State = BlockStates[BB]; bool InterestBlock = false; for (auto &I : *BB) { // Check whether this instruction blocks any RC root code motion. for (unsigned i = 0; i < RCRootVault.size(); ++i) { if (!State->BBMaxSet.test(i) || !mayBlockCodeMotion(&I, RCRootVault[i])) continue; // This is a blocking instruction for the rcroot. InterestBlock = true; State->BBKillSet.set(i); State->BBGenSet.reset(i); } // If this is a retain instruction, it also generates. if (isRetainInstruction(&I)) { unsigned idx = RCRootIndex[getRCRoot(&I)]; State->BBGenSet.set(idx); assert(State->BBKillSet.test(idx) && "Killset computed incorrectly"); State->BBKillSet.reset(idx); InterestBlock = true; } } // Is this a block that is interesting to the last iteration of the data // flow. if (!InterestBlock) continue; InterestBlocks.insert(BB); } } bool RetainCodeMotionContext::performCodeMotion() { bool Changed = false; // Create the new retain instructions. for (auto RC : RCRootVault) { auto Iter = InsertPoints.find(RC); if (Iter == InsertPoints.end()) continue; for (auto IP : Iter->second) { // we are about to insert a new retain instruction before the insertion // point. Check if the previous instruction is reusable, reuse it, do // not insert new instruction and delete old one. if (auto I = getPrevReusableInst(IP, Iter->first)) { RCInstructions.erase(I); continue; } createIncrementBefore(Iter->first, IP); Changed = true; } } // Remove the old retain instructions. for (auto R : RCInstructions) { ++NumRetainsSunk; recursivelyDeleteTriviallyDeadInstructions(R, true); } return Changed; } void RetainCodeMotionContext::mergeBBDataFlowStates(SILBasicBlock *BB) { BlockState *State = BlockStates[BB]; State->BBSetIn.reset(); // If basic block has no predecessor, simply reset and return. if (BB->pred_empty()) return; // Intersect in all predecessors' BBSetOuts. auto Iter = BB->pred_begin(); State->BBSetIn = BlockStates[*Iter]->BBSetOut; Iter = std::next(Iter); for (auto E = BB->pred_end(); Iter != E; ++Iter) { State->BBSetIn &= BlockStates[*Iter]->BBSetOut; } } bool RetainCodeMotionContext::processBBWithGenKillSet(SILBasicBlock *BB) { RetainBlockState *State = BlockStates[BB]; // Compute the BBSetOut at the end of the basic block. mergeBBDataFlowStates(BB); // Compute the BBSetIn at the beginning of the basic block. State->BBSetIn.reset(State->BBKillSet); State->BBSetIn |= State->BBGenSet; // If BBSetIn changes, then keep iterating until reached a fixed point. return State->updateBBSetOut(State->BBSetIn); } void RetainCodeMotionContext::convergeCodeMotionDataFlow() { // Process each basic block with the genset and killset. Every time the // BBSetOut of a basic block changes, the optimization is rerun on its // successors. llvm::SmallVector WorkList; llvm::SmallPtrSet HandledBBs; // Push into reverse post order so that we can pop from the back and get // post order. for (SILBasicBlock *B : PO->getReversePostOrder()) { WorkList.push_back(B); HandledBBs.insert(B); } while (!WorkList.empty()) { SILBasicBlock *BB = WorkList.pop_back_val(); HandledBBs.erase(BB); if (processBBWithGenKillSet(BB)) { for (auto &X : BB->getSuccessors()) { // We do not push basic block into the worklist if its already // in the worklist. if (HandledBBs.count(X)) continue; WorkList.push_back(X); } } } } void RetainCodeMotionContext::computeCodeMotionInsertPoints() { // The BBSetOuts have converged, run last iteration and figure out // insertion point for each refcounted root. for (SILBasicBlock *BB : PO->getReversePostOrder()) { mergeBBDataFlowStates(BB); RetainBlockState *S = BlockStates[BB]; // Compute insertion point generated by the edge value transition. // If there is a transition from 1 to 0, that means we have a partial // merge, which means the retain can NOT be sunk to the current block, // so place it at the end of the predecessors. for (unsigned i = 0; i < RCRootVault.size(); ++i) { if (S->BBSetIn[i]) continue; for (auto Pred : BB->getPredecessorBlocks()) { BlockState *PBB = BlockStates[Pred]; if (!PBB->BBSetOut[i]) continue; InsertPoints[RCRootVault[i]].push_back(Pred->getTerminator()); } } // Is this block interesting. If we are sure this block does not generate // retains nor does it block any retains (i.e. no insertion point will be // created), we can skip it, as the BBSetOut has been converged if this is // a multi-iteration function. if (MultiIteration && !InterestBlocks.count(BB)) continue; // Compute insertion point within the basic block. Process instructions in // the basic block in reverse post-order fashion. for (auto I = BB->begin(), E = BB->end(); I != E; ++I) { for (unsigned i = 0; i < RCRootVault.size(); ++i) { if (!S->BBSetIn[i] || !mayBlockCodeMotion(&*I, RCRootVault[i])) continue; S->BBSetIn.reset(i); InsertPoints[RCRootVault[i]].push_back(&*I); } // If this is a retain instruction, it also generates. if (isRetainInstruction(&*I)) { S->BBSetIn.set(RCRootIndex[getRCRoot(&*I)]); } } // Lastly update the BBSetOut, only necessary when we are running a single // iteration dataflow. if (!MultiIteration) { S->updateBBSetOut(S->BBSetIn); } } } //===----------------------------------------------------------------------===// // Release Code Motion //===----------------------------------------------------------------------===// class ReleaseBlockState : public BlockState { public: /// Check whether the BBSetIn has changed. If it does, we need to rerun /// the data flow on this block's predecessors to reach fixed point. bool updateBBSetIn(llvm::SmallBitVector &X) { if (BBSetIn == X) return false; BBSetIn = X; return true; } /// constructor. /// /// If \p InitOptimistic is true, the block in-bits are initialized to 1 /// which enables optimistic data flow evaluation. ReleaseBlockState(bool InitOptimistic, unsigned size) { // backward data flow. // Initialize to true if we are running optimistic data flow, i.e. // MultiIteration is true. BBSetIn.resize(size, InitOptimistic); BBSetOut.resize(size, false); BBMaxSet.resize(size, InitOptimistic); // Genset and Killset are initially empty. BBGenSet.resize(size, false); BBKillSet.resize(size, false); } }; /// ReleaseCodeMotionContext - Context to perform release code motion. class ReleaseCodeMotionContext : public CodeMotionContext { /// All the release block state for all the basic blocks in the function. llvm::SmallDenseMap BlockStates; /// We are not moving epilogue releases. bool FreezeEpilogueReleases; /// The epilogue release matcher we are currently using. ConsumedArgToEpilogueReleaseMatcher &ERM; /// Return true if the instruction blocks the Ptr to be moved further. bool mayBlockCodeMotion(SILInstruction *II, SILValue Ptr) override { // NOTE: If more checks are to be added, place the most expensive in the end. // This function is called many times. // // We can not move a release above the instruction that defines the // released value. if (II == Ptr->getDefiningInstruction()) return true; // Identical RC root blocks code motion, we will be able to move this release // further once we move the blocking release. if (isReleaseInstruction(II) && getRCRoot(II) == Ptr) return true; // Stop at may interfere. if (mayHaveSymmetricInterference(II, Ptr, AA)) return true; // This instruction does not block the release. return false; } /// Return the successor instruction if it happens to be a release with the /// given RC root, nullptr otherwise. SILInstruction *getPrevReusableInst(SILInstruction *I, SILValue Root) { if (&*I->getParent()->begin() == I) return nullptr; auto Prev = &*std::prev(SILBasicBlock::iterator(I)); if (isReleaseInstruction(Prev) && getRCRoot(Prev) == Root) return Prev; return nullptr; } public: /// Constructor. ReleaseCodeMotionContext(llvm::SpecificBumpPtrAllocator &BPA, SILFunction *F, PostOrderFunctionInfo *PO, AliasAnalysis *AA, RCIdentityFunctionInfo *RCFI, bool FreezeEpilogueReleases, ConsumedArgToEpilogueReleaseMatcher &ERM) : CodeMotionContext(BPA, F, PO, AA, RCFI), FreezeEpilogueReleases(FreezeEpilogueReleases), ERM(ERM) { MultiIteration = requireIteration(); } /// virtual destructor. ~ReleaseCodeMotionContext() override {} /// Return true if the data flow can converge in 1 iteration. bool requireIteration() override; /// Initialize necessary things to run the iterative data flow. void initializeCodeMotionDataFlow() override; /// Initialize the basic block maximum refcounted set. void initializeCodeMotionBBMaxSet() override; /// Compute the genset and killset for every root in every basic block. void computeCodeMotionGenKillSet() override; /// Run the iterative data flow to converge. void convergeCodeMotionDataFlow() override; /// Use the data flow results, come up with places to insert the new inst. void computeCodeMotionInsertPoints() override; /// Remove the old retains and create the new *moved* refcounted instructions bool performCodeMotion() override; /// Compute the BBSetIn and BBSetOut for the current basic /// block with the generated gen and kill set. bool processBBWithGenKillSet(SILBasicBlock *BB) override; /// Merge the data flow states. void mergeBBDataFlowStates(SILBasicBlock *BB) override; }; bool ReleaseCodeMotionContext::requireIteration() { // If all basic blocks will have their successors processed if the basic // blocks in the functions are iterated in post order. Then this function // can be processed in one iteration, i.e. no need to generate the genset // and killset. llvm::SmallPtrSet PBBs; for (SILBasicBlock *B : PO->getPostOrder()) { for (auto &X : B->getSuccessors()) { if (!PBBs.count(X)) return true; } PBBs.insert(B); } return false; } void ReleaseCodeMotionContext::initializeCodeMotionDataFlow() { // All blocks which are initialized with 1-bits. These are all blocks which // eventually reach the function exit (return, throw), excluding the // function exit blocks themselves. // Optimistic initialization enables moving releases across loops. On the // other hand, blocks, which never reach the function exit, e.g. infinite // loop blocks, must be excluded. Otherwise we would end up inserting // completely unrelated release instructions in such blocks. llvm::SmallPtrSet BlocksInitOptimistically; llvm::SmallVector Worklist; // Find all the RC roots in the function. for (auto &BB : *F) { for (auto &II : BB) { if (!isReleaseInstruction(&II)) continue; // Do not try to enumerate if we are not hoisting epilogue releases. if (FreezeEpilogueReleases && ERM.isEpilogueRelease(&II)) continue; SILValue Root = getRCRoot(&II); RCInstructions.insert(&II); if (RCRootIndex.find(Root) != RCRootIndex.end()) continue; RCRootIndex[Root] = RCRootVault.size(); RCRootVault.insert(Root); } if (MultiIteration && BB.getTerminator()->isFunctionExiting()) Worklist.push_back(&BB); } // Find all blocks from which there is a path to the function exit. // Note: the Worklist is empty if we are not in MultiIteration mode. while (!Worklist.empty()) { SILBasicBlock *BB = Worklist.pop_back_val(); for (SILBasicBlock *Pred : BB->getPredecessorBlocks()) { if (BlocksInitOptimistically.insert(Pred).second) Worklist.push_back(Pred); } } // Initialize all the data flow bit vector for all basic blocks. for (auto &BB : *F) { BlockStates[&BB] = new (BPA.Allocate()) ReleaseBlockState(BlocksInitOptimistically.count(&BB) != 0, RCRootVault.size()); } } void ReleaseCodeMotionContext::initializeCodeMotionBBMaxSet() { for (SILBasicBlock *BB : PO->getPostOrder()) { // If basic block has no successor, do nothing. BlockState *State = BlockStates[BB]; if (BB->succ_empty()) { State->BBMaxSet.reset(); } else { // Intersect in all successors' BBMaxOuts. State->BBMaxSet.set(); for (auto E = BB->succ_end(), I = BB->succ_begin(); I != E; ++I) { State->BBMaxSet &= BlockStates[*I]->BBMaxSet; } } // Process the instructions in the basic block to find what refcounted // roots are released. If we know that an RC root can't be released at a // basic block, then we know we do not need to consider it for the killset. // NOTE: this is a conservative approximation, because some releases may be // blocked before it reaches this block. for (auto II = BB->rbegin(), IE = BB->rend(); II != IE; ++II) { if (!isReleaseInstruction(&*II)) continue; State->BBMaxSet.set(RCRootIndex[getRCRoot(&*II)]); } } } void ReleaseCodeMotionContext::computeCodeMotionGenKillSet() { for (SILBasicBlock *BB : PO->getPostOrder()) { auto *State = BlockStates[BB]; bool InterestBlock = false; for (auto I = BB->rbegin(), E = BB->rend(); I != E; ++I) { // Check whether this instruction blocks any RC root code motion. for (unsigned i = 0; i < RCRootVault.size(); ++i) { if (!State->BBMaxSet.test(i) || !mayBlockCodeMotion(&*I, RCRootVault[i])) continue; // This instruction blocks this RC root. InterestBlock = true; State->BBKillSet.set(i); State->BBGenSet.reset(i); } // If this is an epilogue release and we are freezing epilogue release // simply continue. if (FreezeEpilogueReleases && ERM.isEpilogueRelease(&*I)) continue; // If this is a release instruction, it also generates. if (isReleaseInstruction(&*I)) { unsigned idx = RCRootIndex[getRCRoot(&*I)]; State->BBGenSet.set(idx); assert(State->BBKillSet.test(idx) && "Killset computed incorrectly"); State->BBKillSet.reset(idx); InterestBlock = true; } } // Handle SILArgument, SILArgument can invalidate. for (unsigned i = 0; i < RCRootVault.size(); ++i) { auto *A = dyn_cast(RCRootVault[i]); if (!A || A->getParent() != BB) continue; InterestBlock = true; State->BBKillSet.set(i); State->BBGenSet.reset(i); } // Is this interesting to the last iteration of the data flow. if (!InterestBlock) continue; InterestBlocks.insert(BB); } } void ReleaseCodeMotionContext::mergeBBDataFlowStates(SILBasicBlock *BB) { BlockState *State = BlockStates[BB]; State->BBSetOut.reset(); // If basic block has no successor, simply reset and return. if (BB->succ_empty()) return; // Intersect in all successors' BBSetIn. auto Iter = BB->succ_begin(); State->BBSetOut = BlockStates[*Iter]->BBSetIn; Iter = std::next(Iter); for (auto E = BB->succ_end(); Iter != E; ++Iter) { State->BBSetOut &= BlockStates[*Iter]->BBSetIn; } } bool ReleaseCodeMotionContext::performCodeMotion() { bool Changed = false; SmallVector NewReleases; // Create the new releases at each anchor point. for (auto RC : RCRootVault) { auto Iter = InsertPoints.find(RC); if (Iter == InsertPoints.end()) continue; for (auto IP : Iter->second) { // we are about to insert a new release instruction before the insertion // point. Check if the successor instruction is reusable, reuse it, do // not insert new instruction and delete old one. if (auto I = getPrevReusableInst(IP, Iter->first)) { if (RCInstructions.erase(I)) NewReleases.push_back(I); continue; } if (SILInstruction *I = createDecrementBefore(Iter->first, IP).getPtrOrNull()) NewReleases.push_back(I); Changed = true; } } // Remove the old release instructions. for (auto R : RCInstructions) { ++NumReleasesHoisted; recursivelyDeleteTriviallyDeadInstructions(R, true); } // Eliminate pairs of retain-release if they are adjacent to each other and // retain/release the same RCRoot, e.g. // strong_retain %2 // strong_release %2 for (SILInstruction *ReleaseInst : NewReleases) { auto InstIter = ReleaseInst->getIterator(); if (InstIter == ReleaseInst->getParent()->begin()) continue; SILInstruction *PrevInst = &*std::prev(InstIter); if (isRetainInstruction(PrevInst) && getRCRoot(PrevInst) == getRCRoot(ReleaseInst)) { recursivelyDeleteTriviallyDeadInstructions(PrevInst, true); recursivelyDeleteTriviallyDeadInstructions(ReleaseInst, true); } } return Changed; } bool ReleaseCodeMotionContext::processBBWithGenKillSet(SILBasicBlock *BB) { ReleaseBlockState *State = BlockStates[BB]; // Compute the BBSetOut at the end of the basic block. mergeBBDataFlowStates(BB); // Compute the BBSetIn at the beginning of the basic block. State->BBSetOut.reset(State->BBKillSet); State->BBSetOut |= State->BBGenSet; // If BBSetIn changes, then keep iterating until reached a fixed point. return State->updateBBSetIn(State->BBSetOut); } void ReleaseCodeMotionContext::convergeCodeMotionDataFlow() { // Process each basic block with the gen and kill set. Every time the // BBSetIn of a basic block changes, the optimization is rerun on its // predecessors. llvm::SmallVector WorkList; llvm::SmallPtrSet HandledBBs; // Push into reverse post order so that we can pop from the back and get // post order. for (SILBasicBlock *B : PO->getPostOrder()) { WorkList.push_back(B); HandledBBs.insert(B); } while (!WorkList.empty()) { SILBasicBlock *BB = WorkList.pop_back_val(); HandledBBs.erase(BB); if (processBBWithGenKillSet(BB)) { for (auto X : BB->getPredecessorBlocks()) { // We do not push basic block into the worklist if its already // in the worklist. if (HandledBBs.count(X)) continue; WorkList.push_back(X); } } } } void ReleaseCodeMotionContext::computeCodeMotionInsertPoints() { // The BBSetIns have converged, run last iteration and figure out insertion // point for each RC root. for (SILBasicBlock *BB : PO->getPostOrder()) { // Intersect in the successor BBSetIns. mergeBBDataFlowStates(BB); ReleaseBlockState *S = BlockStates[BB]; // Compute insertion point generated by the edge value transition. // If there is a transition from 1 to 0, that means we have a partial // merge, which means the release can NOT be hoisted to the current block. // place it at the successors. for (unsigned i = 0; i < RCRootVault.size(); ++i) { if (S->BBSetOut[i]) continue; for (auto &Succ : BB->getSuccessors()) { BlockState *SBB = BlockStates[Succ]; if (!SBB->BBSetIn[i]) continue; InsertPoints[RCRootVault[i]].push_back(&*(*Succ).begin()); } } // Is this block interesting ? if (MultiIteration && !InterestBlocks.count(BB)) continue; // Compute insertion point generated by MayUse terminator inst. // If terminator instruction can block the RC root. We will have no // choice but to anchor the release instructions in the successor blocks. for (unsigned i = 0; i < RCRootVault.size(); ++i) { SILInstruction *Term = BB->getTerminator(); if (!S->BBSetOut[i] || !mayBlockCodeMotion(Term, RCRootVault[i])) continue; for (auto &Succ : BB->getSuccessors()) { BlockState *SBB = BlockStates[Succ]; if (!SBB->BBSetIn[i]) continue; InsertPoints[RCRootVault[i]].push_back(&*(*Succ).begin()); } S->BBSetOut.reset(i); } // Compute insertion point generated within the basic block. Process // instructions in post-order fashion. for (auto I = std::next(BB->rbegin()), E = BB->rend(); I != E; ++I) { for (unsigned i = 0; i < RCRootVault.size(); ++i) { if (!S->BBSetOut[i] || !mayBlockCodeMotion(&*I, RCRootVault[i])) continue; auto *InsertPt = &*std::next(SILBasicBlock::iterator(&*I)); InsertPoints[RCRootVault[i]].push_back(InsertPt); S->BBSetOut.reset(i); } // If we are freezing this epilogue release. Simply continue. if (FreezeEpilogueReleases && ERM.isEpilogueRelease(&*I)) continue; // This release generates. if (isReleaseInstruction(&*I)) { S->BBSetOut.set(RCRootIndex[getRCRoot(&*I)]); } } // Compute insertion point generated by SILArgument. SILArgument blocks if // it defines the released value. for (unsigned i = 0; i < RCRootVault.size(); ++i) { if (!S->BBSetOut[i]) continue; auto *A = dyn_cast(RCRootVault[i]); if (!A || A->getParent() != BB) continue; InsertPoints[RCRootVault[i]].push_back(&*BB->begin()); S->BBSetOut.reset(i); } // Lastly update the BBSetIn, only necessary when we are running a single // iteration dataflow. if (!MultiIteration) { S->updateBBSetIn(S->BBSetOut); } } } //===----------------------------------------------------------------------===// // Top Level Entry Point //===----------------------------------------------------------------------===// namespace { /// Code motion kind. enum CodeMotionKind : unsigned { Retain = 0, Release = 1}; class ARCCodeMotion : public SILFunctionTransform { /// Whether to hoist releases or sink retains. CodeMotionKind Kind; /// Freeze epilogue release or not. bool FreezeEpilogueReleases; public: /// Constructor. ARCCodeMotion(CodeMotionKind H, bool F) : Kind(H), FreezeEpilogueReleases(F) {} /// The entry point to the transformation. void run() override { // Code motion disabled. if (DisableARCCodeMotion) return; // Respect function no.optimize. SILFunction *F = getFunction(); if (!F->shouldOptimize()) return; // Return if there is critical edge and we are disabling critical edge // splitting. if (DisableIfWithCriticalEdge && hasCriticalEdges(*F, false)) return; DEBUG(llvm::dbgs() << "*** ARCCM on function: " << F->getName() << " ***\n"); PostOrderAnalysis *POA = PM->getAnalysis(); // Split all critical edges. // // TODO: maybe we can do this lazily or maybe we should disallow SIL passes // to create critical edges. bool EdgeChanged = splitAllCriticalEdges(*F, false, nullptr, nullptr); if (EdgeChanged) POA->invalidateFunction(F); auto *PO = POA->get(F); auto *AA = PM->getAnalysis(); auto *RCFI = PM->getAnalysis()->get(F); llvm::SpecificBumpPtrAllocator BPA; bool InstChanged = false; if (Kind == Release) { // TODO: we should consider Throw block as well, or better we should // abstract the Return block or Throw block away in the matcher. SILArgumentConvention Conv[] = {SILArgumentConvention::Direct_Owned}; ConsumedArgToEpilogueReleaseMatcher ERM(RCFI, F, Conv, ConsumedArgToEpilogueReleaseMatcher::ExitKind::Return); ReleaseCodeMotionContext RelCM(BPA, F, PO, AA, RCFI, FreezeEpilogueReleases, ERM); // Run release hoisting. InstChanged |= RelCM.run(); } else { RetainCodeMotionContext RetCM(BPA, F, PO, AA, RCFI); // Run retain sinking. InstChanged |= RetCM.run(); } if (EdgeChanged) { // We splitted critical edges. invalidateAnalysis(SILAnalysis::InvalidationKind::FunctionBody); return; } if (InstChanged) { // We moved instructions. invalidateAnalysis(SILAnalysis::InvalidationKind::Instructions); } } }; } // end anonymous namespace /// Sink Retains. SILTransform *swift::createRetainSinking() { return new ARCCodeMotion(CodeMotionKind::Retain, false); } /// Hoist releases, but not epilogue release. ASO relies on epilogue releases /// to prove knownsafety on enclosed releases. SILTransform *swift::createReleaseHoisting() { return new ARCCodeMotion(CodeMotionKind::Release, true); } /// Hoist all releases. SILTransform *swift::createLateReleaseHoisting() { return new ARCCodeMotion(CodeMotionKind::Release, false); }