//===--- MandatoryInlining.cpp - Perform inlining of "transparent" sites --===// // // This source file is part of the Swift.org open source project // // Copyright (c) 2014 - 2017 Apple Inc. and the Swift project authors // Licensed under Apache License v2.0 with Runtime Library Exception // // See https://swift.org/LICENSE.txt for license information // See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors // //===----------------------------------------------------------------------===// #define DEBUG_TYPE "mandatory-inlining" #include "swift/AST/DiagnosticEngine.h" #include "swift/AST/DiagnosticsSIL.h" #include "swift/Basic/BlotSetVector.h" #include "swift/SIL/BasicBlockUtils.h" #include "swift/SIL/BranchPropagatedUser.h" #include "swift/SIL/InstructionUtils.h" #include "swift/SIL/OwnershipUtils.h" #include "swift/SILOptimizer/PassManager/Passes.h" #include "swift/SILOptimizer/PassManager/Transforms.h" #include "swift/SILOptimizer/Utils/CFG.h" #include "swift/SILOptimizer/Utils/Devirtualize.h" #include "swift/SILOptimizer/Utils/Local.h" #include "swift/SILOptimizer/Utils/SILInliner.h" #include "swift/SILOptimizer/Utils/SILOptFunctionBuilder.h" #include "swift/SILOptimizer/Utils/StackNesting.h" #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/ImmutableSet.h" #include "llvm/ADT/Statistic.h" #include "llvm/Support/Debug.h" using namespace swift; using DenseFunctionSet = llvm::DenseSet; using ImmutableFunctionSet = llvm::ImmutableSet; STATISTIC(NumMandatoryInlines, "Number of function application sites inlined by the mandatory " "inlining pass"); template static void diagnose(ASTContext &Context, SourceLoc loc, Diag diag, U &&...args) { Context.Diags.diagnose(loc, diag, std::forward(args)...); } static SILValue stripCopies(SILValue v) { while (auto *cvi = dyn_cast(v)) { v = cvi->getOperand(); } return v; } /// If \p applySite is a terminator then pass the first instruction of each /// successor to fun. Otherwise, pass std::next(applySite). static void insertAfterApply(SILInstruction *applySite, llvm::function_ref &&fun) { auto *ti = dyn_cast(applySite); if (!ti) { return fun(std::next(applySite->getIterator())); } for (auto *succBlocks : ti->getSuccessorBlocks()) { fun(succBlocks->begin()); } } /// Fixup reference counts after inlining a function call (which is a no-op /// unless the function is a thick function). /// /// It is important to note that, we can not assume that the partial apply, the /// apply site, or the callee value are control dependent in any way. This /// requires us to need to be very careful. See inline comments. static void fixupReferenceCounts( PartialApplyInst *pai, SILInstruction *applySite, SILValue calleeValue, ArrayRef captureArgConventions, MutableArrayRef capturedArgs, bool isCalleeGuaranteed) { // We assume that we were passed a slice of our actual argument array. So we // can use this to copy if we need to. assert(captureArgConventions.size() == capturedArgs.size()); SmallPtrSet visitedBlocks; // FIXME: Can we cache this in between inlining invocations? DeadEndBlocks deadEndBlocks(pai->getFunction()); SmallVector leakingBlocks; auto errorBehavior = ownership::ErrorBehaviorKind::ReturnFalse; // Add a copy of each non-address type capture argument to lifetime extend the // captured argument over at least the inlined function and till the end of a // box if we have an address. This deals with the possibility of the closure // being destroyed by an earlier application and thus cause the captured // argument to be destroyed. auto loc = RegularLocation::getAutoGeneratedLocation(); for (unsigned i : indices(captureArgConventions)) { auto convention = captureArgConventions[i]; SILValue &v = capturedArgs[i]; if (v->getType().isAddress()) { // FIXME: What about indirectly owned parameters? The invocation of the // closure would perform an indirect copy which we should mimick here. assert(convention != ParameterConvention::Indirect_In && "Missing indirect copy"); continue; } auto *f = applySite->getFunction(); // See if we have a trivial value. In such a case, just continue. We do not // need to fix up anything. if (v->getType().isTrivial(*f)) continue; bool hasOwnership = f->hasOwnership(); switch (convention) { case ParameterConvention::Indirect_In: case ParameterConvention::Indirect_In_Constant: case ParameterConvention::Indirect_Inout: case ParameterConvention::Indirect_InoutAliasable: case ParameterConvention::Indirect_In_Guaranteed: llvm_unreachable("Should be handled above"); case ParameterConvention::Direct_Guaranteed: { // If we have a direct_guaranteed value, the value is being taken by the // partial_apply at +1, but we are going to invoke the value at +0. So we // need to copy/borrow the value before the pai and then // end_borrow/destroy_value at the apply site. SILValue copy = SILBuilderWithScope(pai).emitCopyValueOperation(loc, v); SILValue argument = copy; if (hasOwnership) { argument = SILBuilderWithScope(pai).createBeginBorrow(loc, argument); } visitedBlocks.clear(); // If we need to insert compensating destroys, do so. auto error = valueHasLinearLifetime(copy, {applySite}, {}, visitedBlocks, deadEndBlocks, errorBehavior, &leakingBlocks); if (error.getFoundLeak()) { while (!leakingBlocks.empty()) { auto *leakingBlock = leakingBlocks.pop_back_val(); auto loc = RegularLocation::getAutoGeneratedLocation(); SILBuilderWithScope builder(leakingBlock->begin()); if (hasOwnership) { builder.createEndBorrow(loc, argument); } builder.emitDestroyValueOperation(loc, copy); } } // If we found an over consume it means that our value is consumed within // the loop. That means our leak code will have lifetime extended the // value over the loop. So we should /not/ insert a destroy after the // apply site. In contrast, if we do not have an over consume, we must // have been compensating for uses in the top of a diamond and need to // insert a destroy after the apply since the leak will just cover the // other path. if (!error.getFoundOverConsume()) { insertAfterApply(applySite, [&](SILBasicBlock::iterator iter) { if (hasOwnership) { SILBuilderWithScope(iter).createEndBorrow(loc, argument); } SILBuilderWithScope(iter).emitDestroyValueOperation(loc, copy); }); } v = argument; break; } // TODO: Do we need to lifetime extend here? case ParameterConvention::Direct_Unowned: { v = SILBuilderWithScope(pai).emitCopyValueOperation(loc, v); visitedBlocks.clear(); // If we need to insert compensating destroys, do so. auto error = valueHasLinearLifetime(v, {applySite}, {}, visitedBlocks, deadEndBlocks, errorBehavior, &leakingBlocks); if (error.getFoundError()) { while (!leakingBlocks.empty()) { auto *leakingBlock = leakingBlocks.pop_back_val(); auto loc = RegularLocation::getAutoGeneratedLocation(); SILBuilderWithScope builder(leakingBlock->begin()); builder.emitDestroyValueOperation(loc, v); } } insertAfterApply(applySite, [&](SILBasicBlock::iterator iter) { SILBuilderWithScope(iter).emitDestroyValueOperation(loc, v); }); break; } // If we have an owned value, we insert a copy here for two reasons: // // 1. To balance the consuming argument. // 2. To lifetime extend the value over the call site in case our partial // apply has another use that would destroy our value first. case ParameterConvention::Direct_Owned: { v = SILBuilderWithScope(pai).emitCopyValueOperation(loc, v); visitedBlocks.clear(); // If we need to insert compensating destroys, do so. auto error = valueHasLinearLifetime(v, {applySite}, {}, visitedBlocks, deadEndBlocks, errorBehavior, &leakingBlocks); if (error.getFoundError()) { while (!leakingBlocks.empty()) { auto *leakingBlock = leakingBlocks.pop_back_val(); auto loc = RegularLocation::getAutoGeneratedLocation(); SILBuilderWithScope builder(leakingBlock->begin()); builder.emitDestroyValueOperation(loc, v); } } break; } } } // Destroy the callee as the apply would have done if our function is not // callee guaranteed. if (!isCalleeGuaranteed) { insertAfterApply(applySite, [&](SILBasicBlock::iterator iter) { SILBuilderWithScope(iter).emitDestroyValueOperation(loc, calleeValue); }); } } static SILValue cleanupLoadedCalleeValue(SILValue calleeValue, LoadInst *li) { auto *pbi = cast(li->getOperand()); auto *abi = cast(pbi->getOperand()); // The load instruction must have no more uses or a single destroy left to // erase it. if (li->getFunction()->hasOwnership()) { // TODO: What if we have multiple destroy_value? That should be ok as well. auto *dvi = li->getSingleUserOfType(); if (!dvi) return SILValue(); dvi->eraseFromParent(); } else if (!li->use_empty()) { return SILValue(); } li->eraseFromParent(); // Look through uses of the alloc box the load is loading from to find up to // one store and up to one strong release. PointerUnion destroy; destroy = nullptr; for (Operand *use : abi->getUses()) { auto *user = use->getUser(); if (destroy.isNull()) { if (auto *sri = dyn_cast(user)) { destroy = sri; continue; } if (auto *dvi = dyn_cast(user)) { destroy = dvi; continue; } } if (user == pbi) continue; return SILValue(); } StoreInst *si = nullptr; for (Operand *use : pbi->getUses()) { if (auto *useSI = dyn_cast_or_null(use->getUser())) { si = useSI; continue; } return SILValue(); } // If we found a store, record its source and erase it. if (si) { calleeValue = si->getSrc(); si->eraseFromParent(); } else { calleeValue = SILValue(); } // If we found a strong release, replace it with a strong release of the // source of the store and erase it. if (destroy) { if (calleeValue) { if (auto *sri = destroy.dyn_cast()) { SILBuilderWithScope(sri).emitStrongReleaseAndFold(sri->getLoc(), calleeValue); sri->eraseFromParent(); } else { auto *dvi = destroy.get(); SILBuilderWithScope(dvi).emitDestroyValueAndFold(dvi->getLoc(), calleeValue); dvi->eraseFromParent(); } } } assert(pbi->use_empty()); pbi->eraseFromParent(); assert(abi->use_empty()); abi->eraseFromParent(); return calleeValue; } /// Removes instructions that create the callee value if they are no /// longer necessary after inlining. static void cleanupCalleeValue(SILValue calleeValue) { // Handle the case where the callee of the apply is a load instruction. If we // fail to optimize, return. Otherwise, see if we can look through other // abstractions on our callee. if (auto *li = dyn_cast(calleeValue)) { calleeValue = cleanupLoadedCalleeValue(calleeValue, li); if (!calleeValue) { return; } } calleeValue = stripCopies(calleeValue); // Inline constructor auto calleeSource = ([&]() -> SILValue { // Handle partial_apply/thin_to_thick -> convert_function: // tryDeleteDeadClosure must run before deleting a ConvertFunction that uses // the PartialApplyInst or ThinToThickFunctionInst. tryDeleteDeadClosure // will delete any uses of the closure, including a // convert_escape_to_noescape conversion. if (auto *cfi = dyn_cast(calleeValue)) return stripCopies(cfi->getOperand()); if (auto *cvt = dyn_cast(calleeValue)) return stripCopies(cvt->getOperand()); return stripCopies(calleeValue); })(); if (auto *pai = dyn_cast(calleeSource)) { SILValue callee = pai->getCallee(); if (!tryDeleteDeadClosure(pai)) return; calleeValue = callee; } else if (auto *tttfi = dyn_cast(calleeSource)) { SILValue callee = tttfi->getCallee(); if (!tryDeleteDeadClosure(tttfi)) return; calleeValue = callee; } calleeValue = stripCopies(calleeValue); // Handle function_ref -> convert_function -> partial_apply/thin_to_thick. if (auto *cfi = dyn_cast(calleeValue)) { if (isInstructionTriviallyDead(cfi)) { recursivelyDeleteTriviallyDeadInstructions(cfi, true); return; } } if (auto *fri = dyn_cast(calleeValue)) { if (!fri->use_empty()) return; fri->eraseFromParent(); } } namespace { /// Cleanup dead closures after inlining. class ClosureCleanup { using DeadInstSet = SmallBlotSetVector; /// A helper class to update the set of dead instructions. /// /// Since this is called by the SILModule callback, the instruction may longer /// be well-formed. Do not visit its operands. However, it's position in the /// basic block is still valid. /// /// FIXME: Using the Module's callback mechanism for this is terrible. /// Instead, cleanupCalleeValue could be easily rewritten to use its own /// instruction deletion helper and pass a callback to tryDeleteDeadClosure /// and recursivelyDeleteTriviallyDeadInstructions. class DeleteUpdateHandler : public DeleteNotificationHandler { SILModule &Module; DeadInstSet &DeadInsts; public: DeleteUpdateHandler(SILModule &M, DeadInstSet &DeadInsts) : Module(M), DeadInsts(DeadInsts) { Module.registerDeleteNotificationHandler(this); } ~DeleteUpdateHandler() override { // Unregister the handler. Module.removeDeleteNotificationHandler(this); } // Handling of instruction removal notifications. bool needsNotifications() override { return true; } // Handle notifications about removals of instructions. void handleDeleteNotification(SILNode *node) override { auto deletedI = dyn_cast(node); if (!deletedI) return; DeadInsts.erase(deletedI); } }; SmallBlotSetVector deadFunctionVals; public: /// This regular instruction deletion callback checks for any function-type /// values that may be unused after deleting the given instruction. void recordDeadFunction(SILInstruction *deletedInst) { // If the deleted instruction was already recorded as a function producer, // delete it from the map and record its operands instead. deadFunctionVals.erase(deletedInst); for (auto &operand : deletedInst->getAllOperands()) { SILValue operandVal = operand.get(); if (!operandVal->getType().is()) continue; // Simply record all function-producing instructions used by dead // code. Checking for a single use would not be precise because // `deletedInst` could itself use `deadInst` multiple times. if (auto *deadInst = operandVal->getDefiningInstruction()) deadFunctionVals.insert(deadInst); } } // Note: instructions in the `deadFunctionVals` set may use each other, so the // set needs to continue to be updated (by this handler) when deleting // instructions. This assumes that DeadFunctionValSet::erase() is stable. void cleanupDeadClosures(SILFunction *F) { DeleteUpdateHandler deleteUpdate(F->getModule(), deadFunctionVals); for (Optional I : deadFunctionVals) { if (!I.hasValue()) continue; if (auto *SVI = dyn_cast(I.getValue())) cleanupCalleeValue(SVI); } } }; } // end of namespace static void collectPartiallyAppliedArguments( PartialApplyInst *PAI, SmallVectorImpl &CapturedArgConventions, SmallVectorImpl &FullArgs) { ApplySite Site(PAI); SILFunctionConventions CalleeConv(Site.getSubstCalleeType(), PAI->getModule()); for (auto &Arg : PAI->getArgumentOperands()) { unsigned CalleeArgumentIndex = Site.getCalleeArgIndex(Arg); assert(CalleeArgumentIndex >= CalleeConv.getSILArgIndexOfFirstParam()); auto ParamInfo = CalleeConv.getParamInfoForSILArg(CalleeArgumentIndex); CapturedArgConventions.push_back(ParamInfo.getConvention()); FullArgs.push_back(Arg.get()); } } static SILValue getLoadedCalleeValue(LoadInst *li) { auto *pbi = dyn_cast(li->getOperand()); if (!pbi) return SILValue(); auto *abi = dyn_cast(pbi->getOperand()); if (!abi) return SILValue(); PointerUnion destroy = static_cast(nullptr); // Look through uses of the alloc box the load is loading from to find up to // one store and up to one destroy. for (auto *use : abi->getUses()) { auto *user = use->getUser(); // Look for our single destroy. If we find it... continue. if (destroy.isNull()) { if (auto *sri = dyn_cast(user)) { destroy = sri; continue; } if (auto *dvi = dyn_cast(user)) { destroy = dvi; continue; } } // Ignore our pbi if we find one. if (user == pbi) continue; // Otherwise, we have something that we do not understand. Return // SILValue(). // // NOTE: We purposely allow for strong_retain, retain_value, copy_value to // go down this path since we only want to consider simple boxes that have a // single post-dominating destroy. So if we have a strong_retain, // retain_value, or copy_value, we want to bail. return SILValue(); } // Make sure that our project_box has a single store user and our load user. StoreInst *si = nullptr; for (Operand *use : pbi->getUses()) { // If this use is our load... continue. if (use->getUser() == li) continue; // Otherwise, see if we have a store... if (auto *useSI = dyn_cast_or_null(use->getUser())) { // If we already have a store, we have a value that is initialized // multiple times... bail. if (si) return SILValue(); // If we do not have a store yet, make sure that it is in the same basic // block as box. Otherwise bail. if (useSI->getParent() != abi->getParent()) return SILValue(); // Ok, we found a store in the same block as the box and for which we have // so far only found one. Stash the store. si = useSI; continue; } // Otherwise, we have something we do not support... bail. return SILValue(); } // If we did not find a store, bail. if (!si) return SILValue(); // Otherwise, we have found our callee... the source of our store. return si->getSrc(); } /// Returns the callee SILFunction called at a call site, in the case /// that the call is transparent (as in, both that the call is marked /// with the transparent flag and that callee function is actually transparently /// determinable from the SIL) or nullptr otherwise. This assumes that the SIL /// is already in SSA form. /// /// In the case that a non-null value is returned, FullArgs contains effective /// argument operands for the callee function. static SILFunction * getCalleeFunction(SILFunction *F, FullApplySite AI, bool &IsThick, SmallVectorImpl &CapturedArgConventions, SmallVectorImpl &FullArgs, PartialApplyInst *&PartialApply) { IsThick = false; PartialApply = nullptr; CapturedArgConventions.clear(); FullArgs.clear(); // First grab our basic arguments from our apply. for (const auto &Arg : AI.getArguments()) FullArgs.push_back(Arg); // Then grab a first approximation of our apply by stripping off all copy // operations. SILValue CalleeValue = stripCopies(AI.getCallee()); // If after stripping off copy_values, we have a load then see if we the // function we want to inline has a simple available value through a simple // alloc_box. Bail otherwise. if (auto *li = dyn_cast(CalleeValue)) { CalleeValue = getLoadedCalleeValue(li); if (!CalleeValue) return nullptr; CalleeValue = stripCopies(CalleeValue); } // PartialApply/ThinToThick -> ConvertFunction patterns are generated // by @noescape closures. // // FIXME: We don't currently handle mismatched return types, however, this // would be a good optimization to handle and would be as simple as inserting // a cast. auto skipFuncConvert = [](SILValue CalleeValue) { // Skip any copies that we see. CalleeValue = stripCopies(CalleeValue); // We can also allow a thin @escape to noescape conversion as such: // %1 = function_ref @thin_closure_impl : $@convention(thin) () -> () // %2 = convert_function %1 : // $@convention(thin) () -> () to $@convention(thin) @noescape () -> () // %3 = thin_to_thick_function %2 : // $@convention(thin) @noescape () -> () to // $@noescape @callee_guaranteed () -> () // %4 = apply %3() : $@noescape @callee_guaranteed () -> () if (auto *ThinToNoescapeCast = dyn_cast(CalleeValue)) { auto FromCalleeTy = ThinToNoescapeCast->getOperand()->getType().castTo(); if (FromCalleeTy->getExtInfo().hasContext()) return CalleeValue; auto ToCalleeTy = ThinToNoescapeCast->getType().castTo(); auto EscapingCalleeTy = ToCalleeTy->getWithExtInfo( ToCalleeTy->getExtInfo().withNoEscape(false)); if (FromCalleeTy != EscapingCalleeTy) return CalleeValue; return stripCopies(ThinToNoescapeCast->getOperand()); } // Ignore mark_dependence users. A partial_apply [stack] uses them to mark // the dependence of the trivial closure context value on the captured // arguments. if (auto *MD = dyn_cast(CalleeValue)) { while (MD) { CalleeValue = MD->getValue(); MD = dyn_cast(CalleeValue); } return CalleeValue; } auto *CFI = dyn_cast(CalleeValue); if (!CFI) return stripCopies(CalleeValue); // TODO: Handle argument conversion. All the code in this file needs to be // cleaned up and generalized. The argument conversion handling in // optimizeApplyOfConvertFunctionInst should apply to any combine // involving an apply, not just a specific pattern. // // For now, just handle conversion that doesn't affect argument types, // return types, or throws. We could trivially handle any other // representation change, but the only one that doesn't affect the ABI and // matters here is @noescape, so just check for that. auto FromCalleeTy = CFI->getOperand()->getType().castTo(); auto ToCalleeTy = CFI->getType().castTo(); auto EscapingCalleeTy = ToCalleeTy->getWithExtInfo(ToCalleeTy->getExtInfo().withNoEscape(false)); if (FromCalleeTy != EscapingCalleeTy) return stripCopies(CalleeValue); return stripCopies(CFI->getOperand()); }; // Look through a escape to @noescape conversion. CalleeValue = skipFuncConvert(CalleeValue); // We are allowed to see through exactly one "partial apply" instruction or // one "thin to thick function" instructions, since those are the patterns // generated when using auto closures. if (auto *PAI = dyn_cast(CalleeValue)) { // Collect the applied arguments and their convention. collectPartiallyAppliedArguments(PAI, CapturedArgConventions, FullArgs); CalleeValue = stripCopies(PAI->getCallee()); IsThick = true; PartialApply = PAI; } else if (auto *TTTFI = dyn_cast(CalleeValue)) { CalleeValue = stripCopies(TTTFI->getOperand()); IsThick = true; } CalleeValue = skipFuncConvert(CalleeValue); auto *FRI = dyn_cast(CalleeValue); if (!FRI) return nullptr; SILFunction *CalleeFunction = FRI->getReferencedFunction(); switch (CalleeFunction->getRepresentation()) { case SILFunctionTypeRepresentation::Thick: case SILFunctionTypeRepresentation::Thin: case SILFunctionTypeRepresentation::Method: case SILFunctionTypeRepresentation::Closure: case SILFunctionTypeRepresentation::WitnessMethod: break; case SILFunctionTypeRepresentation::CFunctionPointer: case SILFunctionTypeRepresentation::ObjCMethod: case SILFunctionTypeRepresentation::Block: return nullptr; } // If the CalleeFunction is a not-transparent definition, we can not process // it. if (CalleeFunction->isTransparent() == IsNotTransparent) return nullptr; // If CalleeFunction is a declaration, see if we can load it. if (CalleeFunction->empty()) AI.getModule().loadFunction(CalleeFunction); // If we fail to load it, bail. if (CalleeFunction->empty()) return nullptr; if (F->isSerialized() && !CalleeFunction->hasValidLinkageForFragileInline()) { if (!CalleeFunction->hasValidLinkageForFragileRef()) { llvm::errs() << "caller: " << F->getName() << "\n"; llvm::errs() << "callee: " << CalleeFunction->getName() << "\n"; llvm_unreachable("Should never be inlining a resilient function into " "a fragile function"); } return nullptr; } return CalleeFunction; } static SILInstruction *tryDevirtualizeApplyHelper(FullApplySite InnerAI, ClassHierarchyAnalysis *CHA) { auto NewInst = tryDevirtualizeApply(InnerAI, CHA); if (!NewInst) return InnerAI.getInstruction(); deleteDevirtualizedApply(InnerAI); // FIXME: Comments at the use of this helper indicate that devirtualization // may return SILArgument. Yet here we assert that it must return an // instruction. auto newApplyAI = NewInst.getInstruction(); assert(newApplyAI && "devirtualized but removed apply site?"); return newApplyAI; } /// Inlines all mandatory inlined functions into the body of a function, /// first recursively inlining all mandatory apply instructions in those /// functions into their bodies if necessary. /// /// \param F the function to be processed /// \param AI nullptr if this is being called from the top level; the relevant /// ApplyInst requiring the recursive call when non-null /// \param FullyInlinedSet the set of all functions already known to be fully /// processed, to avoid processing them over again /// \param SetFactory an instance of ImmutableFunctionSet::Factory /// \param CurrentInliningSet the set of functions currently being inlined in /// the current call stack of recursive calls /// /// \returns true if successful, false if failed due to circular inlining. static bool runOnFunctionRecursively(SILOptFunctionBuilder &FuncBuilder, SILFunction *F, FullApplySite AI, DenseFunctionSet &FullyInlinedSet, ImmutableFunctionSet::Factory &SetFactory, ImmutableFunctionSet CurrentInliningSet, ClassHierarchyAnalysis *CHA) { // Avoid reprocessing functions needlessly. if (FullyInlinedSet.count(F)) return true; // Prevent attempt to circularly inline. if (CurrentInliningSet.contains(F)) { // This cannot happen on a top-level call, so AI should be non-null. assert(AI && "Cannot have circular inline without apply"); SILLocation L = AI.getLoc(); assert(L && "Must have location for transparent inline apply"); diagnose(F->getModule().getASTContext(), L.getStartSourceLoc(), diag::circular_transparent); return false; } // Add to the current inlining set (immutably, so we only affect the set // during this call and recursive subcalls). CurrentInliningSet = SetFactory.add(CurrentInliningSet, F); SmallVector CapturedArgConventions; SmallVector FullArgs; bool needUpdateStackNesting = false; // Visiting blocks in reverse order avoids revisiting instructions after block // splitting, which would be quadratic. for (auto BI = F->rbegin(), BE = F->rend(), nextBB = BI; BI != BE; BI = nextBB) { // After inlining, the block iterator will be adjusted to point to the last // block containing inlined instructions. This way, the inlined function // body will be reprocessed within the caller's context without revisiting // any original instructions. nextBB = std::next(BI); // While iterating over this block, instructions are inserted and deleted. // To avoid quadratic block splitting, instructions must be processed in // reverse order (block splitting reassigned the parent pointer of all // instructions below the split point). for (auto II = BI->rbegin(); II != BI->rend(); ++II) { FullApplySite InnerAI = FullApplySite::isa(&*II); if (!InnerAI) continue; // *NOTE* If devirtualization succeeds, devirtInst may not be InnerAI, // but a casted result of InnerAI or even a block argument due to // abstraction changes when calling the witness or class method. auto *devirtInst = tryDevirtualizeApplyHelper(InnerAI, CHA); // Restore II to the current apply site. II = devirtInst->getReverseIterator(); // If the devirtualized call result is no longer a invalid FullApplySite, // then it has succeeded, but the result is not immediately inlinable. InnerAI = FullApplySite::isa(devirtInst); if (!InnerAI) continue; SILValue CalleeValue = InnerAI.getCallee(); bool IsThick; PartialApplyInst *PAI; SILFunction *CalleeFunction = getCalleeFunction( F, InnerAI, IsThick, CapturedArgConventions, FullArgs, PAI); if (!CalleeFunction) continue; // Then recursively process it first before trying to inline it. if (!runOnFunctionRecursively(FuncBuilder, CalleeFunction, InnerAI, FullyInlinedSet, SetFactory, CurrentInliningSet, CHA)) { // If we failed due to circular inlining, then emit some notes to // trace back the failure if we have more information. // FIXME: possibly it could be worth recovering and attempting other // inlines within this same recursive call rather than simply // propagating the failure. if (AI) { SILLocation L = AI.getLoc(); assert(L && "Must have location for transparent inline apply"); diagnose(F->getModule().getASTContext(), L.getStartSourceLoc(), diag::note_while_inlining); } return false; } // Get our list of substitutions. auto Subs = (PAI ? PAI->getSubstitutionMap() : InnerAI.getSubstitutionMap()); SILOpenedArchetypesTracker OpenedArchetypesTracker(F); F->getModule().registerDeleteNotificationHandler( &OpenedArchetypesTracker); // The callee only needs to know about opened archetypes used in // the substitution list. OpenedArchetypesTracker.registerUsedOpenedArchetypes( InnerAI.getInstruction()); if (PAI) { OpenedArchetypesTracker.registerUsedOpenedArchetypes(PAI); } SILInliner Inliner(FuncBuilder, SILInliner::InlineKind::MandatoryInline, Subs, OpenedArchetypesTracker); if (!Inliner.canInlineApplySite(InnerAI)) continue; // Inline function at I, which also changes I to refer to the first // instruction inlined in the case that it succeeds. We purposely // process the inlined body after inlining, because the inlining may // have exposed new inlining opportunities beyond those present in // the inlined function when processed independently. LLVM_DEBUG(llvm::errs() << "Inlining @" << CalleeFunction->getName() << " into @" << InnerAI.getFunction()->getName() << "\n"); // If we intend to inline a partial_apply function that is not on the // stack, then we need to balance the reference counts for correctness. // // NOTE: If our partial apply is on the stack, it only has point uses (and // hopefully eventually guaranteed) uses of the captured arguments. // // NOTE: If we have a thin_to_thick_function, we do not need to worry // about such things since a thin_to_thick_function does not capture any // arguments. if (PAI && PAI->isOnStack() == PartialApplyInst::NotOnStack) { bool IsCalleeGuaranteed = PAI->getType().castTo()->isCalleeGuaranteed(); auto CapturedArgs = MutableArrayRef(FullArgs).take_back( CapturedArgConventions.size()); // We need to insert the copies before the partial_apply since if we can // not remove the partial_apply the captured values will be dead by the // time we hit the call site. fixupReferenceCounts(PAI, InnerAI.getInstruction(), CalleeValue, CapturedArgConventions, CapturedArgs, IsCalleeGuaranteed); } // Register a callback to record potentially unused function values after // inlining. ClosureCleanup closureCleanup; Inliner.setDeletionCallback([&closureCleanup](SILInstruction *I) { closureCleanup.recordDeadFunction(I); }); needUpdateStackNesting |= Inliner.needsUpdateStackNesting(InnerAI); // Inlining deletes the apply, and can introduce multiple new basic // blocks. After this, CalleeValue and other instructions may be invalid. // nextBB will point to the last inlined block auto firstInlinedInstAndLastBB = Inliner.inlineFunction(CalleeFunction, InnerAI, FullArgs); nextBB = firstInlinedInstAndLastBB.second->getReverseIterator(); ++NumMandatoryInlines; // The IR is now valid, and trivial dead arguments are removed. However, // we may be able to remove dead callee computations (e.g. dead // partial_apply closures). closureCleanup.cleanupDeadClosures(F); // Resume inlining within nextBB, which contains only the inlined // instructions and possibly instructions in the original call block that // have not yet been visited. break; } } if (needUpdateStackNesting) { StackNesting().correctStackNesting(F); } // Keep track of full inlined functions so we don't waste time recursively // reprocessing them. FullyInlinedSet.insert(F); return true; } //===----------------------------------------------------------------------===// // Top Level Driver //===----------------------------------------------------------------------===// namespace { class MandatoryInlining : public SILModuleTransform { /// The entry point to the transformation. void run() override { ClassHierarchyAnalysis *CHA = getAnalysis(); SILModule *M = getModule(); bool ShouldCleanup = !getOptions().DebugSerialization; DenseFunctionSet FullyInlinedSet; ImmutableFunctionSet::Factory SetFactory; SILOptFunctionBuilder FuncBuilder(*this); for (auto &F : *M) { // Don't inline into thunks, even transparent callees. if (F.isThunk()) continue; // Skip deserialized functions. if (F.wasDeserializedCanonical()) continue; runOnFunctionRecursively(FuncBuilder, &F, FullApplySite(), FullyInlinedSet, SetFactory, SetFactory.getEmptySet(), CHA); // The inliner splits blocks at call sites. Re-merge trivial branches // to reestablish a canonical CFG. mergeBasicBlocks(&F); } if (!ShouldCleanup) return; // Now that we've inlined some functions, clean up. If there are any // transparent functions that are deserialized from another module that are // now unused, just remove them from the module. // // We do this with a simple linear scan, because transparent functions that // reference each other have already been flattened. for (auto FI = M->begin(), E = M->end(); FI != E; ) { SILFunction &F = *FI++; invalidateAnalysis(&F, SILAnalysis::InvalidationKind::Everything); if (F.getRefCount() != 0) continue; // Leave non-transparent functions alone. if (!F.isTransparent()) continue; // We discard functions that don't have external linkage, // e.g. deserialized functions, internal functions, and thunks. // Being marked transparent controls this. if (F.isPossiblyUsedExternally()) continue; // ObjC functions are called through the runtime and are therefore alive // even if not referenced inside SIL. if (F.getRepresentation() == SILFunctionTypeRepresentation::ObjCMethod) continue; // Okay, just erase the function from the module. FuncBuilder.eraseFunction(&F); } } }; } // end anonymous namespace SILTransform *swift::createMandatoryInlining() { return new MandatoryInlining(); }