//===--- MandatoryInlining.cpp - Perform inlining of "transparent" sites --===// // // This source file is part of the Swift.org open source project // // Copyright (c) 2014 - 2017 Apple Inc. and the Swift project authors // Licensed under Apache License v2.0 with Runtime Library Exception // // See https://swift.org/LICENSE.txt for license information // See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors // //===----------------------------------------------------------------------===// #define DEBUG_TYPE "mandatory-inlining" #include "swift/AST/DiagnosticEngine.h" #include "swift/AST/DiagnosticsSIL.h" #include "swift/Basic/Assertions.h" #include "swift/Basic/BlotSetVector.h" #include "swift/SIL/BasicBlockUtils.h" #include "swift/SIL/InstructionUtils.h" #include "swift/SIL/LinearLifetimeChecker.h" #include "swift/SIL/OwnershipUtils.h" #include "swift/SILOptimizer/PassManager/Passes.h" #include "swift/SILOptimizer/PassManager/Transforms.h" #include "swift/SILOptimizer/Utils/CFGOptUtils.h" #include "swift/SILOptimizer/Utils/Devirtualize.h" #include "swift/SILOptimizer/Utils/InstOptUtils.h" #include "swift/SILOptimizer/Utils/SILInliner.h" #include "swift/SILOptimizer/Utils/SILOptFunctionBuilder.h" #include "swift/SILOptimizer/Utils/StackNesting.h" #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/ImmutableSet.h" #include "llvm/ADT/Statistic.h" #include "llvm/Support/Debug.h" using namespace swift; using DenseFunctionSet = llvm::DenseSet; using ImmutableFunctionSet = llvm::ImmutableSet; STATISTIC(NumMandatoryInlines, "Number of function application sites inlined by the mandatory " "inlining pass"); //===----------------------------------------------------------------------===// // Printing Helpers //===----------------------------------------------------------------------===// extern llvm::cl::opt SILPrintInliningCallee; extern llvm::cl::opt SILPrintInliningCallerBefore; extern llvm::cl::opt SILPrintInliningCallerAfter; extern llvm::cl::opt EnableVerifyAfterEachInlining; extern void printInliningDetailsCallee(StringRef passName, SILFunction *caller, SILFunction *callee); extern void printInliningDetailsCallerBefore(StringRef passName, SILFunction *caller, SILFunction *callee); extern void printInliningDetailsCallerAfter(StringRef passName, SILFunction *caller, SILFunction *callee); template static void diagnose(ASTContext &Context, SourceLoc loc, Diag diag, U &&...args) { Context.Diags.diagnose(loc, diag, std::forward(args)...); } /// Fixup reference counts after inlining a function call (which is a no-op /// unless the function is a thick function). /// /// It is important to note that, we can not assume that the partial apply, the /// apply site, or the callee value are control dependent in any way. This /// requires us to need to be very careful. See inline comments. /// /// Returns true if the stack nesting is invalidated and must be corrected /// afterwards. static bool fixupReferenceCounts( PartialApplyInst *pai, FullApplySite applySite, SILValue calleeValue, ArrayRef captureArgConventions, MutableArrayRef capturedArgs, bool isCalleeGuaranteed) { // We assume that we were passed a slice of our actual argument array. So we // can use this to copy if we need to. assert(captureArgConventions.size() == capturedArgs.size()); // FIXME: Can we cache this in between inlining invocations? DeadEndBlocks deadEndBlocks(pai->getFunction()); SmallVector leakingBlocks; bool invalidatedStackNesting = false; // Add a copy of each non-address type capture argument to lifetime extend the // captured argument over at least the inlined function and till the end of a // box if we have an address. This deals with the possibility of the closure // being destroyed by an earlier application and thus cause the captured // argument to be destroyed. auto loc = RegularLocation::getAutoGeneratedLocation(); for (unsigned i : indices(captureArgConventions)) { auto convention = captureArgConventions[i]; SILValue &v = capturedArgs[i]; auto *f = applySite.getFunction(); // See if we have a trivial value. In such a case, just continue. We do not // need to fix up anything. if (v->getType().isTrivial(*f)) continue; bool hasOwnership = f->hasOwnership(); switch (convention) { case ParameterConvention::Indirect_In_CXX: case ParameterConvention::Indirect_In: llvm_unreachable("Missing indirect copy"); case ParameterConvention::Pack_Owned: case ParameterConvention::Pack_Guaranteed: // FIXME: can these happen? llvm_unreachable("Missing pack owned<->guaranteed conversions"); case ParameterConvention::Indirect_Inout: case ParameterConvention::Indirect_InoutAliasable: case ParameterConvention::Pack_Inout: break; case ParameterConvention::Indirect_In_Guaranteed: { // Do the same as for Direct_Guaranteed, just the address version. // (See comment below). SILBuilderWithScope builder(pai); auto *stackLoc = builder.createAllocStack(loc, v->getType().getObjectType()); builder.createCopyAddr(loc, v, stackLoc, IsNotTake, IsInitialization); LinearLifetimeChecker checker(&deadEndBlocks); bool consumedInLoop = checker.completeConsumingUseSet( pai, applySite.getCalleeOperand(), [&](SILBasicBlock::iterator insertPt) { SILBuilderWithScope builder(insertPt); builder.createDestroyAddr(loc, stackLoc); builder.createDeallocStack(loc, stackLoc); }); if (!consumedInLoop) { applySite.insertAfterInvocation([&](SILBuilder &builder) { builder.createDestroyAddr(loc, stackLoc); builder.createDeallocStack(loc, stackLoc); }); } v = stackLoc; invalidatedStackNesting = true; break; } case ParameterConvention::Direct_Guaranteed: { // If we have a direct_guaranteed value, the value is being taken by the // partial_apply at +1, but we are going to invoke the value at +0. So we // need to copy/borrow the value before the pai and then // end_borrow/destroy_value at the apply site. SILValue copy = SILBuilderWithScope(pai).emitCopyValueOperation(loc, v); SILValue argument = copy; if (hasOwnership) { argument = SILBuilderWithScope(pai).createBeginBorrow(loc, argument); } // If we need to insert compensating destroys, do so. // // NOTE: We use pai here since in non-ossa code emitCopyValueOperation // returns the operand of the strong_retain which may have a ValueBase // that is not in the same block. An example of where this is important is // if we are performing emitCopyValueOperation in non-ossa code on an // argument when the partial_apply is not in the entrance block. In truth, // the linear lifetime checker does not /actually/ care what the value is // (ignoring diagnostic error msgs that we do not care about here), it // just cares about the block the value is in. In a forthcoming commit, I // am going to change this to use a different API on the linear lifetime // checker that makes this clearer. LinearLifetimeChecker checker(&deadEndBlocks); bool consumedInLoop = checker.completeConsumingUseSet( pai, applySite.getCalleeOperand(), [&](SILBasicBlock::iterator insertPt) { SILBuilderWithScope builder(insertPt); if (hasOwnership) { builder.createEndBorrow(loc, argument); } builder.emitDestroyValueOperation(loc, copy); }); // Since our applySite is in a different loop than our partial apply means // that our leak code will have lifetime extended the value over the // loop. So we should /not/ insert a destroy after the apply site. In // contrast, if we do not have a loop, we must have been compensating for // uses in the top of a diamond and need to insert a destroy after the // apply since the leak will just cover the other path. if (!consumedInLoop) { applySite.insertAfterInvocation([&](SILBuilder &builder) { if (hasOwnership) { builder.createEndBorrow(loc, argument); } builder.emitDestroyValueOperation(loc, copy); }); } v = argument; break; } // TODO: Do we need to lifetime extend here? case ParameterConvention::Direct_Unowned: { v = SILBuilderWithScope(pai).emitCopyValueOperation(loc, v); // If our consuming partial apply does not post-dominate our // partial_apply, compute the completion of the post dominance set and if // that set is non-empty, insert compensating destroys at those places. // // NOTE: We use pai here since in non-ossa code emitCopyValueOperation // returns the operand of the strong_retain which may have a ValueBase // that is not in the same block. An example of where this is important is // if we are performing emitCopyValueOperation in non-ossa code on an // argument when the partial_apply is not in the entrance block. In truth, // the linear lifetime checker does not /actually/ care what the value is // (ignoring diagnostic error msgs that we do not care about here), it // just cares about the block the value is in. In a forthcoming commit, I // am going to change this to use a different API on the linear lifetime // checker that makes this clearer. LinearLifetimeChecker checker(&deadEndBlocks); checker.completeConsumingUseSet( pai, applySite.getCalleeOperand(), [&](SILBasicBlock::iterator insertPt) { auto loc = RegularLocation::getAutoGeneratedLocation(); SILBuilderWithScope builder(insertPt); builder.emitDestroyValueOperation(loc, v); }); // Then insert destroys after the apply site since our value is not being // consumed as part of the actual apply. applySite.insertAfterInvocation([&](SILBuilder &builder) { builder.emitDestroyValueOperation(loc, v); }); break; } // If we have an owned value, we insert a copy here for two reasons: // // 1. To balance the consuming argument. // 2. To lifetime extend the value over the call site in case our partial // apply has another use that would destroy our value first. case ParameterConvention::Direct_Owned: { v = SILBuilderWithScope(pai).emitCopyValueOperation(loc, v); // If we need to insert compensating destroys, do so. // // NOTE: We use pai here since in non-ossa code emitCopyValueOperation // returns the operand of the strong_retain which may have a ValueBase // that is not in the same block. An example of where this is important is // if we are performing emitCopyValueOperation in non-ossa code on an // argument when the partial_apply is not in the entrance block. In truth, // the linear lifetime checker does not /actually/ care what the value is // (ignoring diagnostic error msgs that we do not care about here), it // just cares about the block the value is in. In a forthcoming commit, I // am going to change this to use a different API on the linear lifetime // checker that makes this clearer. LinearLifetimeChecker checker(&deadEndBlocks); checker.completeConsumingUseSet( pai, applySite.getCalleeOperand(), [&](SILBasicBlock::iterator insertPt) { auto loc = RegularLocation::getAutoGeneratedLocation(); SILBuilderWithScope builder(insertPt); builder.emitDestroyValueOperation(loc, v); }); // NOTE: Unlike with the unowned case above, when we are owned we do not // need to insert destroys since the apply will consume the value for us. break; } } } // Destroy the callee as the apply would have done if our function is not // callee guaranteed. if (!isCalleeGuaranteed) { applySite.insertAfterInvocation([&](SILBuilder &builder) { builder.emitDestroyValueOperation(loc, calleeValue); }); } return invalidatedStackNesting; } // Handle the case where the callee of the apply is either a load or a // project_box that was used by a deleted load. If we fail to optimize, // return an invalid SILValue. static SILValue cleanupLoadedCalleeValue(SILValue calleeValue) { auto calleeSource = calleeValue; auto *li = dyn_cast(calleeValue); if (li) { calleeSource = li->getOperand(); } auto *pbi = dyn_cast(calleeSource); if (!pbi) return SILValue(); auto *abi = dyn_cast(pbi->getOperand()); if (!abi) return SILValue(); // The load instruction must have no more uses or a single destroy left to // erase it. if (li) { if (li->getFunction()->hasOwnership()) { // TODO: What if we have multiple destroy_value? That should be ok. auto *dvi = li->getSingleUserOfType(); if (!dvi) return SILValue(); dvi->eraseFromParent(); } else if (!li->use_empty()) { return SILValue(); } li->eraseFromParent(); } // Look through uses of the alloc box the load is loading from to find up to // one store and up to one strong release. PointerUnion destroy; destroy = nullptr; for (Operand *use : abi->getUses()) { auto *user = use->getUser(); if (destroy.isNull()) { if (auto *sri = dyn_cast(user)) { destroy = sri; continue; } if (auto *dvi = dyn_cast(user)) { destroy = dvi; continue; } } if (user == pbi) continue; return SILValue(); } StoreInst *si = nullptr; for (Operand *use : pbi->getUses()) { if (auto *useSI = dyn_cast_or_null(use->getUser())) { si = useSI; continue; } return SILValue(); } // If we found a store, record its source and erase it. if (si) { calleeValue = si->getSrc(); si->eraseFromParent(); } else { calleeValue = SILValue(); } // If we found a strong release, replace it with a strong release of the // source of the store and erase it. if (destroy) { if (calleeValue) { if (auto *sri = destroy.dyn_cast()) { SILBuilderWithScope(sri).emitStrongReleaseAndFold(sri->getLoc(), calleeValue); sri->eraseFromParent(); } else { auto *dvi = cast(destroy); SILBuilderWithScope(dvi).emitDestroyValueAndFold(dvi->getLoc(), calleeValue); dvi->eraseFromParent(); } } } assert(pbi->use_empty()); pbi->eraseFromParent(); assert(abi->use_empty()); abi->eraseFromParent(); return calleeValue; } /// Removes instructions that create the callee value if they are no /// longer necessary after inlining. static void cleanupCalleeValue(SILValue calleeValue, bool &invalidatedStackNesting) { if (auto loadedValue = cleanupLoadedCalleeValue(calleeValue)) calleeValue = loadedValue; calleeValue = lookThroughOwnershipInsts(calleeValue); // Inline constructor auto calleeSource = ([&]() -> SILValue { // Handle partial_apply/thin_to_thick -> convert_function: // tryDeleteDeadClosure must run before deleting a ConvertFunction that uses // the PartialApplyInst or ThinToThickFunctionInst. tryDeleteDeadClosure // will delete any uses of the closure, including a // convert_escape_to_noescape conversion. if (auto *cfi = dyn_cast(calleeValue)) return lookThroughOwnershipInsts(cfi->getOperand()); if (auto *cvt = dyn_cast(calleeValue)) return lookThroughOwnershipInsts(cvt->getOperand()); return lookThroughOwnershipInsts(calleeValue); })(); if (auto *pai = dyn_cast(calleeSource)) { SILValue callee = pai->getCallee(); if (!tryDeleteDeadClosure(pai)) return; calleeValue = callee; } else if (auto *tttfi = dyn_cast(calleeSource)) { SILValue callee = tttfi->getCallee(); if (!tryDeleteDeadClosure(tttfi)) return; calleeValue = callee; } invalidatedStackNesting = true; calleeValue = lookThroughOwnershipInsts(calleeValue); // Handle function_ref -> convert_function -> partial_apply/thin_to_thick. if (auto *cfi = dyn_cast(calleeValue)) { if (isInstructionTriviallyDead(cfi)) { recursivelyDeleteTriviallyDeadInstructions(cfi, true); return; } } if (auto *fri = dyn_cast(calleeValue)) { if (!fri->use_empty()) return; fri->eraseFromParent(); } } namespace { /// Cleanup dead closures after inlining. class ClosureCleanup { SmallBlotSetVector deadFunctionVals; public: /// Set to true if some alloc/dealloc_stack instruction are inserted and at /// the end of the run stack nesting needs to be corrected. bool invalidatedStackNesting = false; /// This regular instruction deletion callback checks for any function-type /// values that may be unused after deleting the given instruction. void recordDeadFunction(SILInstruction *deletedInst) { // If it is a debug instruction, return. // In this function, we look at operands of an instruction to be // deleted, and add back the defining instruction of the operands to the // worklist if it has a function type. This works in general when we are // deleting dead instructions recursively. // But we also consider, an instruction with only debug uses as dead. // And with eraseFromParentWithDebugInsts, we will be deleting a dead // instruction with its debug instructions. So when we are deleting a debug // instruction, we may have already deleted its operand's defining // instruction. So it would be incorrect to add back its operand's defining // instruction. if (deletedInst->isDebugInstruction()) return; // If the deleted instruction was already recorded as a function producer, // delete it from the map and record its operands instead. deadFunctionVals.erase(deletedInst); for (auto &operand : deletedInst->getAllOperands()) { SILValue operandVal = operand.get(); if (!operandVal->getType().is()) continue; // Simply record all function-producing instructions used by dead // code. Checking for a single use would not be precise because // `deletedInst` could itself use `deadInst` multiple times. if (auto *deadInst = operandVal->getDefiningInstruction()) deadFunctionVals.insert(deadInst); } } // Note: instructions in the `deadFunctionVals` set may use each other, so the // set needs to continue to be updated (by this handler) when deleting // instructions. This assumes that DeadFunctionValSet::erase() is stable. void cleanupDeadClosures(SILFunction *F) { for (std::optional I : deadFunctionVals) { if (!I.has_value() || I.value()->isDeleted()) continue; if (auto *SVI = dyn_cast(I.value())) cleanupCalleeValue(SVI, invalidatedStackNesting); } } }; } // end of namespace static void collectPartiallyAppliedArguments( PartialApplyInst *PAI, SmallVectorImpl &CapturedArgConventions, SmallVectorImpl &FullArgs) { ApplySite Site(PAI); SILFunctionConventions CalleeConv(Site.getSubstCalleeType(), PAI->getModule()); for (auto &Arg : PAI->getArgumentOperands()) { unsigned CalleeArgumentIndex = Site.getCalleeArgIndex(Arg); assert(CalleeArgumentIndex >= CalleeConv.getSILArgIndexOfFirstParam()); auto ParamInfo = CalleeConv.getParamInfoForSILArg(CalleeArgumentIndex); CapturedArgConventions.push_back(ParamInfo.getConvention()); FullArgs.push_back(Arg.get()); } } static SILValue getLoadedCalleeValue(LoadInst *li) { auto *pbi = dyn_cast(li->getOperand()); if (!pbi) return SILValue(); auto *abi = dyn_cast(pbi->getOperand()); if (!abi) return SILValue(); PointerUnion destroy = static_cast(nullptr); // Look through uses of the alloc box the load is loading from to find up to // one store and up to one destroy. for (auto *use : abi->getUses()) { auto *user = use->getUser(); // Look for our single destroy. If we find it... continue. if (destroy.isNull()) { if (auto *sri = dyn_cast(user)) { destroy = sri; continue; } if (auto *dvi = dyn_cast(user)) { destroy = dvi; continue; } } // Ignore our pbi if we find one. if (user == pbi) continue; // Otherwise, we have something that we do not understand. Return // SILValue(). // // NOTE: We purposely allow for strong_retain, retain_value, copy_value to // go down this path since we only want to consider simple boxes that have a // single post-dominating destroy. So if we have a strong_retain, // retain_value, or copy_value, we want to bail. return SILValue(); } // Make sure that our project_box has a single store user and our load user. StoreInst *si = nullptr; for (Operand *use : pbi->getUses()) { // If this use is our load... continue. if (use->getUser() == li) continue; // Otherwise, see if we have a store... if (auto *useSI = dyn_cast_or_null(use->getUser())) { // If we already have a store, we have a value that is initialized // multiple times... bail. if (si) return SILValue(); // If we do not have a store yet, make sure that it is in the same basic // block as box. Otherwise bail. if (useSI->getParent() != abi->getParent()) return SILValue(); // Ok, we found a store in the same block as the box and for which we have // so far only found one. Stash the store. si = useSI; continue; } // Otherwise, we have something we do not support... bail. return SILValue(); } // If we did not find a store, bail. if (!si) return SILValue(); // Otherwise, we have found our callee... the source of our store. return si->getSrc(); } static bool convertsThinEscapeToNoescape(ConvertFunctionInst *cv) { // Example: // %1 = function_ref @thin_closure_impl : $() -> () // %2 = convert_function %1 : $() -> () to $@noescape () -> () // auto fromTy = cv->getOperand()->getType().castTo(); if (fromTy->getExtInfo().hasContext()) return false; auto toTy = cv->getType().castTo(); auto escapeToTy = toTy->getWithExtInfo(toTy->getExtInfo().withNoEscape(false)); return fromTy == escapeToTy; } // PartialApply/ThinToThick -> ConvertFunction patterns are generated // by @noescape closures. // // FIXME: We don't currently handle mismatched return types, however, this // would be a good optimization to handle and would be as simple as inserting // a cast. static SILValue stripFunctionConversions(SILValue CalleeValue) { // Skip any copies that we see. CalleeValue = lookThroughOwnershipInsts(CalleeValue); if (auto *ConvertFn = dyn_cast(CalleeValue)) { if (ConvertFn->onlyConvertsSubstitutions() || ConvertFn->onlyConvertsSendable() || convertsThinEscapeToNoescape(ConvertFn)) { return stripFunctionConversions(ConvertFn->getOperand()); } return CalleeValue; } // Ignore mark_dependence users. A partial_apply [stack] uses them to mark // the dependence of the trivial closure context value on the captured // arguments. if (auto *MD = dyn_cast(CalleeValue)) { while (MD) { CalleeValue = MD->getValue(); MD = dyn_cast(CalleeValue); } return CalleeValue; } auto *CFI = dyn_cast(CalleeValue); if (!CFI) return lookThroughOwnershipInsts(CalleeValue); // TODO: Handle argument conversion. All the code in this file needs to be // cleaned up and generalized. The argument conversion handling in // optimizeApplyOfConvertFunctionInst should apply to any combine // involving an apply, not just a specific pattern. // // For now, just handle conversion that doesn't affect argument types, // return types, or throws. We could trivially handle any other // representation change, but the only one that doesn't affect the ABI and // matters here is @noescape, so just check for that. auto FromCalleeTy = CFI->getOperand()->getType().castTo(); auto ToCalleeTy = CFI->getType().castTo(); auto EscapingCalleeTy = ToCalleeTy->getWithExtInfo(ToCalleeTy->getExtInfo().withNoEscape(false)); if (FromCalleeTy != EscapingCalleeTy) return lookThroughOwnershipInsts(CalleeValue); return lookThroughOwnershipInsts(CFI->getOperand()); } /// Returns the callee SILFunction called at a call site, in the case /// that the call is transparent (as in, both that the call is marked /// with the transparent flag and that callee function is actually transparently /// determinable from the SIL) or nullptr otherwise. This assumes that the SIL /// is already in SSA form. /// /// In the case that a non-null value is returned, FullArgs contains effective /// argument operands for the callee function. static SILFunction * getCalleeFunction(SILFunction *F, FullApplySite AI, bool &IsThick, SmallVectorImpl &CapturedArgConventions, SmallVectorImpl &FullArgs, PartialApplyInst *&PartialApply) { IsThick = false; PartialApply = nullptr; CapturedArgConventions.clear(); FullArgs.clear(); // First grab our basic arguments from our apply. for (SILValue Arg : AI.getArguments()) FullArgs.push_back(Arg); // Then grab a first approximation of our apply by stripping off all copy // operations. SILValue CalleeValue = lookThroughOwnershipInsts(AI.getCallee()); // If after stripping off copy_values, we have a load then see if we the // function we want to inline has a simple available value through a simple // alloc_box. Bail otherwise. if (auto *li = dyn_cast(CalleeValue)) { CalleeValue = getLoadedCalleeValue(li); if (!CalleeValue) return nullptr; CalleeValue = lookThroughOwnershipInsts(CalleeValue); } // Look through a escape to @noescape conversion. CalleeValue = stripFunctionConversions(CalleeValue); // We are allowed to see through exactly one "partial apply" instruction or // one "thin to thick function" instructions, since those are the patterns // generated when using auto closures. if (auto *PAI = dyn_cast(CalleeValue)) { // Collect the applied arguments and their convention. collectPartiallyAppliedArguments(PAI, CapturedArgConventions, FullArgs); CalleeValue = lookThroughOwnershipInsts(PAI->getCallee()); IsThick = true; PartialApply = PAI; } else if (auto *TTTFI = dyn_cast(CalleeValue)) { CalleeValue = lookThroughOwnershipInsts(TTTFI->getOperand()); IsThick = true; } CalleeValue = stripFunctionConversions(CalleeValue); auto *FRI = dyn_cast(CalleeValue); if (!FRI) return nullptr; SILFunction *CalleeFunction = FRI->getReferencedFunction(); switch (CalleeFunction->getRepresentation()) { case SILFunctionTypeRepresentation::Thick: case SILFunctionTypeRepresentation::Thin: case SILFunctionTypeRepresentation::Method: case SILFunctionTypeRepresentation::Closure: case SILFunctionTypeRepresentation::WitnessMethod: case SILFunctionTypeRepresentation::KeyPathAccessorGetter: case SILFunctionTypeRepresentation::KeyPathAccessorSetter: case SILFunctionTypeRepresentation::KeyPathAccessorEquals: case SILFunctionTypeRepresentation::KeyPathAccessorHash: break; case SILFunctionTypeRepresentation::CFunctionPointer: case SILFunctionTypeRepresentation::CXXMethod: case SILFunctionTypeRepresentation::ObjCMethod: case SILFunctionTypeRepresentation::Block: return nullptr; } // If the CalleeFunction is a not-transparent definition, we can not process // it. if (CalleeFunction->isTransparent() == IsNotTransparent) return nullptr; // If CalleeFunction is a declaration, see if we can load it. if (CalleeFunction->empty()) AI.getModule().loadFunction(CalleeFunction, SILModule::LinkingMode::LinkNormal); // If we fail to load it, bail. if (CalleeFunction->empty()) return nullptr; if (!CalleeFunction->canBeInlinedIntoCaller(F->getSerializedKind())) { if (F->isAnySerialized() && !CalleeFunction->hasValidLinkageForFragileRef(F->getSerializedKind())) { llvm::errs() << "caller: " << F->getName() << "\n"; llvm::errs() << "callee: " << CalleeFunction->getName() << "\n"; llvm_unreachable("Should never be inlining a resilient function into " "a fragile function"); } return nullptr; } return CalleeFunction; } static SILInstruction *tryDevirtualizeApplyHelper(SILPassManager *pm, FullApplySite InnerAI, ClassHierarchyAnalysis *CHA) { auto NewInst = tryDevirtualizeApply(pm, InnerAI, CHA).first; if (!NewInst) return InnerAI.getInstruction(); deleteDevirtualizedApply(InnerAI); // FIXME: Comments at the use of this helper indicate that devirtualization // may return SILArgument. Yet here we assert that it must return an // instruction. auto newApplyAI = NewInst.getInstruction(); assert(newApplyAI && "devirtualized but removed apply site?"); return newApplyAI; } /// Inlines all mandatory inlined functions into the body of a function, /// first recursively inlining all mandatory apply instructions in those /// functions into their bodies if necessary. /// /// \param F the function to be processed /// \param AI nullptr if this is being called from the top level; the relevant /// ApplyInst requiring the recursive call when non-null /// \param FullyInlinedSet the set of all functions already known to be fully /// processed, to avoid processing them over again /// \param SetFactory an instance of ImmutableFunctionSet::Factory /// \param CurrentInliningSet the set of functions currently being inlined in /// the current call stack of recursive calls /// /// \returns true if successful, false if failed due to circular inlining. static bool runOnFunctionRecursively(SILOptFunctionBuilder &FuncBuilder, SILPassManager *pm, SILFunction *F, FullApplySite AI, DenseFunctionSet &FullyInlinedSet, ImmutableFunctionSet::Factory &SetFactory, ImmutableFunctionSet CurrentInliningSet, ClassHierarchyAnalysis *CHA, DenseFunctionSet &changedFunctions) { // Avoid reprocessing functions needlessly. if (FullyInlinedSet.count(F)) return true; // Prevent attempt to circularly inline. if (CurrentInliningSet.contains(F)) { // This cannot happen on a top-level call, so AI should be non-null. assert(AI && "Cannot have circular inline without apply"); SILLocation L = AI.getLoc(); assert(L && "Must have location for transparent inline apply"); diagnose(F->getModule().getASTContext(), L.getStartSourceLoc(), diag::circular_transparent); return false; } // Add to the current inlining set (immutably, so we only affect the set // during this call and recursive subcalls). CurrentInliningSet = SetFactory.add(CurrentInliningSet, F); SmallVector CapturedArgConventions; SmallVector FullArgs; bool invalidatedStackNesting = false; // Visiting blocks in reverse order avoids revisiting instructions after block // splitting, which would be quadratic. for (auto BI = F->rbegin(), BE = F->rend(), nextBB = BI; BI != BE; BI = nextBB) { // After inlining, the block iterator will be adjusted to point to the last // block containing inlined instructions. This way, the inlined function // body will be reprocessed within the caller's context without revisiting // any original instructions. nextBB = std::next(BI); // While iterating over this block, instructions are inserted and deleted. // To avoid quadratic block splitting, instructions must be processed in // reverse order (block splitting reassigned the parent pointer of all // instructions below the split point). for (auto II = BI->rbegin(); II != BI->rend(); ++II) { FullApplySite InnerAI = FullApplySite::isa(&*II); if (!InnerAI) continue; // *NOTE* If devirtualization succeeds, devirtInst may not be InnerAI, // but a casted result of InnerAI or even a block argument due to // abstraction changes when calling the witness or class method. auto *devirtInst = tryDevirtualizeApplyHelper(pm, InnerAI, CHA); // If devirtualization succeeds, make sure we record that this function // changed. if (devirtInst != InnerAI.getInstruction()) changedFunctions.insert(F); // Restore II to the current apply site. II = devirtInst->getReverseIterator(); // If the devirtualized call result is no longer a invalid FullApplySite, // then it has succeeded, but the result is not immediately inlinable. InnerAI = FullApplySite::isa(devirtInst); if (!InnerAI) continue; SILValue CalleeValue = InnerAI.getCallee(); bool IsThick; PartialApplyInst *PAI; SILFunction *CalleeFunction = getCalleeFunction( F, InnerAI, IsThick, CapturedArgConventions, FullArgs, PAI); if (!CalleeFunction) continue; // Then recursively process it first before trying to inline it. if (!runOnFunctionRecursively( FuncBuilder, pm, CalleeFunction, InnerAI, FullyInlinedSet, SetFactory, CurrentInliningSet, CHA, changedFunctions)) { // If we failed due to circular inlining, then emit some notes to // trace back the failure if we have more information. // FIXME: possibly it could be worth recovering and attempting other // inlines within this same recursive call rather than simply // propagating the failure. if (AI) { SILLocation L = AI.getLoc(); assert(L && "Must have location for transparent inline apply"); diagnose(F->getModule().getASTContext(), L.getStartSourceLoc(), diag::note_while_inlining); } return false; } // Get our list of substitutions. auto Subs = (PAI ? PAI->getSubstitutionMap() : InnerAI.getSubstitutionMap()); // Register a callback to record potentially unused function values after // inlining. ClosureCleanup closureCleanup; InstructionDeleter deleter(InstModCallbacks().onNotifyWillBeDeleted( [&closureCleanup](SILInstruction *I) { closureCleanup.recordDeadFunction(I); })); SILInliner Inliner(FuncBuilder, deleter, SILInliner::InlineKind::MandatoryInline, Subs); if (!Inliner.canInlineApplySite(InnerAI)) continue; // Inline function at I, which also changes I to refer to the first // instruction inlined in the case that it succeeds. We purposely // process the inlined body after inlining, because the inlining may // have exposed new inlining opportunities beyond those present in // the inlined function when processed independently. LLVM_DEBUG(llvm::errs() << "Inlining @" << CalleeFunction->getName() << " into @" << InnerAI.getFunction()->getName() << "\n"); // If we intend to inline a partial_apply function that is not on the // stack, then we need to balance the reference counts for correctness. // // NOTE: If our partial apply is on the stack, it only has point uses (and // hopefully eventually guaranteed) uses of the captured arguments. // // NOTE: If we have a thin_to_thick_function, we do not need to worry // about such things since a thin_to_thick_function does not capture any // arguments. if (PAI && PAI->isOnStack() == PartialApplyInst::NotOnStack) { bool IsCalleeGuaranteed = PAI->getType().castTo()->isCalleeGuaranteed(); auto CapturedArgs = MutableArrayRef(FullArgs).take_back( CapturedArgConventions.size()); // We need to insert the copies before the partial_apply since if we can // not remove the partial_apply the captured values will be dead by the // time we hit the call site. invalidatedStackNesting |= fixupReferenceCounts(PAI, InnerAI, CalleeValue, CapturedArgConventions, CapturedArgs, IsCalleeGuaranteed); } invalidatedStackNesting |= Inliner.invalidatesStackNesting(InnerAI); if (SILPrintInliningCallee) { printInliningDetailsCallee("MandatoryInlining", F, CalleeFunction); } if (SILPrintInliningCallerBefore) { printInliningDetailsCallerBefore("MandatoryInlining", F, CalleeFunction); } // Inlining deletes the apply, and can introduce multiple new basic // blocks. After this, CalleeValue and other instructions may be invalid. // nextBB will point to the last inlined block SILBasicBlock *lastBB = Inliner.inlineFunction(CalleeFunction, InnerAI, FullArgs); // When inlining an OSSA function into a non-OSSA function, ownership of // nonescaping closures is lowered. At that point, they are recognized // as stack users. Since they weren't recognized as such before, they // may not satisfy stack discipline. Fix that up now. invalidatedStackNesting |= (CalleeFunction->hasOwnership() && !F->hasOwnership()); if (SILPrintInliningCallerAfter) { printInliningDetailsCallerAfter("MandatoryInlining", F, CalleeFunction); } nextBB = lastBB->getReverseIterator(); ++NumMandatoryInlines; deleter.cleanupDeadInstructions(); // The IR is now valid, and trivial dead arguments are removed. However, // we may be able to remove dead callee computations (e.g. dead // partial_apply closures). closureCleanup.cleanupDeadClosures(F); invalidatedStackNesting |= closureCleanup.invalidatedStackNesting; // Record that we inlined into this function so that we can invalidate it // later. changedFunctions.insert(F); if (EnableVerifyAfterEachInlining) { if (invalidatedStackNesting) { StackNesting::fixNesting(F); changedFunctions.insert(F); invalidatedStackNesting = false; } F->verify(); } // Resume inlining within nextBB, which contains only the inlined // instructions and possibly instructions in the original call block that // have not yet been visited. break; } } if (invalidatedStackNesting) { StackNesting::fixNesting(F); changedFunctions.insert(F); } // Keep track of full inlined functions so we don't waste time recursively // reprocessing them. FullyInlinedSet.insert(F); return true; } //===----------------------------------------------------------------------===// // Top Level Driver //===----------------------------------------------------------------------===// namespace { class MandatoryInlining : public SILModuleTransform { /// The entry point to the transformation. void run() override { ClassHierarchyAnalysis *CHA = getAnalysis(); SILModule *M = getModule(); bool SILVerifyAll = getOptions().VerifyAll; DenseFunctionSet FullyInlinedSet; ImmutableFunctionSet::Factory SetFactory; DenseFunctionSet changedFunctions; SILOptFunctionBuilder FuncBuilder(*this); for (auto &F : *M) { switch (F.isThunk()) { case IsThunk_t::IsThunk: case IsThunk_t::IsReabstractionThunk: case IsThunk_t::IsSignatureOptimizedThunk: // Don't inline into most thunks, even transparent callees. continue; case IsThunk_t::IsNotThunk: case IsThunk_t::IsBackDeployedThunk: // For correctness, inlining _stdlib_isOSVersionAtLeast() when it is // declared transparent is mandatory in the thunks of @backDeployed // functions. These thunks will not contain calls to other transparent // functions. break; } // Skip deserialized functions. if (F.wasDeserializedCanonical()) continue; runOnFunctionRecursively(FuncBuilder, getPassManager(), &F, FullApplySite(), FullyInlinedSet, SetFactory, SetFactory.getEmptySet(), CHA, changedFunctions); // The inliner splits blocks at call sites. Re-merge trivial branches // to reestablish a canonical CFG. if (mergeBasicBlocks(&F)) { changedFunctions.insert(&F); } // If we are asked to perform SIL verify all, perform that now so that we // can discover the immediate inlining trigger of the problematic // function. if (SILVerifyAll) { F.verify(); } } if (getOptions().DebugSerialization) return; for (auto *F : changedFunctions) { invalidateAnalysis(F, SILAnalysis::InvalidationKind::FunctionBody); } } }; } // end anonymous namespace SILTransform *swift::createMandatoryInlining() { return new MandatoryInlining(); }