//===--- ClosureSpecializer.cpp - Performs Closure Specialization ---------===// // // This source file is part of the Swift.org open source project // // Copyright (c) 2014 - 2017 Apple Inc. and the Swift project authors // Licensed under Apache License v2.0 with Runtime Library Exception // // See https://swift.org/LICENSE.txt for license information // See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors // //===----------------------------------------------------------------------===// /// /// \file /// /// Closure Specialization /// ---------------------- /// /// The purpose of the algorithm in this file is to perform the following /// transformation: given a closure passed into a function which the closure is /// then invoked in, clone the function and create a copy of the closure inside /// the function. This closure will be able to be eliminated easily and the /// overhead is gone. We then try to remove the original closure. /// /// There are some complications. They are listed below and how we work around /// them: /// /// 1. If we support the specialization of closures with multiple user callsites /// that can be specialized, we need to ensure that any captured values have /// their reference counts adjusted properly. This implies for every /// specialized call site, we insert an additional retain for each captured /// argument with reference semantics. We will pass them in as extra @owned /// to the specialized function. This @owned will be consumed by the "copy" /// partial apply that is in the specialized function. Now the partial apply /// will own those ref counts. This is unapplicable to thin_to_thick_function /// since they do not have any captured args. /// /// 2. If the closure was passed in @owned vs if the closure was passed in /// @guaranteed. If the original closure was passed in @owned, then we know /// that there is a balancing release for the new "copy" partial apply. But /// since the original partial apply no longer will have that corresponding /// -1, we need to insert a release for the old partial apply. We do this /// right after the old call site where the original partial apply was /// called. This ensures we do not shrink the lifetime of the old partial /// apply. In the case where the old partial_apply was passed in at +0, we /// know that the old partial_apply does not need to have any ref count /// adjustments. On the other hand, the new "copy" partial apply in the /// specialized function now needs to be balanced lest we leak. Thus we /// insert a release right before any exit from the function. This ensures /// that the release occurs in the epilog after any retains associated with /// @owned return values. /// /// 3. In !useLoweredAddresses mode, we do not support specialization of closures /// with arguments passed using any indirect calling conventions besides /// @inout and @inout_aliasable. This is a temporary limitation that goes /// away with sil-opaque-values. //===----------------------------------------------------------------------===// #define DEBUG_TYPE "closure-specialization" #include "swift/SILOptimizer/IPO/ClosureSpecializer.h" #include "swift/Basic/Assertions.h" #include "swift/Basic/Range.h" #include "swift/Demangling/Demangle.h" #include "swift/Demangling/Demangler.h" #include "swift/SIL/InstructionUtils.h" #include "swift/SIL/SILCloner.h" #include "swift/SIL/SILFunction.h" #include "swift/SIL/SILInstruction.h" #include "swift/SIL/SILModule.h" #include "swift/SILOptimizer/Analysis/BasicCalleeAnalysis.h" #include "swift/SILOptimizer/Analysis/FunctionOrder.h" #include "swift/SILOptimizer/Analysis/ValueTracking.h" #include "swift/SILOptimizer/PassManager/Passes.h" #include "swift/SILOptimizer/PassManager/Transforms.h" #include "swift/SILOptimizer/Utils/CFGOptUtils.h" #include "swift/SILOptimizer/Utils/InstOptUtils.h" #include "swift/SILOptimizer/Utils/SILInliner.h" #include "swift/SILOptimizer/Utils/SILOptFunctionBuilder.h" #include "swift/SILOptimizer/Utils/SpecializationMangler.h" #include "swift/SILOptimizer/Utils/StackNesting.h" #include "swift/SILOptimizer/Utils/ValueLifetime.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/Statistic.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" using namespace swift; STATISTIC(NumClosureSpecialized, "Number of functions with closures specialized"); STATISTIC(NumPropagatedClosuresEliminated, "Number of closures propagated and then eliminated"); STATISTIC(NumPropagatedClosuresNotEliminated, "Number of closures propagated but not eliminated"); llvm::cl::opt EliminateDeadClosures( "closure-specialize-eliminate-dead-closures", llvm::cl::init(true), llvm::cl::desc("Do not eliminate dead closures after closure " "specialization. This is meant to be used when testing.")); //===----------------------------------------------------------------------===// // Utility //===----------------------------------------------------------------------===// static bool isSupportedClosureKind(const SILInstruction *I) { return isa(I) || isa(I); } static const int SpecializationLevelLimit = 2; static int getSpecializationLevelRecursive(StringRef funcName, Demangler &parent) { using namespace Demangle; Demangler demangler; demangler.providePreallocatedMemory(parent); // Check for this kind of node tree: // // kind=Global // kind=FunctionSignatureSpecialization // kind=SpecializationPassID, index=1 // kind=FunctionSignatureSpecializationParam // kind=FunctionSignatureSpecializationParamKind, index=5 // kind=FunctionSignatureSpecializationParamPayload, text="..." // Node *root = demangler.demangleSymbol(funcName); if (!root) return 0; if (root->getKind() != Node::Kind::Global) return 0; Node *funcSpec = root->getFirstChild(); if (!funcSpec || funcSpec->getNumChildren() < 2) return 0; if (funcSpec->getKind() != Node::Kind::FunctionSignatureSpecialization) return 0; // Match any function specialization. We check for constant propagation at the // parameter level. Node *param = funcSpec->getChild(0); if (param->getKind() != Node::Kind::SpecializationPassID) return SpecializationLevelLimit + 1; // unrecognized format unsigned maxParamLevel = 0; for (unsigned paramIdx = 1; paramIdx < funcSpec->getNumChildren(); ++paramIdx) { Node *param = funcSpec->getChild(paramIdx); if (param->getKind() != Node::Kind::FunctionSignatureSpecializationParam) return SpecializationLevelLimit + 1; // unrecognized format // A parameter is recursive if it has a kind with index and type payload if (param->getNumChildren() < 2) continue; Node *kindNd = param->getChild(0); if (kindNd->getKind() != Node::Kind::FunctionSignatureSpecializationParamKind) { return SpecializationLevelLimit + 1; // unrecognized format } auto kind = FunctionSigSpecializationParamKind(kindNd->getIndex()); if (kind != FunctionSigSpecializationParamKind::ConstantPropFunction) continue; Node *payload = param->getChild(1); if (payload->getKind() != Node::Kind::Identifier) { return SpecializationLevelLimit + 1; // unrecognized format } // Check if the specialized function is a specialization itself. unsigned paramLevel = 1 + getSpecializationLevelRecursive(payload->getText(), demangler); if (paramLevel > maxParamLevel) maxParamLevel = paramLevel; } return maxParamLevel; } //===----------------------------------------------------------------------===// // Publicly visible for bridging //===----------------------------------------------------------------------===// int swift::getSpecializationLevel(SILFunction *f) { Demangle::StackAllocatedDemangler<1024> demangler; return getSpecializationLevelRecursive(f->getName(), demangler); } bool swift::isDifferentiableFuncComponent( SILFunction *f, AutoDiffFunctionComponent expectedComponent) { Demangle::Context Ctx; if (auto *root = Ctx.demangleSymbolAsNode(f->getName())) { if (auto *node = root->findByKind(Demangle::Node::Kind::AutoDiffFunctionKind, 3)) { if (node->hasIndex()) { auto component = (char)node->getIndex(); if (component == (char)expectedComponent) { return true; } } } } return false; } //===----------------------------------------------------------------------===// // Closure Spec Cloner Interface //===----------------------------------------------------------------------===// namespace { class CallSiteDescriptor; /// A SILCloner subclass which clones a function that takes a closure /// argument. We update the parameter list to remove the parameter for the /// closure argument and to append the variables captured in the closure. /// We also need to replace the closure parameter with the partial apply /// on the closure. We need to update the callsite to pass in the correct /// arguments. class ClosureSpecCloner : public SILClonerWithScopes { public: using SuperTy = SILClonerWithScopes; friend class SILInstructionVisitor; friend class SILCloner; ClosureSpecCloner(SILOptFunctionBuilder &FunctionBuilder, const CallSiteDescriptor &CallSiteDesc, StringRef ClonedName) : SuperTy(*initCloned(FunctionBuilder, CallSiteDesc, ClonedName)), CallSiteDesc(CallSiteDesc) {} void populateCloned(); SILValue cloneCalleeConversion(SILValue calleeValue, SILValue NewClosure, SILBuilder &Builder, SmallVectorImpl &NeedsRelease, llvm::DenseMap &CapturedMap); SILFunction *getCloned() { return &getBuilder().getFunction(); } static SILFunction *cloneFunction(SILOptFunctionBuilder &FunctionBuilder, const CallSiteDescriptor &CallSiteDesc, StringRef NewName) { ClosureSpecCloner C(FunctionBuilder, CallSiteDesc, NewName); C.populateCloned(); ++NumClosureSpecialized; return C.getCloned(); }; private: static SILFunction *initCloned(SILOptFunctionBuilder &FunctionBuilder, const CallSiteDescriptor &CallSiteDesc, StringRef ClonedName); const CallSiteDescriptor &CallSiteDesc; }; } // end anonymous namespace //===----------------------------------------------------------------------===// // Call Site Descriptor //===----------------------------------------------------------------------===// namespace { struct ClosureInfo; static SILFunction *getClosureCallee(SILInstruction *inst) { if (auto *PAI = dyn_cast(inst)) return cast(PAI->getCallee())->getReferencedFunction(); auto *TTTFI = cast(inst); return cast(TTTFI->getCallee())->getReferencedFunction(); } class CallSiteDescriptor { ClosureInfo *CInfo; FullApplySite AI; unsigned ClosureIndex; SILParameterInfo ClosureParamInfo; // This is only needed if we have guaranteed parameters. In most cases it will // have only one element, a return inst. llvm::TinyPtrVector NonFailureExitBBs; public: CallSiteDescriptor(ClosureInfo *CInfo, FullApplySite AI, unsigned ClosureIndex, SILParameterInfo ClosureParamInfo, llvm::TinyPtrVector &&NonFailureExitBBs) : CInfo(CInfo), AI(AI), ClosureIndex(ClosureIndex), ClosureParamInfo(ClosureParamInfo), NonFailureExitBBs(NonFailureExitBBs) {} CallSiteDescriptor(CallSiteDescriptor&&) =default; CallSiteDescriptor &operator=(CallSiteDescriptor &&) =default; SILFunction *getApplyCallee() const { return cast(AI.getCallee())->getReferencedFunction(); } SILFunction *getClosureCallee() const { return ::getClosureCallee(getClosure()); } bool closureHasRefSemanticContext() const { return isa(getClosure()) && !cast(getClosure())->isOnStack(); } bool destroyIfPartialApplyStack(SILBuilder &B, SingleValueInstruction *newClosure) const { auto *PA = dyn_cast(newClosure); if (!PA || !PA->isOnStack()) return false; if (B.getFunction().hasOwnership()) { // Under OSSA, the closure acts as an owned value whose lifetime is a // borrow scope for the captures, so we need to end the borrow scope // before ending the lifetimes of the captures themselves. B.createDestroyValue(getClosure()->getLoc(), PA); insertDestroyOfCapturedArguments(PA, B); // The stack slot for the partial_apply doesn't get reified until after // OSSA. return false; } else { insertDestroyOfCapturedArguments(PA, B); B.createDeallocStack(getClosure()->getLoc(), PA); return true; } } unsigned getClosureIndex() const { return ClosureIndex; } // Get the closure value passed to the apply (on the caller side). SILValue getClosureCallerArg() const { return getApplyInst().getArgument(ClosureIndex); } SILParameterInfo getClosureParameterInfo() const { return ClosureParamInfo; } SingleValueInstruction * createNewClosure(SILBuilder &B, SILValue V, llvm::SmallVectorImpl &Args) const { if (auto *PA = dyn_cast(getClosure())) return B.createPartialApply(getClosure()->getLoc(), V, {}, Args, PA->getCalleeConvention(), PA->getResultIsolation(), PA->isOnStack()); assert(isa(getClosure()) && "We only support partial_apply and thin_to_thick_function"); return B.createThinToThickFunction(getClosure()->getLoc(), V, getClosure()->getType()); } FullApplySite getApplyInst() const { return AI; } bool isSerialized() const; SerializedKind_t getSerializedKind() const; std::string createName() const; OperandValueArrayRef getArguments() const { if (auto *PAI = dyn_cast(getClosure())) return PAI->getArguments(); // Thin to thick function has no non-callee arguments. assert(isa(getClosure()) && "We only support partial_apply and thin_to_thick_function"); return OperandValueArrayRef(ArrayRef()); } inline SingleValueInstruction *getClosure() const; unsigned getNumArguments() const { if (auto *PAI = dyn_cast(getClosure())) return PAI->getNumArguments(); // Thin to thick function has no non-callee arguments. assert(isa(getClosure()) && "We only support partial_apply and thin_to_thick_function"); return 0; } bool isClosureGuaranteed() const { return getClosureParameterInfo().isGuaranteedInCaller(); } bool isClosureConsumed() const { return getClosureParameterInfo().isConsumedInCaller(); } bool isClosureOnStack() const { auto *PA = dyn_cast(getClosure()); if (!PA) return false; return PA->isOnStack(); } bool isTrivialNoEscapeParameter() const { auto ClosureParmFnTy = getClosureParameterInfo().getInterfaceType()->getAs(); return ClosureParmFnTy->isTrivialNoEscape(); } SILLocation getLoc() const { return getClosure()->getLoc(); } SILModule &getModule() const { return AI.getModule(); } ArrayRef getNonFailureExitBBs() const { return NonFailureExitBBs; } /// Extend the lifetime of 'Arg' to the lifetime of the closure. void extendArgumentLifetime(SILValue Arg, SILArgumentConvention ArgConvention) const; }; } // end anonymous namespace namespace { struct ClosureInfo { SingleValueInstruction *Closure; ValueLifetimeAnalysis::Frontier LifetimeFrontier; llvm::SmallVector CallSites; ClosureInfo(SingleValueInstruction *Closure) : Closure(Closure) {} ClosureInfo(ClosureInfo &&) =default; ClosureInfo &operator=(ClosureInfo &&) =default; }; } // end anonymous namespace SingleValueInstruction *CallSiteDescriptor::getClosure() const { return CInfo->Closure; } static bool isNonInoutIndirectSILArgument(SILValue Arg, SILArgumentConvention ArgConvention) { return !Arg->getType().isObject() && ArgConvention.isIndirectConvention() && ArgConvention != SILArgumentConvention::Indirect_Inout && ArgConvention != SILArgumentConvention::Indirect_InoutAliasable; } /// Update the callsite to pass in the correct arguments. static void rewriteApplyInst(const CallSiteDescriptor &CSDesc, SILFunction *NewF) { FullApplySite AI = CSDesc.getApplyInst(); SingleValueInstruction *Closure = CSDesc.getClosure(); SILBuilderWithScope Builder(Closure); FunctionRefInst *FRI = Builder.createFunctionRef(AI.getLoc(), NewF); // Create the args for the new apply by removing the closure argument... llvm::SmallVector NewArgs; unsigned Index = 0; for (auto Arg : AI.getArguments()) { if (Index != CSDesc.getClosureIndex()) NewArgs.push_back(Arg); ++Index; } // ... and appending the captured arguments. We also insert retains here at // the location of the original closure. This is needed to balance the // implicit release of all captured arguments that occurs when the partial // apply is destroyed. auto ClosureCalleeConv = CSDesc.getClosureCallee()->getConventions(); unsigned ClosureArgIdx = ClosureCalleeConv.getNumSILArguments() - CSDesc.getNumArguments(); for (auto Arg : CSDesc.getArguments()) { SILType ArgTy = Arg->getType(); // If our argument is of trivial type, continue... if (ArgTy.isTrivial(*NewF)) { NewArgs.push_back(Arg); ++ClosureArgIdx; continue; } auto ArgConvention = ClosureCalleeConv.getSILArgumentConvention(ClosureArgIdx); // Non-inout indirect arguments are not supported yet. assert(ArgTy.isObject() || !isNonInoutIndirectSILArgument(Arg, ArgConvention)); // If argument is not an object and it is an inout parameter, // continue... if (!ArgTy.isObject() && !isNonInoutIndirectSILArgument(Arg, ArgConvention)) { NewArgs.push_back(Arg); ++ClosureArgIdx; continue; } // TODO: When we support address types, this code path will need to be // updated. // We need to balance the consumed argument of the new partial_apply in the // specialized callee by a retain. If both the original partial_apply and // the apply of the callee are in the same basic block we can assume they // are executed the same number of times. Therefore it is sufficient to just // retain the argument at the site of the original partial_apply. // // %closure = partial_apply (%arg) // = apply %callee(%closure) // => // retain %arg // %closure = partial_apply (%arg) // apply %specialized_callee(..., %arg) // // However, if they are not in the same basic block the callee might be // executed more frequently than the closure (for example, if the closure is // created in a loop preheader and the callee taking the closure is executed // in the loop). In such a case we must keep the argument live across the // call site of the callee and emit a matching retain for every invocation // of the callee. // // %closure = partial_apply (%arg) // // while () { // = %callee(%closure) // } // => // retain %arg // %closure = partial_apply (%arg) // // while () { // retain %arg // apply %specialized_callee(.., %arg) // } // release %arg // if (AI.getParent() != Closure->getParent()) { // Emit the retain and release that keeps the argument life across the // callee using the closure. CSDesc.extendArgumentLifetime(Arg, ArgConvention); // Emit the retain that matches the captured argument by the // partial_apply // in the callee that is consumed by the partial_apply. Builder.setInsertionPoint(AI.getInstruction()); Builder.createRetainValue(Closure->getLoc(), Arg, Builder.getDefaultAtomicity()); } else { Builder.createRetainValue(Closure->getLoc(), Arg, Builder.getDefaultAtomicity()); } NewArgs.push_back(Arg); ++ClosureArgIdx; } Builder.setInsertionPoint(AI.getInstruction()); FullApplySite NewAI; switch (AI.getKind()) { case FullApplySiteKind::TryApplyInst: { auto *TAI = cast(AI); NewAI = Builder.createTryApply(AI.getLoc(), FRI, SubstitutionMap(), NewArgs, TAI->getNormalBB(), TAI->getErrorBB(), TAI->getApplyOptions()); // If we passed in the original closure as @owned, then insert a release // right after NewAI. This is to balance the +1 from being an @owned // argument to AI. if (!CSDesc.isClosureConsumed() || CSDesc.isTrivialNoEscapeParameter() || !CSDesc.closureHasRefSemanticContext()) { break; } Builder.setInsertionPoint(TAI->getNormalBB()->begin()); Builder.createReleaseValue(Closure->getLoc(), Closure, Builder.getDefaultAtomicity()); Builder.setInsertionPoint(TAI->getErrorBB()->begin()); Builder.createReleaseValue(Closure->getLoc(), Closure, Builder.getDefaultAtomicity()); Builder.setInsertionPoint(AI.getInstruction()); break; } case FullApplySiteKind::ApplyInst: { auto oldApply = cast(AI); auto newApply = Builder.createApply(oldApply->getLoc(), FRI, SubstitutionMap(), NewArgs, oldApply->getApplyOptions()); // If we passed in the original closure as @owned, then insert a release // right after NewAI. This is to balance the +1 from being an @owned // argument to AI. if (CSDesc.isClosureConsumed() && !CSDesc.isTrivialNoEscapeParameter() && CSDesc.closureHasRefSemanticContext()) Builder.createReleaseValue(Closure->getLoc(), Closure, Builder.getDefaultAtomicity()); // Replace all uses of the old apply with the new apply. oldApply->replaceAllUsesWith(newApply); break; } case FullApplySiteKind::BeginApplyInst: llvm_unreachable("Unhandled case"); } // Erase the old apply. AI.getInstruction()->eraseFromParent(); // TODO: Maybe include invalidation code for CallSiteDescriptor after we erase // AI from parent? } bool CallSiteDescriptor::isSerialized() const { return getClosure()->getFunction()->getSerializedKind() == IsSerialized; } SerializedKind_t CallSiteDescriptor::getSerializedKind() const { return getClosure()->getFunction()->getSerializedKind(); } std::string CallSiteDescriptor::createName() const { auto P = Demangle::SpecializationPass::ClosureSpecializer; Mangle::FunctionSignatureSpecializationMangler Mangler(getApplyCallee()->getASTContext(), P, getSerializedKind(), getApplyCallee()); if (auto *PAI = dyn_cast(getClosure())) { Mangler.setArgumentClosureProp(getClosureIndex(), PAI); } else { auto *TTTFI = cast(getClosure()); Mangler.setArgumentClosureProp(getClosureIndex(), TTTFI); } return Mangler.mangle(); } void CallSiteDescriptor::extendArgumentLifetime( SILValue Arg, SILArgumentConvention ArgConvention) const { assert(!CInfo->LifetimeFrontier.empty() && "Need a post-dominating release(s)"); auto ArgTy = Arg->getType(); // Extend the lifetime of a captured argument to cover the callee. SILBuilderWithScope Builder(getClosure()); // Indirect non-inout arguments are not supported yet. assert(!isNonInoutIndirectSILArgument(Arg, ArgConvention)); if (ArgTy.isObject()) { Builder.createRetainValue(getClosure()->getLoc(), Arg, Builder.getDefaultAtomicity()); for (auto *I : CInfo->LifetimeFrontier) { Builder.setInsertionPoint(I); Builder.createReleaseValue(getClosure()->getLoc(), Arg, Builder.getDefaultAtomicity()); } } } static bool isSupportedClosure(const SILInstruction *Closure) { if (!isSupportedClosureKind(Closure)) return false; // We only support simple closures where a partial_apply or // thin_to_thick_function is passed a function_ref. This will be stored here // so the checking of the Callee can use the same code in both cases. SILValue Callee; // If Closure is a partial apply... if (auto *PAI = dyn_cast(Closure)) { // And it has substitutions, return false. if (PAI->hasSubstitutions()) return false; // Ok, it is a closure we support, set Callee. Callee = PAI->getCallee(); } else { // Otherwise closure must be a thin_to_thick_function. Callee = cast(Closure)->getCallee(); } // Make sure that it is a simple partial apply (i.e. its callee is a // function_ref). // // TODO: We can probably handle other partial applies here. auto *FRI = dyn_cast_or_null(Callee); if (!FRI) return false; if (auto *PAI = dyn_cast(Closure)) { // Check whether each argument is supported. auto ClosureCallee = FRI->getReferencedFunction(); auto ClosureCalleeConv = ClosureCallee->getConventions(); unsigned ClosureArgIdxBase = ClosureCalleeConv.getNumSILArguments() - PAI->getNumArguments(); for (auto pair : llvm::enumerate(PAI->getArguments())) { auto Arg = pair.value(); auto ClosureArgIdx = pair.index() + ClosureArgIdxBase; auto ArgConvention = ClosureCalleeConv.getSILArgumentConvention(ClosureArgIdx); SILType ArgTy = Arg->getType(); // Specializing (currently) always produces a retain in the caller. // That's not allowed for values of move-only type. if (ArgTy.isMoveOnly()) { return false; } // Bail if it's an ObjectiveC block which might _not_ be copied onto the heap, i.e // optimized by SimplifyCopyBlock. We can't do this because the optimization inserts // retains+releases for captured arguments. That's not possible for stack-allocated blocks. // TODO: avoid inserting retains+releases at all for captures of `partial_apply [on_stack]` if (ArgTy.is() && ArgTy.getFunctionRepresentation() == SILFunctionTypeRepresentation::Block && // A `copy_block` ensures that the block is copied onto the heap. !isa(Arg) && // SimplifyCopyBlock only works for `partial_apply [on_stack]`. PAI->isOnStack()) { return false; } // Only @inout/@inout_aliasable addresses are (currently) supported. // If our argument is an object, continue... if (ArgTy.isObject()) { ++ClosureArgIdx; continue; } if (ArgConvention != SILArgumentConvention::Indirect_Inout && ArgConvention != SILArgumentConvention::Indirect_InoutAliasable) return false; ++ClosureArgIdx; } } // Otherwise, we do support specializing this closure. return true; } //===----------------------------------------------------------------------===// // Closure Spec Cloner Implementation //===----------------------------------------------------------------------===// /// In this function we create the actual cloned function and its proper cloned /// type. But we do not create any body. This implies that the creation of the /// actual arguments in the function is in populateCloned. /// /// \arg PAUser The function that is being passed the partial apply. /// \arg PAI The partial apply that is being passed to PAUser. /// \arg ClosureIndex The index of the partial apply in PAUser's function /// signature. /// \arg ClonedName The name of the cloned function that we will create. SILFunction * ClosureSpecCloner::initCloned(SILOptFunctionBuilder &FunctionBuilder, const CallSiteDescriptor &CallSiteDesc, StringRef ClonedName) { SILFunction *ClosureUser = CallSiteDesc.getApplyCallee(); // This is the list of new interface parameters of the cloned function. llvm::SmallVector NewParameterInfoList; // First add to NewParameterInfoList all of the SILParameterInfo in the // original function except for the closure. CanSILFunctionType ClosureUserFunTy = ClosureUser->getLoweredFunctionType(); auto ClosureUserConv = ClosureUser->getConventions(); unsigned Index = ClosureUserConv.getSILArgIndexOfFirstParam(); for (auto ¶m : ClosureUserConv.getParameters()) { if (Index != CallSiteDesc.getClosureIndex()) NewParameterInfoList.push_back(param); ++Index; } // Then add any arguments that are captured in the closure to the function's // argument type. Since they are captured, we need to pass them directly into // the new specialized function. SILFunction *ClosedOverFun = CallSiteDesc.getClosureCallee(); auto ClosedOverFunConv = ClosedOverFun->getConventions(); SILModule &M = ClosureUser->getModule(); // Captured parameters are always appended to the function signature. If the // type of the captured argument is: // - direct and trivial, pass the argument as Direct_Unowned. // - direct and non-trivial, pass the argument as Direct_Owned. // - indirect, pass the argument using the same parameter convention as in the // original closure. // // We use the type of the closure here since we allow for the closure to be an // external declaration. unsigned NumTotalParams = ClosedOverFunConv.getNumParameters(); unsigned NumNotCaptured = NumTotalParams - CallSiteDesc.getNumArguments(); for (auto &PInfo : ClosedOverFunConv.getParameters().slice(NumNotCaptured)) { ParameterConvention ParamConv; if (PInfo.isFormalIndirect()) { ParamConv = PInfo.getConvention(); assert(!SILModuleConventions(M).useLoweredAddresses() || ParamConv == ParameterConvention::Indirect_Inout || ParamConv == ParameterConvention::Indirect_InoutAliasable); } else { ParamConv = ClosedOverFunConv .getSILType(PInfo, CallSiteDesc.getApplyInst() .getFunction() ->getTypeExpansionContext()) .isTrivial(*ClosureUser) ? ParameterConvention::Direct_Unowned : ParameterConvention::Direct_Owned; } SILParameterInfo NewPInfo(PInfo.getInterfaceType(), ParamConv); NewParameterInfoList.push_back(NewPInfo); } // The specialized function is always a thin function. This is important // because we may add additional parameters after the Self parameter of // witness methods. In this case the new function is not a method anymore. auto ExtInfo = ClosureUserFunTy->getExtInfo(); ExtInfo = ExtInfo.withRepresentation(SILFunctionTypeRepresentation::Thin); auto ClonedTy = SILFunctionType::get( ClosureUserFunTy->getInvocationGenericSignature(), ExtInfo, ClosureUserFunTy->getCoroutineKind(), ClosureUserFunTy->getCalleeConvention(), NewParameterInfoList, ClosureUserFunTy->getYields(), ClosureUserFunTy->getResults(), ClosureUserFunTy->getOptionalErrorResult(), ClosureUserFunTy->getPatternSubstitutions(), ClosureUserFunTy->getInvocationSubstitutions(), M.getASTContext()); // We make this function bare so we don't have to worry about decls in the // SILArgument. auto *Fn = FunctionBuilder.createFunction( // It's important to use a shared linkage for the specialized function // and not the original linkage. // Otherwise the new function could have an external linkage (in case the // original function was de-serialized) and would not be code-gen'd. // It's also important to disconnect this specialized function from any // classes (the classSubclassScope), because that may incorrectly // influence the linkage. getSpecializedLinkage(ClosureUser, ClosureUser->getLinkage()), ClonedName, ClonedTy, ClosureUser->getGenericEnvironment(), ClosureUser->getLocation(), IsBare, ClosureUser->isTransparent(), CallSiteDesc.getSerializedKind(), IsNotDynamic, IsNotDistributed, IsNotRuntimeAccessible, ClosureUser->getEntryCount(), ClosureUser->isThunk(), /*classSubclassScope=*/SubclassScope::NotApplicable, ClosureUser->getInlineStrategy(), ClosureUser->getEffectsKind(), ClosureUser, ClosureUser->getDebugScope()); if (!ClosureUser->hasOwnership()) { Fn->setOwnershipEliminated(); } for (auto &Attr : ClosureUser->getSemanticsAttrs()) Fn->addSemanticsAttr(Attr); return Fn; } // Clone a chain of ConvertFunctionInsts. SILValue ClosureSpecCloner::cloneCalleeConversion( SILValue calleeValue, SILValue NewClosure, SILBuilder &Builder, SmallVectorImpl &NeedsRelease, llvm::DenseMap &CapturedMap) { // There might be a mark dependence on a previous closure value. Therefore, we // add all closure values to the map. auto addToOldToNewClosureMap = [&](SILValue origValue, SILValue newValue) -> SILValue { assert(!CapturedMap.count(origValue)); CapturedMap[origValue] = newValue; return newValue; }; if (calleeValue == CallSiteDesc.getClosure()) return addToOldToNewClosureMap(calleeValue, NewClosure); if (auto *CFI = dyn_cast(calleeValue)) { SILValue origCalleeValue = calleeValue; calleeValue = cloneCalleeConversion(CFI->getOperand(), NewClosure, Builder, NeedsRelease, CapturedMap); return addToOldToNewClosureMap( origCalleeValue, Builder.createConvertFunction( CallSiteDesc.getLoc(), calleeValue, CFI->getType(), CFI->withoutActuallyEscaping())); } if (auto *PAI = dyn_cast(calleeValue)) { assert(isPartialApplyOfReabstractionThunk(PAI) && isSupportedClosure(PAI) && PAI->getArgument(0) ->getType() .getAs() ->isTrivialNoEscape()); SILValue origCalleeValue = calleeValue; calleeValue = cloneCalleeConversion(PAI->getArgument(0), NewClosure, Builder, NeedsRelease, CapturedMap); auto origRef = PAI->getReferencedFunctionOrNull(); assert(origRef); auto FunRef = Builder.createFunctionRef(CallSiteDesc.getLoc(), origRef); auto NewPA = Builder.createPartialApply( CallSiteDesc.getLoc(), FunRef, {}, {calleeValue}, PAI->getCalleeConvention(), PAI->getResultIsolation(), PAI->isOnStack()); // If the partial_apply is on stack we will emit a dealloc_stack in the // epilog. NeedsRelease.push_back(NewPA); return addToOldToNewClosureMap(origCalleeValue, NewPA); } if (auto *MD = dyn_cast(calleeValue)) { SILValue origCalleeValue = calleeValue; calleeValue = cloneCalleeConversion(MD->getValue(), NewClosure, Builder, NeedsRelease, CapturedMap); if (!CapturedMap.count(MD->getBase())) { CallSiteDesc.getClosure()->dump(); MD->dump(); MD->getFunction()->dump(); } assert(CapturedMap.count(MD->getBase())); return addToOldToNewClosureMap( origCalleeValue, Builder.createMarkDependence(CallSiteDesc.getLoc(), calleeValue, CapturedMap[MD->getBase()], MarkDependenceKind::Escaping)); } auto *Cvt = cast(calleeValue); SILValue origCalleeValue = calleeValue; calleeValue = cloneCalleeConversion(Cvt->getOperand(), NewClosure, Builder, NeedsRelease, CapturedMap); return addToOldToNewClosureMap( origCalleeValue, Builder.createConvertEscapeToNoEscape(CallSiteDesc.getLoc(), calleeValue, Cvt->getType(), true)); } /// Populate the body of the cloned closure, modifying instructions as /// necessary. This is where we create the actual specialized BB Arguments void ClosureSpecCloner::populateCloned() { bool invalidatedStackNesting = false; SILFunction *Cloned = getCloned(); SILFunction *ClosureUser = CallSiteDesc.getApplyCallee(); // Create arguments for the entry block. SILBasicBlock *ClosureUserEntryBB = &*ClosureUser->begin(); SILBasicBlock *ClonedEntryBB = Cloned->createBasicBlock(); SmallVector entryArgs; entryArgs.reserve(ClosureUserEntryBB->getArguments().size()); // Remove the closure argument. for (size_t i = 0, e = ClosureUserEntryBB->args_size(); i != e; ++i) { SILArgument *Arg = ClosureUserEntryBB->getArgument(i); if (i == CallSiteDesc.getClosureIndex()) { entryArgs.push_back(SILValue()); continue; } // Otherwise, create a new argument which copies the original argument auto typeInContext = Cloned->getLoweredType(Arg->getType()); auto *MappedValue = ClonedEntryBB->createFunctionArgument(typeInContext, Arg->getDecl()); MappedValue->copyFlags(cast(Arg)); entryArgs.push_back(MappedValue); } // Next we need to add in any arguments that are not captured as arguments to // the cloned function. // // We do not insert the new mapped arguments into the value map since there by // definition is nothing in the partial apply user function that references // such arguments. After this pass is done the only thing that will reference // the arguments is the partial apply that we will create. SILFunction *ClosedOverFun = CallSiteDesc.getClosureCallee(); SILBuilder &Builder = getBuilder(); auto ClosedOverFunConv = ClosedOverFun->getConventions(); unsigned NumTotalParams = ClosedOverFunConv.getNumParameters(); unsigned NumNotCaptured = NumTotalParams - CallSiteDesc.getNumArguments(); llvm::SmallVector NewPAIArgs; llvm::DenseMap CapturedMap; unsigned idx = 0; for (auto &PInfo : ClosedOverFunConv.getParameters().slice(NumNotCaptured)) { auto paramTy = ClosedOverFunConv.getSILType(PInfo, Builder.getTypeExpansionContext()); // Get the type in context of the new function. paramTy = Cloned->getLoweredType(paramTy); SILValue MappedValue = ClonedEntryBB->createFunctionArgument(paramTy); NewPAIArgs.push_back(MappedValue); auto CapturedVal = cast(CallSiteDesc.getClosure())->getArgument(idx++); CapturedMap[CapturedVal] = MappedValue; } Builder.setInsertionPoint(ClonedEntryBB); // Clone FRI and PAI, and replace usage of the removed closure argument // with result of cloned PAI. SILValue FnVal = Builder.createFunctionRef(CallSiteDesc.getLoc(), ClosedOverFun); auto *NewClosure = CallSiteDesc.createNewClosure(Builder, FnVal, NewPAIArgs); // Clone a chain of ConvertFunctionInsts. This can create further // reabstraction partial_apply instructions. SmallVector NeedsRelease; SILValue ConvertedCallee = cloneCalleeConversion(CallSiteDesc.getClosureCallerArg(), NewClosure, Builder, NeedsRelease, CapturedMap); // Make sure that we actually emit the releases for reabstraction thunks. We // have guaranteed earlier that we only allow reabstraction thunks if the // closure was passed trivial. assert(NeedsRelease.empty() || CallSiteDesc.isTrivialNoEscapeParameter()); entryArgs[CallSiteDesc.getClosureIndex()] = ConvertedCallee; // Visit original BBs in depth-first preorder, starting with the // entry block, cloning all instructions and terminators. cloneFunctionBody(ClosureUser, ClonedEntryBB, entryArgs); // Then insert a release in all non failure exit BBs if our partial apply was // guaranteed. This is b/c it was passed at +0 originally and we need to // balance the initial increment of the newly created closure(s). bool ClosureHasRefSemantics = CallSiteDesc.closureHasRefSemanticContext(); if ((CallSiteDesc.isClosureGuaranteed() || CallSiteDesc.isTrivialNoEscapeParameter()) && (ClosureHasRefSemantics || !NeedsRelease.empty() || CallSiteDesc.isClosureOnStack())) { for (SILBasicBlock *BB : CallSiteDesc.getNonFailureExitBBs()) { SILBasicBlock *OpBB = getOpBasicBlock(BB); TermInst *TI = OpBB->getTerminator(); auto Loc = CleanupLocation(NewClosure->getLoc()); // If we have an exit, we place the release right before it so we know // that it will be executed at the end of the epilogue. if (TI->isFunctionExiting()) { Builder.setInsertionPoint(TI); if (ClosureHasRefSemantics) Builder.createReleaseValue(Loc, SILValue(NewClosure), Builder.getDefaultAtomicity()); else invalidatedStackNesting |= CallSiteDesc.destroyIfPartialApplyStack(Builder, NewClosure); for (auto PAI : NeedsRelease) { if (PAI->isOnStack()) invalidatedStackNesting |= CallSiteDesc.destroyIfPartialApplyStack(Builder, PAI); else Builder.createReleaseValue(Loc, SILValue(PAI), Builder.getDefaultAtomicity()); } continue; } // We use casts where findAllNonFailureExitBBs should have made sure that // this is true. This will ensure that the code is updated when we hit the // cast failure in debug builds. auto *Unreachable = cast(TI); auto PrevIter = std::prev(SILBasicBlock::iterator(Unreachable)); auto NoReturnApply = FullApplySite::isa(&*PrevIter); // We insert the release value right before the no return apply so that if // the partial apply is passed into the no-return function as an @owned // value, we will retain the partial apply before we release it and // potentially eliminate it. Builder.setInsertionPoint(NoReturnApply.getInstruction()); if (ClosureHasRefSemantics) Builder.createReleaseValue(Loc, SILValue(NewClosure), Builder.getDefaultAtomicity()); else invalidatedStackNesting |= CallSiteDesc.destroyIfPartialApplyStack(Builder, NewClosure); for (auto PAI : NeedsRelease) { if (PAI->isOnStack()) invalidatedStackNesting |= CallSiteDesc.destroyIfPartialApplyStack(Builder, PAI); else Builder.createReleaseValue(Loc, SILValue(PAI), Builder.getDefaultAtomicity()); } } } if (invalidatedStackNesting) { StackNesting::fixNesting(Cloned); } } //===----------------------------------------------------------------------===// // Closure Specializer //===----------------------------------------------------------------------===// namespace { class SILClosureSpecializerTransform : public SILFunctionTransform { bool gatherCallSites( SILFunction *Caller, llvm::SmallVectorImpl> &ClosureCandidates, llvm::DenseSet &MultipleClosureAI); bool specialize(SILFunction *Caller, std::vector &PropagatedClosures); public: SILClosureSpecializerTransform() {} void run() override; }; void SILClosureSpecializerTransform::run() { SILFunction *F = getFunction(); // Don't optimize functions that are marked with the opt.never // attribute. if (!F->shouldOptimize()) return; // If F is an external declaration, there is nothing to specialize. if (F->isExternalDeclaration()) return; std::vector PropagatedClosures; if (!specialize(F, PropagatedClosures)) return; // If for testing purposes we were asked to not eliminate dead closures, // return. if (EliminateDeadClosures) { // Otherwise, remove any local dead closures that are now dead since we // specialized all of their uses. LLVM_DEBUG(llvm::dbgs() << "Trying to remove dead closures!\n"); sortUnique(PropagatedClosures); bool invalidatedStackNesting = false; for (auto *Closure : PropagatedClosures) { LLVM_DEBUG(llvm::dbgs() << " Visiting: " << *Closure); if (!tryDeleteDeadClosure(Closure)) { LLVM_DEBUG(llvm::dbgs() << " Failed to delete closure!\n"); ++NumPropagatedClosuresNotEliminated; continue; } LLVM_DEBUG(llvm::dbgs() << " Deleted closure!\n"); ++NumPropagatedClosuresEliminated; invalidatedStackNesting = true; } if (invalidatedStackNesting) { StackNesting::fixNesting(F); } } // Invalidate everything since we delete calls as well as add new // calls and branches. invalidateAnalysis(SILAnalysis::InvalidationKind::FunctionBody); } static void markReabstractionPartialApplyAsUsed( SILValue FirstClosure, SILValue Current, llvm::DenseSet &UsedReabstractionClosure) { if (Current == FirstClosure) return; if (auto PA = dyn_cast(Current)) { UsedReabstractionClosure.insert(PA); return markReabstractionPartialApplyAsUsed(FirstClosure, PA->getArgument(0), UsedReabstractionClosure); } if (auto Cvt = dyn_cast(Current)) { return markReabstractionPartialApplyAsUsed(FirstClosure, Cvt->getOperand(), UsedReabstractionClosure); } if (auto Cvt = dyn_cast(Current)) { return markReabstractionPartialApplyAsUsed(FirstClosure, Cvt->getOperand(), UsedReabstractionClosure); } if (auto MD = dyn_cast(Current)) { return markReabstractionPartialApplyAsUsed(FirstClosure, MD->getValue(), UsedReabstractionClosure); } llvm_unreachable("Unexpect instruction"); } /// Returns true if the \p closureArgIdx argument of \p callee is called in /// \p callee or any function called by callee. static bool isClosureAppliedIn(SILFunction *Callee, unsigned closureArgIdx, SmallPtrSetImpl &HandledFuncs) { // Limit the number of recursive calls to not go into exponential behavior in // corner cases. const int RecursionBudget = 8; SILValue Arg = Callee->getArgument(closureArgIdx); for (Operand *ArgUse : Arg->getUses()) { if (auto UserAI = FullApplySite::isa(ArgUse->getUser())) { if (UserAI.getCallee() == Arg) return true; assert(UserAI.isArgumentOperand(*ArgUse) && "any other non-argument operands than the callee?"); SILFunction *ApplyCallee = UserAI.getReferencedFunctionOrNull(); if (ApplyCallee && !ApplyCallee->isExternalDeclaration() && HandledFuncs.count(ApplyCallee) == 0 && HandledFuncs.size() < RecursionBudget) { HandledFuncs.insert(ApplyCallee); if (isClosureAppliedIn(UserAI.getReferencedFunctionOrNull(), UserAI.getCalleeArgIndex(*ArgUse), HandledFuncs)) return true; } } } return false; } static bool canSpecializeFullApplySite(FullApplySiteKind kind) { switch (kind) { case FullApplySiteKind::TryApplyInst: case FullApplySiteKind::ApplyInst: return true; case FullApplySiteKind::BeginApplyInst: return false; } llvm_unreachable("covered switch"); } bool SILClosureSpecializerTransform::gatherCallSites( SILFunction *Caller, llvm::SmallVectorImpl> &ClosureCandidates, llvm::DenseSet &MultipleClosureAI) { // A set of apply inst that we have associated with a closure. We use this to // make sure that we do not handle call sites with multiple closure arguments. llvm::DenseSet VisitedAI; // We should not look at reabstraction closure twice who we ultimately ended // up using as an argument that we specialize on. llvm::DenseSet UsedReabstractionClosure; bool CFGChanged = false; // For each basic block BB in Caller... for (auto &BB : *Caller) { // For each instruction II in BB... for (auto &II : BB) { // If II is not a closure that we support specializing, skip it... if (!isSupportedClosure(&II)) continue; auto ClosureInst = cast(&II); if (UsedReabstractionClosure.count(ClosureInst)) continue; std::unique_ptr CInfo; // Go through all uses of our closure. // Worklist of operands. SmallVector Uses(ClosureInst->getUses()); // Live range end points. SmallVector UsePoints; // Set of possible arguments for mark_dependence. The base of a // mark_dependence we copy must be available in the specialized function. llvm::SmallSet PossibleMarkDependenceBases; if (auto *PA = dyn_cast(ClosureInst)) { for (auto Opd : PA->getArguments()) PossibleMarkDependenceBases.insert(Opd); } bool HaveUsedReabstraction = false; // Uses may grow in this loop. for (size_t UseIndex = 0; UseIndex < Uses.size(); ++UseIndex) { auto *Use = Uses[UseIndex]; UsePoints.push_back(Use->getUser()); // Recurse through conversions. if (auto *CFI = dyn_cast(Use->getUser())) { // Push Uses in reverse order so they are visited in forward order. Uses.append(CFI->getUses().begin(), CFI->getUses().end()); PossibleMarkDependenceBases.insert(CFI); continue; } if (auto *Cvt = dyn_cast(Use->getUser())) { Uses.append(Cvt->getUses().begin(), Cvt->getUses().end()); PossibleMarkDependenceBases.insert(Cvt); continue; } // Look through reabstraction thunks. if (auto *PA = dyn_cast(Use->getUser())) { // Reabstraction can cause series of partial_apply to be emitted. It // is okay to treat these like conversion instructions. Current // restriction: if the partial_apply does not take ownership of its // argument we don't need to analyze which partial_apply to emit // release for (its all of them). if (isPartialApplyOfReabstractionThunk(PA) && isSupportedClosure(PA) && PA->getArgument(0) ->getType() .getAs() ->isTrivialNoEscape()) { Uses.append(PA->getUses().begin(), PA->getUses().end()); PossibleMarkDependenceBases.insert(PA); HaveUsedReabstraction = true; } continue; } // Look through mark_dependence on partial_apply [stack]. if (auto *MD = dyn_cast(Use->getUser())) { // We can't copy a closure if the mark_dependence base is not // available in the specialized function. if (!PossibleMarkDependenceBases.count(MD->getBase())) continue; if (MD->getValue() == Use->get() && MD->getValue()->getType().is() && MD->getValue() ->getType() .castTo() ->isTrivialNoEscape()) { Uses.append(MD->getUses().begin(), MD->getUses().end()); continue; } } // If this use is not a full apply site that we can process or an apply // inst with substitutions, there is nothing interesting for us to do, // so continue... auto AI = FullApplySite::isa(Use->getUser()); if (!AI || AI.hasSubstitutions() || !canSpecializeFullApplySite(AI.getKind()) || !AI.canOptimize()) continue; // Check if we have already associated this apply inst with a closure to // be specialized. We do not handle applies that take in multiple // closures at this time. if (!VisitedAI.insert(AI).second) { MultipleClosureAI.insert(AI); continue; } // If AI does not have a function_ref definition as its callee, we can // not do anything here... so continue... SILFunction *ApplyCallee = AI.getReferencedFunctionOrNull(); if (!ApplyCallee || ApplyCallee->isExternalDeclaration()) continue; // Don't specialize non-fragile callees if the caller is fragile; // the specialized callee will have shared linkage, and thus cannot // be referenced from the fragile caller. if (!ApplyCallee->canBeInlinedIntoCaller(Caller->getSerializedKind())) continue; // If the callee uses a dynamic Self, we cannot specialize it, // since the resulting specialization might longer has 'self' as the // last parameter. // // We could fix this by inserting new arguments more carefully, or // changing how we model dynamic Self altogether. if (mayBindDynamicSelf(ApplyCallee)) return CFGChanged; // Check if the closure is passed as an argument (and not called). if (!AI.isArgumentOperand(*Use)) continue; unsigned ClosureIndex = AI.getCalleeArgIndex(*Use); // Ok, we know that we can perform the optimization but not whether or // not the optimization is profitable. Check if the closure is actually // called in the callee (or in a function called by the callee). SmallPtrSet HandledFuncs; if (!isClosureAppliedIn(ApplyCallee, ClosureIndex, HandledFuncs)) continue; unsigned firstParamArgIdx = AI.getSubstCalleeConv().getSILArgIndexOfFirstParam(); assert(ClosureIndex >= firstParamArgIdx); auto ClosureParamIndex = ClosureIndex - firstParamArgIdx; auto ParamInfo = AI.getSubstCalleeType()->getParameters(); SILParameterInfo ClosureParamInfo = ParamInfo[ClosureParamIndex]; // We currently only support copying intermediate reabstraction // closures if the closure is ultimately passed trivially. bool IsClosurePassedTrivially = ClosureParamInfo.getInterfaceType() ->castTo() ->isTrivialNoEscape(); if (HaveUsedReabstraction && !IsClosurePassedTrivially) continue; // Get all non-failure exit BBs in the Apply Callee if our partial apply // is guaranteed. If we do not understand one of the exit BBs, bail. // // We need this to make sure that we insert a release in the appropriate // locations to balance the +1 from the creation of the partial apply. // // However, thin_to_thick_function closures don't have a context and // don't need to be released. bool OnlyHaveThinToThickClosure = isa(ClosureInst) && !HaveUsedReabstraction; llvm::TinyPtrVector NonFailureExitBBs; if ((ClosureParamInfo.isGuaranteedInCaller() || IsClosurePassedTrivially) && !OnlyHaveThinToThickClosure && !findAllNonFailureExitBBs(ApplyCallee, NonFailureExitBBs)) { continue; } // Specializing a readnone, readonly, releasenone function with a // nontrivial context is illegal. Inserting a release in such a function // results in miscompilation after other optimizations. // For now, the specialization is disabled. // // TODO: A @noescape closure should never be converted to an @owned // argument regardless of the function attribute. if (!OnlyHaveThinToThickClosure && ApplyCallee->getEffectsKind() <= EffectsKind::ReleaseNone) { continue; } // Avoid an infinite specialization loop caused by repeated runs of // ClosureSpecializer and CapturePropagation. // CapturePropagation propagates constant function-literals. Such // function specializations can then be optimized again by the // ClosureSpecializer and so on. // This happens if a closure argument is called _and_ referenced in // another closure, which is passed to a recursive call. E.g. // // func foo(_ c: @escaping () -> ()) { // c() // foo({ c() }) // } // // A limit of 2 is good enough and will not be exceeded in "regular" // optimization scenarios. if (getSpecializationLevel(getClosureCallee(ClosureInst)) > SpecializationLevelLimit) { continue; } // Compute the final release points of the closure. We will insert // release of the captured arguments here. if (!CInfo) CInfo.reset(new ClosureInfo(ClosureInst)); // Mark the reabstraction closures as used. if (HaveUsedReabstraction) markReabstractionPartialApplyAsUsed(ClosureInst, Use->get(), UsedReabstractionClosure); // Now we know that CSDesc is profitable to specialize. Add it to our // call site list. CInfo->CallSites.push_back( CallSiteDescriptor(CInfo.get(), AI, ClosureIndex, ClosureParamInfo, std::move(NonFailureExitBBs))); } if (CInfo) { ValueLifetimeAnalysis VLA(CInfo->Closure, UsePoints); if (!VLA.computeFrontier(CInfo->LifetimeFrontier, ValueLifetimeAnalysis::AllowToModifyCFG)) { CFGChanged = true; } ClosureCandidates.push_back(std::move(CInfo)); } } } return CFGChanged; } bool SILClosureSpecializerTransform::specialize(SILFunction *Caller, std::vector &PropagatedClosures) { LLVM_DEBUG(llvm::dbgs() << "Optimizing callsites that take closure " "argument in " << Caller->getName() << '\n'); // Collect all of the PartialApplyInsts that are used as arguments to // ApplyInsts. Check the profitability of specializing the closure argument. llvm::SmallVector, 8> ClosureCandidates; llvm::DenseSet MultipleClosureAI; if (gatherCallSites(Caller, ClosureCandidates, MultipleClosureAI)) { invalidateAnalysis(SILAnalysis::InvalidationKind::Branches); } SILOptFunctionBuilder FuncBuilder(*this); bool Changed = false; for (const auto &CInfo : ClosureCandidates) { for (auto &CSDesc : CInfo->CallSites) { // Do not specialize apply insts that take in multiple closures. This pass // does not know how to do this yet. if (MultipleClosureAI.count(CSDesc.getApplyInst())) continue; auto NewFName = CSDesc.createName(); LLVM_DEBUG(llvm::dbgs() << " Perform optimizations with new name " << NewFName << '\n'); // Then see if we already have a specialized version of this function in // our module. SILFunction *NewF = CInfo->Closure->getModule().lookUpFunction(NewFName); // If not, create a specialized version of ApplyCallee calling the closure // directly. if (!NewF) { NewF = ClosureSpecCloner::cloneFunction(FuncBuilder, CSDesc, NewFName); addFunctionToPassManagerWorklist(NewF, CSDesc.getApplyCallee()); LLVM_DEBUG(llvm::dbgs() << "\nThe rewritten callee is:\n"; NewF->dump()); } // Rewrite the call rewriteApplyInst(CSDesc, NewF); PropagatedClosures.push_back(CSDesc.getClosure()); Changed = true; } } LLVM_DEBUG(if (Changed) { llvm::dbgs() << "\nThe rewritten caller is:\n"; Caller->dump(); }); return Changed; } } // end anonymous namespace SILTransform *swift::createClosureSpecializer() { return new SILClosureSpecializerTransform(); }