//===--- PerformanceInliner.cpp - Basic cost based performance inlining ---===// // // This source file is part of the Swift.org open source project // // Copyright (c) 2014 - 2017 Apple Inc. and the Swift project authors // Licensed under Apache License v2.0 with Runtime Library Exception // // See https://swift.org/LICENSE.txt for license information // See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors // //===----------------------------------------------------------------------===// #define DEBUG_TYPE "sil-inliner" #include "swift/AST/Module.h" #include "swift/AST/SemanticAttrs.h" #include "swift/Basic/Assertions.h" #include "swift/SIL/MemAccessUtils.h" #include "swift/SIL/OptimizationRemark.h" #include "swift/SILOptimizer/Analysis/BasicCalleeAnalysis.h" #include "swift/SILOptimizer/Analysis/IsSelfRecursiveAnalysis.h" #include "swift/SILOptimizer/PassManager/Passes.h" #include "swift/SILOptimizer/PassManager/Transforms.h" #include "swift/SILOptimizer/Utils/BasicBlockOptUtils.h" #include "swift/SILOptimizer/Utils/CFGOptUtils.h" #include "swift/SILOptimizer/Utils/Devirtualize.h" #include "swift/SILOptimizer/Utils/Generics.h" #include "swift/SILOptimizer/Utils/OwnershipOptUtils.h" #include "swift/SILOptimizer/Utils/PerformanceInlinerUtils.h" #include "swift/SILOptimizer/Utils/SILOptFunctionBuilder.h" #include "swift/SILOptimizer/Utils/StackNesting.h" #include "llvm/ADT/Statistic.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" using namespace swift; STATISTIC(NumFunctionsInlined, "Number of functions inlined"); llvm::cl::opt PrintShortestPathInfo( "print-shortest-path-info", llvm::cl::init(false), llvm::cl::desc("Print shortest-path information for inlining")); llvm::cl::opt EnableSILInliningOfGenerics( "sil-inline-generics", llvm::cl::init(false), llvm::cl::desc("Enable inlining of generics")); llvm::cl::opt EnableSILAggressiveInlining("sil-aggressive-inline", llvm::cl::init(false), llvm::cl::desc("Enable aggressive inlining")); llvm::cl::opt EnableVerifyAfterInlining( "sil-inline-verify-after-inline", llvm::cl::init(false), llvm::cl::desc("Run sil verification after inlining all found callee apply " "sites into a caller.")); llvm::cl::opt SILPrintInliningCallee( "sil-print-inlining-callee", llvm::cl::init(false), llvm::cl::desc("Print functions that are inlined into other functions.")); llvm::cl::opt SILPrintInliningCallerBefore( "sil-print-inlining-caller-before", llvm::cl::init(false), llvm::cl::desc( "Print functions into which another function is about to be inlined.")); llvm::cl::opt SILPrintInliningCallerAfter( "sil-print-inlining-caller-after", llvm::cl::init(false), llvm::cl::desc( "Print functions into which another function has been inlined.")); llvm::cl::opt EnableVerifyAfterEachInlining( "sil-inline-verify-after-each-inline", llvm::cl::init(false), llvm::cl::desc( "Run sil verification after inlining each found callee apply " "site into a caller.")); //===----------------------------------------------------------------------===// // Heuristics //===----------------------------------------------------------------------===// /// The following constants define the cost model for inlining. Some constants /// are also defined in ShortestPathAnalysis. llvm::cl::opt RemovedCallBenefit( "sil-inline-removed-call-benefit", llvm::cl::init(20), llvm::cl::desc("The base value for every call: it represents the benefit " "of removing the call overhead itself.")); llvm::cl::opt RemovedCoroutineCallBenefit( "sil-inline-removed-coroutine-call-benefit", llvm::cl::init(300), llvm::cl::desc("The benefit of inlining a `begin_apply`.")); llvm::cl::opt RemovedClosureBenefit( "sil-inline-removed-closure-benefit", llvm::cl::init(RemovedCallBenefit + 50), llvm::cl::desc( "The benefit if the operand of an apply gets constant e.g. if a " "closure is passed to an apply instruction in the callee.")); llvm::cl::opt RemovedLoadBenefit( "sil-inline-removed-load-benefit", llvm::cl::init(RemovedCallBenefit + 5), llvm::cl::desc("The benefit if a load can (probably) eliminated because it " "loads from a stack location in the caller.")); llvm::cl::opt RemovedStoreBenefit( "sil-inline-removed-store-benefit", llvm::cl::init(RemovedCallBenefit + 10), llvm::cl::desc("The benefit if a store can (probably) eliminated because " "it stores to a stack location in the caller.")); llvm::cl::opt RemovedTerminatorBenefit( "sil-inline-removed-terminator-benefit", llvm::cl::init(RemovedCallBenefit + 10), llvm::cl::desc("The benefit if the condition of a terminator instruction " "gets constant due to inlining.")); llvm::cl::opt RefCountBenefit("sil-inline-ref-count-benefit", llvm::cl::init(RemovedCallBenefit + 20), llvm::cl::desc("The benefit if a retain/release can " "(probably) be eliminated after inlining.")); llvm::cl::opt FastPathBuiltinBenefit( "sil-inline-fast-path-builtin-benefit", llvm::cl::init(RemovedCallBenefit + 40), llvm::cl::desc("The benefit of a onFastPath builtin.")); llvm::cl::opt DevirtualizedCallBenefit( "sil-inline-devirtualized-call-benefit", llvm::cl::init(RemovedCallBenefit + 300), llvm::cl::desc("The benefit of being able to devirtualize a call.")); llvm::cl::opt GenericSpecializationBenefit( "sil-inline-generic-specialization-benefit", llvm::cl::init(RemovedCallBenefit + 300), llvm::cl::desc("The benefit of being able to produce a generic " "specialization for a call.")); llvm::cl::opt ExclusivityBenefit( "sil-inline-exclusivity-benefit", llvm::cl::init(RemovedCallBenefit + 10), llvm::cl::desc("The benefit of inlining an exclusivity-containing callee. " "The exclusivity needs to be: dynamic, has no nested " "conflict and addresses known storage")); llvm::cl::opt OSizeClassMethodBenefit( "sil-inline-o-size-class-method-benefit", llvm::cl::init(5), llvm::cl::desc("The benefit of inlining class methods with -Osize. We only " "inline very small class methods with -Osize.")); llvm::cl::opt GlobalInitBenefit( "sil-inline-global-init-benefit", llvm::cl::init(100), llvm::cl::desc("The benefit of inlining constructors into global initializers.")); llvm::cl::opt TrivialFunctionThreshold( "sil-inline-trivial-function-threshold", llvm::cl::init(18), llvm::cl::desc("Approximately up to this cost level a function can be " "inlined without increasing the code size.")); llvm::cl::opt BlockLimitDenominator( "sil-inline-block-limit-denominator", llvm::cl::init(3000), llvm::cl::desc("Configuration for the \"soft\" caller block limit. When " "changing make sure you update BlockLimitMaxIntNumerator.")); llvm::cl::opt BlockLimitMaxIntNumerator( "sil-inline-block-limit-max-int-numerator", llvm::cl::init(18608), llvm::cl::desc("Computations with BlockLimitDenominator will overflow with " "numerators >= this value. This equals cbrt(INT_MAX) * " "cbrt(BlockLimitDenominator); we hardcode its value because " "std::cbrt() is not constexpr.")); llvm::cl::opt OverallCallerBlockLimit( "sil-inline-overall-caller-block-limit", llvm::cl::init(400), llvm::cl::desc("No inlining is done if the caller has more than this " "number of blocks.")); llvm::cl::opt DefaultApplyLength( "sil-inline-default-apply-length", llvm::cl::init(10), llvm::cl::desc("The assumed execution length of a function call.")); //===----------------------------------------------------------------------===// // Printing Helpers //===----------------------------------------------------------------------===// extern void printInliningDetailsCallee(StringRef passName, SILFunction *caller, SILFunction *callee); extern void printInliningDetailsCallerBefore(StringRef passName, SILFunction *caller, SILFunction *callee); extern void printInliningDetailsCallerAfter(StringRef passName, SILFunction *caller, SILFunction *callee); //===----------------------------------------------------------------------===// // Performance Inliner //===----------------------------------------------------------------------===// namespace { using Weight = ShortestPathAnalysis::Weight; class SILPerformanceInliner { StringRef PassName; SILOptFunctionBuilder &FuncBuilder; /// Specifies which functions not to inline, based on @_semantics and /// global_init attributes. InlineSelection WhatToInline; SILPassManager *pm; DominanceAnalysis *DA; SILLoopAnalysis *LA; BasicCalleeAnalysis *BCA; IsSelfRecursiveAnalysis *SRA; // For keys of SILFunction and SILLoop. llvm::DenseMap SPAs; llvm::SpecificBumpPtrAllocator SPAAllocator; ColdBlockInfo CBI; OptRemark::Emitter &ORE; OptimizationMode OptMode; #ifndef NDEBUG SILFunction *LastPrintedCaller = nullptr; void dumpCaller(SILFunction *Caller) { if (Caller != LastPrintedCaller) { llvm::dbgs() << "\nInline into caller: " << Caller->getName() << '\n'; LastPrintedCaller = Caller; } } #endif ShortestPathAnalysis *getSPA(SILFunction *F, SILLoopInfo *LI) { ShortestPathAnalysis *&SPA = SPAs[F]; if (!SPA) { SPA = new (SPAAllocator.Allocate()) ShortestPathAnalysis(F, LI); } return SPA; } bool profileBasedDecision( const FullApplySite &AI, int Benefit, SILFunction *Callee, int CalleeCost, int &NumCallerBlocks, const llvm::DenseMapIterator< swift::SILBasicBlock *, uint64_t, llvm::DenseMapInfo, llvm::detail::DenseMapPair, true> &bbIt); bool isAutoDiffLinearMapWithControlFlow(FullApplySite AI); bool isTupleWithAllocsOrPartialApplies(SILValue retVal); bool isProfitableToInline( FullApplySite AI, Weight CallerWeight, ConstantTracker &callerTracker, int &NumCallerBlocks, const llvm::DenseMap &BBToWeightMap); bool decideInWarmBlock( FullApplySite AI, Weight CallerWeight, ConstantTracker &callerTracker, int &NumCallerBlocks, const llvm::DenseMap &BBToWeightMap); bool decideInColdBlock(FullApplySite AI, SILFunction *Callee, int numCallerBlocks); void visitColdBlocks(SmallVectorImpl &AppliesToInline, SILBasicBlock *root, DominanceInfo *DT, int numCallerBlocks); void collectAppliesToInline(SILFunction *Caller, SmallVectorImpl &Applies); public: SILPerformanceInliner(StringRef PassName, SILOptFunctionBuilder &FuncBuilder, InlineSelection WhatToInline, SILPassManager *pm, DominanceAnalysis *DA, PostDominanceAnalysis *PDA, SILLoopAnalysis *LA, BasicCalleeAnalysis *BCA, IsSelfRecursiveAnalysis *SRA, OptimizationMode OptMode, OptRemark::Emitter &ORE) : PassName(PassName), FuncBuilder(FuncBuilder), WhatToInline(WhatToInline), pm(pm), DA(DA), LA(LA), BCA(BCA), SRA(SRA), CBI(DA, PDA), ORE(ORE), OptMode(OptMode) {} bool inlineCallsIntoFunction(SILFunction *F); }; } // end anonymous namespace // Returns true if it is possible to perform a generic // specialization for a given call. static bool canSpecializeGeneric(ApplySite AI, SILFunction *F, SubstitutionMap Subs) { return ReabstractionInfo::canBeSpecialized(AI, F, Subs); } bool SILPerformanceInliner::profileBasedDecision( const FullApplySite &AI, int Benefit, SILFunction *Callee, int CalleeCost, int &NumCallerBlocks, const llvm::DenseMapIterator< swift::SILBasicBlock *, uint64_t, llvm::DenseMapInfo, llvm::detail::DenseMapPair, true> &bbIt) { if (CalleeCost < TrivialFunctionThreshold) { // We do not increase code size below this threshold return true; } auto callerCount = bbIt->getSecond(); if (callerCount < 1) { // Never called - do not inline LLVM_DEBUG(dumpCaller(AI.getFunction()); llvm::dbgs() << "profiled decision: NO, " "reason= Never Called.\n"); return false; } auto calleeCount = Callee->getEntryCount(); if (calleeCount) { // If we have Callee count - use SI heuristic: auto calleCountVal = calleeCount.getValue(); auto percent = (long double)callerCount / (long double)calleCountVal; if (percent < 0.8) { LLVM_DEBUG(dumpCaller(AI.getFunction()); llvm::dbgs() << "profiled decision: NO, reason=SI " << std::to_string(percent) << "%\n"); return false; } LLVM_DEBUG(dumpCaller(AI.getFunction()); llvm::dbgs() << "profiled decision: YES, reason=SI " << std::to_string(percent) << "%\n"); } else { // No callee count - use a "modified" aggressive IHF for now if (CalleeCost > Benefit && callerCount < 100) { LLVM_DEBUG(dumpCaller(AI.getFunction()); llvm::dbgs() << "profiled decision: NO, reason=IHF " << callerCount << '\n'); return false; } LLVM_DEBUG(dumpCaller(AI.getFunction()); llvm::dbgs() << "profiled decision: YES, reason=IHF " << callerCount << '\n'); } // We're gonna inline! NumCallerBlocks += Callee->size(); return true; } // Checks if `FAI` can be traced back to a specifically named, // input enum function argument. If so, the callsite // containing function is a linear map in Swift Autodiff. bool SILPerformanceInliner::isAutoDiffLinearMapWithControlFlow( FullApplySite FAI) { static const std::string LinearMapBranchTracingEnumPrefix = "_AD__"; auto val = FAI.getCallee(); for (;;) { if (auto *inst = dyn_cast(val)) { if (auto pi = Projection::isObjectProjection(val)) { // Extract a member from a struct/tuple/enum. val = pi->getOperand(0); continue; } else if (auto base = stripFunctionConversions(inst)) { val = base; continue; } return false; } else if (auto *phiArg = dyn_cast(val)) { if (auto *predBB = phiArg->getParent()->getSinglePredecessorBlock()) { // The terminator of this predecessor block must either be a // (conditional) branch instruction or a switch_enum. if (auto *bi = dyn_cast(predBB->getTerminator())) { val = bi->getArg(phiArg->getIndex()); continue; } else if (auto *cbi = dyn_cast(predBB->getTerminator())) { val = cbi->getArgForDestBB(phiArg->getParent(), phiArg->getIndex()); continue; } else if (auto *sei = dyn_cast(predBB->getTerminator())) { val = sei->getOperand(); continue; } return false; } } break; } // If `val` now points to a function argument then we have successfully traced // the callee back to a function argument. // // We now need to check if this argument is an enum and named like an autodiff // branch tracing enum. if (auto *arg = dyn_cast(val)) { if (auto *enumDecl = arg->getType().getEnumOrBoundGenericEnum()) { return enumDecl->getName().str().starts_with( LinearMapBranchTracingEnumPrefix); } } return false; } // Checks if the given value is a tuple containing allocated objects // or partial applies. // // Returns true if the number of allocated objects or partial applies is // greater than 0, and false otherwise. // // Returns false if the value is not a tuple. bool SILPerformanceInliner::isTupleWithAllocsOrPartialApplies(SILValue val) { if (auto *ti = dyn_cast(val)) { for (auto i : range(ti->getNumOperands())) { SILValue val = ti->getOperand(i); if (auto base = stripFunctionConversions(val)) val = base; if (isa(val) || isa(val)) return true; } } return false; } // Uses a function's mangled name to determine if it is an Autodiff VJP // function. // // TODO: VJPs of differentiable functions with custom silgen names are not // recognized as VJPs by this function. However, this is not a hard limitation // and can be fixed. bool isFunctionAutodiffVJP(SILFunction *callee) { swift::Demangle::Context Ctx; if (auto *Root = Ctx.demangleSymbolAsNode(callee->getName())) { if (auto *node = Root->findByKind( swift::Demangle::Node::Kind::AutoDiffFunctionKind, 3)) { if (node->hasIndex()) { auto index = (char)node->getIndex(); auto ADFunctionKind = swift::Demangle::AutoDiffFunctionKind(index); if (ADFunctionKind == swift::Demangle::AutoDiffFunctionKind::VJP) { return true; } } } } return false; } bool isAllocator(SILFunction *callee) { swift::Demangle::Context Ctx; if (auto *Root = Ctx.demangleSymbolAsNode(callee->getName())) { return Root->findByKind(swift::Demangle::Node::Kind::Allocator, 3) != nullptr; } return false; } bool isProfitableToInlineAutodiffVJP(SILFunction *vjp, SILFunction *caller, InlineSelection whatToInline, StringRef stageName) { bool isLowLevelFunctionPassPipeline = stageName == "LowLevel,Function"; auto isHighLevelFunctionPassPipeline = stageName == "HighLevel,Function+EarlyLoopOpt"; auto calleeHasControlFlow = vjp->size() > 1; auto isCallerVJP = isFunctionAutodiffVJP(caller); auto callerHasControlFlow = caller->size() > 1; // If the pass is being run as part of the low-level function pass pipeline, // the autodiff closure-spec optimization is done doing its work. Therefore, // all VJPs should be considered for inlining. if (isLowLevelFunctionPassPipeline) { return true; } // If callee has control-flow it will definitely not be handled by the // Autodiff closure-spec optimization. Therefore, we should consider it for // inlining. if (calleeHasControlFlow) { return true; } // If this is the EarlyPerfInline pass we want to have the Autodiff // closure-spec optimization pass optimize VJPs in isolation before they are // inlined into other VJPs. if (isHighLevelFunctionPassPipeline) { return false; } // If this is not the EarlyPerfInline pass, VJPs should only be inlined into // other VJPs that do not contain any control-flow. if (!isCallerVJP || (isCallerVJP && callerHasControlFlow)) { return false; } return true; } static bool isConstantValue(SILValue v, ValueSet &visited) { if (!visited.insert(v)) return true; if (isa(v)) return true; if (auto *s = dyn_cast(v)) { for (Operand &op : s->getAllOperands()) { if (!isConstantValue(op.get(), visited)) return false; } return true; } return false; } static bool hasConstantArguments(FullApplySite fas) { ValueSet visited(fas.getFunction()); for (Operand &op : fas.getArgumentOperands()) { if (!fas.isIndirectResultOperand(op)) { if (!isConstantValue(op.get(), visited)) return false; } } return true; } static bool hasConstantEnumArgument(FullApplySite fas) { for (SILValue arg : fas.getArguments()) { if (isa(arg)) return true; } return false; } bool SILPerformanceInliner::isProfitableToInline( FullApplySite AI, Weight CallerWeight, ConstantTracker &callerTracker, int &NumCallerBlocks, const llvm::DenseMap &BBToWeightMap) { SILFunction *Callee = AI.getReferencedFunctionOrNull(); assert(Callee); bool IsGeneric = AI.hasSubstitutions(); if (isFunctionAutodiffVJP(Callee) && !isProfitableToInlineAutodiffVJP(Callee, AI.getFunction(), WhatToInline, this->pm->getStageName())) { return false; } // Start with a base benefit. int BaseBenefit = isa(AI) ? RemovedCoroutineCallBenefit : RemovedCallBenefit; // If function has more than 5 parameters / results, then increase base // benefit for each additional parameter. We assume that for each extra // parameter or result we'd eliminate extra pair of loads and stores used to // pass / return value via stack. unsigned numParameters = AI->getNumRealOperands(), numResults = AI->getNumResults(); if (numParameters > 5) BaseBenefit += (RemovedLoadBenefit + RemovedStoreBenefit) * (numParameters - 5); if (numResults > 5) BaseBenefit += (RemovedLoadBenefit + RemovedStoreBenefit) * (numResults - 5); // Osize heuristic. // // As a hack, don't apply this at all to coroutine inlining; avoiding // coroutine allocation overheads is extremely valuable. There might be // more principled ways of getting this effect. bool isClassMethodAtOsize = false; if (OptMode == OptimizationMode::ForSize && !isa(AI)) { // Don't inline into thunks. if (AI.getFunction()->isThunk()) return false; // Don't inline class methods. if (Callee->hasSelfParam()) { auto SelfTy = Callee->getLoweredFunctionType()->getSelfInstanceType( FuncBuilder.getModule(), AI.getFunction()->getTypeExpansionContext()); if (SelfTy->mayHaveSuperclass() && Callee->getRepresentation() == SILFunctionTypeRepresentation::Method) isClassMethodAtOsize = true; } // Use command line option to control inlining in Osize mode. const uint64_t CallerBaseBenefitReductionFactor = AI.getFunction()->getModule().getOptions().CallerBaseBenefitReductionFactor; BaseBenefit = BaseBenefit / CallerBaseBenefitReductionFactor; } // It is always OK to inline a simple call. // TODO: May be consider also the size of the callee? if (isPureCall(AI, BCA)) { OptRemark::Emitter::emitOrDebug(DEBUG_TYPE, &ORE, [&]() { using namespace OptRemark; return RemarkPassed("Inline", *AI.getInstruction()) << "Pure call. Always profitable to inline " << NV("Callee", Callee); }); LLVM_DEBUG(dumpCaller(AI.getFunction()); llvm::dbgs() << " pure-call decision " << Callee->getName() << '\n'); return true; } if (Callee->hasSemanticsAttr(semantics::OPTIMIZE_SIL_INLINE_CONSTANT_ARGUMENTS) && hasConstantArguments(AI)) { return true; } // If there is a "constant" enum argument to a synthesized enum comparison, // we can always inline it, because most of it will be constant folded anyway. if (Callee->hasSemanticsAttr(semantics::DERIVED_ENUM_EQUALS) && hasConstantEnumArgument(AI)) { return true; } // Bail out if this generic call can be optimized by means of // the generic specialization, because we prefer generic specialization // to inlining of generics. if (IsGeneric && canSpecializeGeneric(AI, Callee, AI.getSubstitutionMap())) { return false; } // Bail out if this is a generic call of a `@_specialize(exported:)` function // and we are in the early inliner. We want to give the generic specializer // the opportunity to see specialized call sites. if (IsGeneric && WhatToInline == InlineSelection::NoSemanticsAndEffects && Callee->hasPrespecialization()) { return false; } SILLoopInfo *CalleeLI = LA->get(Callee); ShortestPathAnalysis *CalleeSPA = getSPA(Callee, CalleeLI); if (!CalleeSPA->isValid()) { CalleeSPA->analyze(CBI, [](FullApplySite FAS) { // We don't compute SPA for another call-level. Functions called from // the callee are assumed to have DefaultApplyLength. return DefaultApplyLength.getValue(); }); } ConstantTracker constTracker(Callee, &callerTracker, AI); DominanceInfo *DT = DA->get(Callee); SILBasicBlock *CalleeEntry = &Callee->front(); DominanceOrder domOrder(CalleeEntry, DT, Callee->size()); // We don't want to blow up code-size // We will only inline if *ALL* dynamic accesses are // known and have no nested conflict bool AllAccessesBeneficialToInline = true; bool returnsAllocation = false; // Calculate the inlining cost of the callee. int CalleeCost = 0; int Benefit = 0; // We don’t know if we want to update the benefit with // the exclusivity heuristic or not. We can *only* do that // if AllAccessesBeneficialToInline is true int ExclusivityBenefitWeight = 0; int ExclusivityBenefitBase = ExclusivityBenefit; if (EnableSILAggressiveInlining) { ExclusivityBenefitBase += 500; } SubstitutionMap CalleeSubstMap = AI.getSubstitutionMap(); CallerWeight.updateBenefit(Benefit, BaseBenefit); // Go through all blocks of the function, accumulate the cost and find // benefits. while (SILBasicBlock *block = domOrder.getNext()) { constTracker.beginBlock(); Weight BlockW = CalleeSPA->getWeight(block, CallerWeight); for (SILInstruction &I : *block) { constTracker.trackInst(&I); CalleeCost += (int)instructionInlineCost(I); if (FullApplySite FAI = FullApplySite::isa(&I)) { // Check if the callee is passed as an argument. If so, increase the // threshold, because inlining will (probably) eliminate the closure. SILInstruction *def = constTracker.getDefInCaller(FAI.getCallee()); if (def && (isa(def) || isa(def))) BlockW.updateBenefit(Benefit, RemovedClosureBenefit); else if (isAutoDiffLinearMapWithControlFlow(FAI)) { // TODO: Do we need to tweak inlining benefits given to pullbacks // (with and without control-flow)? // For linear maps in Swift Autodiff, callees may be passed as an // argument, however, they may be hidden behind a branch-tracing // enum (tracing execution flow of the original function). // // If we can establish that we are inside of a Swift Autodiff linear // map and that the branch tracing input enum is wrapping pullback // closures, then we can update this function's benefit with // `RemovedClosureBenefit` because inlining will (probably) eliminate // the closure. BlockW.updateBenefit(Benefit, RemovedClosureBenefit); } // Check if inlining the callee would allow for further // optimizations like devirtualization or generic specialization. if (!def) def = dyn_cast_or_null(FAI.getCallee()); if (!def) continue; auto Subs = FAI.getSubstitutionMap(); // Bail if it is not a generic call or inlining of generics is forbidden. if (!EnableSILInliningOfGenerics || !Subs.hasAnySubstitutableParams()) continue; if (!isa(def) && !isa(def) && !isa(def)) continue; // It is a generic call inside the callee. Check if after inlining // it will be possible to perform a generic specialization or // devirtualization of this call. // Create the list of substitutions as they will be after // inlining. auto SubMap = Subs.subst(CalleeSubstMap); // Check if the call can be devirtualized. if (isa(def) || isa(def) || isa(def)) { // TODO: Take AI.getSubstitutions() into account. if (canDevirtualizeApply(FAI, nullptr)) { LLVM_DEBUG(llvm::dbgs() << "Devirtualization will be possible " "after inlining for the call:\n"; FAI.getInstruction()->dumpInContext()); BlockW.updateBenefit(Benefit, DevirtualizedCallBenefit); } } // Check if a generic specialization would be possible. if (isa(def)) { auto CalleeF = FAI.getCalleeFunction(); if (!canSpecializeGeneric(FAI, CalleeF, SubMap)) continue; LLVM_DEBUG(llvm::dbgs() << "Generic specialization will be possible " "after inlining for the call:\n"; FAI.getInstruction()->dumpInContext()); BlockW.updateBenefit(Benefit, GenericSpecializationBenefit); } } else if (auto *LI = dyn_cast(&I)) { // Check if it's a load from a stack location in the caller. Such a load // might be optimized away if inlined. if (constTracker.isStackAddrInCaller(LI->getOperand())) BlockW.updateBenefit(Benefit, RemovedLoadBenefit); } else if (auto *SI = dyn_cast(&I)) { // Check if it's a store to a stack location in the caller. Such a load // might be optimized away if inlined. if (constTracker.isStackAddrInCaller(SI->getDest())) BlockW.updateBenefit(Benefit, RemovedStoreBenefit); } else if (isa(&I) || isa(&I)) { SILValue Op = stripCasts(I.getOperand(0)); if (auto *Arg = dyn_cast(Op)) { if (Arg->getArgumentConvention() == SILArgumentConvention::Direct_Guaranteed) { BlockW.updateBenefit(Benefit, RefCountBenefit); } } } else if (auto *BI = dyn_cast(&I)) { if (BI->getBuiltinInfo().ID == BuiltinValueKind::OnFastPath) BlockW.updateBenefit(Benefit, FastPathBuiltinBenefit); } else if (auto *BAI = dyn_cast(&I)) { if (BAI->getEnforcement() == SILAccessEnforcement::Dynamic) { // The access is dynamic and has no nested conflict // See if the storage location is considered by // access enforcement optimizations auto storage = AccessStorage::compute(BAI->getSource()); if (BAI->hasNoNestedConflict() && (storage.isFormalAccessBase())) { BlockW.updateBenefit(ExclusivityBenefitWeight, ExclusivityBenefitBase); } else { AllAccessesBeneficialToInline = false; } } } else if (auto ri = dyn_cast(&I)) { SILValue retVal = ri->getOperand(); if (auto *eir = dyn_cast(retVal)) retVal = eir->getOperand(); if (auto *uci = dyn_cast(retVal)) retVal = uci->getOperand(); // Inlining functions which return an allocated object or partial_apply // most likely has a benefit in the caller, because e.g. it can enable // de-virtualization. if (isa(retVal) || isa(retVal) || isTupleWithAllocsOrPartialApplies(retVal)) { BlockW.updateBenefit(Benefit, RemovedCallBenefit + 10); returnsAllocation = true; } } } // Don't count costs in blocks which are dead after inlining. SILBasicBlock *takenBlock = constTracker.getTakenBlock(block->getTerminator()); if (takenBlock) { BlockW.updateBenefit(Benefit, RemovedTerminatorBenefit); domOrder.pushChildrenIf(block, [=](SILBasicBlock *child) { return child->getSinglePredecessorBlock() != block || child == takenBlock; }); } else { domOrder.pushChildren(block); } } if (AllAccessesBeneficialToInline) { Benefit = std::max(Benefit, ExclusivityBenefitWeight); } if (AI.getFunction()->isGlobalInitOnceFunction() && isAllocator(Callee)) { // Inlining constructors into global initializers increase the changes that // the global can be initialized statically. CallerWeight.updateBenefit(Benefit, GlobalInitBenefit); } if (AI.getFunction()->isThunk()) { // Only inline trivial functions into thunks (which will not increase the // code size). if (CalleeCost > TrivialFunctionThreshold) { return false; } LLVM_DEBUG(dumpCaller(AI.getFunction()); llvm::dbgs() << " decision {" << CalleeCost << " into thunk} " << Callee->getName() << '\n'); return true; } // We reduce the benefit if the caller is too large. For this we use a // cubic function on the number of caller blocks. This starts to prevent // inlining at about 800 - 1000 caller blocks. if (NumCallerBlocks < BlockLimitMaxIntNumerator) Benefit -= (NumCallerBlocks * NumCallerBlocks) / BlockLimitDenominator * NumCallerBlocks / BlockLimitDenominator; else // The calculation in the if branch would overflow if we performed it. Benefit = 0; // If we have profile info - use it for final inlining decision. auto *bb = AI.getInstruction()->getParent(); auto bbIt = BBToWeightMap.find(bb); if (bbIt != BBToWeightMap.end()) { if (profileBasedDecision(AI, Benefit, Callee, CalleeCost, NumCallerBlocks, bbIt)) { OptRemark::Emitter::emitOrDebug(DEBUG_TYPE, &ORE, [&]() { using namespace OptRemark; return RemarkPassed("Inline", *AI.getInstruction()) << "Profitable due to provided profile"; }); return true; } OptRemark::Emitter::emitOrDebug(DEBUG_TYPE, &ORE, [&]() { using namespace OptRemark; return RemarkMissed("Inline", *AI.getInstruction()) << "Not profitable due to provided profile"; }); return false; } if (isClassMethodAtOsize && Benefit > OSizeClassMethodBenefit) { Benefit = OSizeClassMethodBenefit; if (returnsAllocation) Benefit += 10; } // This is the final inlining decision. if (CalleeCost > Benefit) { OptRemark::Emitter::emitOrDebug(DEBUG_TYPE, &ORE, [&]() { using namespace OptRemark; return RemarkMissed("Inline", *AI.getInstruction()) << "Not profitable to inline function " << NV("Callee", Callee) << " (cost = " << NV("Cost", CalleeCost) << ", benefit = " << NV("Benefit", Benefit) << ")"; }); return false; } NumCallerBlocks += Callee->size(); LLVM_DEBUG(dumpCaller(AI.getFunction()); llvm::dbgs() << " decision {c=" << CalleeCost << ", b=" << Benefit << ", l=" << CalleeSPA->getScopeLength(CalleeEntry, 0) << ", c-w=" << CallerWeight << ", bb=" << Callee->size() << ", c-bb=" << NumCallerBlocks << "} " << Callee->getName() << '\n'); OptRemark::Emitter::emitOrDebug(DEBUG_TYPE, &ORE, [&]() { using namespace OptRemark; return RemarkPassed("Inlined", *AI.getInstruction()) << NV("Callee", Callee) << " inlined into " << NV("Caller", AI.getFunction()) << " (cost = " << NV("Cost", CalleeCost) << ", benefit = " << NV("Benefit", Benefit) << ")"; }); return true; } static bool returnsClosure(SILFunction *F) { for (SILBasicBlock &BB : *F) { if (auto *RI = dyn_cast(BB.getTerminator())) { return isa(RI->getOperand()); } } return false; } static bool hasMaxNumberOfBasicBlocks(SILFunction *f, int limit) { for (SILBasicBlock &block : *f) { (void)block; if (limit == 0) return false; limit--; } return true; } static bool isInlineAlwaysCallSite(SILFunction *Callee, int numCallerBlocks) { if (Callee->isTransparent()) return true; if (Callee->getInlineStrategy() == AlwaysInline && !Callee->getModule().getOptions().IgnoreAlwaysInline && // Protect against misuse of @inline(__always). // Inline-always should only be used on relatively small functions. // It must not be used on recursive functions. This check prevents that // the compiler blows up if @inline(__always) is put on a recursive function. (numCallerBlocks < 64 || hasMaxNumberOfBasicBlocks(Callee, 64))) { return true; } return false; } /// Checks if a given generic apply should be inlined unconditionally, i.e. /// without any complex analysis using e.g. a cost model. /// It returns true if a function should be inlined. /// It returns false if a function should not be inlined. /// It returns None if the decision cannot be made without a more complex /// analysis. static std::optional shouldInlineGeneric(FullApplySite AI, int numCallerBlocks) { assert(AI.hasSubstitutions() && "Expected a generic apply"); SILFunction *Callee = AI.getReferencedFunctionOrNull(); // Do not inline @_semantics functions when compiling the stdlib, // because they need to be preserved, so that the optimizer // can properly optimize a user code later. ModuleDecl *SwiftModule = Callee->getModule().getSwiftModule(); if ((Callee->hasSemanticsAttrThatStartsWith("array.") || Callee->hasSemanticsAttrThatStartsWith("fixed_storage.")) && (SwiftModule->isStdlibModule() || SwiftModule->isOnoneSupportModule())) return false; // Do not inline into thunks. if (AI.getFunction()->isThunk()) return false; // Always inline generic functions which are marked as // AlwaysInline or transparent. if (isInlineAlwaysCallSite(Callee, numCallerBlocks)) return true; // If all substitutions are concrete, then there is no need to perform the // generic inlining. Let the generic specializer create a specialized // function and then decide if it is beneficial to inline it. if (!AI.getSubstitutionMap().getRecursiveProperties().hasArchetype()) return false; if (Callee->getLoweredFunctionType()->getCoroutineKind() != SILCoroutineKind::None) { // Co-routines are so expensive (e.g. Array.subscript.read) that we always // enable inlining them in a generic context. Though the final inlining // decision is done by the usual heuristics. Therefore we return None and // not true. return std::nullopt; } // The returned partial_apply of a thunk is most likely being optimized away // if inlined. Because some thunks cannot be specialized (e.g. if an opened // existential is in the substitution list), we inline such thunks also in case // they are generic. if (Callee->isThunk() && returnsClosure(Callee)) return true; // All other generic functions should not be inlined if this kind of inlining // is disabled. if (!EnableSILInliningOfGenerics) return false; // It is not clear yet if this function should be decided or not. return std::nullopt; } bool SILPerformanceInliner::decideInWarmBlock( FullApplySite AI, Weight CallerWeight, ConstantTracker &callerTracker, int &NumCallerBlocks, const llvm::DenseMap &BBToWeightMap) { if (AI.hasSubstitutions()) { // Only inline generics if definitively clear that it should be done. auto ShouldInlineGeneric = shouldInlineGeneric(AI, NumCallerBlocks); if (ShouldInlineGeneric.has_value()) return ShouldInlineGeneric.value(); } SILFunction *Callee = AI.getReferencedFunctionOrNull(); if (isInlineAlwaysCallSite(Callee, NumCallerBlocks)) { LLVM_DEBUG(dumpCaller(AI.getFunction()); llvm::dbgs() << " always-inline decision " << Callee->getName() << '\n'); return true; } return isProfitableToInline(AI, CallerWeight, callerTracker, NumCallerBlocks, BBToWeightMap); } /// Return true if inlining this call site into a cold block is profitable. bool SILPerformanceInliner::decideInColdBlock(FullApplySite AI, SILFunction *Callee, int numCallerBlocks) { if (AI.hasSubstitutions()) { // Only inline generics if definitively clear that it should be done. auto ShouldInlineGeneric = shouldInlineGeneric(AI, numCallerBlocks); if (ShouldInlineGeneric.has_value()) return ShouldInlineGeneric.value(); return false; } if (isInlineAlwaysCallSite(Callee, numCallerBlocks)) { LLVM_DEBUG(dumpCaller(AI.getFunction()); llvm::dbgs() << " always-inline decision " << Callee->getName() << '\n'); return true; } int CalleeCost = 0; for (SILBasicBlock &Block : *Callee) { for (SILInstruction &I : Block) { CalleeCost += int(instructionInlineCost(I)); if (CalleeCost > TrivialFunctionThreshold) return false; } } LLVM_DEBUG(dumpCaller(AI.getFunction()); llvm::dbgs() << " cold decision {" << CalleeCost << "} " << Callee->getName() << '\n'); return true; } /// Record additional weight increases. /// /// Why can't we just add the weight when we call isProfitableToInline? Because /// the additional weight is for _another_ function than the current handled /// callee. static void addWeightCorrection(FullApplySite FAS, llvm::DenseMap &WeightCorrections) { SILFunction *Callee = FAS.getReferencedFunctionOrNull(); if (Callee && Callee->hasSemanticsAttr(semantics::ARRAY_UNINITIALIZED)) { // We want to inline the argument to an array.uninitialized call, because // this argument is most likely a call to a function which contains the // buffer allocation for the array. It is essential to inline it for stack // promotion of the array buffer. SILValue BufferArg = FAS.getArgument(0); SILValue Base = stripValueProjections(stripCasts(BufferArg)); if (auto BaseApply = FullApplySite::isa(Base)) WeightCorrections[BaseApply] += 6; } } static bool containsWeight(TermInst *inst) { for (auto &succ : inst->getSuccessors()) { if (succ.getCount()) { return true; } } return false; } static void addToBBCounts(llvm::DenseMap &BBToWeightMap, uint64_t numToAdd, swift::TermInst *termInst) { for (auto &succ : termInst->getSuccessors()) { auto *currBB = succ.getBB(); assert(BBToWeightMap.find(currBB) != BBToWeightMap.end() && "Expected to find block in map"); BBToWeightMap[currBB] += numToAdd; } } static void calculateBBWeights(SILFunction *Caller, DominanceInfo *DT, llvm::DenseMap &BBToWeightMap) { auto entryCount = Caller->getEntryCount(); if (!entryCount) { // No profile for function - return return; } // Add all blocks to BBToWeightMap without count 0 for (auto &block : *Caller) { BBToWeightMap[&block] = 0; } BBToWeightMap[Caller->getEntryBlock()] = entryCount.getValue(); DominanceOrder domOrder(&Caller->front(), DT, Caller->size()); while (SILBasicBlock *block = domOrder.getNext()) { auto bbIt = BBToWeightMap.find(block); assert(bbIt != BBToWeightMap.end() && "Expected to find block in map"); auto bbCount = bbIt->getSecond(); auto *termInst = block->getTerminator(); if (containsWeight(termInst)) { // Instruction already contains accurate counters - use them as-is uint64_t countSum = 0; uint64_t blocksWithoutCount = 0; for (auto &succ : termInst->getSuccessors()) { auto *currBB = succ.getBB(); assert(BBToWeightMap.find(currBB) != BBToWeightMap.end() && "Expected to find block in map"); auto currCount = succ.getCount(); if (!currCount) { ++blocksWithoutCount; continue; } auto currCountVal = currCount.getValue(); countSum += currCountVal; BBToWeightMap[currBB] += currCountVal; } if (countSum < bbCount) { // inaccurate profile - fill in the gaps for BBs without a count: if (blocksWithoutCount > 0) { auto numToAdd = (bbCount - countSum) / blocksWithoutCount; for (auto &succ : termInst->getSuccessors()) { auto *currBB = succ.getBB(); auto currCount = succ.getCount(); if (!currCount) { BBToWeightMap[currBB] += numToAdd; } } } } else { auto numOfSucc = termInst->getSuccessors().size(); assert(numOfSucc > 0 && "Expected successors > 0"); auto numToAdd = (countSum - bbCount) / numOfSucc; addToBBCounts(BBToWeightMap, numToAdd, termInst); } } else { // Fill counters speculatively auto numOfSucc = termInst->getSuccessors().size(); if (numOfSucc == 0) { // No successors to fill continue; } auto numToAdd = bbCount / numOfSucc; addToBBCounts(BBToWeightMap, numToAdd, termInst); } domOrder.pushChildrenIf(block, [&](SILBasicBlock *child) { return true; }); } } void SILPerformanceInliner::collectAppliesToInline( SILFunction *Caller, SmallVectorImpl &Applies) { DominanceInfo *DT = DA->get(Caller); SILLoopInfo *LI = LA->get(Caller); llvm::DenseMap WeightCorrections; // Compute the shortest-path analysis for the caller. ShortestPathAnalysis *SPA = getSPA(Caller, LI); SPA->analyze(CBI, [&](FullApplySite FAS) -> int { // This closure returns the length of a called function. // At this occasion we record additional weight increases. addWeightCorrection(FAS, WeightCorrections); if (SILFunction *Callee = getEligibleFunction(FAS, WhatToInline, SRA)) { // Compute the shortest-path analysis for the callee. SILLoopInfo *CalleeLI = LA->get(Callee); ShortestPathAnalysis *CalleeSPA = getSPA(Callee, CalleeLI); if (!CalleeSPA->isValid()) { CalleeSPA->analyze(CBI, [](FullApplySite FAS) { // We don't compute SPA for another call-level. Functions called from // the callee are assumed to have DefaultApplyLength. return DefaultApplyLength.getValue(); }); } int CalleeLength = CalleeSPA->getScopeLength(&Callee->front(), 0); // Just in case the callee is a noreturn function. if (CalleeLength >= ShortestPathAnalysis::InitialDist) return DefaultApplyLength; return CalleeLength; } // Some unknown function. return DefaultApplyLength; }); #ifndef NDEBUG if (PrintShortestPathInfo) { SPA->dump(); } #endif ConstantTracker constTracker(Caller); DominanceOrder domOrder(&Caller->front(), DT, Caller->size()); int NumCallerBlocks = (int)Caller->size(); llvm::DenseMap BBToWeightMap; calculateBBWeights(Caller, DT, BBToWeightMap); // Go through all instructions and find candidates for inlining. // We do this in dominance order for the constTracker. SmallVector InitialCandidates; while (SILBasicBlock *block = domOrder.getNext()) { constTracker.beginBlock(); Weight BlockWeight; for (auto I = block->begin(), E = block->end(); I != E; ++I) { constTracker.trackInst(&*I); if (!FullApplySite::isa(&*I)) continue; pm->setDependingOnCalleeBodies(); FullApplySite AI = FullApplySite(&*I); auto *Callee = getEligibleFunction(AI, WhatToInline, SRA); if (Callee) { // Check if we have an always_inline or transparent function. If we do, // just add it to our final Applies list and continue. if (isInlineAlwaysCallSite(Callee, NumCallerBlocks)) { NumCallerBlocks += Callee->size(); Applies.push_back(AI); continue; } // Next make sure that we do not have more blocks than our overall // caller block limit at this point. In such a case, we continue. This // will ensure that any further non inline always functions are skipped, // but we /do/ inline any inline_always functions remaining. if (NumCallerBlocks > OverallCallerBlockLimit && // Still allow inlining of small functions. !hasMaxNumberOfBasicBlocks(Callee, 8) && !Caller->hasSemanticsAttr(semantics::OPTIMIZE_SIL_INLINE_AGGRESSIVE)) { continue; } // Otherwise, calculate our block weights and determine if we want to // inline this. if (!BlockWeight.isValid()) BlockWeight = SPA->getWeight(block, Weight(0, 0)); // The actual weight including a possible weight correction. Weight W(BlockWeight, WeightCorrections.lookup(AI)); if (decideInWarmBlock(AI, W, constTracker, NumCallerBlocks, BBToWeightMap)) { InitialCandidates.push_back(AI); } } } domOrder.pushChildrenIf(block, [&] (SILBasicBlock *child) { if (CBI.isCold(child)) { // Handle cold blocks separately. visitColdBlocks(InitialCandidates, child, DT, NumCallerBlocks); return false; } return true; }); } // Calculate how many times a callee is called from this caller. llvm::DenseMap CalleeCount; for (auto AI : InitialCandidates) { SILFunction *Callee = AI.getReferencedFunctionOrNull(); assert(Callee && "apply_inst does not have a direct callee anymore"); ++CalleeCount[Callee]; } // Now copy each candidate callee that has a small enough number of // call sites into the final set of call sites. for (auto AI : InitialCandidates) { SILFunction *Callee = AI.getReferencedFunctionOrNull(); assert(Callee && "apply_inst does not have a direct callee anymore"); const unsigned CallsToCalleeThreshold = 1024; if (CalleeCount[Callee] <= CallsToCalleeThreshold) { Applies.push_back(AI); } } } /// Attempt to inline all calls smaller than our threshold. /// returns True if a function was inlined. bool SILPerformanceInliner::inlineCallsIntoFunction(SILFunction *Caller) { // Don't optimize functions that are marked with the opt.never attribute. if (!Caller->shouldOptimize()) return false; LLVM_DEBUG(llvm::dbgs() << "Inlining calls into " << Caller->getName() << "\n"); // First step: collect all the functions we want to inline. We // don't change anything yet so that the dominator information // remains valid. SmallVector AppliesToInline; collectAppliesToInline(Caller, AppliesToInline); bool invalidatedStackNesting = false; if (AppliesToInline.empty()) return false; InstructionDeleter deleter; // Second step: do the actual inlining. // We inline in reverse order, because for very large blocks with many applies // to inline, splitting the block at every apply would be quadratic. for (auto AI : llvm::reverse(AppliesToInline)) { SILFunction *Callee = AI.getReferencedFunctionOrNull(); assert(Callee && "apply_inst does not have a direct callee anymore"); if (!Callee->shouldOptimize()) { continue; } // If we have a callee that doesn't have ownership, but the caller does have // ownership... do not inline. The two modes are incompatible, so skip this // apply site for now. if (!Callee->hasOwnership() && Caller->hasOwnership()) { LLVM_DEBUG(llvm::dbgs() << "Not inlining non-ossa " << Caller->getName() << "\n"); continue; } LLVM_DEBUG(dumpCaller(Caller); llvm::dbgs() << " inline [" << Callee->size() << "->" << Caller->size() << "] " << Callee->getName() << "\n"); // Note that this must happen before inlining as the apply instruction // will be deleted after inlining. invalidatedStackNesting |= SILInliner::invalidatesStackNesting(AI); if (SILPrintInliningCallee) { printInliningDetailsCallee(PassName, Caller, Callee); } if (SILPrintInliningCallerBefore) { printInliningDetailsCallerBefore(PassName, Caller, Callee); } // We've already determined we should be able to inline this, so // unconditionally inline the function. // // If for whatever reason we can not inline this function, inlineFullApply // will assert, so we are safe making this assumption. SILInliner::inlineFullApply(AI, SILInliner::InlineKind::PerformanceInline, FuncBuilder, deleter); // When inlining an OSSA function into a non-OSSA function, ownership of // nonescaping closures is lowered. At that point, they are recognized as // stack users. Since they weren't recognized as such before, they may not // satisfy stack discipline. Fix that up now. invalidatedStackNesting |= Callee->hasOwnership() && !Caller->hasOwnership(); ++NumFunctionsInlined; if (SILPrintInliningCallerAfter) { printInliningDetailsCallerAfter(PassName, Caller, Callee); } if (EnableVerifyAfterEachInlining) { deleter.cleanupDeadInstructions(); // The inliner splits blocks at call sites. Re-merge trivial branches to // reestablish a canonical CFG. mergeBasicBlocks(Caller); if (invalidatedStackNesting) { StackNesting::fixNesting(Caller); invalidatedStackNesting = false; } Caller->verify(); pm->runSwiftFunctionVerification(Caller); } } deleter.cleanupDeadInstructions(); // The inliner splits blocks at call sites. Re-merge trivial branches to // reestablish a canonical CFG. mergeBasicBlocks(Caller); if (invalidatedStackNesting) { StackNesting::fixNesting(Caller); } updateAllGuaranteedPhis(pm, Caller); // If we were asked to verify our caller after inlining all callees we could // find into it, do so now. This makes it easier to catch verification bugs in // the inliner without running the entire inliner. if (EnableVerifyAfterInlining) { Caller->verify(); pm->runSwiftFunctionVerification(Caller); } return true; } // Find functions in cold blocks which are forced to be inlined. // All other functions are not inlined in cold blocks. void SILPerformanceInliner::visitColdBlocks( SmallVectorImpl &AppliesToInline, SILBasicBlock *Root, DominanceInfo *DT, int numCallerBlocks) { DominanceOrder domOrder(Root, DT); while (SILBasicBlock *block = domOrder.getNext()) { for (SILInstruction &I : *block) { auto *AI = dyn_cast(&I); if (!AI) continue; auto *Callee = getEligibleFunction(AI, WhatToInline, SRA); if (Callee && decideInColdBlock(AI, Callee, numCallerBlocks)) { AppliesToInline.push_back(AI); } } domOrder.pushChildren(block); } } //===----------------------------------------------------------------------===// // Performance Inliner Pass //===----------------------------------------------------------------------===// namespace { class SILPerformanceInlinerPass : public SILFunctionTransform { /// Specifies which functions not to inline, based on @_semantics and /// global_init attributes. InlineSelection WhatToInline; std::string PassName; public: SILPerformanceInlinerPass(InlineSelection WhatToInline, StringRef LevelName): WhatToInline(WhatToInline), PassName(LevelName) {} void run() override { DominanceAnalysis *DA = PM->getAnalysis(); PostDominanceAnalysis *PDA = PM->getAnalysis(); SILLoopAnalysis *LA = PM->getAnalysis(); BasicCalleeAnalysis *BCA = PM->getAnalysis(); IsSelfRecursiveAnalysis *SRA = PM->getAnalysis(); OptRemark::Emitter ORE(DEBUG_TYPE, *getFunction()); if (getOptions().InlineThreshold == 0) { return; } auto OptMode = getFunction()->getEffectiveOptimizationMode(); SILOptFunctionBuilder FuncBuilder(*this); SILPerformanceInliner Inliner(getID(), FuncBuilder, WhatToInline, getPassManager(), DA, PDA, LA, BCA, SRA, OptMode, ORE); assert(getFunction()->isDefinition() && "Expected only functions with bodies!"); // Inline things into this function, and if we do so invalidate // analyses for this function and restart the pipeline so that we // can further optimize this function before attempting to inline // in it again. if (Inliner.inlineCallsIntoFunction(getFunction())) { removeUnreachableBlocks(*getFunction()); invalidateAnalysis(SILAnalysis::InvalidationKind::FunctionBody); restartPassPipeline(); } } }; } // end anonymous namespace SILTransform *swift::createAlwaysInlineInliner() { return new SILPerformanceInlinerPass(InlineSelection::OnlyInlineAlways, "InlineAlways Performance Inliner"); } /// Create an inliner pass that does not inline functions that are marked with /// the @_semantics or @_effects attributes. SILTransform *swift::createEarlyPerfInliner() { return new SILPerformanceInlinerPass( InlineSelection::NoSemanticsAndEffects, "Early Performance Inliner"); } /// Create an inliner pass that inlines all functions that are marked with /// the @_semantics, @_effects or global_init attributes. SILTransform *swift::createPerfInliner() { return new SILPerformanceInlinerPass( InlineSelection::Everything, "Performance Inliner"); }