//===--- PerformanceInliner.cpp - Basic cost based performance inlining ---===// // // This source file is part of the Swift.org open source project // // Copyright (c) 2014 - 2016 Apple Inc. and the Swift project authors // Licensed under Apache License v2.0 with Runtime Library Exception // // See http://swift.org/LICENSE.txt for license information // See http://swift.org/CONTRIBUTORS.txt for the list of Swift project authors // //===----------------------------------------------------------------------===// #define DEBUG_TYPE "sil-inliner" #include "swift/SILOptimizer/PassManager/Passes.h" #include "swift/SILOptimizer/PassManager/Transforms.h" #include "swift/SILOptimizer/Utils/PerformanceInlinerUtils.h" #include "swift/Strings.h" #include "llvm/ADT/Statistic.h" #include "llvm/Support/Debug.h" #include "llvm/Support/CommandLine.h" using namespace swift; STATISTIC(NumFunctionsInlined, "Number of functions inlined"); llvm::cl::opt PrintShortestPathInfo( "print-shortest-path-info", llvm::cl::init(false), llvm::cl::desc("Print shortest-path information for inlining")); llvm::cl::opt EnableSILInliningOfGenerics( "sil-inline-generics", llvm::cl::init(false), llvm::cl::desc("Enable inlining of generics")); //===----------------------------------------------------------------------===// // Performance Inliner //===----------------------------------------------------------------------===// namespace { // Controls the decision to inline functions with @_semantics, @effect and // global_init attributes. enum class InlineSelection { Everything, NoGlobalInit, // and no availability semantics calls NoSemanticsAndGlobalInit }; using Weight = ShortestPathAnalysis::Weight; class SILPerformanceInliner { /// Specifies which functions not to inline, based on @_semantics and /// global_init attributes. InlineSelection WhatToInline; DominanceAnalysis *DA; SILLoopAnalysis *LA; // For keys of SILFunction and SILLoop. llvm::DenseMap SPAs; llvm::SpecificBumpPtrAllocator SPAAllocator; ColdBlockInfo CBI; /// The following constants define the cost model for inlining. Some constants /// are also defined in ShortestPathAnalysis. enum { /// The base value for every call: it represents the benefit of removing the /// call overhead itself. RemovedCallBenefit = 20, /// The benefit if the operand of an apply gets constant, e.g. if a closure /// is passed to an apply instruction in the callee. RemovedClosureBenefit = RemovedCallBenefit + 50, /// The benefit if a load can (probably) eliminated because it loads from /// a stack location in the caller. RemovedLoadBenefit = RemovedCallBenefit + 5, /// The benefit if a store can (probably) eliminated because it stores to /// a stack location in the caller. RemovedStoreBenefit = RemovedCallBenefit + 10, /// The benefit if the condition of a terminator instruction gets constant /// due to inlining. RemovedTerminatorBenefit = RemovedCallBenefit + 10, /// The benefit if a retain/release can (probably) be eliminated after /// inlining. RefCountBenefit = RemovedCallBenefit + 20, /// The benefit of a onFastPath builtin. FastPathBuiltinBenefit = RemovedCallBenefit + 40, /// Approximately up to this cost level a function can be inlined without /// increasing the code size. TrivialFunctionThreshold = 18, /// Configuration for the caller block limit. BlockLimitDenominator = 10000, /// The assumed execution length of a function call. DefaultApplyLength = 10 }; #ifndef NDEBUG SILFunction *LastPrintedCaller = nullptr; void dumpCaller(SILFunction *Caller) { if (Caller != LastPrintedCaller) { llvm::dbgs() << "\nInline into caller: " << Caller->getName() << '\n'; LastPrintedCaller = Caller; } } #endif ShortestPathAnalysis *getSPA(SILFunction *F, SILLoopInfo *LI) { ShortestPathAnalysis *&SPA = SPAs[F]; if (!SPA) { SPA = new (SPAAllocator.Allocate()) ShortestPathAnalysis(F, LI); } return SPA; } SILFunction *getEligibleFunction(FullApplySite AI); bool isProfitableToInline(FullApplySite AI, Weight CallerWeight, ConstantTracker &callerTracker, int &NumCallerBlocks, bool IsGeneric); bool decideInWarmBlock(FullApplySite AI, Weight CallerWeight, ConstantTracker &callerTracker, int &NumCallerBlocks); bool decideInColdBlock(FullApplySite AI, SILFunction *Callee); void visitColdBlocks(SmallVectorImpl &AppliesToInline, SILBasicBlock *root, DominanceInfo *DT); void collectAppliesToInline(SILFunction *Caller, SmallVectorImpl &Applies); public: SILPerformanceInliner(InlineSelection WhatToInline, DominanceAnalysis *DA, SILLoopAnalysis *LA) : WhatToInline(WhatToInline), DA(DA), LA(LA), CBI(DA) {} bool inlineCallsIntoFunction(SILFunction *F); }; } // namespace // Return true if the callee has self-recursive calls. static bool calleeIsSelfRecursive(SILFunction *Callee) { for (auto &BB : *Callee) for (auto &I : BB) if (auto Apply = FullApplySite::isa(&I)) if (Apply.getReferencedFunction() == Callee) return true; return false; } // Returns the callee of an apply_inst if it is basically inlineable. SILFunction *SILPerformanceInliner::getEligibleFunction(FullApplySite AI) { SILFunction *Callee = AI.getReferencedFunction(); if (!Callee) { return nullptr; } // Don't inline functions that are marked with the @_semantics or @effects // attribute if the inliner is asked not to inline them. if (Callee->hasSemanticsAttrs() || Callee->hasEffectsKind()) { if (WhatToInline == InlineSelection::NoSemanticsAndGlobalInit) { return nullptr; } // The "availability" semantics attribute is treated like global-init. if (Callee->hasSemanticsAttrs() && WhatToInline != InlineSelection::Everything && Callee->hasSemanticsAttrThatStartsWith("availability")) { return nullptr; } } else if (Callee->isGlobalInit()) { if (WhatToInline != InlineSelection::Everything) { return nullptr; } } // We can't inline external declarations. if (Callee->empty() || Callee->isExternalDeclaration()) { return nullptr; } // Explicitly disabled inlining. if (Callee->getInlineStrategy() == NoInline) { return nullptr; } if (!Callee->shouldOptimize()) { return nullptr; } // We don't support this yet. if (AI.hasSubstitutions()) return nullptr; SILFunction *Caller = AI.getFunction(); // We don't support inlining a function that binds dynamic self because we // have no mechanism to preserve the original function's local self metadata. if (mayBindDynamicSelf(Callee)) { // Check if passed Self is the same as the Self of the caller. // In this case, it is safe to inline because both functions // use the same Self. if (AI.hasSelfArgument() && Caller->hasSelfParam()) { auto CalleeSelf = stripCasts(AI.getSelfArgument()); auto CallerSelf = Caller->getSelfArgument(); if (CalleeSelf != SILValue(CallerSelf)) return nullptr; } else return nullptr; } // Detect self-recursive calls. if (Caller == Callee) { return nullptr; } // A non-fragile function may not be inlined into a fragile function. if (Caller->isFragile() && !Callee->hasValidLinkageForFragileInline()) { if (!Callee->hasValidLinkageForFragileRef()) { llvm::errs() << "caller: " << Caller->getName() << "\n"; llvm::errs() << "callee: " << Callee->getName() << "\n"; llvm_unreachable("Should never be inlining a resilient function into " "a fragile function"); } return nullptr; } // Inlining self-recursive functions into other functions can result // in excessive code duplication since we run the inliner multiple // times in our pipeline if (calleeIsSelfRecursive(Callee)) { return nullptr; } return Callee; } bool SILPerformanceInliner::isProfitableToInline(FullApplySite AI, Weight CallerWeight, ConstantTracker &callerTracker, int &NumCallerBlocks, bool IsGeneric) { SILFunction *Callee = AI.getReferencedFunction(); SILLoopInfo *LI = LA->get(Callee); ShortestPathAnalysis *SPA = getSPA(Callee, LI); assert(SPA->isValid()); ConstantTracker constTracker(Callee, &callerTracker, AI); DominanceInfo *DT = DA->get(Callee); SILBasicBlock *CalleeEntry = &Callee->front(); DominanceOrder domOrder(CalleeEntry, DT, Callee->size()); // Calculate the inlining cost of the callee. int CalleeCost = 0; int Benefit = 0; // Start with a base benefit. int BaseBenefit = RemovedCallBenefit; const SILOptions &Opts = Callee->getModule().getOptions(); // For some reason -Ounchecked can accept a higher base benefit without // increasing the code size too much. if (Opts.Optimization == SILOptions::SILOptMode::OptimizeUnchecked) BaseBenefit *= 2; CallerWeight.updateBenefit(Benefit, BaseBenefit); // Go through all blocks of the function, accumulate the cost and find // benefits. while (SILBasicBlock *block = domOrder.getNext()) { constTracker.beginBlock(); Weight BlockW = SPA->getWeight(block, CallerWeight); for (SILInstruction &I : *block) { constTracker.trackInst(&I); CalleeCost += (int)instructionInlineCost(I); if (FullApplySite AI = FullApplySite::isa(&I)) { // Check if the callee is passed as an argument. If so, increase the // threshold, because inlining will (probably) eliminate the closure. SILInstruction *def = constTracker.getDefInCaller(AI.getCallee()); if (def && (isa(def) || isa(def))) BlockW.updateBenefit(Benefit, RemovedClosureBenefit); } else if (auto *LI = dyn_cast(&I)) { // Check if it's a load from a stack location in the caller. Such a load // might be optimized away if inlined. if (constTracker.isStackAddrInCaller(LI->getOperand())) BlockW.updateBenefit(Benefit, RemovedLoadBenefit); } else if (auto *SI = dyn_cast(&I)) { // Check if it's a store to a stack location in the caller. Such a load // might be optimized away if inlined. if (constTracker.isStackAddrInCaller(SI->getDest())) BlockW.updateBenefit(Benefit, RemovedStoreBenefit); } else if (isa(&I) || isa(&I)) { SILValue Op = stripCasts(I.getOperand(0)); if (SILArgument *Arg = dyn_cast(Op)) { if (Arg->isFunctionArg() && Arg->getArgumentConvention() == SILArgumentConvention::Direct_Guaranteed) { BlockW.updateBenefit(Benefit, RefCountBenefit); } } } else if (auto *BI = dyn_cast(&I)) { if (BI->getBuiltinInfo().ID == BuiltinValueKind::OnFastPath) BlockW.updateBenefit(Benefit, FastPathBuiltinBenefit); } } // Don't count costs in blocks which are dead after inlining. SILBasicBlock *takenBlock = constTracker.getTakenBlock(block->getTerminator()); if (takenBlock) { BlockW.updateBenefit(Benefit, RemovedTerminatorBenefit); domOrder.pushChildrenIf(block, [=] (SILBasicBlock *child) { return child->getSinglePredecessor() != block || child == takenBlock; }); } else { domOrder.pushChildren(block); } } if (AI.getFunction()->isThunk()) { // Only inline trivial functions into thunks (which will not increase the // code size). if (CalleeCost > TrivialFunctionThreshold) return false; DEBUG( dumpCaller(AI.getFunction()); llvm::dbgs() << " decision {" << CalleeCost << " into thunk} " << Callee->getName() << '\n'; ); return true; } // We reduce the benefit if the caller is too large. For this we use a // cubic function on the number of caller blocks. This starts to prevent // inlining at about 800 - 1000 caller blocks. int blockMinus = (NumCallerBlocks * NumCallerBlocks) / BlockLimitDenominator * NumCallerBlocks / BlockLimitDenominator; Benefit -= blockMinus; // This is the final inlining decision. if (CalleeCost > Benefit) { return false; } NumCallerBlocks += Callee->size(); DEBUG( dumpCaller(AI.getFunction()); llvm::dbgs() << " decision {c=" << CalleeCost << ", b=" << Benefit << ", l=" << SPA->getScopeLength(CalleeEntry, 0) << ", c-w=" << CallerWeight << ", bb=" << Callee->size() << ", c-bb=" << NumCallerBlocks << "} " << Callee->getName() << '\n'; ); return true; } /// Checks if a given generic apply should be inlined unconditionally, i.e. /// without any complex analysis using e.g. a cost model. /// It returns true if a function should be inlined. /// It returns false if a function should not be inlined. /// It returns None if the decision cannot be made without a more complex /// analysis. static Optional shouldInlineGeneric(FullApplySite AI) { assert(!AI.getSubstitutions().empty() && "Expected a generic apply"); if (!EnableSILInliningOfGenerics) return false; // If all substitutions are concrete, then there is no need to perform the // generic inlining. Let the generic specializer create a specialized // function and then decide if it is beneficial to inline it. if (!hasUnboundGenericTypes(AI.getSubstitutions())) return false; SILFunction *Callee = AI.getReferencedFunction(); // Do not inline @_semantics functions when compiling the stdlib, // because they need to be preserved, so that the optimizer // can properly optimize a user code later. auto ModuleName = Callee->getModule().getSwiftModule()->getName().str(); if (Callee->hasSemanticsAttrThatStartsWith("array.") && (ModuleName == STDLIB_NAME || ModuleName == SWIFT_ONONE_SUPPORT)) return false; // Always inline generic functions which are marked as // AlwaysInline or transparent. if (Callee->getInlineStrategy() == AlwaysInline || Callee->isTransparent()) return true; // Only inline if we decided to inline or we are asked to inline all // generic functions. return None; } bool SILPerformanceInliner:: decideInWarmBlock(FullApplySite AI, Weight CallerWeight, ConstantTracker &callerTracker, int &NumCallerBlocks) { if (!AI.getSubstitutions().empty()) { // Only inline generics if definitively clear that it should be done. auto ShouldInlineGeneric = shouldInlineGeneric(AI); if (ShouldInlineGeneric.hasValue()) return ShouldInlineGeneric.getValue(); return false; } SILFunction *Callee = AI.getReferencedFunction(); if (Callee->getInlineStrategy() == AlwaysInline) return true; return isProfitableToInline(AI, CallerWeight, callerTracker, NumCallerBlocks, /* IsGeneric */ false); } /// Return true if inlining this call site into a cold block is profitable. bool SILPerformanceInliner::decideInColdBlock(FullApplySite AI, SILFunction *Callee) { if (!AI.getSubstitutions().empty()) { // Only inline generics if definitively clear that it should be done. auto ShouldInlineGeneric = shouldInlineGeneric(AI); if (ShouldInlineGeneric.hasValue()) return ShouldInlineGeneric.getValue(); return false; } if (Callee->getInlineStrategy() == AlwaysInline) return true; int CalleeCost = 0; for (SILBasicBlock &Block : *Callee) { for (SILInstruction &I : Block) { CalleeCost += int(instructionInlineCost(I)); if (CalleeCost > TrivialFunctionThreshold) return false; } } DEBUG( dumpCaller(AI.getFunction()); llvm::dbgs() << " cold decision {" << CalleeCost << "} " << Callee->getName() << '\n'; ); return true; } /// Record additional weight increases. /// /// Why can't we just add the weight when we call isProfitableToInline? Because /// the additional weight is for _another_ function than the current handled /// callee. static void addWeightCorrection(FullApplySite FAS, llvm::DenseMap &WeightCorrections) { SILFunction *Callee = FAS.getReferencedFunction(); if (Callee && Callee->hasSemanticsAttr("array.uninitialized")) { // We want to inline the argument to an array.uninitialized call, because // this argument is most likely a call to a function which contains the // buffer allocation for the array. It is essential to inline it for stack // promotion of the array buffer. SILValue BufferArg = FAS.getArgument(0); SILValue Base = stripValueProjections(stripCasts(BufferArg)); if (auto BaseApply = FullApplySite::isa(Base)) WeightCorrections[BaseApply] += 6; } } void SILPerformanceInliner::collectAppliesToInline( SILFunction *Caller, SmallVectorImpl &Applies) { DominanceInfo *DT = DA->get(Caller); SILLoopInfo *LI = LA->get(Caller); llvm::DenseMap WeightCorrections; // Compute the shortest-path analysis for the caller. ShortestPathAnalysis *SPA = getSPA(Caller, LI); SPA->analyze(CBI, [&](FullApplySite FAS) -> int { // This closure returns the length of a called function. // At this occasion we record additional weight increases. addWeightCorrection(FAS, WeightCorrections); if (SILFunction *Callee = getEligibleFunction(FAS)) { // Compute the shortest-path analysis for the callee. SILLoopInfo *CalleeLI = LA->get(Callee); ShortestPathAnalysis *CalleeSPA = getSPA(Callee, CalleeLI); if (!CalleeSPA->isValid()) { CalleeSPA->analyze(CBI, [](FullApplySite FAS) { // We don't compute SPA for another call-level. Functions called from // the callee are assumed to have DefaultApplyLength. return DefaultApplyLength; }); } int CalleeLength = CalleeSPA->getScopeLength(&Callee->front(), 0); // Just in case the callee is a noreturn function. if (CalleeLength >= ShortestPathAnalysis::InitialDist) return DefaultApplyLength; return CalleeLength; } // Some unknown function. return DefaultApplyLength; }); #ifndef NDEBUG if (PrintShortestPathInfo) { SPA->dump(); } #endif ConstantTracker constTracker(Caller); DominanceOrder domOrder(&Caller->front(), DT, Caller->size()); int NumCallerBlocks = (int)Caller->size(); // Go through all instructions and find candidates for inlining. // We do this in dominance order for the constTracker. SmallVector InitialCandidates; while (SILBasicBlock *block = domOrder.getNext()) { constTracker.beginBlock(); Weight BlockWeight; for (auto I = block->begin(), E = block->end(); I != E; ++I) { constTracker.trackInst(&*I); if (!FullApplySite::isa(&*I)) continue; FullApplySite AI = FullApplySite(&*I); auto *Callee = getEligibleFunction(AI); if (Callee) { if (!BlockWeight.isValid()) BlockWeight = SPA->getWeight(block, Weight(0, 0)); // The actual weight including a possible weight correction. Weight W(BlockWeight, WeightCorrections.lookup(AI)); if (decideInWarmBlock(AI, W, constTracker, NumCallerBlocks)) InitialCandidates.push_back(AI); } } domOrder.pushChildrenIf(block, [&] (SILBasicBlock *child) { if (CBI.isSlowPath(block, child)) { // Handle cold blocks separately. visitColdBlocks(InitialCandidates, child, DT); return false; } return true; }); } // Calculate how many times a callee is called from this caller. llvm::DenseMap CalleeCount; for (auto AI : InitialCandidates) { SILFunction *Callee = AI.getReferencedFunction(); assert(Callee && "apply_inst does not have a direct callee anymore"); CalleeCount[Callee]++; } // Now copy each candidate callee that has a small enough number of // call sites into the final set of call sites. for (auto AI : InitialCandidates) { SILFunction *Callee = AI.getReferencedFunction(); assert(Callee && "apply_inst does not have a direct callee anymore"); const unsigned CallsToCalleeThreshold = 1024; if (CalleeCount[Callee] <= CallsToCalleeThreshold) Applies.push_back(AI); } } /// \brief Attempt to inline all calls smaller than our threshold. /// returns True if a function was inlined. bool SILPerformanceInliner::inlineCallsIntoFunction(SILFunction *Caller) { // Don't optimize functions that are marked with the opt.never attribute. if (!Caller->shouldOptimize()) return false; // First step: collect all the functions we want to inline. We // don't change anything yet so that the dominator information // remains valid. SmallVector AppliesToInline; collectAppliesToInline(Caller, AppliesToInline); if (AppliesToInline.empty()) return false; // Second step: do the actual inlining. for (auto AI : AppliesToInline) { SILFunction *Callee = AI.getReferencedFunction(); assert(Callee && "apply_inst does not have a direct callee anymore"); if (!Callee->shouldOptimize()) { continue; } SmallVector Args; for (const auto &Arg : AI.getArguments()) Args.push_back(Arg); DEBUG( dumpCaller(Caller); llvm::dbgs() << " inline [" << Callee->size() << "->" << Caller->size() << "] " << Callee->getName() << "\n"; ); SILOpenedArchetypesTracker OpenedArchetypesTracker(*Caller); Caller->getModule().registerDeleteNotificationHandler(&OpenedArchetypesTracker); // The callee only needs to know about opened archetypes used in // the substitution list. OpenedArchetypesTracker.registerUsedOpenedArchetypes(AI.getInstruction()); SILInliner Inliner(*Caller, *Callee, SILInliner::InlineKind::PerformanceInline, AI.getSubstitutions(), OpenedArchetypesTracker); auto Success = Inliner.inlineFunction(AI, Args); (void) Success; // We've already determined we should be able to inline this, so // we expect it to have happened. assert(Success && "Expected inliner to inline this function!"); recursivelyDeleteTriviallyDeadInstructions(AI.getInstruction(), true); NumFunctionsInlined++; } return true; } // Find functions in cold blocks which are forced to be inlined. // All other functions are not inlined in cold blocks. void SILPerformanceInliner::visitColdBlocks( SmallVectorImpl &AppliesToInline, SILBasicBlock *Root, DominanceInfo *DT) { DominanceOrder domOrder(Root, DT); while (SILBasicBlock *block = domOrder.getNext()) { for (SILInstruction &I : *block) { ApplyInst *AI = dyn_cast(&I); if (!AI) continue; auto *Callee = getEligibleFunction(AI); if (Callee && decideInColdBlock(AI, Callee)) { AppliesToInline.push_back(AI); } } domOrder.pushChildren(block); } } //===----------------------------------------------------------------------===// // Performance Inliner Pass //===----------------------------------------------------------------------===// namespace { class SILPerformanceInlinerPass : public SILFunctionTransform { /// Specifies which functions not to inline, based on @_semantics and /// global_init attributes. InlineSelection WhatToInline; std::string PassName; public: SILPerformanceInlinerPass(InlineSelection WhatToInline, StringRef LevelName): WhatToInline(WhatToInline), PassName(LevelName) { PassName.append(" Performance Inliner"); } void run() override { DominanceAnalysis *DA = PM->getAnalysis(); SILLoopAnalysis *LA = PM->getAnalysis(); if (getOptions().InlineThreshold == 0) { return; } SILPerformanceInliner Inliner(WhatToInline, DA, LA); assert(getFunction()->isDefinition() && "Expected only functions with bodies!"); // Inline things into this function, and if we do so invalidate // analyses for this function and restart the pipeline so that we // can further optimize this function before attempting to inline // in it again. if (Inliner.inlineCallsIntoFunction(getFunction())) { invalidateAnalysis(SILAnalysis::InvalidationKind::FunctionBody); restartPassPipeline(); } } StringRef getName() override { return PassName; } }; } // end anonymous namespace /// Create an inliner pass that does not inline functions that are marked with /// the @_semantics, @effects or global_init attributes. SILTransform *swift::createEarlyInliner() { return new SILPerformanceInlinerPass( InlineSelection::NoSemanticsAndGlobalInit, "Early"); } /// Create an inliner pass that does not inline functions that are marked with /// the global_init attribute or have an "availability" semantics attribute. SILTransform *swift::createPerfInliner() { return new SILPerformanceInlinerPass(InlineSelection::NoGlobalInit, "Middle"); } /// Create an inliner pass that inlines all functions that are marked with /// the @_semantics, @effects or global_init attributes. SILTransform *swift::createLateInliner() { return new SILPerformanceInlinerPass(InlineSelection::Everything, "Late"); }