//===--- PerformanceInlinerUtils.h - Common performance inliner utils. ---===// // // This source file is part of the Swift.org open source project // // Copyright (c) 2014 - 2017 Apple Inc. and the Swift project authors // Licensed under Apache License v2.0 with Runtime Library Exception // // See https://swift.org/LICENSE.txt for license information // See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors // //===----------------------------------------------------------------------===// #include "swift/SIL/SILInstruction.h" #include "swift/SIL/Projection.h" #include "swift/SIL/InstructionUtils.h" #include "swift/SILOptimizer/Analysis/ColdBlockInfo.h" #include "swift/SILOptimizer/Analysis/DominanceAnalysis.h" #include "swift/SILOptimizer/Analysis/FunctionOrder.h" #include "swift/SILOptimizer/Analysis/LoopAnalysis.h" #include "swift/SILOptimizer/Utils/ConstantFolding.h" #include "swift/SILOptimizer/Utils/SILInliner.h" #include "llvm/ADT/SmallVector.h" #include "llvm/Support/CommandLine.h" using namespace swift; extern llvm::cl::opt EnableSILInliningOfGenerics; namespace swift { class BasicCalleeAnalysis; // Controls the decision to inline functions with @_semantics, @effect and // global_init attributes. enum class InlineSelection { Everything, NoSemanticsAndEffects, OnlyInlineAlways, }; /// Check if this ApplySite is eligible for inlining. If so, return the callee. SILFunction *getEligibleFunction(FullApplySite AI, InlineSelection WhatToInline); // Returns true if this is a pure call, i.e. the callee has no side-effects // and all arguments are constants. bool isPureCall(FullApplySite AI, BasicCalleeAnalysis *BCA); /// Fundamental @_semantic tags provide additional semantics for optimization /// beyond what SIL analysis can discover from the function body. They should be /// preserved until semantic passes can process them. For example, they may need /// to be hoisted by LICM after inlining up to the highest level caller. /// /// Nested @_semantic tags also provide semantics beyond the function body but /// they hide the semantics of underlying calls. These need to be inlined into /// the highest level caller before semantics passes can process the underlying /// Fundamental semantic functions. /// /// Transient @_semantic tags are not required for optimization. enum class SemanticFunctionLevel { Fundamental, Transient, Nested }; /// Return the SemanticFunctionLevel of \p callee. SemanticFunctionLevel getSemanticFunctionLevel(SILFunction *function); inline bool isOptimizableSemanticFunction(SILFunction *function) { return getSemanticFunctionLevel(function) != SemanticFunctionLevel::Transient; } /// Return true if \p apply calls into an optimizable semantic function from /// within another semantic function, or from a "trivial" wrapper. bool isNestedSemanticCall(FullApplySite apply); // Strips down simple function conversion operations until a base SILValue is // reached. // // Returns a nullptr if `val` is not a function conversion instruction. SILValue stripFunctionConversions(SILValue val); } // end swift namespace //===----------------------------------------------------------------------===// // ConstantTracker //===----------------------------------------------------------------------===// // Tracks constants in the caller and callee to get an estimation of what // values get constant if the callee is inlined. // This can be seen as a "simulation" of several optimizations: SROA, mem2reg // and constant propagation. // Note that this is only a simplified model and not correct in all cases. // For example aliasing information is not taken into account. class ConstantTracker { // Represents a value in integer constant evaluation. struct IntConst { IntConst() : isValid(false), isFromCaller(false) { } IntConst(const APInt &value, bool isFromCaller) : value(value), isValid(true), isFromCaller(isFromCaller) { } // The actual value. APInt value; // True if the value is valid, i.e. could be evaluated to a constant. bool isValid; // True if the value is only valid, because a constant is passed to the // callee. False if constant propagation could do the same job inside the // callee without inlining it. bool isFromCaller; }; // Links between loaded and stored values. // The key is a load instruction, the value is the corresponding store // instruction which stores the loaded value. Both, key and value can also // be copy_addr instructions. llvm::DenseMap links; // The current stored values at memory addresses. // The key is the base address of the memory (after skipping address // projections). The value are store (or copy_addr) instructions, which // store the current value. // This is only an estimation, because e.g. it does not consider potential // aliasing. llvm::DenseMap memoryContent; // Cache for evaluated constants. llvm::SmallDenseMap constCache; // The caller/callee function which is tracked. SILFunction *F; // The constant tracker of the caller function (null if this is the // tracker of the callee). ConstantTracker *callerTracker; // The apply instruction in the caller (null if this is the tracker of the // callee). FullApplySite AI; // Walks through address projections and (optionally) collects them. // Returns the base address, i.e. the first address which is not a // projection. SILValue scanProjections(SILValue addr, SmallVectorImpl *Result = nullptr); // Get the stored value for a load. The loadInst can be either a real load // or a copy_addr. SILValue getStoredValue(SILInstruction *loadInst, ProjectionPath &projStack); // Gets the parameter in the caller for a function argument. SILValue getParam(SILValue value) { if (auto *Arg = dyn_cast(value)) { if (AI && Arg->getFunction() == F) { // Continue at the caller. return AI.getArgument(Arg->getIndex()); } } return SILValue(); } SILInstruction *getMemoryContent(SILValue addr) { // The memory content can be stored in this ConstantTracker or in the // caller's ConstantTracker. SILInstruction *storeInst = memoryContent[addr]; if (storeInst) return storeInst; if (callerTracker) return callerTracker->getMemoryContent(addr); return nullptr; } // Gets the estimated definition of a value. SILInstruction *getDef(SILValue val, ProjectionPath &projStack); // Gets the estimated integer constant result of a builtin. IntConst getBuiltinConst(BuiltinInst *BI, int depth); public: // Constructor for the caller function. ConstantTracker(SILFunction *function) : F(function), callerTracker(nullptr), AI() { } // Constructor for the callee function. ConstantTracker(SILFunction *function, ConstantTracker *caller, FullApplySite callerApply) : F(function), callerTracker(caller), AI(callerApply) { } void beginBlock() { // Currently we don't do any sophisticated dataflow analysis, so we keep // the memoryContent alive only for a single block. memoryContent.clear(); } // Must be called for each instruction visited in dominance order. void trackInst(SILInstruction *inst); // Gets the estimated definition of a value. SILInstruction *getDef(SILValue val) { ProjectionPath projStack(val->getType()); return getDef(val, projStack); } // Gets the estimated definition of a value if it is in the caller. SILInstruction *getDefInCaller(SILValue val) { SILInstruction *def = getDef(val); if (def && def->getFunction() != F) return def; return nullptr; } bool isStackAddrInCaller(SILValue val) { if (SILValue Param = getParam(stripAddressProjections(val))) { return isa(stripAddressProjections(Param)); } return false; } // Gets the estimated integer constant of a value. IntConst getIntConst(SILValue val, int depth = 0); SILBasicBlock *getTakenBlock(TermInst *term); }; //===----------------------------------------------------------------------===// // Shortest path analysis //===----------------------------------------------------------------------===// /// Computes the length of the shortest path through a block in a scope. /// /// A scope is either a loop or the whole function. The "length" is an /// estimation of the execution time. For example if the shortest path of a /// block B is N, it means that the execution time of the scope (either /// function or a single loop iteration), when execution includes the block, is /// at least N, regardless of what branches are taken. /// /// The shortest path of a block is the sum of the shortest path from the scope /// entry and the shortest path to the scope exit. class ShortestPathAnalysis { public: enum { /// We assume that cold block take very long to execute. ColdBlockLength = 1000, /// Our assumption on how many times a loop is executed. LoopCount = 10, /// To keep things simple we only analyze up to this number of nested loops. MaxNumLoopLevels = 4, /// The "weight" for the benefit which a single loop nest gives. SingleLoopWeight = 4, /// Pretty large but small enough to add something without overflowing. InitialDist = (1 << 29) }; /// A weight for an inlining benefit. class Weight { /// How long does it take to execute the scope (either loop or the whole /// function) of the call to inline? The larger it is the less important it /// is to inline. int ScopeLength; /// Represents the loop nest. The larger it is the more important it is to /// inline int LoopWeight; friend class ShortestPathAnalysis; public: Weight(int ScopeLength, int LoopWeight) : ScopeLength(ScopeLength), LoopWeight(LoopWeight) { } Weight(const Weight &RHS, int AdditionalLoopWeight) : Weight(RHS.ScopeLength, RHS.LoopWeight + AdditionalLoopWeight) { } Weight() : ScopeLength(-1), LoopWeight(0) { assert(!isValid()); } bool isValid() const { return ScopeLength >= 0; } /// Updates the \p Benefit by weighting the \p Importance. void updateBenefit(int &Benefit, int Importance) const; #ifndef NDEBUG friend raw_ostream &operator<<(raw_ostream &os, const Weight &W) { os << W.LoopWeight << '/' << W.ScopeLength; return os; } #endif }; private: /// The distances for a block in it's scope (either loop or function). struct Distances { /// The shortest distance from the scope entry to the block entry. int DistFromEntry = InitialDist; /// The shortest distance from the block entry to the scope exit. int DistToExit = InitialDist; /// Additional length to account for loop iterations. It's != 0 for loop /// headers (or loop predecessors). int LoopHeaderLength = 0; }; /// Holds the distance information for a block in all it's scopes, i.e. in all /// its containing loops and the function itself. struct BlockInfo { private: /// Distances for all scopes. Element 0 refers to the function, elements /// > 0 to loops. Distances Dists[MaxNumLoopLevels]; public: /// The length of the block itself. int Length = 0; /// Returns the distances for the loop with nesting level \p LoopDepth. Distances &getDistances(int LoopDepth) { assert(LoopDepth >= 0 && LoopDepth < MaxNumLoopLevels); return Dists[LoopDepth]; } /// Returns the length including the LoopHeaderLength for a given /// \p LoopDepth. int getLength(int LoopDepth) { return Length + getDistances(LoopDepth).LoopHeaderLength; } /// Returns the length of the shortest path of this block in the loop with /// nesting level \p LoopDepth. int getScopeLength(int LoopDepth) { const Distances &D = getDistances(LoopDepth); return D.DistFromEntry + D.DistToExit; } }; SILFunction *F; SILLoopInfo *LI; llvm::DenseMap BlockInfos; std::vector BlockInfoStorage; bool valid = false; BlockInfo *getBlockInfo(const SILBasicBlock *BB) { BlockInfo *BI = BlockInfos[BB]; assert(BI); return BI; } // Utility functions to make the template solveDataFlow compilable for a block // list containing references _and_ a list containing pointers. const SILBasicBlock *getBlock(const SILBasicBlock *BB) { return BB; } const SILBasicBlock *getBlock(const SILBasicBlock &BB) { return &BB; } /// Returns the minimum distance from all predecessor blocks of \p BB. int getEntryDistFromPreds(const SILBasicBlock *BB, int LoopDepth); /// Returns the minimum distance from all successor blocks of \p BB. int getExitDistFromSuccs(const SILBasicBlock *BB, int LoopDepth); /// Computes the distances by solving the dataflow problem for all \p Blocks /// in a scope. template void solveDataFlow(const BlockList &Blocks, int LoopDepth) { bool Changed = false; // Compute the distances from the entry block. do { Changed = false; for (auto &Block : Blocks) { const SILBasicBlock *BB = getBlock(Block); BlockInfo *BBInfo = getBlockInfo(BB); int DistFromEntry = getEntryDistFromPreds(BB, LoopDepth); Distances &BBDists = BBInfo->getDistances(LoopDepth); if (DistFromEntry < BBDists.DistFromEntry) { BBDists.DistFromEntry = DistFromEntry; Changed = true; } } } while (Changed); // Compute the distances to the exit block. do { Changed = false; for (auto &Block : reverse(Blocks)) { const SILBasicBlock *BB = getBlock(Block); BlockInfo *BBInfo = getBlockInfo(BB); int DistToExit = getExitDistFromSuccs(BB, LoopDepth) + BBInfo->getLength(LoopDepth); Distances &BBDists = BBInfo->getDistances(LoopDepth); if (DistToExit < BBDists.DistToExit) { BBDists.DistToExit = DistToExit; Changed = true; } } } while (Changed); } /// Analyze \p Loop and all its inner loops. void analyzeLoopsRecursively(SILLoop *Loop, int LoopDepth); void printFunction(llvm::raw_ostream &OS); void printLoop(llvm::raw_ostream &OS, SILLoop *Loop, int LoopDepth); void printBlockInfo(llvm::raw_ostream &OS, SILBasicBlock *BB, int LoopDepth); public: ShortestPathAnalysis(SILFunction *F, SILLoopInfo *LI) : F(F), LI(LI) { } bool isValid() const { return valid; } /// Compute the distances. The function \p getApplyLength returns the length /// of a function call. template void analyze(ColdBlockInfo &CBI, Func getApplyLength) { assert(!isValid()); valid = true; unsigned numBlocks = F->size(); // As the complexity of the analysis is more than linear with the number of blocks, // disable it for huge functions. In this case inlining will be less aggressive. if (numBlocks > 2000) return; BlockInfoStorage.resize(numBlocks); CBI.analyze(F); // First step: compute the length of the blocks. unsigned BlockIdx = 0; for (SILBasicBlock &BB : *F) { int Length = 0; if (CBI.isCold(&BB)) { Length = ColdBlockLength; } else { for (SILInstruction &I : BB) { if (auto FAS = FullApplySite::isa(&I)) { Length += getApplyLength(FAS); } else { Length += (int)instructionInlineCost(I); } } } BlockInfo *BBInfo = &BlockInfoStorage[BlockIdx++]; BlockInfos[&BB] = BBInfo; BBInfo->Length = Length; // Initialize the distances for the entry and exit blocks, used when // computing the distances for the function itself. if (&BB == &F->front()) BBInfo->getDistances(0).DistFromEntry = 0; if (isa(BB.getTerminator())) BBInfo->getDistances(0).DistToExit = Length; else if (isa(BB.getTerminator()) || isa(BB.getTerminator())) BBInfo->getDistances(0).DistToExit = Length + ColdBlockLength; } // Compute the distances for all loops in the function. for (SILLoop *Loop : *LI) { analyzeLoopsRecursively(Loop, 1); } // Compute the distances for the function itself. solveDataFlow(*F, 0); } /// Returns the length of the shortest path of the block \p BB in the loop /// with nesting level \p LoopDepth. If \p LoopDepth is 0 ir returns the /// shortest path in the function. int getScopeLength(SILBasicBlock *BB, int LoopDepth) { assert(BB->getParent() == F); // Return a conservative default if the analysis was not done due to a high number of blocks. if (BlockInfos.empty()) return ColdBlockLength; if (LoopDepth >= MaxNumLoopLevels) LoopDepth = MaxNumLoopLevels - 1; return getBlockInfo(BB)->getScopeLength(LoopDepth); } /// Returns the weight of block \p BB also considering the \p CallerWeight /// which is the weight of the call site's block in the caller. Weight getWeight(SILBasicBlock *BB, Weight CallerWeight); void dump(); };