Files
swift-mirror/include/swift/SILOptimizer/Utils/PerformanceInlinerUtils.h
Kavon Farvardin 7203a4fa73 ColdBlockInfo: overhaul analysis pass
The old analysis pass doesn't take into account profile data, nor does
it consider post-dominance. It primarily dealt with _fastPath/_slowPath.

A block that is dominated by a cold block is itself cold. That's true
whether it's forwards or backwards dominance.

We can also consider a call to any `Never` returning function as a
cold-exit, though the block(s) leading up to that call may be executed
frequently because of concurrency. For now, I'm ignoring the concurrency
case and assuming it's cold. To make use of this "no return" prediction,
use the `-enable-noreturn-prediction` flag, which is currently off by
default.
2024-09-03 15:41:10 -07:00

493 lines
17 KiB
C++

//===--- PerformanceInlinerUtils.h - Common performance inliner utils. ---===//
//
// This source file is part of the Swift.org open source project
//
// Copyright (c) 2014 - 2017 Apple Inc. and the Swift project authors
// Licensed under Apache License v2.0 with Runtime Library Exception
//
// See https://swift.org/LICENSE.txt for license information
// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
//
//===----------------------------------------------------------------------===//
#include "swift/SIL/SILInstruction.h"
#include "swift/SIL/Projection.h"
#include "swift/SIL/InstructionUtils.h"
#include "swift/SILOptimizer/Analysis/ColdBlockInfo.h"
#include "swift/SILOptimizer/Analysis/DominanceAnalysis.h"
#include "swift/SILOptimizer/Analysis/FunctionOrder.h"
#include "swift/SILOptimizer/Analysis/LoopAnalysis.h"
#include "swift/SILOptimizer/Utils/ConstantFolding.h"
#include "swift/SILOptimizer/Utils/SILInliner.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/Support/CommandLine.h"
using namespace swift;
extern llvm::cl::opt<bool> EnableSILInliningOfGenerics;
namespace swift {
class BasicCalleeAnalysis;
// Controls the decision to inline functions with @_semantics, @effect and
// global_init attributes.
enum class InlineSelection {
Everything,
NoSemanticsAndEffects,
OnlyInlineAlways,
};
/// Check if this ApplySite is eligible for inlining. If so, return the callee.
SILFunction *getEligibleFunction(FullApplySite AI,
InlineSelection WhatToInline);
// Returns true if this is a pure call, i.e. the callee has no side-effects
// and all arguments are constants.
bool isPureCall(FullApplySite AI, BasicCalleeAnalysis *BCA);
/// Fundamental @_semantic tags provide additional semantics for optimization
/// beyond what SIL analysis can discover from the function body. They should be
/// preserved until semantic passes can process them. For example, they may need
/// to be hoisted by LICM after inlining up to the highest level caller.
///
/// Nested @_semantic tags also provide semantics beyond the function body but
/// they hide the semantics of underlying calls. These need to be inlined into
/// the highest level caller before semantics passes can process the underlying
/// Fundamental semantic functions.
///
/// Transient @_semantic tags are not required for optimization.
enum class SemanticFunctionLevel { Fundamental, Transient, Nested };
/// Return the SemanticFunctionLevel of \p callee.
SemanticFunctionLevel getSemanticFunctionLevel(SILFunction *function);
inline bool isOptimizableSemanticFunction(SILFunction *function) {
return getSemanticFunctionLevel(function) != SemanticFunctionLevel::Transient;
}
/// Return true if \p apply calls into an optimizable semantic function from
/// within another semantic function, or from a "trivial" wrapper.
bool isNestedSemanticCall(FullApplySite apply);
// Strips down simple function conversion operations until a base SILValue is
// reached.
//
// Returns a nullptr if `val` is not a function conversion instruction.
SILValue stripFunctionConversions(SILValue val);
} // end swift namespace
//===----------------------------------------------------------------------===//
// ConstantTracker
//===----------------------------------------------------------------------===//
// Tracks constants in the caller and callee to get an estimation of what
// values get constant if the callee is inlined.
// This can be seen as a "simulation" of several optimizations: SROA, mem2reg
// and constant propagation.
// Note that this is only a simplified model and not correct in all cases.
// For example aliasing information is not taken into account.
class ConstantTracker {
// Represents a value in integer constant evaluation.
struct IntConst {
IntConst() : isValid(false), isFromCaller(false) { }
IntConst(const APInt &value, bool isFromCaller) :
value(value), isValid(true), isFromCaller(isFromCaller) { }
// The actual value.
APInt value;
// True if the value is valid, i.e. could be evaluated to a constant.
bool isValid;
// True if the value is only valid, because a constant is passed to the
// callee. False if constant propagation could do the same job inside the
// callee without inlining it.
bool isFromCaller;
};
// Links between loaded and stored values.
// The key is a load instruction, the value is the corresponding store
// instruction which stores the loaded value. Both, key and value can also
// be copy_addr instructions.
llvm::DenseMap<SILInstruction *, SILInstruction *> links;
// The current stored values at memory addresses.
// The key is the base address of the memory (after skipping address
// projections). The value are store (or copy_addr) instructions, which
// store the current value.
// This is only an estimation, because e.g. it does not consider potential
// aliasing.
llvm::DenseMap<SILValue, SILInstruction *> memoryContent;
// Cache for evaluated constants.
llvm::SmallDenseMap<BuiltinInst *, IntConst> constCache;
// The caller/callee function which is tracked.
SILFunction *F;
// The constant tracker of the caller function (null if this is the
// tracker of the callee).
ConstantTracker *callerTracker;
// The apply instruction in the caller (null if this is the tracker of the
// callee).
FullApplySite AI;
// Walks through address projections and (optionally) collects them.
// Returns the base address, i.e. the first address which is not a
// projection.
SILValue scanProjections(SILValue addr,
SmallVectorImpl<Projection> *Result = nullptr);
// Get the stored value for a load. The loadInst can be either a real load
// or a copy_addr.
SILValue getStoredValue(SILInstruction *loadInst,
ProjectionPath &projStack);
// Gets the parameter in the caller for a function argument.
SILValue getParam(SILValue value) {
if (auto *Arg = dyn_cast<SILFunctionArgument>(value)) {
if (AI && Arg->getFunction() == F) {
// Continue at the caller.
return AI.getArgument(Arg->getIndex());
}
}
return SILValue();
}
SILInstruction *getMemoryContent(SILValue addr) {
// The memory content can be stored in this ConstantTracker or in the
// caller's ConstantTracker.
SILInstruction *storeInst = memoryContent[addr];
if (storeInst)
return storeInst;
if (callerTracker)
return callerTracker->getMemoryContent(addr);
return nullptr;
}
// Gets the estimated definition of a value.
SILInstruction *getDef(SILValue val, ProjectionPath &projStack);
// Gets the estimated integer constant result of a builtin.
IntConst getBuiltinConst(BuiltinInst *BI, int depth);
public:
// Constructor for the caller function.
ConstantTracker(SILFunction *function) :
F(function), callerTracker(nullptr), AI()
{ }
// Constructor for the callee function.
ConstantTracker(SILFunction *function, ConstantTracker *caller,
FullApplySite callerApply) :
F(function), callerTracker(caller), AI(callerApply)
{ }
void beginBlock() {
// Currently we don't do any sophisticated dataflow analysis, so we keep
// the memoryContent alive only for a single block.
memoryContent.clear();
}
// Must be called for each instruction visited in dominance order.
void trackInst(SILInstruction *inst);
// Gets the estimated definition of a value.
SILInstruction *getDef(SILValue val) {
ProjectionPath projStack(val->getType());
return getDef(val, projStack);
}
// Gets the estimated definition of a value if it is in the caller.
SILInstruction *getDefInCaller(SILValue val) {
SILInstruction *def = getDef(val);
if (def && def->getFunction() != F)
return def;
return nullptr;
}
bool isStackAddrInCaller(SILValue val) {
if (SILValue Param = getParam(stripAddressProjections(val))) {
return isa<AllocStackInst>(stripAddressProjections(Param));
}
return false;
}
// Gets the estimated integer constant of a value.
IntConst getIntConst(SILValue val, int depth = 0);
SILBasicBlock *getTakenBlock(TermInst *term);
};
//===----------------------------------------------------------------------===//
// Shortest path analysis
//===----------------------------------------------------------------------===//
/// Computes the length of the shortest path through a block in a scope.
///
/// A scope is either a loop or the whole function. The "length" is an
/// estimation of the execution time. For example if the shortest path of a
/// block B is N, it means that the execution time of the scope (either
/// function or a single loop iteration), when execution includes the block, is
/// at least N, regardless of what branches are taken.
///
/// The shortest path of a block is the sum of the shortest path from the scope
/// entry and the shortest path to the scope exit.
class ShortestPathAnalysis {
public:
enum {
/// We assume that cold block take very long to execute.
ColdBlockLength = 1000,
/// Our assumption on how many times a loop is executed.
LoopCount = 10,
/// To keep things simple we only analyze up to this number of nested loops.
MaxNumLoopLevels = 4,
/// The "weight" for the benefit which a single loop nest gives.
SingleLoopWeight = 4,
/// Pretty large but small enough to add something without overflowing.
InitialDist = (1 << 29)
};
/// A weight for an inlining benefit.
class Weight {
/// How long does it take to execute the scope (either loop or the whole
/// function) of the call to inline? The larger it is the less important it
/// is to inline.
int ScopeLength;
/// Represents the loop nest. The larger it is the more important it is to
/// inline
int LoopWeight;
friend class ShortestPathAnalysis;
public:
Weight(int ScopeLength, int LoopWeight) : ScopeLength(ScopeLength),
LoopWeight(LoopWeight) { }
Weight(const Weight &RHS, int AdditionalLoopWeight) :
Weight(RHS.ScopeLength, RHS.LoopWeight + AdditionalLoopWeight) { }
Weight() : ScopeLength(-1), LoopWeight(0) {
assert(!isValid());
}
bool isValid() const { return ScopeLength >= 0; }
/// Updates the \p Benefit by weighting the \p Importance.
void updateBenefit(int &Benefit, int Importance) const;
#ifndef NDEBUG
friend raw_ostream &operator<<(raw_ostream &os, const Weight &W) {
os << W.LoopWeight << '/' << W.ScopeLength;
return os;
}
#endif
};
private:
/// The distances for a block in it's scope (either loop or function).
struct Distances {
/// The shortest distance from the scope entry to the block entry.
int DistFromEntry = InitialDist;
/// The shortest distance from the block entry to the scope exit.
int DistToExit = InitialDist;
/// Additional length to account for loop iterations. It's != 0 for loop
/// headers (or loop predecessors).
int LoopHeaderLength = 0;
};
/// Holds the distance information for a block in all it's scopes, i.e. in all
/// its containing loops and the function itself.
struct BlockInfo {
private:
/// Distances for all scopes. Element 0 refers to the function, elements
/// > 0 to loops.
Distances Dists[MaxNumLoopLevels];
public:
/// The length of the block itself.
int Length = 0;
/// Returns the distances for the loop with nesting level \p LoopDepth.
Distances &getDistances(int LoopDepth) {
assert(LoopDepth >= 0 && LoopDepth < MaxNumLoopLevels);
return Dists[LoopDepth];
}
/// Returns the length including the LoopHeaderLength for a given
/// \p LoopDepth.
int getLength(int LoopDepth) {
return Length + getDistances(LoopDepth).LoopHeaderLength;
}
/// Returns the length of the shortest path of this block in the loop with
/// nesting level \p LoopDepth.
int getScopeLength(int LoopDepth) {
const Distances &D = getDistances(LoopDepth);
return D.DistFromEntry + D.DistToExit;
}
};
SILFunction *F;
SILLoopInfo *LI;
llvm::DenseMap<const SILBasicBlock *, BlockInfo *> BlockInfos;
std::vector<BlockInfo> BlockInfoStorage;
bool valid = false;
BlockInfo *getBlockInfo(const SILBasicBlock *BB) {
BlockInfo *BI = BlockInfos[BB];
assert(BI);
return BI;
}
// Utility functions to make the template solveDataFlow compilable for a block
// list containing references _and_ a list containing pointers.
const SILBasicBlock *getBlock(const SILBasicBlock *BB) { return BB; }
const SILBasicBlock *getBlock(const SILBasicBlock &BB) { return &BB; }
/// Returns the minimum distance from all predecessor blocks of \p BB.
int getEntryDistFromPreds(const SILBasicBlock *BB, int LoopDepth);
/// Returns the minimum distance from all successor blocks of \p BB.
int getExitDistFromSuccs(const SILBasicBlock *BB, int LoopDepth);
/// Computes the distances by solving the dataflow problem for all \p Blocks
/// in a scope.
template <typename BlockList>
void solveDataFlow(const BlockList &Blocks, int LoopDepth) {
bool Changed = false;
// Compute the distances from the entry block.
do {
Changed = false;
for (auto &Block : Blocks) {
const SILBasicBlock *BB = getBlock(Block);
BlockInfo *BBInfo = getBlockInfo(BB);
int DistFromEntry = getEntryDistFromPreds(BB, LoopDepth);
Distances &BBDists = BBInfo->getDistances(LoopDepth);
if (DistFromEntry < BBDists.DistFromEntry) {
BBDists.DistFromEntry = DistFromEntry;
Changed = true;
}
}
} while (Changed);
// Compute the distances to the exit block.
do {
Changed = false;
for (auto &Block : reverse(Blocks)) {
const SILBasicBlock *BB = getBlock(Block);
BlockInfo *BBInfo = getBlockInfo(BB);
int DistToExit =
getExitDistFromSuccs(BB, LoopDepth) + BBInfo->getLength(LoopDepth);
Distances &BBDists = BBInfo->getDistances(LoopDepth);
if (DistToExit < BBDists.DistToExit) {
BBDists.DistToExit = DistToExit;
Changed = true;
}
}
} while (Changed);
}
/// Analyze \p Loop and all its inner loops.
void analyzeLoopsRecursively(SILLoop *Loop, int LoopDepth);
void printFunction(llvm::raw_ostream &OS);
void printLoop(llvm::raw_ostream &OS, SILLoop *Loop, int LoopDepth);
void printBlockInfo(llvm::raw_ostream &OS, SILBasicBlock *BB, int LoopDepth);
public:
ShortestPathAnalysis(SILFunction *F, SILLoopInfo *LI) : F(F), LI(LI) { }
bool isValid() const { return valid; }
/// Compute the distances. The function \p getApplyLength returns the length
/// of a function call.
template <typename Func>
void analyze(ColdBlockInfo &CBI, Func getApplyLength) {
assert(!isValid());
valid = true;
unsigned numBlocks = F->size();
// As the complexity of the analysis is more than linear with the number of blocks,
// disable it for huge functions. In this case inlining will be less aggressive.
if (numBlocks > 2000)
return;
BlockInfoStorage.resize(numBlocks);
CBI.analyze(F);
// First step: compute the length of the blocks.
unsigned BlockIdx = 0;
for (SILBasicBlock &BB : *F) {
int Length = 0;
if (CBI.isCold(&BB)) {
Length = ColdBlockLength;
} else {
for (SILInstruction &I : BB) {
if (auto FAS = FullApplySite::isa(&I)) {
Length += getApplyLength(FAS);
} else {
Length += (int)instructionInlineCost(I);
}
}
}
BlockInfo *BBInfo = &BlockInfoStorage[BlockIdx++];
BlockInfos[&BB] = BBInfo;
BBInfo->Length = Length;
// Initialize the distances for the entry and exit blocks, used when
// computing the distances for the function itself.
if (&BB == &F->front())
BBInfo->getDistances(0).DistFromEntry = 0;
if (isa<ReturnInst>(BB.getTerminator()))
BBInfo->getDistances(0).DistToExit = Length;
else if (isa<ThrowInst>(BB.getTerminator()) ||
isa<ThrowAddrInst>(BB.getTerminator()))
BBInfo->getDistances(0).DistToExit = Length + ColdBlockLength;
}
// Compute the distances for all loops in the function.
for (SILLoop *Loop : *LI) {
analyzeLoopsRecursively(Loop, 1);
}
// Compute the distances for the function itself.
solveDataFlow(*F, 0);
}
/// Returns the length of the shortest path of the block \p BB in the loop
/// with nesting level \p LoopDepth. If \p LoopDepth is 0 ir returns the
/// shortest path in the function.
int getScopeLength(SILBasicBlock *BB, int LoopDepth) {
assert(BB->getParent() == F);
// Return a conservative default if the analysis was not done due to a high number of blocks.
if (BlockInfos.empty())
return ColdBlockLength;
if (LoopDepth >= MaxNumLoopLevels)
LoopDepth = MaxNumLoopLevels - 1;
return getBlockInfo(BB)->getScopeLength(LoopDepth);
}
/// Returns the weight of block \p BB also considering the \p CallerWeight
/// which is the weight of the call site's block in the caller.
Weight getWeight(SILBasicBlock *BB, Weight CallerWeight);
void dump();
};