mirror of
https://github.com/apple/swift.git
synced 2025-12-21 12:14:44 +01:00
1362 lines
49 KiB
C++
1362 lines
49 KiB
C++
//===- PerformanceInliner.cpp - Basic cost based inlining for performance -===//
|
|
//
|
|
// This source file is part of the Swift.org open source project
|
|
//
|
|
// Copyright (c) 2014 - 2015 Apple Inc. and the Swift project authors
|
|
// Licensed under Apache License v2.0 with Runtime Library Exception
|
|
//
|
|
// See http://swift.org/LICENSE.txt for license information
|
|
// See http://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
#define DEBUG_TYPE "sil-inliner"
|
|
#include "swift/SIL/SILInstruction.h"
|
|
#include "swift/SIL/Dominance.h"
|
|
#include "swift/SIL/SILModule.h"
|
|
#include "swift/SIL/Projection.h"
|
|
#include "swift/SILAnalysis/ColdBlockInfo.h"
|
|
#include "swift/SILAnalysis/DominanceAnalysis.h"
|
|
#include "swift/SILAnalysis/CallGraphAnalysis.h"
|
|
#include "swift/SILAnalysis/LoopAnalysis.h"
|
|
#include "swift/SILPasses/Passes.h"
|
|
#include "swift/SILPasses/Transforms.h"
|
|
#include "swift/SILPasses/Utils/Local.h"
|
|
#include "swift/SILPasses/Utils/ConstantFolding.h"
|
|
#include "swift/SILPasses/Utils/Devirtualize.h"
|
|
#include "swift/SILPasses/Utils/Generics.h"
|
|
#include "swift/SILPasses/Utils/SILInliner.h"
|
|
#include "llvm/ADT/SetVector.h"
|
|
#include "llvm/ADT/SmallVector.h"
|
|
#include "llvm/ADT/Statistic.h"
|
|
#include "llvm/Support/Debug.h"
|
|
#include "llvm/Support/CommandLine.h"
|
|
#include "llvm/ADT/MapVector.h"
|
|
#include <functional>
|
|
|
|
|
|
using namespace swift;
|
|
|
|
STATISTIC(NumFunctionsInlined, "Number of functions inlined");
|
|
|
|
namespace {
|
|
|
|
// Threshold for deterministic testing of the inline heuristic.
|
|
// It specifies an instruction cost limit where a simplified model is used
|
|
// for the instruction costs: only builtin instructions have a cost of exactly
|
|
// 1.
|
|
llvm::cl::opt<int> TestThreshold("sil-inline-test-threshold",
|
|
llvm::cl::init(-1), llvm::cl::Hidden);
|
|
|
|
llvm::cl::opt<int> TestOpt("sil-inline-test",
|
|
llvm::cl::init(0), llvm::cl::Hidden);
|
|
|
|
// The following constants define the cost model for inlining.
|
|
|
|
// The base value for every call: it represents the benefit of removing the
|
|
// call overhead.
|
|
// This value can be overridden with the -sil-inline-threshold option.
|
|
const unsigned RemovedCallBenefit = 80;
|
|
|
|
// The benefit if the condition of a terminator instruction gets constant due
|
|
// to inlining.
|
|
const unsigned ConstTerminatorBenefit = 2;
|
|
|
|
// Benefit if the operand of an apply gets constant, e.g. if a closure is
|
|
// passed to an apply instruction in the callee.
|
|
const unsigned ConstCalleeBenefit = 150;
|
|
|
|
// Additional benefit for each loop level.
|
|
const unsigned LoopBenefitFactor = 40;
|
|
|
|
// Approximately up to this cost level a function can be inlined without
|
|
// increasing the code size.
|
|
const unsigned TrivialFunctionThreshold = 20;
|
|
|
|
// Represents a value in integer constant evaluation.
|
|
struct IntConst {
|
|
IntConst() : isValid(false), isFromCaller(false) { }
|
|
|
|
IntConst(const APInt &value, bool isFromCaller) :
|
|
value(value), isValid(true), isFromCaller(isFromCaller) { }
|
|
|
|
// The actual value.
|
|
APInt value;
|
|
|
|
// True if the value is valid, i.e. could be evaluated to a constant.
|
|
bool isValid;
|
|
|
|
// True if the value is only valid, because a constant is passed to the
|
|
// callee. False if constant propagation could do the same job inside the
|
|
// callee without inlining it.
|
|
bool isFromCaller;
|
|
};
|
|
|
|
// Tracks constants in the caller and callee to get an estimation of what
|
|
// values get constant if the callee is inlined.
|
|
// This can be seen as a "simulation" of several optimizations: SROA, mem2reg
|
|
// and constant propagation.
|
|
// Note that this is only a simplified model and not correct in all cases.
|
|
// For example aliasing information is not taken into account.
|
|
class ConstantTracker {
|
|
// Links between loaded and stored values.
|
|
// The key is a load instruction, the value is the corresponding store
|
|
// instruction which stores the loaded value. Both, key and value can also
|
|
// be copy_addr instructions.
|
|
llvm::DenseMap<SILInstruction *, SILInstruction *> links;
|
|
|
|
// The current stored values at memory addresses.
|
|
// The key is the base address of the memory (after skipping address
|
|
// projections). The value are store (or copy_addr) instructions, which
|
|
// store the current value.
|
|
// This is only an estimation, because e.g. it does not consider potential
|
|
// aliasing.
|
|
llvm::DenseMap<SILValue, SILInstruction *> memoryContent;
|
|
|
|
// Cache for evaluated constants.
|
|
llvm::SmallDenseMap<BuiltinInst *, IntConst> constCache;
|
|
|
|
// The caller/callee function which is tracked.
|
|
SILFunction *F;
|
|
|
|
// The constant tracker of the caller function (null if this is the
|
|
// tracker of the callee).
|
|
ConstantTracker *callerTracker;
|
|
|
|
// The apply instruction in the caller (null if this is the tracker of the
|
|
// callee).
|
|
ApplyInst *AI;
|
|
|
|
// Walks through address projections and (optionally) collects them.
|
|
// Returns the base address, i.e. the first address which is not a
|
|
// projection.
|
|
SILValue scanProjections(SILValue addr,
|
|
SmallVectorImpl<Projection> *Result = nullptr);
|
|
|
|
// Get the stored value for a load. The loadInst can be either a real load
|
|
// or a copy_addr.
|
|
SILValue getStoredValue(SILInstruction *loadInst,
|
|
ProjectionPath &projStack);
|
|
|
|
// Gets the parameter in the caller for a function argument.
|
|
SILValue getParam(SILValue value) {
|
|
if (SILArgument *arg = dyn_cast<SILArgument>(value)) {
|
|
if (AI && arg->isFunctionArg() && arg->getFunction() == F) {
|
|
// Continue at the caller.
|
|
return AI->getArgument(arg->getIndex());
|
|
}
|
|
}
|
|
return SILValue();
|
|
}
|
|
|
|
SILInstruction *getMemoryContent(SILValue addr) {
|
|
// The memory content can be stored in this ConstantTracker or in the
|
|
// caller's ConstantTracker.
|
|
SILInstruction *storeInst = memoryContent[addr];
|
|
if (storeInst)
|
|
return storeInst;
|
|
if (callerTracker)
|
|
return callerTracker->getMemoryContent(addr);
|
|
return nullptr;
|
|
}
|
|
|
|
// Gets the estimated definition of a value.
|
|
SILInstruction *getDef(SILValue val, ProjectionPath &projStack);
|
|
|
|
// Gets the estimated integer constant result of a builtin.
|
|
IntConst getBuiltinConst(BuiltinInst *BI, int depth);
|
|
|
|
public:
|
|
|
|
// Constructor for the caller function.
|
|
ConstantTracker(SILFunction *function) :
|
|
F(function), callerTracker(nullptr), AI(nullptr)
|
|
{ }
|
|
|
|
// Constructor for the callee function.
|
|
ConstantTracker(SILFunction *function, ConstantTracker *caller,
|
|
ApplyInst *callerApply) :
|
|
F(function), callerTracker(caller), AI(callerApply)
|
|
{ }
|
|
|
|
void beginBlock() {
|
|
// Currently we don't do any sophisticated dataflow analysis, so we keep
|
|
// the memoryContent alive only for a single block.
|
|
memoryContent.clear();
|
|
}
|
|
|
|
// Must be called for each instruction visited in dominance order.
|
|
void trackInst(SILInstruction *inst);
|
|
|
|
// Gets the estimated definition of a value.
|
|
SILInstruction *getDef(SILValue val) {
|
|
ProjectionPath projStack;
|
|
return getDef(val, projStack);
|
|
}
|
|
|
|
// Gets the estimated definition of a value if it is in the caller.
|
|
SILInstruction *getDefInCaller(SILValue val) {
|
|
SILInstruction *def = getDef(val);
|
|
if (def && def->getFunction() != F)
|
|
return def;
|
|
return nullptr;
|
|
}
|
|
|
|
// Gets the estimated integer constant of a value.
|
|
IntConst getIntConst(SILValue val, int depth = 0);
|
|
};
|
|
|
|
// Controls the decision to inline functions with @_semantics, @effect and
|
|
// global_init attributes.
|
|
enum class InlineSelection {
|
|
Everything,
|
|
NoGlobalInit, // and no availability semantics calls
|
|
NoSemanticsAndGlobalInit
|
|
};
|
|
|
|
class SILPerformanceInliner {
|
|
/// The inline threashold.
|
|
const int InlineCostThreshold;
|
|
/// Specifies which functions not to inline, based on @_semantics and
|
|
/// global_init attributes.
|
|
InlineSelection WhatToInline;
|
|
|
|
/// A map for each newly cloned apply back to the apply that it
|
|
/// was cloned from. This is used in order to stop unbounded
|
|
/// recursion in the case where we are cloning functions that are
|
|
/// themselves recursive, especially when they are indirectly
|
|
/// recursive in a way that is only exposed through inlining
|
|
/// followed by devirtualization.
|
|
llvm::DenseMap<FullApplySite, FullApplySite> OriginMap;
|
|
llvm::DenseSet<FullApplySite> RemovedApplies;
|
|
|
|
SILFunction *getEligibleFunction(ApplyInst *AI);
|
|
|
|
bool isProfitableToInline(ApplyInst *AI, unsigned loopDepthOfAI,
|
|
DominanceAnalysis *DA,
|
|
SILLoopAnalysis *LA,
|
|
ConstantTracker &constTracker);
|
|
|
|
void visitColdBlocks(SmallVectorImpl<ApplyInst *> &AppliesToInline,
|
|
SILBasicBlock *root, DominanceInfo *DT);
|
|
|
|
void collectAppliesToInline(SILFunction *Caller,
|
|
SmallVectorImpl<ApplyInst *> &Applies,
|
|
DominanceAnalysis *DA,
|
|
SILLoopAnalysis *LA,
|
|
CallGraph &CG);
|
|
|
|
bool applyTargetsOriginFunction(FullApplySite Apply, SILFunction *Callee);
|
|
|
|
void removeApply(FullApplySite Apply, CallGraph &CG,
|
|
llvm::SmallVectorImpl<FullApplySite> &NewApplies);
|
|
|
|
FullApplySite devirtualizeUpdatingCallGraph(FullApplySite Apply,
|
|
CallGraph &CG);
|
|
|
|
bool devirtualizeAndSpecializeApplies(
|
|
llvm::SmallVectorImpl<FullApplySite> &Applies,
|
|
CallGraphAnalysis *CGA,
|
|
SILModuleTransform *MT,
|
|
llvm::SmallVectorImpl<SILFunction *> &WorkList);
|
|
|
|
FullApplySite specializeGenericUpdatingCallGraph(FullApplySite Apply,
|
|
CallGraph &CG,
|
|
llvm::SmallVectorImpl<FullApplySite> &NewApplies);
|
|
|
|
bool inlineCallsIntoFunction(SILFunction *F, DominanceAnalysis *DA,
|
|
SILLoopAnalysis *LA, CallGraph &CG,
|
|
llvm::SmallVectorImpl<FullApplySite> &NewApplies);
|
|
|
|
public:
|
|
SILPerformanceInliner(int threshold,
|
|
InlineSelection WhatToInline)
|
|
: InlineCostThreshold(threshold),
|
|
WhatToInline(WhatToInline) {}
|
|
|
|
void inlineDevirtualizeAndSpecialize(SILFunction *WorkItem,
|
|
SILModuleTransform *MT,
|
|
CallGraphAnalysis *CGA,
|
|
DominanceAnalysis *DA,
|
|
SILLoopAnalysis *LA);
|
|
};
|
|
}
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
// ConstantTracker
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
|
void ConstantTracker::trackInst(SILInstruction *inst) {
|
|
if (LoadInst *LI = dyn_cast<LoadInst>(inst)) {
|
|
SILValue baseAddr = scanProjections(LI->getOperand());
|
|
if (SILInstruction *loadLink = getMemoryContent(baseAddr))
|
|
links[LI] = loadLink;
|
|
} else if (StoreInst *SI = dyn_cast<StoreInst>(inst)) {
|
|
SILValue baseAddr = scanProjections(SI->getOperand(1));
|
|
memoryContent[baseAddr] = SI;
|
|
} else if (CopyAddrInst *CAI = dyn_cast<CopyAddrInst>(inst)) {
|
|
if (!CAI->isTakeOfSrc()) {
|
|
// Treat a copy_addr as a load + store
|
|
SILValue loadAddr = scanProjections(CAI->getOperand(0));
|
|
if (SILInstruction *loadLink = getMemoryContent(loadAddr)) {
|
|
links[CAI] = loadLink;
|
|
SILValue storeAddr = scanProjections(CAI->getOperand(1));
|
|
memoryContent[storeAddr] = CAI;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
SILValue ConstantTracker::scanProjections(SILValue addr,
|
|
SmallVectorImpl<Projection> *Result) {
|
|
for (;;) {
|
|
if (Projection::isAddrProjection(addr)) {
|
|
SILInstruction *I = cast<SILInstruction>(addr.getDef());
|
|
if (Result) {
|
|
Optional<Projection> P = Projection::addressProjectionForInstruction(I);
|
|
Result->push_back(P.getValue());
|
|
}
|
|
addr = I->getOperand(0);
|
|
continue;
|
|
}
|
|
if (SILValue param = getParam(addr)) {
|
|
// Go to the caller.
|
|
addr = param;
|
|
continue;
|
|
}
|
|
// Return the base address = the first address which is not a projection.
|
|
return addr;
|
|
}
|
|
}
|
|
|
|
SILValue ConstantTracker::getStoredValue(SILInstruction *loadInst,
|
|
ProjectionPath &projStack) {
|
|
SILInstruction *store = links[loadInst];
|
|
if (!store && callerTracker)
|
|
store = callerTracker->links[loadInst];
|
|
if (!store) return SILValue();
|
|
|
|
assert(isa<LoadInst>(loadInst) || isa<CopyAddrInst>(loadInst));
|
|
|
|
// Push the address projections of the load onto the stack.
|
|
SmallVector<Projection, 4> loadProjections;
|
|
scanProjections(loadInst->getOperand(0), &loadProjections);
|
|
for (const Projection &proj : loadProjections) {
|
|
projStack.push_back(proj);
|
|
}
|
|
|
|
// Pop the address projections of the store from the stack.
|
|
SmallVector<Projection, 4> storeProjections;
|
|
scanProjections(store->getOperand(1), &storeProjections);
|
|
for (auto iter = storeProjections.rbegin(); iter != storeProjections.rend();
|
|
++iter) {
|
|
const Projection &proj = *iter;
|
|
// The corresponding load-projection must match the store-projection.
|
|
if (projStack.empty() || projStack.back() != proj)
|
|
return SILValue();
|
|
projStack.pop_back();
|
|
}
|
|
|
|
if (isa<StoreInst>(store))
|
|
return store->getOperand(0);
|
|
|
|
// The copy_addr instruction is both a load and a store. So we follow the link
|
|
// again.
|
|
assert(isa<CopyAddrInst>(store));
|
|
return getStoredValue(store, projStack);
|
|
}
|
|
|
|
// Get the aggregate member based on the top of the projection stack.
|
|
static SILValue getMember(SILInstruction *inst, ProjectionPath &projStack) {
|
|
if (!projStack.empty()) {
|
|
const Projection &proj = projStack.back();
|
|
return proj.getOperandForAggregate(inst);
|
|
}
|
|
return SILValue();
|
|
}
|
|
|
|
SILInstruction *ConstantTracker::getDef(SILValue val,
|
|
ProjectionPath &projStack) {
|
|
|
|
// Track the value up the dominator tree.
|
|
for (;;) {
|
|
if (SILInstruction *inst = dyn_cast<SILInstruction>(val)) {
|
|
if (auto proj = Projection::valueProjectionForInstruction(inst)) {
|
|
// Extract a member from a struct/tuple/enum.
|
|
projStack.push_back(proj.getValue());
|
|
val = inst->getOperand(0);
|
|
continue;
|
|
} else if (SILValue member = getMember(inst, projStack)) {
|
|
// The opposite of a projection instruction: composing a struct/tuple.
|
|
projStack.pop_back();
|
|
val = member;
|
|
continue;
|
|
} else if (SILValue loadedVal = getStoredValue(inst, projStack)) {
|
|
// A value loaded from memory.
|
|
val = loadedVal;
|
|
continue;
|
|
} else if (isa<ThinToThickFunctionInst>(inst)) {
|
|
val = inst->getOperand(0);
|
|
continue;
|
|
}
|
|
return inst;
|
|
} else if (SILValue param = getParam(val)) {
|
|
// Continue in the caller.
|
|
val = param;
|
|
continue;
|
|
}
|
|
return nullptr;
|
|
}
|
|
}
|
|
|
|
IntConst ConstantTracker::getBuiltinConst(BuiltinInst *BI, int depth) {
|
|
const BuiltinInfo &Builtin = BI->getBuiltinInfo();
|
|
OperandValueArrayRef Args = BI->getArguments();
|
|
switch (Builtin.ID) {
|
|
default: break;
|
|
|
|
// Fold comparison predicates.
|
|
#define BUILTIN(id, name, Attrs)
|
|
#define BUILTIN_BINARY_PREDICATE(id, name, attrs, overload) \
|
|
case BuiltinValueKind::id:
|
|
#include "swift/AST/Builtins.def"
|
|
{
|
|
IntConst lhs = getIntConst(Args[0], depth);
|
|
IntConst rhs = getIntConst(Args[1], depth);
|
|
if (lhs.isValid && rhs.isValid) {
|
|
return IntConst(constantFoldComparison(lhs.value, rhs.value,
|
|
Builtin.ID),
|
|
lhs.isFromCaller || rhs.isFromCaller);
|
|
}
|
|
break;
|
|
}
|
|
|
|
|
|
case BuiltinValueKind::SAddOver:
|
|
case BuiltinValueKind::UAddOver:
|
|
case BuiltinValueKind::SSubOver:
|
|
case BuiltinValueKind::USubOver:
|
|
case BuiltinValueKind::SMulOver:
|
|
case BuiltinValueKind::UMulOver: {
|
|
IntConst lhs = getIntConst(Args[0], depth);
|
|
IntConst rhs = getIntConst(Args[1], depth);
|
|
if (lhs.isValid && rhs.isValid) {
|
|
bool IgnoredOverflow;
|
|
return IntConst(constantFoldBinaryWithOverflow(lhs.value, rhs.value,
|
|
IgnoredOverflow,
|
|
getLLVMIntrinsicIDForBuiltinWithOverflow(Builtin.ID)),
|
|
lhs.isFromCaller || rhs.isFromCaller);
|
|
}
|
|
break;
|
|
}
|
|
|
|
case BuiltinValueKind::SDiv:
|
|
case BuiltinValueKind::SRem:
|
|
case BuiltinValueKind::UDiv:
|
|
case BuiltinValueKind::URem: {
|
|
IntConst lhs = getIntConst(Args[0], depth);
|
|
IntConst rhs = getIntConst(Args[1], depth);
|
|
if (lhs.isValid && rhs.isValid && rhs.value != 0) {
|
|
bool IgnoredOverflow;
|
|
return IntConst(constantFoldDiv(lhs.value, rhs.value,
|
|
IgnoredOverflow, Builtin.ID),
|
|
lhs.isFromCaller || rhs.isFromCaller);
|
|
}
|
|
break;
|
|
}
|
|
|
|
case BuiltinValueKind::And:
|
|
case BuiltinValueKind::AShr:
|
|
case BuiltinValueKind::LShr:
|
|
case BuiltinValueKind::Or:
|
|
case BuiltinValueKind::Shl:
|
|
case BuiltinValueKind::Xor: {
|
|
IntConst lhs = getIntConst(Args[0], depth);
|
|
IntConst rhs = getIntConst(Args[1], depth);
|
|
if (lhs.isValid && rhs.isValid) {
|
|
return IntConst(constantFoldBitOperation(lhs.value, rhs.value,
|
|
Builtin.ID),
|
|
lhs.isFromCaller || rhs.isFromCaller);
|
|
}
|
|
break;
|
|
}
|
|
|
|
case BuiltinValueKind::Trunc:
|
|
case BuiltinValueKind::ZExt:
|
|
case BuiltinValueKind::SExt:
|
|
case BuiltinValueKind::TruncOrBitCast:
|
|
case BuiltinValueKind::ZExtOrBitCast:
|
|
case BuiltinValueKind::SExtOrBitCast: {
|
|
IntConst val = getIntConst(Args[0], depth);
|
|
if (val.isValid) {
|
|
return IntConst(constantFoldCast(val.value, Builtin), val.isFromCaller);
|
|
}
|
|
break;
|
|
}
|
|
}
|
|
return IntConst();
|
|
}
|
|
|
|
// Tries to evaluate the integer constant of a value. The \p depth is used
|
|
// to limit the complexity.
|
|
IntConst ConstantTracker::getIntConst(SILValue val, int depth) {
|
|
|
|
// Don't spend too much time with constant evaluation.
|
|
if (depth >= 10)
|
|
return IntConst();
|
|
|
|
SILInstruction *I = getDef(val);
|
|
if (!I)
|
|
return IntConst();
|
|
|
|
if (auto *IL = dyn_cast<IntegerLiteralInst>(I)) {
|
|
return IntConst(IL->getValue(), IL->getFunction() != F);
|
|
}
|
|
if (auto *BI = dyn_cast<BuiltinInst>(I)) {
|
|
if (constCache.count(BI) != 0)
|
|
return constCache[BI];
|
|
|
|
IntConst builtinConst = getBuiltinConst(BI, depth + 1);
|
|
constCache[BI] = builtinConst;
|
|
return builtinConst;
|
|
}
|
|
return IntConst();
|
|
}
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
// Performance Inliner
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
// Returns the referenced function of an apply_inst if it is a direct call.
|
|
static SILFunction *getReferencedFunction(FullApplySite Apply) {
|
|
auto *FRI = dyn_cast<FunctionRefInst>(Apply.getCallee());
|
|
if (!FRI)
|
|
return nullptr;
|
|
|
|
return FRI->getReferencedFunction();
|
|
}
|
|
|
|
// Given an apply, determine if it or any the apply it was cloned from
|
|
// were in Callee. If so, we can end up in an infinite loop on trying
|
|
// to inline recursive cycles in the code (some of which are only
|
|
// exposed via devirtualization).
|
|
bool SILPerformanceInliner::applyTargetsOriginFunction(FullApplySite Apply,
|
|
SILFunction *Callee) {
|
|
assert(Apply && "Expected non-null apply!");
|
|
assert(!RemovedApplies.count(Apply) && "Apply cannot have been removed!");
|
|
|
|
while (Apply.getFunction() != Callee) {
|
|
auto Found = OriginMap.find(Apply);
|
|
if (Found == OriginMap.end())
|
|
return false;
|
|
|
|
assert(Found->second && "Expected non-null apply!");
|
|
Apply = Found->second;
|
|
|
|
// Bail if we hit an apply that has been removed since we cannot
|
|
// determine if we'll end up in a recursive inlining situation.
|
|
if (RemovedApplies.count(Apply))
|
|
return true;
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
// Returns the callee of an apply_inst if it is basically inlinable.
|
|
SILFunction *SILPerformanceInliner::getEligibleFunction(ApplyInst *AI) {
|
|
|
|
SILFunction *Callee = getReferencedFunction(AI);
|
|
|
|
if (!Callee) {
|
|
DEBUG(llvm::dbgs() << " FAIL: Cannot find inlineable callee.\n");
|
|
return nullptr;
|
|
}
|
|
|
|
// Don't inline functions that are marked with the @_semantics or @effects
|
|
// attribute if the inliner is asked not to inline them.
|
|
if (Callee->hasDefinedSemantics() || Callee->hasEffectsKind()) {
|
|
if (WhatToInline == InlineSelection::NoSemanticsAndGlobalInit) {
|
|
DEBUG(llvm::dbgs() << " FAIL: Function " << Callee->getName()
|
|
<< " has special semantics or effects attribute.\n");
|
|
return nullptr;
|
|
}
|
|
// The "availability" semantics attribute is treated like global-init.
|
|
if (Callee->hasDefinedSemantics() &&
|
|
WhatToInline != InlineSelection::Everything &&
|
|
Callee->getSemanticsString().startswith("availability")) {
|
|
return nullptr;
|
|
}
|
|
} else if (Callee->isGlobalInit()) {
|
|
if (WhatToInline != InlineSelection::Everything) {
|
|
DEBUG(llvm::dbgs() << " FAIL: Function " << Callee->getName()
|
|
<< " has the global-init attribute.\n");
|
|
return nullptr;
|
|
}
|
|
}
|
|
|
|
// We can't inline external declarations.
|
|
if (Callee->empty() || Callee->isExternalDeclaration()) {
|
|
DEBUG(llvm::dbgs() << " FAIL: Cannot inline external " <<
|
|
Callee->getName() << ".\n");
|
|
return nullptr;
|
|
}
|
|
|
|
// Explicitly disabled inlining.
|
|
if (Callee->getInlineStrategy() == NoInline) {
|
|
DEBUG(llvm::dbgs() << " FAIL: noinline attribute on " <<
|
|
Callee->getName() << ".\n");
|
|
return nullptr;
|
|
}
|
|
|
|
// We don't support this yet.
|
|
if (AI->hasSubstitutions()) {
|
|
DEBUG(llvm::dbgs() << " FAIL: Generic substitutions on " <<
|
|
Callee->getName() << ".\n");
|
|
return nullptr;
|
|
}
|
|
|
|
SILFunction *Caller = AI->getFunction();
|
|
|
|
// Check for trivial recursions.
|
|
if (Callee == Caller) {
|
|
DEBUG(llvm::dbgs() << " FAIL: Skipping recursive calls on " <<
|
|
Callee->getName() << ".\n");
|
|
return nullptr;
|
|
}
|
|
|
|
// Check for non-trivial recursion.
|
|
if (applyTargetsOriginFunction(AI, Callee)) {
|
|
DEBUG(llvm::dbgs() << " FAIL: Non-trivial recursion calling " <<
|
|
Callee->getName() << ".\n");
|
|
return nullptr;
|
|
}
|
|
|
|
// A non-fragile function may not be inlined into a fragile function.
|
|
if (Caller->isFragile() && !Callee->isFragile()) {
|
|
DEBUG(llvm::dbgs() << " FAIL: Can't inline fragile " <<
|
|
Callee->getName() << ".\n");
|
|
return nullptr;
|
|
}
|
|
DEBUG(llvm::dbgs() << " Eligible callee: " <<
|
|
Callee->getName() << "\n");
|
|
|
|
return Callee;
|
|
}
|
|
|
|
// Gets the cost of an instruction by using the simplified test-model: only
|
|
// builtin instructions have a cost and that's exactly 1.
|
|
static unsigned testCost(SILInstruction *I) {
|
|
switch (I->getKind()) {
|
|
case ValueKind::BuiltinInst:
|
|
return 1;
|
|
default:
|
|
return 0;
|
|
}
|
|
}
|
|
|
|
// Returns the taken block of a terminator instruction if the condition turns
|
|
// out to be constant.
|
|
static SILBasicBlock *getTakenBlock(TermInst *term,
|
|
ConstantTracker &constTracker) {
|
|
if (CondBranchInst *CBI = dyn_cast<CondBranchInst>(term)) {
|
|
IntConst condConst = constTracker.getIntConst(CBI->getCondition());
|
|
if (condConst.isFromCaller) {
|
|
return condConst.value != 0 ? CBI->getTrueBB() : CBI->getFalseBB();
|
|
}
|
|
return nullptr;
|
|
}
|
|
if (SwitchValueInst *SVI = dyn_cast<SwitchValueInst>(term)) {
|
|
IntConst switchConst = constTracker.getIntConst(SVI->getOperand());
|
|
if (switchConst.isFromCaller) {
|
|
for (unsigned Idx = 0; Idx < SVI->getNumCases(); ++Idx) {
|
|
auto switchCase = SVI->getCase(Idx);
|
|
if (auto *IL = dyn_cast<IntegerLiteralInst>(switchCase.first)) {
|
|
if (switchConst.value == IL->getValue())
|
|
return switchCase.second;
|
|
} else {
|
|
return nullptr;
|
|
}
|
|
}
|
|
if (SVI->hasDefault())
|
|
return SVI->getDefaultBB();
|
|
}
|
|
return nullptr;
|
|
}
|
|
if (SwitchEnumInst *SEI = dyn_cast<SwitchEnumInst>(term)) {
|
|
if (SILInstruction *def = constTracker.getDefInCaller(SEI->getOperand())) {
|
|
if (EnumInst *EI = dyn_cast<EnumInst>(def)) {
|
|
for (unsigned Idx = 0; Idx < SEI->getNumCases(); ++Idx) {
|
|
auto enumCase = SEI->getCase(Idx);
|
|
if (enumCase.first == EI->getElement())
|
|
return enumCase.second;
|
|
}
|
|
if (SEI->hasDefault())
|
|
return SEI->getDefaultBB();
|
|
}
|
|
}
|
|
return nullptr;
|
|
}
|
|
if (CheckedCastBranchInst *CCB = dyn_cast<CheckedCastBranchInst>(term)) {
|
|
if (SILInstruction *def = constTracker.getDefInCaller(CCB->getOperand())) {
|
|
if (UpcastInst *UCI = dyn_cast<UpcastInst>(def)) {
|
|
SILType castType = UCI->getOperand()->getType(0);
|
|
if (CCB->getCastType().isSuperclassOf(castType)) {
|
|
return CCB->getSuccessBB();
|
|
}
|
|
if (!castType.isSuperclassOf(CCB->getCastType())) {
|
|
return CCB->getFailureBB();
|
|
}
|
|
}
|
|
}
|
|
}
|
|
return nullptr;
|
|
}
|
|
|
|
/// Return true if inlining this call site is profitable.
|
|
bool SILPerformanceInliner::isProfitableToInline(ApplyInst *AI,
|
|
unsigned loopDepthOfAI,
|
|
DominanceAnalysis *DA,
|
|
SILLoopAnalysis *LA,
|
|
ConstantTracker &callerTracker) {
|
|
SILFunction *Callee = getReferencedFunction(AI);
|
|
|
|
if (Callee->getInlineStrategy() == AlwaysInline)
|
|
return true;
|
|
|
|
ConstantTracker constTracker(Callee, &callerTracker, AI);
|
|
|
|
DominanceInfo *DT = DA->get(Callee);
|
|
SILLoopInfo *LI = LA->getLoopInfo(Callee);
|
|
|
|
DominanceOrder domOrder(&Callee->front(), DT, Callee->size());
|
|
|
|
// Calculate the inlining cost of the callee.
|
|
unsigned CalleeCost = 0;
|
|
unsigned Benefit = InlineCostThreshold > 0 ? InlineCostThreshold :
|
|
RemovedCallBenefit;
|
|
Benefit += loopDepthOfAI * LoopBenefitFactor;
|
|
int testThreshold = TestThreshold;
|
|
|
|
while (SILBasicBlock *block = domOrder.getNext()) {
|
|
constTracker.beginBlock();
|
|
unsigned loopDepth = LI->getLoopDepth(block);
|
|
for (SILInstruction &I : *block) {
|
|
constTracker.trackInst(&I);
|
|
|
|
auto ICost = instructionInlineCost(I);
|
|
|
|
if (testThreshold >= 0) {
|
|
// We are in test-mode: use a simplified cost model.
|
|
CalleeCost += testCost(&I);
|
|
} else {
|
|
// Use the regular cost model.
|
|
CalleeCost += unsigned(ICost);
|
|
}
|
|
|
|
if (ApplyInst *AI = dyn_cast<ApplyInst>(&I)) {
|
|
|
|
// Check if the callee is passed as an argument. If so, increase the
|
|
// threshold, because inlining will (probably) eliminate the closure.
|
|
SILInstruction *def = constTracker.getDefInCaller(AI->getCallee());
|
|
if (def && (isa<FunctionRefInst>(def) || isa<PartialApplyInst>(def))) {
|
|
|
|
DEBUG(llvm::dbgs() << " Boost: apply const function at" << *AI);
|
|
Benefit += ConstCalleeBenefit + loopDepth * LoopBenefitFactor;
|
|
testThreshold *= 2;
|
|
}
|
|
}
|
|
}
|
|
// Don't count costs in blocks which are dead after inlining.
|
|
SILBasicBlock *takenBlock = getTakenBlock(block->getTerminator(),
|
|
constTracker);
|
|
if (takenBlock) {
|
|
Benefit += ConstTerminatorBenefit + TestOpt;
|
|
DEBUG(llvm::dbgs() << " Take bb" << takenBlock->getDebugID() <<
|
|
" of" << *block->getTerminator());
|
|
domOrder.pushChildrenIf(block, [=] (SILBasicBlock *child) {
|
|
return child->getSinglePredecessor() != block || child == takenBlock;
|
|
});
|
|
} else {
|
|
domOrder.pushChildren(block);
|
|
}
|
|
}
|
|
|
|
unsigned Threshold = Benefit; // The default.
|
|
if (testThreshold >= 0) {
|
|
// We are in testing mode.
|
|
Threshold = testThreshold;
|
|
} else if (AI->getFunction()->isThunk()) {
|
|
// Only inline trivial functions into thunks (which will not increase the
|
|
// code size).
|
|
Threshold = TrivialFunctionThreshold;
|
|
}
|
|
|
|
if (CalleeCost > Threshold) {
|
|
DEBUG(llvm::dbgs() << " NO: Function too big to inline, "
|
|
"cost: " << CalleeCost << ", threshold: " << Threshold << "\n");
|
|
return false;
|
|
}
|
|
DEBUG(llvm::dbgs() << " YES: ready to inline, "
|
|
"cost: " << CalleeCost << ", threshold: " << Threshold << "\n");
|
|
return true;
|
|
}
|
|
|
|
/// Return true if inlining this call site into a cold block is profitable.
|
|
static bool isProfitableInColdBlock(SILFunction *Callee) {
|
|
if (Callee->getInlineStrategy() == AlwaysInline)
|
|
return true;
|
|
|
|
// Testing with the TestThreshold disables inlining into cold blocks.
|
|
if (TestThreshold >= 0)
|
|
return false;
|
|
|
|
unsigned CalleeCost = 0;
|
|
|
|
for (SILBasicBlock &Block : *Callee) {
|
|
for (SILInstruction &I : Block) {
|
|
auto ICost = instructionInlineCost(I);
|
|
CalleeCost += (unsigned)ICost;
|
|
|
|
if (CalleeCost > TrivialFunctionThreshold)
|
|
return false;
|
|
}
|
|
}
|
|
|
|
DEBUG(llvm::dbgs() << " YES: ready to inline into cold block, cost:"
|
|
<< CalleeCost << "\n");
|
|
return true;
|
|
}
|
|
|
|
|
|
// Attempt to devirtualize, maintaining the call graph if
|
|
// successful. When successful, replaces the old apply with the new
|
|
// one and returns the new one. When unsuccessful returns an empty
|
|
// apply site.
|
|
FullApplySite SILPerformanceInliner::devirtualizeUpdatingCallGraph(
|
|
FullApplySite Apply,
|
|
CallGraph &CG) {
|
|
auto *AI = cast<ApplyInst>(Apply.getInstruction());
|
|
|
|
auto *NewInst = tryDevirtualizeApply(AI);
|
|
if (!NewInst)
|
|
return FullApplySite();
|
|
|
|
if (auto *Edge = CG.getCallGraphEdge(AI))
|
|
CG.removeEdge(Edge);
|
|
|
|
auto *NewAI = findApplyFromDevirtualizedResult(NewInst);
|
|
// In cases where devirtualization results in having to
|
|
// insert code to match the result type of the original
|
|
// function, we need to find the original apply. It's
|
|
// currently simple enough to always do this, and we'll end
|
|
// up with a better call graph if we try to maintain the
|
|
// property that we can always find the apply that resulted
|
|
// from devirtualizing.
|
|
assert(NewAI && "Expected to find an apply!");
|
|
|
|
assert(!OriginMap.count(NewAI) && "Unexpected apply in map!");
|
|
if (OriginMap.count(AI))
|
|
OriginMap[NewAI] = OriginMap[AI];
|
|
RemovedApplies.insert(AI);
|
|
replaceDeadApply(AI, NewInst);
|
|
|
|
auto *F = getReferencedFunction(NewAI);
|
|
assert(F && "Expected direct function referenced!");
|
|
|
|
// If we devirtualized to a function declaration, attempt to link it
|
|
// in and update the call graph, or else just return the newly
|
|
// devirtualized instruction.
|
|
if (F->isExternalDeclaration()) {
|
|
auto &M = F->getModule();
|
|
if (!M.linkFunction(F, SILModule::LinkingMode::LinkAll,
|
|
CallGraphLinkerEditor(CG).getCallback()))
|
|
return NewAI;
|
|
}
|
|
|
|
CG.addEdgesForApply(NewAI);
|
|
|
|
return NewAI;
|
|
}
|
|
|
|
FullApplySite SILPerformanceInliner::specializeGenericUpdatingCallGraph(
|
|
FullApplySite Apply,
|
|
CallGraph &CG,
|
|
llvm::SmallVectorImpl<FullApplySite> &NewApplies) {
|
|
assert(NewApplies.empty() && "Expected out parameter for new applies!");
|
|
|
|
if (!Apply.hasSubstitutions())
|
|
return FullApplySite();
|
|
|
|
auto *Callee = getReferencedFunction(Apply);
|
|
|
|
if (!Callee)
|
|
return FullApplySite();
|
|
|
|
if (Callee->isExternalDeclaration()) {
|
|
auto &M = Callee->getModule();
|
|
if (!M.linkFunction(Callee, SILModule::LinkingMode::LinkAll,
|
|
CallGraphLinkerEditor(CG).getCallback()))
|
|
return FullApplySite();
|
|
}
|
|
|
|
auto *AI = Apply.getInstruction();
|
|
SILFunction *SpecializedFunction;
|
|
llvm::SmallVector<FullApplyCollector::value_type, 4> NewApplyPairs;
|
|
auto Specialized = trySpecializeApplyOfGeneric(ApplySite(AI),
|
|
SpecializedFunction,
|
|
NewApplyPairs);
|
|
|
|
if (!Specialized)
|
|
return FullApplySite();
|
|
|
|
assert(FullApplySite::isa(Specialized.getInstruction()) &&
|
|
"Expected full apply site!");
|
|
auto SpecializedFullApply = FullApplySite(Specialized.getInstruction());
|
|
|
|
// Update the call graph based on the specialization.
|
|
|
|
CallGraphEditor Editor(CG);
|
|
if (SpecializedFunction)
|
|
Editor.addCallGraphNode(SpecializedFunction);
|
|
|
|
Editor.replaceApplyWithNew(Apply, SpecializedFullApply);
|
|
|
|
for (auto NewApply : NewApplyPairs) {
|
|
NewApplies.push_back(NewApply.first);
|
|
|
|
Editor.addEdgesForApply(NewApply.first);
|
|
|
|
assert(!OriginMap.count(NewApply.first) && "Unexpected apply in map!");
|
|
OriginMap[NewApply.first] = NewApply.second;
|
|
}
|
|
|
|
assert(!OriginMap.count(SpecializedFullApply) && "Unexpected apply in map!");
|
|
if (OriginMap.count(Apply))
|
|
OriginMap[SpecializedFullApply] = OriginMap[Apply];
|
|
RemovedApplies.insert(Apply);
|
|
// Replace the old apply with the new and delete the old.
|
|
replaceDeadApply(Apply, SpecializedFullApply.getInstruction());
|
|
|
|
return SpecializedFullApply;
|
|
}
|
|
|
|
static void collectAllAppliesInFunction(SILFunction *F,
|
|
llvm::SmallVectorImpl<FullApplySite> &Applies) {
|
|
assert(Applies.empty() && "Expected empty vector to store into!");
|
|
|
|
for (auto &B : *F)
|
|
for (auto &I : B)
|
|
if (auto *AI = dyn_cast<ApplyInst>(&I))
|
|
Applies.push_back(FullApplySite(AI));
|
|
}
|
|
|
|
// Devirtualize and specialize a group of applies, updating the call
|
|
// graph and returning a worklist of newly exposed function references
|
|
// that should be considered for inlining before continuing with the
|
|
// caller that has the passed-in applies.
|
|
//
|
|
// The returned worklist is stacked such that the last things we want
|
|
// to process are earlier on the list.
|
|
//
|
|
// Returns true if any changes were made.
|
|
bool SILPerformanceInliner::devirtualizeAndSpecializeApplies(
|
|
llvm::SmallVectorImpl<FullApplySite> &Applies,
|
|
CallGraphAnalysis *CGA,
|
|
SILModuleTransform *MT,
|
|
llvm::SmallVectorImpl<SILFunction *> &WorkList) {
|
|
assert(WorkList.empty() && "Expected empty worklist for return results!");
|
|
|
|
auto &CG = CGA->getCallGraph();
|
|
bool ChangedAny = false;
|
|
|
|
// The set of all new function references generated by
|
|
// devirtualization and specialization.
|
|
llvm::SetVector<SILFunction *> NewRefs;
|
|
|
|
// Process all applies passed in, plus any new ones that are pushed
|
|
// on as a result of specializing the referenced functions.
|
|
while (!Applies.empty()) {
|
|
auto Apply = Applies.back();
|
|
Applies.pop_back();
|
|
|
|
bool ChangedApply = false;
|
|
if (auto NewApply = devirtualizeUpdatingCallGraph(Apply, CG)) {
|
|
ChangedApply = true;
|
|
|
|
Apply = NewApply;
|
|
}
|
|
|
|
llvm::SmallVector<FullApplySite, 4> NewApplies;
|
|
if (auto NewApply = specializeGenericUpdatingCallGraph(Apply, CG,
|
|
NewApplies)) {
|
|
ChangedApply = true;
|
|
|
|
Apply = NewApply;
|
|
Applies.insert(Applies.end(), NewApplies.begin(), NewApplies.end());
|
|
}
|
|
|
|
if (ChangedApply) {
|
|
ChangedAny = true;
|
|
|
|
auto *NewCallee = getReferencedFunction(Apply);
|
|
assert(NewCallee && "Expected directly referenced function!");
|
|
|
|
// Track all new references to function definitions.
|
|
if (NewCallee->isDefinition())
|
|
NewRefs.insert(NewCallee);
|
|
|
|
// Invalidate analyses, but lock the call graph since we
|
|
// maintain it, and also indicate that we preserve branches
|
|
// since we've only touched applies.
|
|
CGA->lockInvalidation();
|
|
MT->invalidateAnalysis(Apply.getFunction(),
|
|
SILAnalysis::PreserveKind::Branches);
|
|
CGA->unlockInvalidation();
|
|
}
|
|
}
|
|
|
|
// Copy out all the new function references gathered.
|
|
if (ChangedAny)
|
|
WorkList.insert(WorkList.end(), NewRefs.begin(), NewRefs.end());
|
|
|
|
return ChangedAny;
|
|
}
|
|
|
|
void SILPerformanceInliner::collectAppliesToInline(SILFunction *Caller,
|
|
SmallVectorImpl<ApplyInst *> &Applies,
|
|
DominanceAnalysis *DA,
|
|
SILLoopAnalysis *LA,
|
|
CallGraph &CG) {
|
|
DominanceInfo *DT = DA->get(Caller);
|
|
SILLoopInfo *LI = LA->getLoopInfo(Caller);
|
|
|
|
ConstantTracker constTracker(Caller);
|
|
DominanceOrder domOrder(&Caller->front(), DT, Caller->size());
|
|
|
|
// Go through all instructions and find candidates for inlining.
|
|
// We do this in dominance order for the constTracker.
|
|
SmallVector<ApplyInst *, 8> InitialCandidates;
|
|
while (SILBasicBlock *block = domOrder.getNext()) {
|
|
constTracker.beginBlock();
|
|
unsigned loopDepth = LI->getLoopDepth(block);
|
|
for (auto I = block->begin(), E = block->end(); I != E; ++I) {
|
|
constTracker.trackInst(&*I);
|
|
|
|
auto *AI = dyn_cast<ApplyInst>(I);
|
|
if (!AI)
|
|
continue;
|
|
|
|
DEBUG(llvm::dbgs() << " Check:" << *AI);
|
|
|
|
auto *Callee = getEligibleFunction(AI);
|
|
if (Callee) {
|
|
if (isProfitableToInline(AI, loopDepth, DA, LA, constTracker))
|
|
InitialCandidates.push_back(AI);
|
|
}
|
|
}
|
|
domOrder.pushChildrenIf(block, [&] (SILBasicBlock *child) {
|
|
if (ColdBlockInfo::isSlowPath(block, child)) {
|
|
// Handle cold blocks separately.
|
|
visitColdBlocks(InitialCandidates, child, DT);
|
|
return false;
|
|
}
|
|
return true;
|
|
});
|
|
}
|
|
|
|
// Calculate how many times a callee is called from this caller.
|
|
llvm::DenseMap<SILFunction *, unsigned> CalleeCount;
|
|
for (auto AI : InitialCandidates) {
|
|
SILFunction *Callee = getReferencedFunction(AI);
|
|
assert(Callee && "apply_inst does not have a direct callee anymore");
|
|
CalleeCount[Callee]++;
|
|
}
|
|
|
|
// Now copy each candidate callee that has a small enough number of
|
|
// call sites into the final set of call sites.
|
|
for (auto AI : InitialCandidates) {
|
|
SILFunction *Callee = getReferencedFunction(AI);
|
|
assert(Callee && "apply_inst does not have a direct callee anymore");
|
|
|
|
const unsigned CallsToCalleeThreshold = 1024;
|
|
if (CalleeCount[Callee] <= CallsToCalleeThreshold)
|
|
Applies.push_back(AI);
|
|
}
|
|
}
|
|
|
|
/// \brief Attempt to inline all calls smaller than our threshold.
|
|
/// returns True if a function was inlined.
|
|
bool SILPerformanceInliner::inlineCallsIntoFunction(SILFunction *Caller,
|
|
DominanceAnalysis *DA,
|
|
SILLoopAnalysis *LA,
|
|
CallGraph &CG,
|
|
llvm::SmallVectorImpl<FullApplySite> &NewApplies) {
|
|
// Don't optimize functions that are marked with the opt.never attribute.
|
|
if (!Caller->shouldOptimize())
|
|
return false;
|
|
|
|
DEBUG(llvm::dbgs() << "Visiting Function: " << Caller->getName() << "\n");
|
|
|
|
assert(NewApplies.empty() && "Expected empty vector to store results in!");
|
|
|
|
// First step: collect all the functions we want to inline. We
|
|
// don't change anything yet so that the dominator information
|
|
// remains valid.
|
|
SmallVector<ApplyInst *, 8> AppliesToInline;
|
|
collectAppliesToInline(Caller, AppliesToInline, DA, LA, CG);
|
|
|
|
if (AppliesToInline.empty())
|
|
return false;
|
|
|
|
// Second step: do the actual inlining.
|
|
for (auto AI : AppliesToInline) {
|
|
SILFunction *Callee = getReferencedFunction(AI);
|
|
assert(Callee && "apply_inst does not have a direct callee anymore");
|
|
|
|
DEBUG(llvm::dbgs() << " Inline:" << *AI);
|
|
|
|
SmallVector<SILValue, 8> Args;
|
|
for (const auto &Arg : AI->getArguments())
|
|
Args.push_back(Arg);
|
|
|
|
FullApplyCollector Collector;
|
|
|
|
// Notice that we will skip all of the newly inlined ApplyInsts. That's
|
|
// okay because we will visit them in our next invocation of the inliner.
|
|
TypeSubstitutionMap ContextSubs;
|
|
SILInliner Inliner(*Caller, *Callee,
|
|
SILInliner::InlineKind::PerformanceInline,
|
|
ContextSubs, AI->getSubstitutions(),
|
|
Collector.getCallback());
|
|
auto Success = Inliner.inlineFunction(AI, Args);
|
|
(void) Success;
|
|
// We've already determined we should be able to inline this, so
|
|
// we expect it to have happened.
|
|
assert(Success && "Expected inliner to inline this function!");
|
|
llvm::SmallVector<FullApplySite, 4> AppliesFromInlinee;
|
|
for (auto &P : Collector.getApplyPairs()) {
|
|
AppliesFromInlinee.push_back(P.first);
|
|
|
|
// Maintain a mapping for all new applies back to the apply they
|
|
// originated from.
|
|
assert(!OriginMap.count(P.first) && "Did not expect apply to be in map!");
|
|
assert(P.second && "Expected non-null apply site!");
|
|
OriginMap[P.first] = P.second;
|
|
}
|
|
|
|
CallGraphEditor Editor(CG);
|
|
Editor.replaceApplyWithNew(AI, AppliesFromInlinee);
|
|
|
|
RemovedApplies.insert(AI);
|
|
recursivelyDeleteTriviallyDeadInstructions(AI, true);
|
|
|
|
NewApplies.insert(NewApplies.end(), AppliesFromInlinee.begin(),
|
|
AppliesFromInlinee.end());
|
|
DA->invalidate(Caller, SILAnalysis::PreserveKind::Nothing);
|
|
NumFunctionsInlined++;
|
|
}
|
|
|
|
DEBUG(llvm::dbgs() << "\n");
|
|
return true;
|
|
}
|
|
|
|
|
|
void SILPerformanceInliner::inlineDevirtualizeAndSpecialize(
|
|
SILFunction *Caller,
|
|
SILModuleTransform *MT,
|
|
CallGraphAnalysis *CGA,
|
|
DominanceAnalysis *DA,
|
|
SILLoopAnalysis *LA) {
|
|
assert(Caller->isDefinition() &&
|
|
"Expected only defined functions in the call graph!");
|
|
|
|
llvm::SmallVector<SILFunction *, 4> WorkList;
|
|
WorkList.push_back(Caller);
|
|
|
|
auto &CG = CGA->getOrBuildCallGraph();
|
|
OriginMap.clear();
|
|
RemovedApplies.clear();
|
|
|
|
while (!WorkList.empty()) {
|
|
llvm::SmallVector<FullApplySite, 4> WorkItemApplies;
|
|
collectAllAppliesInFunction(WorkList.back(), WorkItemApplies);
|
|
|
|
// Devirtualize and specialize any applies we've collected,
|
|
// and collect new functions we should inline into as we do
|
|
// so.
|
|
llvm::SmallVector<SILFunction *, 4> NewFuncs;
|
|
if (devirtualizeAndSpecializeApplies(WorkItemApplies, CGA, MT, NewFuncs)) {
|
|
WorkList.insert(WorkList.end(), NewFuncs.begin(), NewFuncs.end());
|
|
NewFuncs.clear();
|
|
}
|
|
assert(WorkItemApplies.empty() && "Expected all applies to be processed!");
|
|
|
|
// We want to inline into each function on the worklist, starting
|
|
// with any new ones that were exposed as a result of
|
|
// devirtualization (to insure we're inlining into callees first).
|
|
//
|
|
// After inlining, we may have new opportunities for
|
|
// devirtualization, e.g. as a result of exposing the dynamic type
|
|
// of an object. When those opportunities arise we want to attempt
|
|
// devirtualization and then again attempt to inline into the
|
|
// newly exposed functions, etc. until we're back to the function
|
|
// we began with.
|
|
auto *Initial = WorkList.back();
|
|
|
|
// In practice we rarely exceed 5, but in a perf test we iterate 51 times.
|
|
const unsigned MaxLaps = 150;
|
|
unsigned Lap = 0;
|
|
while (1) {
|
|
auto *WorkItem = WorkList.back();
|
|
assert(WorkItem->isDefinition() &&
|
|
"Expected function definition on work list!");
|
|
|
|
// Devirtualization and specialization might have exposed new
|
|
// function references. We want to inline within those functions
|
|
// before inlining within our original function.
|
|
//
|
|
// Inlining in turn might result in new applies that we should
|
|
// consider for devirtualization and specialization.
|
|
llvm::SmallVector<FullApplySite, 4> NewApplies;
|
|
if (inlineCallsIntoFunction(WorkItem, DA, LA, CG, NewApplies)) {
|
|
// Invalidate analyses, but lock the call graph since we
|
|
// maintain it.
|
|
CGA->lockInvalidation();
|
|
MT->invalidateAnalysis(WorkItem, SILAnalysis::PreserveKind::Nothing);
|
|
CGA->unlockInvalidation();
|
|
|
|
// FIXME: Update inlineCallsIntoFunction to collect all
|
|
// remaining applies after inlining, not just those
|
|
// resulting from inlining code.
|
|
llvm::SmallVector<FullApplySite, 4> WorkItemApplies;
|
|
collectAllAppliesInFunction(WorkItem, WorkItemApplies);
|
|
|
|
if (devirtualizeAndSpecializeApplies(WorkItemApplies, CGA, MT,
|
|
NewFuncs)) {
|
|
WorkList.insert(WorkList.end(), NewFuncs.begin(), NewFuncs.end());
|
|
NewFuncs.clear();
|
|
assert(WorkItemApplies.empty() &&
|
|
"Expected all applies to be processed!");
|
|
}
|
|
} else if (WorkItem == Initial) {
|
|
break;
|
|
} else {
|
|
WorkList.pop_back();
|
|
}
|
|
|
|
Lap++;
|
|
if (Lap > MaxLaps)
|
|
// It's possible to construct real code where this will hit, but
|
|
// it's more likely that there is an issue tracking recursive
|
|
// inlining, in which case we want to know about it in internal
|
|
// builds, and not hang on bots or user machines.
|
|
assert(Lap <= MaxLaps && "Possible bug tracking recursion!");
|
|
// Give up and move along.
|
|
if (Lap > MaxLaps) {
|
|
while (WorkList.back() != Initial)
|
|
WorkList.pop_back();
|
|
break;
|
|
}
|
|
}
|
|
|
|
assert(WorkList.back() == Initial &&
|
|
"Expected to exit with same element on top of stack!" );
|
|
WorkList.pop_back();
|
|
}
|
|
}
|
|
|
|
// Find functions in cold blocks which are forced to be inlined.
|
|
// All other functions are not inlined in cold blocks.
|
|
void SILPerformanceInliner::visitColdBlocks(SmallVectorImpl<ApplyInst *> &
|
|
AppliesToInline,
|
|
SILBasicBlock *Root,
|
|
DominanceInfo *DT) {
|
|
DominanceOrder domOrder(Root, DT);
|
|
while (SILBasicBlock *block = domOrder.getNext()) {
|
|
for (SILInstruction &I : *block) {
|
|
ApplyInst *AI = dyn_cast<ApplyInst>(&I);
|
|
if (!AI)
|
|
continue;
|
|
|
|
auto *Callee = getEligibleFunction(AI);
|
|
if (Callee && isProfitableInColdBlock(Callee)) {
|
|
DEBUG(llvm::dbgs() << " inline in cold block:" << *AI);
|
|
AppliesToInline.push_back(AI);
|
|
}
|
|
}
|
|
domOrder.pushChildren(block);
|
|
}
|
|
}
|
|
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
// Performane Inliner Pass
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
namespace {
|
|
class SILPerformanceInlinerPass : public SILModuleTransform {
|
|
/// Specifies which functions not to inline, based on @_semantics and
|
|
/// global_init attributes.
|
|
InlineSelection WhatToInline;
|
|
std::string PassName;
|
|
public:
|
|
SILPerformanceInlinerPass(InlineSelection WhatToInline, StringRef LevelName):
|
|
WhatToInline(WhatToInline), PassName(LevelName) {
|
|
PassName.append(" Performance Inliner");
|
|
}
|
|
|
|
void run() override {
|
|
CallGraphAnalysis *CGA = PM->getAnalysis<CallGraphAnalysis>();
|
|
DominanceAnalysis *DA = PM->getAnalysis<DominanceAnalysis>();
|
|
SILLoopAnalysis *LA = PM->getAnalysis<SILLoopAnalysis>();
|
|
|
|
if (getOptions().InlineThreshold == 0) {
|
|
DEBUG(llvm::dbgs() << "*** The Performance Inliner is disabled ***\n");
|
|
return;
|
|
}
|
|
|
|
SILPerformanceInliner Inliner(getOptions().InlineThreshold,
|
|
WhatToInline);
|
|
|
|
auto &CG = CGA->getOrBuildCallGraph();
|
|
auto &BottomUpFunctions = CG.getBottomUpFunctionOrder();
|
|
|
|
// Copy the bottom-up function list into a worklist.
|
|
llvm::SmallVector<SILFunction *, 32> WorkList;
|
|
// FIXME: std::reverse_copy would be better, but it crashes.
|
|
for (auto I = BottomUpFunctions.rbegin(), E = BottomUpFunctions.rend();
|
|
I != E; ++I)
|
|
WorkList.push_back(*I);
|
|
|
|
// Inline functions bottom up from the leafs.
|
|
while (!WorkList.empty()) {
|
|
Inliner.inlineDevirtualizeAndSpecialize(WorkList.back(), this, CGA, DA,
|
|
LA);
|
|
WorkList.pop_back();
|
|
}
|
|
}
|
|
|
|
StringRef getName() override { return PassName; }
|
|
};
|
|
} // end anonymous namespace
|
|
|
|
/// Create an inliner pass that does not inline functions that are marked with
|
|
/// the @_semantics, @effects or global_init attributes.
|
|
SILTransform *swift::createEarlyInliner() {
|
|
return new SILPerformanceInlinerPass(
|
|
InlineSelection::NoSemanticsAndGlobalInit, "Early");
|
|
}
|
|
|
|
/// Create an inliner pass that does not inline functions that are marked with
|
|
/// the global_init attribute or have an "availability" semantics attribute.
|
|
SILTransform *swift::createPerfInliner() {
|
|
return new SILPerformanceInlinerPass(InlineSelection::NoGlobalInit, "Middle");
|
|
}
|
|
|
|
/// Create an inliner pass that inlines all functions that are marked with
|
|
/// the @_semantics, @effects or global_init attributes.
|
|
SILTransform *swift::createLateInliner() {
|
|
return new SILPerformanceInlinerPass(InlineSelection::Everything, "Late");
|
|
}
|