LICM: hoist calls to global_init functions

Global initializers are executed only once.
Therefore it's possible to hoist such an initializer call to a loop pre-header - in case there are no conflicting side-effects in the loop before the call.
Also, the call must post-dominate the loop pre-header. Otherwise it would be executed speculatively.
This commit is contained in:
Erik Eckstein
2020-03-17 16:26:35 +01:00
parent e0c4fa2d92
commit 3ad7d548c2
5 changed files with 324 additions and 11 deletions

View File

@@ -146,6 +146,57 @@ static bool mayWriteTo(AliasAnalysis *AA, SideEffectAnalysis *SEA,
return false;
}
/// Returns true if \p sideEffectInst cannot be reordered with a call to a
/// global initialier.
static bool mayConflictWithGlobalInit(AliasAnalysis *AA,
SILInstruction *sideEffectInst, ApplyInst *globalInitCall) {
if (auto *SI = dyn_cast<StoreInst>(sideEffectInst)) {
return AA->mayReadOrWriteMemory(globalInitCall, SI->getDest());
}
if (auto *LI = dyn_cast<LoadInst>(sideEffectInst)) {
return AA->mayWriteToMemory(globalInitCall, LI->getOperand());
}
return true;
}
/// Returns true if any of the instructions in \p sideEffectInsts which are
/// post-dominated by a call to a global initialier cannot be reordered with
/// the call.
static bool mayConflictWithGlobalInit(AliasAnalysis *AA,
InstSet &sideEffectInsts,
ApplyInst *globalInitCall,
SILBasicBlock *preHeader, PostDominanceInfo *PD) {
if (!PD->dominates(globalInitCall->getParent(), preHeader))
return true;
SILBasicBlock *globalInitBlock = globalInitCall->getParent();
for (auto *seInst : sideEffectInsts) {
// Only check instructions in blocks which are "before" (i.e. post-dominated
// by) the block which contains the init-call.
// Instructions which are before the call in the same block have already
// been checked.
if (PD->properlyDominates(globalInitBlock, seInst->getParent())) {
if (mayConflictWithGlobalInit(AA, seInst, globalInitCall))
return true;
}
}
return false;
}
/// Returns true if any of the instructions in \p sideEffectInsts cannot be
/// reordered with a call to a global initialier (which is in the same basic
/// block).
static bool mayConflictWithGlobalInit(AliasAnalysis *AA,
ArrayRef<SILInstruction *> sideEffectInsts,
ApplyInst *globalInitCall) {
for (auto *seInst : sideEffectInsts) {
assert(seInst->getParent() == globalInitCall->getParent());
if (mayConflictWithGlobalInit(AA, seInst, globalInitCall))
return true;
}
return false;
}
// When Hoisting / Sinking,
// Don't descend into control-dependent code.
// Only traverse into basic blocks that dominate all exits.
@@ -409,6 +460,8 @@ class LoopTreeOptimization {
AliasAnalysis *AA;
SideEffectAnalysis *SEA;
DominanceInfo *DomTree;
PostDominanceAnalysis *PDA;
PostDominanceInfo *postDomTree = nullptr;
AccessedStorageAnalysis *ASA;
bool Changed;
@@ -435,10 +488,11 @@ class LoopTreeOptimization {
public:
LoopTreeOptimization(SILLoop *TopLevelLoop, SILLoopInfo *LI,
AliasAnalysis *AA, SideEffectAnalysis *SEA,
DominanceInfo *DT, AccessedStorageAnalysis *ASA,
DominanceInfo *DT, PostDominanceAnalysis *PDA,
AccessedStorageAnalysis *ASA,
bool RunsOnHighLevelSil)
: LoopInfo(LI), AA(AA), SEA(SEA), DomTree(DT), ASA(ASA), Changed(false),
RunsOnHighLevelSIL(RunsOnHighLevelSil) {
: LoopInfo(LI), AA(AA), SEA(SEA), DomTree(DT), PDA(PDA), ASA(ASA),
Changed(false), RunsOnHighLevelSIL(RunsOnHighLevelSil) {
// Collect loops for a recursive bottom-up traversal in the loop tree.
BotUpWorkList.push_back(TopLevelLoop);
for (unsigned i = 0; i < BotUpWorkList.size(); ++i) {
@@ -556,9 +610,11 @@ static bool isSafeReadOnlyApply(SideEffectAnalysis *SEA, ApplyInst *AI) {
}
static void checkSideEffects(swift::SILInstruction &Inst,
InstSet &SideEffectInsts) {
InstSet &SideEffectInsts,
SmallVectorImpl<SILInstruction *> &sideEffectsInBlock) {
if (Inst.mayHaveSideEffects()) {
SideEffectInsts.insert(&Inst);
sideEffectsInBlock.push_back(&Inst);
}
}
@@ -708,6 +764,7 @@ void LoopTreeOptimization::analyzeCurrentLoop(
// Interesting instructions in the loop:
SmallVector<ApplyInst *, 8> ReadOnlyApplies;
SmallVector<ApplyInst *, 8> globalInitCalls;
SmallVector<LoadInst *, 8> Loads;
SmallVector<StoreInst *, 8> Stores;
SmallVector<FixLifetimeInst *, 8> FixLifetimes;
@@ -715,6 +772,7 @@ void LoopTreeOptimization::analyzeCurrentLoop(
SmallVector<FullApplySite, 8> fullApplies;
for (auto *BB : Loop->getBlocks()) {
SmallVector<SILInstruction *, 8> sideEffectsInBlock;
for (auto &Inst : *BB) {
switch (Inst.getKind()) {
case SILInstructionKind::FixLifetimeInst: {
@@ -731,12 +789,12 @@ void LoopTreeOptimization::analyzeCurrentLoop(
case SILInstructionKind::StoreInst: {
Stores.push_back(cast<StoreInst>(&Inst));
LoadsAndStores.push_back(&Inst);
checkSideEffects(Inst, sideEffects);
checkSideEffects(Inst, sideEffects, sideEffectsInBlock);
break;
}
case SILInstructionKind::BeginAccessInst:
BeginAccesses.push_back(cast<BeginAccessInst>(&Inst));
checkSideEffects(Inst, sideEffects);
checkSideEffects(Inst, sideEffects, sideEffectsInBlock);
break;
case SILInstructionKind::RefElementAddrInst:
SpecialHoist.push_back(cast<RefElementAddrInst>(&Inst));
@@ -747,12 +805,21 @@ void LoopTreeOptimization::analyzeCurrentLoop(
// cond_fail that would have protected (executed before) a memory access
// must - after hoisting - also be executed before said access.
HoistUp.insert(&Inst);
checkSideEffects(Inst, sideEffects);
checkSideEffects(Inst, sideEffects, sideEffectsInBlock);
break;
case SILInstructionKind::ApplyInst: {
auto *AI = cast<ApplyInst>(&Inst);
if (isSafeReadOnlyApply(SEA, AI)) {
ReadOnlyApplies.push_back(AI);
} else if (SILFunction *callee = AI->getReferencedFunctionOrNull()) {
// Calls to global inits are different because we don't care about
// side effects which are "after" the call in the loop.
if (callee->isGlobalInit() &&
// Check against side-effects within the same block.
// Side-effects in other blocks are checked later (after we
// scanned all blocks of the loop).
!mayConflictWithGlobalInit(AA, sideEffectsInBlock, AI))
globalInitCalls.push_back(AI);
}
// check for array semantics and side effects - same as default
LLVM_FALLTHROUGH;
@@ -761,7 +828,7 @@ void LoopTreeOptimization::analyzeCurrentLoop(
if (auto fullApply = FullApplySite::isa(&Inst)) {
fullApplies.push_back(fullApply);
}
checkSideEffects(Inst, sideEffects);
checkSideEffects(Inst, sideEffects, sideEffectsInBlock);
if (canHoistUpDefault(&Inst, Loop, DomTree, RunsOnHighLevelSIL)) {
HoistUp.insert(&Inst);
}
@@ -780,6 +847,23 @@ void LoopTreeOptimization::analyzeCurrentLoop(
HoistUp.insert(LI);
}
}
if (!globalInitCalls.empty()) {
if (!postDomTree) {
postDomTree = PDA->get(Preheader->getParent());
}
if (postDomTree->getRootNode()) {
for (ApplyInst *ginitCall : globalInitCalls) {
// Check against side effects which are "before" (i.e. post-dominated
// by) the global initializer call.
if (!mayConflictWithGlobalInit(AA, sideEffects, ginitCall, Preheader,
postDomTree)) {
HoistUp.insert(ginitCall);
}
}
}
}
// Collect memory locations for which we can move all loads and stores out
// of the loop.
for (StoreInst *SI : Stores) {
@@ -1041,6 +1125,7 @@ public:
}
DominanceAnalysis *DA = PM->getAnalysis<DominanceAnalysis>();
PostDominanceAnalysis *PDA = PM->getAnalysis<PostDominanceAnalysis>();
AliasAnalysis *AA = PM->getAnalysis<AliasAnalysis>();
SideEffectAnalysis *SEA = PM->getAnalysis<SideEffectAnalysis>();
AccessedStorageAnalysis *ASA = getAnalysis<AccessedStorageAnalysis>();
@@ -1051,8 +1136,8 @@ public:
for (auto *TopLevelLoop : *LoopInfo) {
if (!DomTree) DomTree = DA->get(F);
LoopTreeOptimization Opt(TopLevelLoop, LoopInfo, AA, SEA, DomTree, ASA,
RunsOnHighLevelSil);
LoopTreeOptimization Opt(TopLevelLoop, LoopInfo, AA, SEA, DomTree, PDA,
ASA, RunsOnHighLevelSil);
Changed |= Opt.optimize();
}