Extend AllocBoxToStack to handle apply (#31974)

* Don't always give shared linkage to spl functions

private functions on specialization were being given shared linkage.
Use swift::getSpecializeLinkage to correctly get the linkage for the
specialized function based on the linkage of the original function.

* Extend AllocBoxToStack to handle apply

AllocBoxToStack analyzes the uses of boxes and promotes them to stack if
it is safe to do so. Currently the analysis is limited to only a few known
users including partial_apply.

With this change, the pass also analyzes apply users, where the callee
is a local private function.
The analysis is recursive and bound by a threshold.

Fixes rdar://59070139
This commit is contained in:
Meghana Gupta
2020-06-22 14:50:15 -07:00
committed by GitHub
parent 865224aa93
commit 1d4f617fee
6 changed files with 1075 additions and 113 deletions

View File

@@ -19,6 +19,7 @@
#include "swift/SIL/SILCloner.h"
#include "swift/SILOptimizer/PassManager/Passes.h"
#include "swift/SILOptimizer/PassManager/Transforms.h"
#include "swift/SILOptimizer/Utils/InstOptUtils.h"
#include "swift/SILOptimizer/Utils/SILOptFunctionBuilder.h"
#include "swift/SILOptimizer/Utils/SpecializationMangler.h"
#include "swift/SILOptimizer/Utils/StackNesting.h"
@@ -28,12 +29,21 @@
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
using namespace swift;
STATISTIC(NumStackPromoted, "Number of alloc_box's promoted to the stack");
//===----------------------------------------------------------------------===//
// MaxLocalApplyRecurDepth limits the recursive analysis depth while
// checking if a box can be promoted to stack. This is currently set to 4, a
// limit assumed to be sufficient to handle typical call chain of local
// functions through which a box can be passed.
static llvm::cl::opt<unsigned> MaxLocalApplyRecurDepth(
"max-local-apply-recur-depth", llvm::cl::init(4),
llvm::cl::desc("Max recursive depth for analyzing local functions"));
//===-----------------------------------------------------------------------===//
// SIL Utilities for alloc_box Promotion
//===----------------------------------------------------------------------===//
@@ -274,38 +284,76 @@ static bool partialApplyEscapes(SILValue V, bool examineApply) {
return false;
}
static SILInstruction *findUnexpectedBoxUse(SILValue Box,
bool examinePartialApply,
bool inAppliedFunction,
SmallVectorImpl<Operand *> &);
static SILInstruction *recursivelyFindBoxOperandsPromotableToAddress(
SILValue Box, bool inAppliedFunction, SmallVectorImpl<Operand *> &,
SmallPtrSetImpl<SILFunction *> &, unsigned CurrentRecurDepth);
/// checkPartialApplyBody - Check the body of a partial apply to see
/// checkLocalApplyBody - Check the body of an apply's callee to see
/// if the box pointer argument passed to it has uses that would
/// disqualify it from being promoted to a stack location. Return
/// true if this partial apply will not block our promoting the box.
static bool checkPartialApplyBody(Operand *O) {
/// true if this apply will not block our promoting the box.
static bool checkLocalApplyBody(Operand *O,
SmallVectorImpl<Operand *> &PromotedOperands,
SmallPtrSetImpl<SILFunction *> &VisitedCallees,
unsigned CurrentRecurDepth) {
SILFunction *F = ApplySite(O->getUser()).getReferencedFunctionOrNull();
// If we cannot examine the function body, assume the worst.
if (!F || F->empty())
return false;
// We don't actually use these because we're not recursively
// rewriting the partial applies we find.
SmallVector<Operand *, 1> PromotedOperands;
// Since this function can be called recursively while analyzing the same box,
// mark the callee as visited, so that we don't end up in a recursive cycle.
auto iter = VisitedCallees.insert(F);
if (!iter.second)
return false;
auto calleeArg = F->getArgument(ApplySite(O->getUser()).getCalleeArgIndex(*O));
return !findUnexpectedBoxUse(calleeArg, /* examinePartialApply = */ false,
/* inAppliedFunction = */ true,
PromotedOperands);
auto res = !recursivelyFindBoxOperandsPromotableToAddress(
calleeArg,
/* inAppliedFunction = */ true, PromotedOperands, VisitedCallees,
CurrentRecurDepth + 1);
return res;
}
// Returns true if a callee is eligible to be cloned and rewritten for
// AllocBoxToStack opt. We don't want to increase code size, so this is
// restricted only for private local functions currently.
static bool isOptimizableApplySite(ApplySite Apply) {
auto callee = Apply.getReferencedFunctionOrNull();
if (!callee) {
return false;
}
// Callee should be optimizable.
if (!callee->shouldOptimize())
return false;
// External function definitions.
if (!callee->isDefinition())
return false;
// Do not optimize always_inlinable functions.
if (callee->getInlineStrategy() == Inline_t::AlwaysInline)
return false;
if (callee->getLinkage() != SILLinkage::Private)
return false;
return true;
}
/// Validate that the uses of a pointer to a box do not eliminate it from
/// consideration for promotion to a stack element. Optionally examine the body
/// of partial_apply to see if there is an unexpected use inside. Return the
/// instruction with the unexpected use if we find one.
static SILInstruction *
findUnexpectedBoxUse(SILValue Box, bool examinePartialApply,
bool inAppliedFunction,
SmallVectorImpl<Operand *> &PromotedOperands) {
/// consideration for promotion to a stack element. Return the instruction with
/// the unexpected use if we find one.
/// If a box has ApplySite users, we recursively examine the callees to check
/// for unexpected use of the box argument. If all the callees through which the
/// box is passed don't have any unexpected uses, `PromotedOperands` will be
/// populated with the box arguments in DFS order.
static SILInstruction *recursivelyFindBoxOperandsPromotableToAddress(
SILValue Box, bool inAppliedFunction,
SmallVectorImpl<Operand *> &PromotedOperands,
SmallPtrSetImpl<SILFunction *> &VisitedCallees,
unsigned CurrentRecurDepth = 0) {
assert((Box->getType().is<SILBoxType>()
|| Box->getType()
== SILType::getNativeObjectType(Box->getType().getASTContext()))
@@ -337,15 +385,32 @@ findUnexpectedBoxUse(SILValue Box, bool examinePartialApply,
continue;
}
// For partial_apply, if we've been asked to examine the body, the
// uses of the argument are okay there, and the partial_apply
// itself cannot escape, then everything is fine.
if (auto *PAI = dyn_cast<PartialApplyInst>(User))
if (examinePartialApply && checkPartialApplyBody(Op) &&
!partialApplyEscapes(PAI, /* examineApply = */ true)) {
LocalPromotedOperands.push_back(Op);
continue;
if (auto Apply = ApplySite::isa(User)) {
if (CurrentRecurDepth > MaxLocalApplyRecurDepth) {
return User;
}
switch (Apply.getKind()) {
case ApplySiteKind::PartialApplyInst: {
if (checkLocalApplyBody(Op, LocalPromotedOperands, VisitedCallees,
CurrentRecurDepth) &&
!partialApplyEscapes(cast<PartialApplyInst>(User),
/* examineApply = */ true)) {
LocalPromotedOperands.push_back(Op);
continue;
}
break;
}
case ApplySiteKind::ApplyInst:
case ApplySiteKind::BeginApplyInst:
case ApplySiteKind::TryApplyInst:
if (isOptimizableApplySite(Apply) &&
checkLocalApplyBody(Op, LocalPromotedOperands, VisitedCallees,
CurrentRecurDepth)) {
LocalPromotedOperands.push_back(Op);
continue;
}
}
}
return User;
}
@@ -364,12 +429,13 @@ static InFlightDiagnostic diagnose(ASTContext &Context, SourceLoc loc,
/// canPromoteAllocBox - Can we promote this alloc_box to an alloc_stack?
static bool canPromoteAllocBox(AllocBoxInst *ABI,
SmallVectorImpl<Operand *> &PromotedOperands) {
SmallPtrSet<SILFunction *, 8> VisitedCallees;
// Scan all of the uses of the address of the box to see if any
// disqualifies the box from being promoted to the stack.
if (auto *User = findUnexpectedBoxUse(ABI,
/* examinePartialApply = */ true,
/* inAppliedFunction = */ false,
PromotedOperands)) {
if (auto *User = recursivelyFindBoxOperandsPromotableToAddress(
ABI,
/* inAppliedFunction = */ false, PromotedOperands, VisitedCallees,
/* CurrentRecurDepth = */ 0)) {
(void)User;
// Otherwise, we have an unexpected use.
LLVM_DEBUG(llvm::dbgs() << "*** Failed to promote alloc_box in @"
@@ -557,6 +623,9 @@ private:
void visitStrongRetainInst(StrongRetainInst *Inst);
void visitCopyValueInst(CopyValueInst *Inst);
void visitProjectBoxInst(ProjectBoxInst *Inst);
void checkNoPromotedBoxInApply(ApplySite Apply);
#define APPLYSITE_INST(Name, Parent) void visit##Name(Name *Inst);
#include "swift/SIL/SILNodes.def"
};
} // end anonymous namespace
@@ -633,11 +702,12 @@ SILFunction *PromotedParamCloner::initCloned(SILOptFunctionBuilder &FuncBuilder,
&& "SILFunction missing DebugScope");
assert(!Orig->isGlobalInit() && "Global initializer cannot be cloned");
auto *Fn = FuncBuilder.createFunction(
SILLinkage::Shared, ClonedName, ClonedTy, Orig->getGenericEnvironment(),
Orig->getLocation(), Orig->isBare(), Orig->isTransparent(), Serialized,
IsNotDynamic, Orig->getEntryCount(), Orig->isThunk(),
Orig->getClassSubclassScope(), Orig->getInlineStrategy(),
Orig->getEffectsKind(), Orig, Orig->getDebugScope());
swift::getSpecializedLinkage(Orig, Orig->getLinkage()), ClonedName,
ClonedTy, Orig->getGenericEnvironment(), Orig->getLocation(),
Orig->isBare(), Orig->isTransparent(), Serialized, IsNotDynamic,
Orig->getEntryCount(), Orig->isThunk(), Orig->getClassSubclassScope(),
Orig->getInlineStrategy(), Orig->getEffectsKind(), Orig,
Orig->getDebugScope());
for (auto &Attr : Orig->getSemanticsAttrs()) {
Fn->addSemanticsAttr(Attr);
}
@@ -678,9 +748,12 @@ PromotedParamCloner::populateCloned() {
OrigPromotedParameters.insert(*I);
NewPromotedArgs[ArgNo] = promotedArg;
// All uses of the promoted box should either be projections, which are
// folded when visited, or copy/destroy operations which are ignored.
// We only promote boxes used in apply or projections or copy/destroy
// value operations.
// We should never see an apply user of the box, because we rewrite the
// applies and specialize the callees in dfs order.
// Projection users are folded when visited and copy/destroy operations
// are ignored.
entryArgs.push_back(SILValue());
} else {
// Create a new argument which copies the original argument.
@@ -760,34 +833,63 @@ void PromotedParamCloner::visitProjectBoxInst(ProjectBoxInst *Inst) {
SILCloner<PromotedParamCloner>::visitProjectBoxInst(Inst);
}
/// Specialize a partial_apply by promoting the parameters indicated by
// While cloning during specialization, make sure apply instructions do not have
// box arguments that need to be promoted.
// This is an assertion in debug builds only. The reason why this should never
// be true is that we have cloned our callees in DFS order meaning that any of
// our callees that had a promotable box will have already have been promoted
// away by the time this runs.
void PromotedParamCloner::checkNoPromotedBoxInApply(ApplySite Apply) {
#ifndef NDEBUG
for (auto &O : Apply.getArgumentOperands()) {
assert(OrigPromotedParameters.count(O.get()) == 0);
}
#endif
}
void PromotedParamCloner::visitApplyInst(ApplyInst *Inst) {
checkNoPromotedBoxInApply(Inst);
SILCloner<PromotedParamCloner>::visitApplyInst(Inst);
}
void PromotedParamCloner::visitBeginApplyInst(BeginApplyInst *Inst) {
checkNoPromotedBoxInApply(Inst);
SILCloner<PromotedParamCloner>::visitBeginApplyInst(Inst);
}
void PromotedParamCloner::visitPartialApplyInst(PartialApplyInst *Inst) {
checkNoPromotedBoxInApply(Inst);
SILCloner<PromotedParamCloner>::visitPartialApplyInst(Inst);
}
void PromotedParamCloner::visitTryApplyInst(TryApplyInst *Inst) {
checkNoPromotedBoxInApply(Inst);
SILCloner<PromotedParamCloner>::visitTryApplyInst(Inst);
}
/// Specialize ApplySite by promoting the parameters indicated by
/// indices. We expect these parameters to be replaced by stack address
/// references.
static PartialApplyInst *
specializePartialApply(SILOptFunctionBuilder &FuncBuilder,
PartialApplyInst *PartialApply,
ArgIndexList &PromotedCalleeArgIndices,
AllocBoxToStackState &pass) {
auto *FRI = cast<FunctionRefInst>(PartialApply->getCallee());
assert(FRI && "Expected a direct partial_apply!");
static SILInstruction *
specializeApplySite(SILOptFunctionBuilder &FuncBuilder, ApplySite Apply,
ArgIndexList &PromotedCalleeArgIndices,
AllocBoxToStackState &pass) {
auto *FRI = cast<FunctionRefInst>(Apply.getCallee());
assert(FRI && "Expected a direct ApplySite");
auto *F = FRI->getReferencedFunctionOrNull();
assert(F && "Expected a referenced function!");
IsSerialized_t Serialized = IsNotSerialized;
if (PartialApply->getFunction()->isSerialized())
if (Apply.getFunction()->isSerialized())
Serialized = IsSerializable;
std::string ClonedName =
getClonedName(F, Serialized, PromotedCalleeArgIndices);
auto &M = PartialApply->getModule();
auto &M = Apply.getModule();
SILFunction *ClonedFn;
if (auto *PrevFn = M.lookUpFunction(ClonedName)) {
assert(PrevFn->isSerialized() == Serialized);
ClonedFn = PrevFn;
} else {
// Clone the function the existing partial_apply references.
// Clone the function the existing ApplySite references.
PromotedParamCloner Cloner(FuncBuilder, F, Serialized,
PromotedCalleeArgIndices,
ClonedName);
@@ -796,95 +898,133 @@ specializePartialApply(SILOptFunctionBuilder &FuncBuilder,
pass.T->addFunctionToPassManagerWorklist(ClonedFn, F);
}
// Now create the new partial_apply using the cloned function.
// Now create the new ApplySite using the cloned function.
SmallVector<SILValue, 16> Args;
ValueLifetimeAnalysis::Frontier PAFrontier;
// Promote the arguments that need promotion.
for (auto &O : PartialApply->getArgumentOperands()) {
for (auto &O : Apply.getArgumentOperands()) {
auto CalleeArgIndex = ApplySite(O.getUser()).getCalleeArgIndex(O);
if (!count(PromotedCalleeArgIndices, CalleeArgIndex)) {
Args.push_back(O.get());
continue;
}
// If this argument is promoted, it is a box that we're turning into an
// address because we've proven we can keep this value on the stack. The
// partial_apply had ownership of this box so we must now release it
// explicitly when the partial_apply is released.
auto *Box = cast<SingleValueInstruction>(O.get());
assert((isa<AllocBoxInst>(Box) || isa<CopyValueInst>(Box)) &&
"Expected either an alloc box or a copy of an alloc box");
SILBuilder B(Box);
Args.push_back(B.createProjectBox(Box->getLoc(), Box, 0));
auto Box = O.get();
assert(((isa<SingleValueInstruction>(Box) && isa<AllocBoxInst>(Box) ||
isa<CopyValueInst>(Box)) ||
isa<SILFunctionArgument>(Box)) &&
"Expected either an alloc box or a copy of an alloc box or a "
"function argument");
auto InsertPt = Box->getDefiningInsertionPoint();
assert(InsertPt);
SILBuilderWithScope B(InsertPt);
Args.push_back(B.createProjectBox(Box.getLoc(), Box, 0));
if (PAFrontier.empty()) {
ValueLifetimeAnalysis VLA(PartialApply);
pass.CFGChanged |= !VLA.computeFrontier(
PAFrontier, ValueLifetimeAnalysis::AllowToModifyCFG);
assert(!PAFrontier.empty() && "partial_apply must have at least one use "
"to release the returned function");
}
// For a partial_apply, if this argument is promoted, it is a box that we're
// turning into an address because we've proven we can keep this value on
// the stack. The partial_apply had ownership of this box so we must now
// release it explicitly when the partial_apply is released.
if (Apply.getKind() == ApplySiteKind::PartialApplyInst) {
if (PAFrontier.empty()) {
ValueLifetimeAnalysis VLA(cast<PartialApplyInst>(Apply));
pass.CFGChanged |= !VLA.computeFrontier(
PAFrontier, ValueLifetimeAnalysis::AllowToModifyCFG);
assert(!PAFrontier.empty() &&
"partial_apply must have at least one use "
"to release the returned function");
}
// Insert destroys of the box at each point where the partial_apply becomes
// dead.
for (SILInstruction *FrontierInst : PAFrontier) {
SILBuilderWithScope Builder(FrontierInst);
Builder.createDestroyValue(PartialApply->getLoc(), Box);
// Insert destroys of the box at each point where the partial_apply
// becomes dead.
for (SILInstruction *FrontierInst : PAFrontier) {
SILBuilderWithScope Builder(FrontierInst);
Builder.createDestroyValue(Apply.getLoc(), Box);
}
}
}
SILBuilderWithScope Builder(PartialApply);
auto ApplyInst = Apply.getInstruction();
SILBuilderWithScope Builder(ApplyInst);
// Build the function_ref and partial_apply.
SILValue FunctionRef = Builder.createFunctionRef(PartialApply->getLoc(),
ClonedFn);
return Builder.createPartialApply(
PartialApply->getLoc(), FunctionRef, PartialApply->getSubstitutionMap(),
Args,
PartialApply->getType().getAs<SILFunctionType>()->getCalleeConvention());
// Build the function_ref and ApplySite.
SILValue FunctionRef = Builder.createFunctionRef(Apply.getLoc(), ClonedFn);
switch (Apply.getKind()) {
case ApplySiteKind::PartialApplyInst: {
auto *PAI = cast<PartialApplyInst>(ApplyInst);
return Builder.createPartialApply(
Apply.getLoc(), FunctionRef, Apply.getSubstitutionMap(), Args,
PAI->getType().getAs<SILFunctionType>()->getCalleeConvention(),
PAI->isOnStack(),
GenericSpecializationInformation::create(ApplyInst, Builder));
}
case ApplySiteKind::ApplyInst:
return Builder.createApply(
Apply.getLoc(), FunctionRef, Apply.getSubstitutionMap(), Args,
GenericSpecializationInformation::create(ApplyInst, Builder));
case ApplySiteKind::BeginApplyInst:
return Builder.createBeginApply(
Apply.getLoc(), FunctionRef, Apply.getSubstitutionMap(), Args,
GenericSpecializationInformation::create(ApplyInst, Builder));
case ApplySiteKind::TryApplyInst: {
auto TAI = cast<TryApplyInst>(Apply);
return Builder.createTryApply(
Apply.getLoc(), FunctionRef, Apply.getSubstitutionMap(), Args,
TAI->getNormalBB(), TAI->getErrorBB(),
GenericSpecializationInformation::create(ApplyInst, Builder));
}
}
}
static void rewritePartialApplies(AllocBoxToStackState &pass) {
llvm::DenseMap<PartialApplyInst *, ArgIndexList> IndexMap;
static void rewriteApplySites(AllocBoxToStackState &pass) {
llvm::DenseMap<ApplySite, ArgIndexList> IndexMap;
llvm::SmallVector<ApplySite, 8> AppliesToSpecialize;
ArgIndexList Indices;
// Build a map from partial_apply to the indices of the operands
// Build a map from the ApplySite to the indices of the operands
// that will be promoted in our rewritten version.
for (auto *O : pass.PromotedOperands) {
auto CalleeArgIndexNumber = ApplySite(O->getUser()).getCalleeArgIndex(*O);
auto User = O->getUser();
auto Apply = ApplySite(User);
auto CalleeArgIndexNumber = Apply.getCalleeArgIndex(*O);
Indices.clear();
Indices.push_back(CalleeArgIndexNumber);
auto *PartialApply = cast<PartialApplyInst>(O->getUser());
llvm::DenseMap<PartialApplyInst *, ArgIndexList>::iterator It;
bool Inserted;
std::tie(It, Inserted) = IndexMap.insert(std::make_pair(PartialApply,
Indices));
if (!Inserted)
It->second.push_back(CalleeArgIndexNumber);
auto iterAndSuccess = IndexMap.try_emplace(Apply, Indices);
// AllocBoxStack opt promotes boxes passed to a chain of applies when it is
// safe to do so. All such applies have to be specialized to take pointer
// arguments instead of box arguments. This has to be done in dfs order.
// Since PromotedOperands is already populated in dfs order by
// `recursivelyFindBoxOperandsPromotableToAddress`. Build
// `AppliesToSpecialize` in the same order.
if (iterAndSuccess.second) {
AppliesToSpecialize.push_back(Apply);
} else {
iterAndSuccess.first->second.push_back(CalleeArgIndexNumber);
}
}
// Clone the referenced function of each partial_apply, removing the
// Clone the referenced function of each ApplySite, removing the
// operands that we will not need, and remove the existing
// partial_apply.
// ApplySite.
SILOptFunctionBuilder FuncBuilder(*pass.T);
for (auto &It : IndexMap) {
auto *PartialApply = It.first;
auto &Indices = It.second;
for (auto &Apply : AppliesToSpecialize) {
auto It = IndexMap.find(Apply);
assert(It != IndexMap.end());
auto &Indices = It->second;
// Sort the indices and unique them.
std::sort(Indices.begin(), Indices.end());
Indices.erase(std::unique(Indices.begin(), Indices.end()), Indices.end());
sortUnique(Indices);
PartialApplyInst *Replacement =
specializePartialApply(FuncBuilder, PartialApply, Indices, pass);
PartialApply->replaceAllUsesWith(Replacement);
auto *Replacement = specializeApplySite(FuncBuilder, Apply, Indices, pass);
assert(Apply.getKind() == ApplySite(Replacement).getKind());
Apply.getInstruction()->replaceAllUsesPairwiseWith(Replacement);
auto *FRI = cast<FunctionRefInst>(PartialApply->getCallee());
PartialApply->eraseFromParent();
auto *FRI = cast<FunctionRefInst>(Apply.getCallee());
Apply.getInstruction()->eraseFromParent();
// TODO: Erase from module if there are no more uses.
if (FRI->use_empty())
@@ -895,9 +1035,9 @@ static void rewritePartialApplies(AllocBoxToStackState &pass) {
/// Clone closure bodies and rewrite partial applies. Returns the number of
/// alloc_box allocations promoted.
static unsigned rewritePromotedBoxes(AllocBoxToStackState &pass) {
// First we'll rewrite any partial applies that we can to remove the
// box container pointer from the operands.
rewritePartialApplies(pass);
// First we'll rewrite any ApplySite that we can to remove
// the box container pointer from the operands.
rewriteApplySites(pass);
unsigned Count = 0;
auto rend = pass.Promotable.rend();