mirror of
https://github.com/apple/swift.git
synced 2025-12-21 12:14:44 +01:00
Otherwise, one will get use after frees. I added an interpreter test as wlel as an end to end test. rdar://50884462
1011 lines
38 KiB
C++
1011 lines
38 KiB
C++
//===--- MandatoryInlining.cpp - Perform inlining of "transparent" sites --===//
|
|
//
|
|
// This source file is part of the Swift.org open source project
|
|
//
|
|
// Copyright (c) 2014 - 2017 Apple Inc. and the Swift project authors
|
|
// Licensed under Apache License v2.0 with Runtime Library Exception
|
|
//
|
|
// See https://swift.org/LICENSE.txt for license information
|
|
// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
#define DEBUG_TYPE "mandatory-inlining"
|
|
#include "swift/AST/DiagnosticEngine.h"
|
|
#include "swift/AST/DiagnosticsSIL.h"
|
|
#include "swift/Basic/BlotSetVector.h"
|
|
#include "swift/SIL/BasicBlockUtils.h"
|
|
#include "swift/SIL/BranchPropagatedUser.h"
|
|
#include "swift/SIL/InstructionUtils.h"
|
|
#include "swift/SIL/OwnershipUtils.h"
|
|
#include "swift/SILOptimizer/PassManager/Passes.h"
|
|
#include "swift/SILOptimizer/PassManager/Transforms.h"
|
|
#include "swift/SILOptimizer/Utils/CFG.h"
|
|
#include "swift/SILOptimizer/Utils/Devirtualize.h"
|
|
#include "swift/SILOptimizer/Utils/Local.h"
|
|
#include "swift/SILOptimizer/Utils/SILInliner.h"
|
|
#include "swift/SILOptimizer/Utils/SILOptFunctionBuilder.h"
|
|
#include "swift/SILOptimizer/Utils/StackNesting.h"
|
|
#include "llvm/ADT/DenseSet.h"
|
|
#include "llvm/ADT/ImmutableSet.h"
|
|
#include "llvm/ADT/Statistic.h"
|
|
#include "llvm/Support/Debug.h"
|
|
|
|
using namespace swift;
|
|
|
|
using DenseFunctionSet = llvm::DenseSet<SILFunction *>;
|
|
using ImmutableFunctionSet = llvm::ImmutableSet<SILFunction *>;
|
|
|
|
STATISTIC(NumMandatoryInlines,
|
|
"Number of function application sites inlined by the mandatory "
|
|
"inlining pass");
|
|
|
|
template<typename...T, typename...U>
|
|
static void diagnose(ASTContext &Context, SourceLoc loc, Diag<T...> diag,
|
|
U &&...args) {
|
|
Context.Diags.diagnose(loc, diag, std::forward<U>(args)...);
|
|
}
|
|
|
|
static SILValue stripCopies(SILValue v) {
|
|
while (auto *cvi = dyn_cast<CopyValueInst>(v)) {
|
|
v = cvi->getOperand();
|
|
}
|
|
return v;
|
|
}
|
|
|
|
/// If \p applySite is a terminator then pass the first instruction of each
|
|
/// successor to fun. Otherwise, pass std::next(applySite).
|
|
static void
|
|
insertAfterApply(SILInstruction *applySite,
|
|
llvm::function_ref<void(SILBasicBlock::iterator)> &&fun) {
|
|
auto *ti = dyn_cast<TermInst>(applySite);
|
|
if (!ti) {
|
|
return fun(std::next(applySite->getIterator()));
|
|
}
|
|
|
|
for (auto *succBlocks : ti->getSuccessorBlocks()) {
|
|
fun(succBlocks->begin());
|
|
}
|
|
}
|
|
|
|
/// Fixup reference counts after inlining a function call (which is a no-op
|
|
/// unless the function is a thick function).
|
|
///
|
|
/// It is important to note that, we can not assume that the partial apply, the
|
|
/// apply site, or the callee value are control dependent in any way. This
|
|
/// requires us to need to be very careful. See inline comments.
|
|
static void fixupReferenceCounts(
|
|
PartialApplyInst *pai, SILInstruction *applySite, SILValue calleeValue,
|
|
ArrayRef<ParameterConvention> captureArgConventions,
|
|
MutableArrayRef<SILValue> capturedArgs, bool isCalleeGuaranteed) {
|
|
|
|
// We assume that we were passed a slice of our actual argument array. So we
|
|
// can use this to copy if we need to.
|
|
assert(captureArgConventions.size() == capturedArgs.size());
|
|
|
|
SmallPtrSet<SILBasicBlock *, 8> visitedBlocks;
|
|
// FIXME: Can we cache this in between inlining invocations?
|
|
DeadEndBlocks deadEndBlocks(pai->getFunction());
|
|
SmallVector<SILBasicBlock *, 4> leakingBlocks;
|
|
|
|
auto errorBehavior = ownership::ErrorBehaviorKind::ReturnFalse;
|
|
|
|
// Add a copy of each non-address type capture argument to lifetime extend the
|
|
// captured argument over at least the inlined function and till the end of a
|
|
// box if we have an address. This deals with the possibility of the closure
|
|
// being destroyed by an earlier application and thus cause the captured
|
|
// argument to be destroyed.
|
|
auto loc = RegularLocation::getAutoGeneratedLocation();
|
|
|
|
for (unsigned i : indices(captureArgConventions)) {
|
|
auto convention = captureArgConventions[i];
|
|
SILValue &v = capturedArgs[i];
|
|
if (v->getType().isAddress()) {
|
|
// FIXME: What about indirectly owned parameters? The invocation of the
|
|
// closure would perform an indirect copy which we should mimick here.
|
|
assert(convention != ParameterConvention::Indirect_In &&
|
|
"Missing indirect copy");
|
|
continue;
|
|
}
|
|
|
|
auto *f = applySite->getFunction();
|
|
|
|
// See if we have a trivial value. In such a case, just continue. We do not
|
|
// need to fix up anything.
|
|
if (v->getType().isTrivial(*f))
|
|
continue;
|
|
|
|
bool hasOwnership = f->hasOwnership();
|
|
|
|
switch (convention) {
|
|
case ParameterConvention::Indirect_In:
|
|
case ParameterConvention::Indirect_In_Constant:
|
|
case ParameterConvention::Indirect_Inout:
|
|
case ParameterConvention::Indirect_InoutAliasable:
|
|
case ParameterConvention::Indirect_In_Guaranteed:
|
|
llvm_unreachable("Should be handled above");
|
|
|
|
case ParameterConvention::Direct_Guaranteed: {
|
|
// If we have a direct_guaranteed value, the value is being taken by the
|
|
// partial_apply at +1, but we are going to invoke the value at +0. So we
|
|
// need to copy/borrow the value before the pai and then
|
|
// end_borrow/destroy_value at the apply site.
|
|
SILValue copy = SILBuilderWithScope(pai).emitCopyValueOperation(loc, v);
|
|
SILValue argument = copy;
|
|
if (hasOwnership) {
|
|
argument = SILBuilderWithScope(pai).createBeginBorrow(loc, argument);
|
|
}
|
|
|
|
visitedBlocks.clear();
|
|
// If we need to insert compensating destroys, do so.
|
|
auto error =
|
|
valueHasLinearLifetime(copy, {applySite}, {}, visitedBlocks,
|
|
deadEndBlocks, errorBehavior, &leakingBlocks);
|
|
if (error.getFoundLeak()) {
|
|
while (!leakingBlocks.empty()) {
|
|
auto *leakingBlock = leakingBlocks.pop_back_val();
|
|
auto loc = RegularLocation::getAutoGeneratedLocation();
|
|
SILBuilderWithScope builder(leakingBlock->begin());
|
|
if (hasOwnership) {
|
|
builder.createEndBorrow(loc, argument);
|
|
}
|
|
builder.emitDestroyValueOperation(loc, copy);
|
|
}
|
|
}
|
|
|
|
// If we found an over consume it means that our value is consumed within
|
|
// the loop. That means our leak code will have lifetime extended the
|
|
// value over the loop. So we should /not/ insert a destroy after the
|
|
// apply site. In contrast, if we do not have an over consume, we must
|
|
// have been compensating for uses in the top of a diamond and need to
|
|
// insert a destroy after the apply since the leak will just cover the
|
|
// other path.
|
|
if (!error.getFoundOverConsume()) {
|
|
insertAfterApply(applySite, [&](SILBasicBlock::iterator iter) {
|
|
if (hasOwnership) {
|
|
SILBuilderWithScope(iter).createEndBorrow(loc, argument);
|
|
}
|
|
SILBuilderWithScope(iter).emitDestroyValueOperation(loc, copy);
|
|
});
|
|
}
|
|
v = argument;
|
|
break;
|
|
}
|
|
|
|
// TODO: Do we need to lifetime extend here?
|
|
case ParameterConvention::Direct_Unowned: {
|
|
v = SILBuilderWithScope(pai).emitCopyValueOperation(loc, v);
|
|
|
|
visitedBlocks.clear();
|
|
// If we need to insert compensating destroys, do so.
|
|
auto error =
|
|
valueHasLinearLifetime(v, {applySite}, {}, visitedBlocks,
|
|
deadEndBlocks, errorBehavior, &leakingBlocks);
|
|
if (error.getFoundError()) {
|
|
while (!leakingBlocks.empty()) {
|
|
auto *leakingBlock = leakingBlocks.pop_back_val();
|
|
auto loc = RegularLocation::getAutoGeneratedLocation();
|
|
SILBuilderWithScope builder(leakingBlock->begin());
|
|
builder.emitDestroyValueOperation(loc, v);
|
|
}
|
|
}
|
|
|
|
insertAfterApply(applySite, [&](SILBasicBlock::iterator iter) {
|
|
SILBuilderWithScope(iter).emitDestroyValueOperation(loc, v);
|
|
});
|
|
break;
|
|
}
|
|
|
|
// If we have an owned value, we insert a copy here for two reasons:
|
|
//
|
|
// 1. To balance the consuming argument.
|
|
// 2. To lifetime extend the value over the call site in case our partial
|
|
// apply has another use that would destroy our value first.
|
|
case ParameterConvention::Direct_Owned: {
|
|
v = SILBuilderWithScope(pai).emitCopyValueOperation(loc, v);
|
|
|
|
visitedBlocks.clear();
|
|
// If we need to insert compensating destroys, do so.
|
|
auto error =
|
|
valueHasLinearLifetime(v, {applySite}, {}, visitedBlocks,
|
|
deadEndBlocks, errorBehavior, &leakingBlocks);
|
|
if (error.getFoundError()) {
|
|
while (!leakingBlocks.empty()) {
|
|
auto *leakingBlock = leakingBlocks.pop_back_val();
|
|
auto loc = RegularLocation::getAutoGeneratedLocation();
|
|
SILBuilderWithScope builder(leakingBlock->begin());
|
|
builder.emitDestroyValueOperation(loc, v);
|
|
}
|
|
}
|
|
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
// Destroy the callee as the apply would have done if our function is not
|
|
// callee guaranteed.
|
|
if (!isCalleeGuaranteed) {
|
|
insertAfterApply(applySite, [&](SILBasicBlock::iterator iter) {
|
|
SILBuilderWithScope(iter).emitDestroyValueOperation(loc, calleeValue);
|
|
});
|
|
}
|
|
}
|
|
|
|
static SILValue cleanupLoadedCalleeValue(SILValue calleeValue, LoadInst *li) {
|
|
auto *pbi = cast<ProjectBoxInst>(li->getOperand());
|
|
auto *abi = cast<AllocBoxInst>(pbi->getOperand());
|
|
|
|
// The load instruction must have no more uses or a single destroy left to
|
|
// erase it.
|
|
if (li->getFunction()->hasOwnership()) {
|
|
// TODO: What if we have multiple destroy_value? That should be ok as well.
|
|
auto *dvi = li->getSingleUserOfType<DestroyValueInst>();
|
|
if (!dvi)
|
|
return SILValue();
|
|
dvi->eraseFromParent();
|
|
} else if (!li->use_empty()) {
|
|
return SILValue();
|
|
}
|
|
li->eraseFromParent();
|
|
|
|
// Look through uses of the alloc box the load is loading from to find up to
|
|
// one store and up to one strong release.
|
|
PointerUnion<StrongReleaseInst *, DestroyValueInst *> destroy;
|
|
destroy = nullptr;
|
|
for (Operand *use : abi->getUses()) {
|
|
auto *user = use->getUser();
|
|
|
|
if (destroy.isNull()) {
|
|
if (auto *sri = dyn_cast<StrongReleaseInst>(user)) {
|
|
destroy = sri;
|
|
continue;
|
|
}
|
|
|
|
if (auto *dvi = dyn_cast<DestroyValueInst>(user)) {
|
|
destroy = dvi;
|
|
continue;
|
|
}
|
|
}
|
|
|
|
if (user == pbi)
|
|
continue;
|
|
|
|
return SILValue();
|
|
}
|
|
|
|
StoreInst *si = nullptr;
|
|
for (Operand *use : pbi->getUses()) {
|
|
if (auto *useSI = dyn_cast_or_null<StoreInst>(use->getUser())) {
|
|
si = useSI;
|
|
continue;
|
|
}
|
|
return SILValue();
|
|
}
|
|
|
|
// If we found a store, record its source and erase it.
|
|
if (si) {
|
|
calleeValue = si->getSrc();
|
|
si->eraseFromParent();
|
|
} else {
|
|
calleeValue = SILValue();
|
|
}
|
|
|
|
// If we found a strong release, replace it with a strong release of the
|
|
// source of the store and erase it.
|
|
if (destroy) {
|
|
if (calleeValue) {
|
|
if (auto *sri = destroy.dyn_cast<StrongReleaseInst *>()) {
|
|
SILBuilderWithScope(sri).emitStrongReleaseAndFold(sri->getLoc(),
|
|
calleeValue);
|
|
sri->eraseFromParent();
|
|
} else {
|
|
auto *dvi = destroy.get<DestroyValueInst *>();
|
|
SILBuilderWithScope(dvi).emitDestroyValueAndFold(dvi->getLoc(),
|
|
calleeValue);
|
|
dvi->eraseFromParent();
|
|
}
|
|
}
|
|
}
|
|
|
|
assert(pbi->use_empty());
|
|
pbi->eraseFromParent();
|
|
assert(abi->use_empty());
|
|
abi->eraseFromParent();
|
|
|
|
return calleeValue;
|
|
}
|
|
|
|
/// Removes instructions that create the callee value if they are no
|
|
/// longer necessary after inlining.
|
|
static void cleanupCalleeValue(SILValue calleeValue) {
|
|
// Handle the case where the callee of the apply is a load instruction. If we
|
|
// fail to optimize, return. Otherwise, see if we can look through other
|
|
// abstractions on our callee.
|
|
if (auto *li = dyn_cast<LoadInst>(calleeValue)) {
|
|
calleeValue = cleanupLoadedCalleeValue(calleeValue, li);
|
|
if (!calleeValue) {
|
|
return;
|
|
}
|
|
}
|
|
|
|
calleeValue = stripCopies(calleeValue);
|
|
|
|
// Inline constructor
|
|
auto calleeSource = ([&]() -> SILValue {
|
|
// Handle partial_apply/thin_to_thick -> convert_function:
|
|
// tryDeleteDeadClosure must run before deleting a ConvertFunction that uses
|
|
// the PartialApplyInst or ThinToThickFunctionInst. tryDeleteDeadClosure
|
|
// will delete any uses of the closure, including a
|
|
// convert_escape_to_noescape conversion.
|
|
if (auto *cfi = dyn_cast<ConvertFunctionInst>(calleeValue))
|
|
return stripCopies(cfi->getOperand());
|
|
|
|
if (auto *cvt = dyn_cast<ConvertEscapeToNoEscapeInst>(calleeValue))
|
|
return stripCopies(cvt->getOperand());
|
|
|
|
return stripCopies(calleeValue);
|
|
})();
|
|
|
|
if (auto *pai = dyn_cast<PartialApplyInst>(calleeSource)) {
|
|
SILValue callee = pai->getCallee();
|
|
if (!tryDeleteDeadClosure(pai))
|
|
return;
|
|
calleeValue = callee;
|
|
} else if (auto *tttfi = dyn_cast<ThinToThickFunctionInst>(calleeSource)) {
|
|
SILValue callee = tttfi->getCallee();
|
|
if (!tryDeleteDeadClosure(tttfi))
|
|
return;
|
|
calleeValue = callee;
|
|
}
|
|
|
|
calleeValue = stripCopies(calleeValue);
|
|
|
|
// Handle function_ref -> convert_function -> partial_apply/thin_to_thick.
|
|
if (auto *cfi = dyn_cast<ConvertFunctionInst>(calleeValue)) {
|
|
if (isInstructionTriviallyDead(cfi)) {
|
|
recursivelyDeleteTriviallyDeadInstructions(cfi, true);
|
|
return;
|
|
}
|
|
}
|
|
|
|
if (auto *fri = dyn_cast<FunctionRefInst>(calleeValue)) {
|
|
if (!fri->use_empty())
|
|
return;
|
|
fri->eraseFromParent();
|
|
}
|
|
}
|
|
|
|
namespace {
|
|
/// Cleanup dead closures after inlining.
|
|
class ClosureCleanup {
|
|
using DeadInstSet = SmallBlotSetVector<SILInstruction *, 4>;
|
|
|
|
/// A helper class to update the set of dead instructions.
|
|
///
|
|
/// Since this is called by the SILModule callback, the instruction may longer
|
|
/// be well-formed. Do not visit its operands. However, it's position in the
|
|
/// basic block is still valid.
|
|
///
|
|
/// FIXME: Using the Module's callback mechanism for this is terrible.
|
|
/// Instead, cleanupCalleeValue could be easily rewritten to use its own
|
|
/// instruction deletion helper and pass a callback to tryDeleteDeadClosure
|
|
/// and recursivelyDeleteTriviallyDeadInstructions.
|
|
class DeleteUpdateHandler : public DeleteNotificationHandler {
|
|
SILModule &Module;
|
|
DeadInstSet &DeadInsts;
|
|
|
|
public:
|
|
DeleteUpdateHandler(SILModule &M, DeadInstSet &DeadInsts)
|
|
: Module(M), DeadInsts(DeadInsts) {
|
|
Module.registerDeleteNotificationHandler(this);
|
|
}
|
|
|
|
~DeleteUpdateHandler() override {
|
|
// Unregister the handler.
|
|
Module.removeDeleteNotificationHandler(this);
|
|
}
|
|
|
|
// Handling of instruction removal notifications.
|
|
bool needsNotifications() override { return true; }
|
|
|
|
// Handle notifications about removals of instructions.
|
|
void handleDeleteNotification(SILNode *node) override {
|
|
auto deletedI = dyn_cast<SILInstruction>(node);
|
|
if (!deletedI)
|
|
return;
|
|
|
|
DeadInsts.erase(deletedI);
|
|
}
|
|
};
|
|
|
|
SmallBlotSetVector<SILInstruction *, 4> deadFunctionVals;
|
|
|
|
public:
|
|
/// This regular instruction deletion callback checks for any function-type
|
|
/// values that may be unused after deleting the given instruction.
|
|
void recordDeadFunction(SILInstruction *deletedInst) {
|
|
// If the deleted instruction was already recorded as a function producer,
|
|
// delete it from the map and record its operands instead.
|
|
deadFunctionVals.erase(deletedInst);
|
|
for (auto &operand : deletedInst->getAllOperands()) {
|
|
SILValue operandVal = operand.get();
|
|
if (!operandVal->getType().is<SILFunctionType>())
|
|
continue;
|
|
|
|
// Simply record all function-producing instructions used by dead
|
|
// code. Checking for a single use would not be precise because
|
|
// `deletedInst` could itself use `deadInst` multiple times.
|
|
if (auto *deadInst = operandVal->getDefiningInstruction())
|
|
deadFunctionVals.insert(deadInst);
|
|
}
|
|
}
|
|
|
|
// Note: instructions in the `deadFunctionVals` set may use each other, so the
|
|
// set needs to continue to be updated (by this handler) when deleting
|
|
// instructions. This assumes that DeadFunctionValSet::erase() is stable.
|
|
void cleanupDeadClosures(SILFunction *F) {
|
|
DeleteUpdateHandler deleteUpdate(F->getModule(), deadFunctionVals);
|
|
for (Optional<SILInstruction *> I : deadFunctionVals) {
|
|
if (!I.hasValue())
|
|
continue;
|
|
|
|
if (auto *SVI = dyn_cast<SingleValueInstruction>(I.getValue()))
|
|
cleanupCalleeValue(SVI);
|
|
}
|
|
}
|
|
};
|
|
|
|
} // end of namespace
|
|
|
|
static void collectPartiallyAppliedArguments(
|
|
PartialApplyInst *PAI,
|
|
SmallVectorImpl<ParameterConvention> &CapturedArgConventions,
|
|
SmallVectorImpl<SILValue> &FullArgs) {
|
|
ApplySite Site(PAI);
|
|
SILFunctionConventions CalleeConv(Site.getSubstCalleeType(),
|
|
PAI->getModule());
|
|
for (auto &Arg : PAI->getArgumentOperands()) {
|
|
unsigned CalleeArgumentIndex = Site.getCalleeArgIndex(Arg);
|
|
assert(CalleeArgumentIndex >= CalleeConv.getSILArgIndexOfFirstParam());
|
|
auto ParamInfo = CalleeConv.getParamInfoForSILArg(CalleeArgumentIndex);
|
|
CapturedArgConventions.push_back(ParamInfo.getConvention());
|
|
FullArgs.push_back(Arg.get());
|
|
}
|
|
}
|
|
|
|
static SILValue getLoadedCalleeValue(LoadInst *li) {
|
|
auto *pbi = dyn_cast<ProjectBoxInst>(li->getOperand());
|
|
if (!pbi)
|
|
return SILValue();
|
|
|
|
auto *abi = dyn_cast<AllocBoxInst>(pbi->getOperand());
|
|
if (!abi)
|
|
return SILValue();
|
|
|
|
PointerUnion<StrongReleaseInst *, DestroyValueInst *> destroy =
|
|
static_cast<StrongReleaseInst *>(nullptr);
|
|
|
|
// Look through uses of the alloc box the load is loading from to find up to
|
|
// one store and up to one destroy.
|
|
for (auto *use : abi->getUses()) {
|
|
auto *user = use->getUser();
|
|
|
|
// Look for our single destroy. If we find it... continue.
|
|
if (destroy.isNull()) {
|
|
if (auto *sri = dyn_cast<StrongReleaseInst>(user)) {
|
|
destroy = sri;
|
|
continue;
|
|
}
|
|
|
|
if (auto *dvi = dyn_cast<DestroyValueInst>(user)) {
|
|
destroy = dvi;
|
|
continue;
|
|
}
|
|
}
|
|
|
|
// Ignore our pbi if we find one.
|
|
if (user == pbi)
|
|
continue;
|
|
|
|
// Otherwise, we have something that we do not understand. Return
|
|
// SILValue().
|
|
//
|
|
// NOTE: We purposely allow for strong_retain, retain_value, copy_value to
|
|
// go down this path since we only want to consider simple boxes that have a
|
|
// single post-dominating destroy. So if we have a strong_retain,
|
|
// retain_value, or copy_value, we want to bail.
|
|
return SILValue();
|
|
}
|
|
|
|
// Make sure that our project_box has a single store user and our load user.
|
|
StoreInst *si = nullptr;
|
|
for (Operand *use : pbi->getUses()) {
|
|
// If this use is our load... continue.
|
|
if (use->getUser() == li)
|
|
continue;
|
|
|
|
// Otherwise, see if we have a store...
|
|
if (auto *useSI = dyn_cast_or_null<StoreInst>(use->getUser())) {
|
|
// If we already have a store, we have a value that is initialized
|
|
// multiple times... bail.
|
|
if (si)
|
|
return SILValue();
|
|
|
|
// If we do not have a store yet, make sure that it is in the same basic
|
|
// block as box. Otherwise bail.
|
|
if (useSI->getParent() != abi->getParent())
|
|
return SILValue();
|
|
|
|
// Ok, we found a store in the same block as the box and for which we have
|
|
// so far only found one. Stash the store.
|
|
si = useSI;
|
|
continue;
|
|
}
|
|
|
|
// Otherwise, we have something we do not support... bail.
|
|
return SILValue();
|
|
}
|
|
|
|
// If we did not find a store, bail.
|
|
if (!si)
|
|
return SILValue();
|
|
|
|
// Otherwise, we have found our callee... the source of our store.
|
|
return si->getSrc();
|
|
}
|
|
|
|
/// Returns the callee SILFunction called at a call site, in the case
|
|
/// that the call is transparent (as in, both that the call is marked
|
|
/// with the transparent flag and that callee function is actually transparently
|
|
/// determinable from the SIL) or nullptr otherwise. This assumes that the SIL
|
|
/// is already in SSA form.
|
|
///
|
|
/// In the case that a non-null value is returned, FullArgs contains effective
|
|
/// argument operands for the callee function.
|
|
static SILFunction *
|
|
getCalleeFunction(SILFunction *F, FullApplySite AI, bool &IsThick,
|
|
SmallVectorImpl<ParameterConvention> &CapturedArgConventions,
|
|
SmallVectorImpl<SILValue> &FullArgs,
|
|
PartialApplyInst *&PartialApply) {
|
|
IsThick = false;
|
|
PartialApply = nullptr;
|
|
CapturedArgConventions.clear();
|
|
FullArgs.clear();
|
|
|
|
// First grab our basic arguments from our apply.
|
|
for (const auto &Arg : AI.getArguments())
|
|
FullArgs.push_back(Arg);
|
|
|
|
// Then grab a first approximation of our apply by stripping off all copy
|
|
// operations.
|
|
SILValue CalleeValue = stripCopies(AI.getCallee());
|
|
|
|
// If after stripping off copy_values, we have a load then see if we the
|
|
// function we want to inline has a simple available value through a simple
|
|
// alloc_box. Bail otherwise.
|
|
if (auto *li = dyn_cast<LoadInst>(CalleeValue)) {
|
|
CalleeValue = getLoadedCalleeValue(li);
|
|
if (!CalleeValue)
|
|
return nullptr;
|
|
CalleeValue = stripCopies(CalleeValue);
|
|
}
|
|
|
|
// PartialApply/ThinToThick -> ConvertFunction patterns are generated
|
|
// by @noescape closures.
|
|
//
|
|
// FIXME: We don't currently handle mismatched return types, however, this
|
|
// would be a good optimization to handle and would be as simple as inserting
|
|
// a cast.
|
|
auto skipFuncConvert = [](SILValue CalleeValue) {
|
|
// Skip any copies that we see.
|
|
CalleeValue = stripCopies(CalleeValue);
|
|
|
|
// We can also allow a thin @escape to noescape conversion as such:
|
|
// %1 = function_ref @thin_closure_impl : $@convention(thin) () -> ()
|
|
// %2 = convert_function %1 :
|
|
// $@convention(thin) () -> () to $@convention(thin) @noescape () -> ()
|
|
// %3 = thin_to_thick_function %2 :
|
|
// $@convention(thin) @noescape () -> () to
|
|
// $@noescape @callee_guaranteed () -> ()
|
|
// %4 = apply %3() : $@noescape @callee_guaranteed () -> ()
|
|
if (auto *ThinToNoescapeCast = dyn_cast<ConvertFunctionInst>(CalleeValue)) {
|
|
auto FromCalleeTy =
|
|
ThinToNoescapeCast->getOperand()->getType().castTo<SILFunctionType>();
|
|
if (FromCalleeTy->getExtInfo().hasContext())
|
|
return CalleeValue;
|
|
auto ToCalleeTy = ThinToNoescapeCast->getType().castTo<SILFunctionType>();
|
|
auto EscapingCalleeTy = ToCalleeTy->getWithExtInfo(
|
|
ToCalleeTy->getExtInfo().withNoEscape(false));
|
|
if (FromCalleeTy != EscapingCalleeTy)
|
|
return CalleeValue;
|
|
return stripCopies(ThinToNoescapeCast->getOperand());
|
|
}
|
|
|
|
// Ignore mark_dependence users. A partial_apply [stack] uses them to mark
|
|
// the dependence of the trivial closure context value on the captured
|
|
// arguments.
|
|
if (auto *MD = dyn_cast<MarkDependenceInst>(CalleeValue)) {
|
|
while (MD) {
|
|
CalleeValue = MD->getValue();
|
|
MD = dyn_cast<MarkDependenceInst>(CalleeValue);
|
|
}
|
|
return CalleeValue;
|
|
}
|
|
|
|
auto *CFI = dyn_cast<ConvertEscapeToNoEscapeInst>(CalleeValue);
|
|
if (!CFI)
|
|
return stripCopies(CalleeValue);
|
|
|
|
// TODO: Handle argument conversion. All the code in this file needs to be
|
|
// cleaned up and generalized. The argument conversion handling in
|
|
// optimizeApplyOfConvertFunctionInst should apply to any combine
|
|
// involving an apply, not just a specific pattern.
|
|
//
|
|
// For now, just handle conversion that doesn't affect argument types,
|
|
// return types, or throws. We could trivially handle any other
|
|
// representation change, but the only one that doesn't affect the ABI and
|
|
// matters here is @noescape, so just check for that.
|
|
auto FromCalleeTy = CFI->getOperand()->getType().castTo<SILFunctionType>();
|
|
auto ToCalleeTy = CFI->getType().castTo<SILFunctionType>();
|
|
auto EscapingCalleeTy =
|
|
ToCalleeTy->getWithExtInfo(ToCalleeTy->getExtInfo().withNoEscape(false));
|
|
if (FromCalleeTy != EscapingCalleeTy)
|
|
return stripCopies(CalleeValue);
|
|
|
|
return stripCopies(CFI->getOperand());
|
|
};
|
|
|
|
// Look through a escape to @noescape conversion.
|
|
CalleeValue = skipFuncConvert(CalleeValue);
|
|
|
|
// We are allowed to see through exactly one "partial apply" instruction or
|
|
// one "thin to thick function" instructions, since those are the patterns
|
|
// generated when using auto closures.
|
|
if (auto *PAI = dyn_cast<PartialApplyInst>(CalleeValue)) {
|
|
// Collect the applied arguments and their convention.
|
|
collectPartiallyAppliedArguments(PAI, CapturedArgConventions, FullArgs);
|
|
|
|
CalleeValue = stripCopies(PAI->getCallee());
|
|
IsThick = true;
|
|
PartialApply = PAI;
|
|
} else if (auto *TTTFI = dyn_cast<ThinToThickFunctionInst>(CalleeValue)) {
|
|
CalleeValue = stripCopies(TTTFI->getOperand());
|
|
IsThick = true;
|
|
}
|
|
|
|
CalleeValue = skipFuncConvert(CalleeValue);
|
|
|
|
auto *FRI = dyn_cast<FunctionRefInst>(CalleeValue);
|
|
if (!FRI)
|
|
return nullptr;
|
|
|
|
SILFunction *CalleeFunction = FRI->getReferencedFunction();
|
|
|
|
switch (CalleeFunction->getRepresentation()) {
|
|
case SILFunctionTypeRepresentation::Thick:
|
|
case SILFunctionTypeRepresentation::Thin:
|
|
case SILFunctionTypeRepresentation::Method:
|
|
case SILFunctionTypeRepresentation::Closure:
|
|
case SILFunctionTypeRepresentation::WitnessMethod:
|
|
break;
|
|
|
|
case SILFunctionTypeRepresentation::CFunctionPointer:
|
|
case SILFunctionTypeRepresentation::ObjCMethod:
|
|
case SILFunctionTypeRepresentation::Block:
|
|
return nullptr;
|
|
}
|
|
|
|
// If the CalleeFunction is a not-transparent definition, we can not process
|
|
// it.
|
|
if (CalleeFunction->isTransparent() == IsNotTransparent)
|
|
return nullptr;
|
|
|
|
// If CalleeFunction is a declaration, see if we can load it.
|
|
if (CalleeFunction->empty())
|
|
AI.getModule().loadFunction(CalleeFunction);
|
|
|
|
// If we fail to load it, bail.
|
|
if (CalleeFunction->empty())
|
|
return nullptr;
|
|
|
|
if (F->isSerialized() &&
|
|
!CalleeFunction->hasValidLinkageForFragileInline()) {
|
|
if (!CalleeFunction->hasValidLinkageForFragileRef()) {
|
|
llvm::errs() << "caller: " << F->getName() << "\n";
|
|
llvm::errs() << "callee: " << CalleeFunction->getName() << "\n";
|
|
llvm_unreachable("Should never be inlining a resilient function into "
|
|
"a fragile function");
|
|
}
|
|
return nullptr;
|
|
}
|
|
|
|
return CalleeFunction;
|
|
}
|
|
|
|
static SILInstruction *tryDevirtualizeApplyHelper(FullApplySite InnerAI,
|
|
ClassHierarchyAnalysis *CHA) {
|
|
auto NewInst = tryDevirtualizeApply(InnerAI, CHA);
|
|
if (!NewInst)
|
|
return InnerAI.getInstruction();
|
|
|
|
deleteDevirtualizedApply(InnerAI);
|
|
|
|
// FIXME: Comments at the use of this helper indicate that devirtualization
|
|
// may return SILArgument. Yet here we assert that it must return an
|
|
// instruction.
|
|
auto newApplyAI = NewInst.getInstruction();
|
|
assert(newApplyAI && "devirtualized but removed apply site?");
|
|
|
|
return newApplyAI;
|
|
}
|
|
|
|
/// Inlines all mandatory inlined functions into the body of a function,
|
|
/// first recursively inlining all mandatory apply instructions in those
|
|
/// functions into their bodies if necessary.
|
|
///
|
|
/// \param F the function to be processed
|
|
/// \param AI nullptr if this is being called from the top level; the relevant
|
|
/// ApplyInst requiring the recursive call when non-null
|
|
/// \param FullyInlinedSet the set of all functions already known to be fully
|
|
/// processed, to avoid processing them over again
|
|
/// \param SetFactory an instance of ImmutableFunctionSet::Factory
|
|
/// \param CurrentInliningSet the set of functions currently being inlined in
|
|
/// the current call stack of recursive calls
|
|
///
|
|
/// \returns true if successful, false if failed due to circular inlining.
|
|
static bool
|
|
runOnFunctionRecursively(SILOptFunctionBuilder &FuncBuilder,
|
|
SILFunction *F, FullApplySite AI,
|
|
DenseFunctionSet &FullyInlinedSet,
|
|
ImmutableFunctionSet::Factory &SetFactory,
|
|
ImmutableFunctionSet CurrentInliningSet,
|
|
ClassHierarchyAnalysis *CHA) {
|
|
// Avoid reprocessing functions needlessly.
|
|
if (FullyInlinedSet.count(F))
|
|
return true;
|
|
|
|
// Prevent attempt to circularly inline.
|
|
if (CurrentInliningSet.contains(F)) {
|
|
// This cannot happen on a top-level call, so AI should be non-null.
|
|
assert(AI && "Cannot have circular inline without apply");
|
|
SILLocation L = AI.getLoc();
|
|
assert(L && "Must have location for transparent inline apply");
|
|
diagnose(F->getModule().getASTContext(), L.getStartSourceLoc(),
|
|
diag::circular_transparent);
|
|
return false;
|
|
}
|
|
|
|
// Add to the current inlining set (immutably, so we only affect the set
|
|
// during this call and recursive subcalls).
|
|
CurrentInliningSet = SetFactory.add(CurrentInliningSet, F);
|
|
|
|
SmallVector<ParameterConvention, 16> CapturedArgConventions;
|
|
SmallVector<SILValue, 32> FullArgs;
|
|
bool needUpdateStackNesting = false;
|
|
|
|
// Visiting blocks in reverse order avoids revisiting instructions after block
|
|
// splitting, which would be quadratic.
|
|
for (auto BI = F->rbegin(), BE = F->rend(), nextBB = BI; BI != BE;
|
|
BI = nextBB) {
|
|
// After inlining, the block iterator will be adjusted to point to the last
|
|
// block containing inlined instructions. This way, the inlined function
|
|
// body will be reprocessed within the caller's context without revisiting
|
|
// any original instructions.
|
|
nextBB = std::next(BI);
|
|
|
|
// While iterating over this block, instructions are inserted and deleted.
|
|
// To avoid quadratic block splitting, instructions must be processed in
|
|
// reverse order (block splitting reassigned the parent pointer of all
|
|
// instructions below the split point).
|
|
for (auto II = BI->rbegin(); II != BI->rend(); ++II) {
|
|
FullApplySite InnerAI = FullApplySite::isa(&*II);
|
|
if (!InnerAI)
|
|
continue;
|
|
|
|
// *NOTE* If devirtualization succeeds, devirtInst may not be InnerAI,
|
|
// but a casted result of InnerAI or even a block argument due to
|
|
// abstraction changes when calling the witness or class method.
|
|
auto *devirtInst = tryDevirtualizeApplyHelper(InnerAI, CHA);
|
|
// Restore II to the current apply site.
|
|
II = devirtInst->getReverseIterator();
|
|
// If the devirtualized call result is no longer a invalid FullApplySite,
|
|
// then it has succeeded, but the result is not immediately inlinable.
|
|
InnerAI = FullApplySite::isa(devirtInst);
|
|
if (!InnerAI)
|
|
continue;
|
|
|
|
SILValue CalleeValue = InnerAI.getCallee();
|
|
bool IsThick;
|
|
PartialApplyInst *PAI;
|
|
SILFunction *CalleeFunction = getCalleeFunction(
|
|
F, InnerAI, IsThick, CapturedArgConventions, FullArgs, PAI);
|
|
|
|
if (!CalleeFunction)
|
|
continue;
|
|
|
|
// Then recursively process it first before trying to inline it.
|
|
if (!runOnFunctionRecursively(FuncBuilder, CalleeFunction, InnerAI,
|
|
FullyInlinedSet, SetFactory,
|
|
CurrentInliningSet, CHA)) {
|
|
// If we failed due to circular inlining, then emit some notes to
|
|
// trace back the failure if we have more information.
|
|
// FIXME: possibly it could be worth recovering and attempting other
|
|
// inlines within this same recursive call rather than simply
|
|
// propagating the failure.
|
|
if (AI) {
|
|
SILLocation L = AI.getLoc();
|
|
assert(L && "Must have location for transparent inline apply");
|
|
diagnose(F->getModule().getASTContext(), L.getStartSourceLoc(),
|
|
diag::note_while_inlining);
|
|
}
|
|
return false;
|
|
}
|
|
|
|
// Get our list of substitutions.
|
|
auto Subs = (PAI
|
|
? PAI->getSubstitutionMap()
|
|
: InnerAI.getSubstitutionMap());
|
|
|
|
SILOpenedArchetypesTracker OpenedArchetypesTracker(F);
|
|
F->getModule().registerDeleteNotificationHandler(
|
|
&OpenedArchetypesTracker);
|
|
// The callee only needs to know about opened archetypes used in
|
|
// the substitution list.
|
|
OpenedArchetypesTracker.registerUsedOpenedArchetypes(
|
|
InnerAI.getInstruction());
|
|
if (PAI) {
|
|
OpenedArchetypesTracker.registerUsedOpenedArchetypes(PAI);
|
|
}
|
|
|
|
SILInliner Inliner(FuncBuilder, SILInliner::InlineKind::MandatoryInline,
|
|
Subs, OpenedArchetypesTracker);
|
|
if (!Inliner.canInlineApplySite(InnerAI))
|
|
continue;
|
|
|
|
// Inline function at I, which also changes I to refer to the first
|
|
// instruction inlined in the case that it succeeds. We purposely
|
|
// process the inlined body after inlining, because the inlining may
|
|
// have exposed new inlining opportunities beyond those present in
|
|
// the inlined function when processed independently.
|
|
LLVM_DEBUG(llvm::errs() << "Inlining @" << CalleeFunction->getName()
|
|
<< " into @" << InnerAI.getFunction()->getName()
|
|
<< "\n");
|
|
|
|
// If we intend to inline a partial_apply function that is not on the
|
|
// stack, then we need to balance the reference counts for correctness.
|
|
//
|
|
// NOTE: If our partial apply is on the stack, it only has point uses (and
|
|
// hopefully eventually guaranteed) uses of the captured arguments.
|
|
//
|
|
// NOTE: If we have a thin_to_thick_function, we do not need to worry
|
|
// about such things since a thin_to_thick_function does not capture any
|
|
// arguments.
|
|
if (PAI && PAI->isOnStack() == PartialApplyInst::NotOnStack) {
|
|
bool IsCalleeGuaranteed =
|
|
PAI->getType().castTo<SILFunctionType>()->isCalleeGuaranteed();
|
|
auto CapturedArgs = MutableArrayRef<SILValue>(FullArgs).take_back(
|
|
CapturedArgConventions.size());
|
|
// We need to insert the copies before the partial_apply since if we can
|
|
// not remove the partial_apply the captured values will be dead by the
|
|
// time we hit the call site.
|
|
fixupReferenceCounts(PAI, InnerAI.getInstruction(), CalleeValue,
|
|
CapturedArgConventions, CapturedArgs,
|
|
IsCalleeGuaranteed);
|
|
}
|
|
|
|
// Register a callback to record potentially unused function values after
|
|
// inlining.
|
|
ClosureCleanup closureCleanup;
|
|
Inliner.setDeletionCallback([&closureCleanup](SILInstruction *I) {
|
|
closureCleanup.recordDeadFunction(I);
|
|
});
|
|
|
|
needUpdateStackNesting |= Inliner.needsUpdateStackNesting(InnerAI);
|
|
|
|
// Inlining deletes the apply, and can introduce multiple new basic
|
|
// blocks. After this, CalleeValue and other instructions may be invalid.
|
|
// nextBB will point to the last inlined block
|
|
auto firstInlinedInstAndLastBB =
|
|
Inliner.inlineFunction(CalleeFunction, InnerAI, FullArgs);
|
|
nextBB = firstInlinedInstAndLastBB.second->getReverseIterator();
|
|
++NumMandatoryInlines;
|
|
|
|
// The IR is now valid, and trivial dead arguments are removed. However,
|
|
// we may be able to remove dead callee computations (e.g. dead
|
|
// partial_apply closures).
|
|
closureCleanup.cleanupDeadClosures(F);
|
|
|
|
// Resume inlining within nextBB, which contains only the inlined
|
|
// instructions and possibly instructions in the original call block that
|
|
// have not yet been visited.
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (needUpdateStackNesting) {
|
|
StackNesting().correctStackNesting(F);
|
|
}
|
|
|
|
// Keep track of full inlined functions so we don't waste time recursively
|
|
// reprocessing them.
|
|
FullyInlinedSet.insert(F);
|
|
return true;
|
|
}
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
// Top Level Driver
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
namespace {
|
|
|
|
class MandatoryInlining : public SILModuleTransform {
|
|
/// The entry point to the transformation.
|
|
void run() override {
|
|
ClassHierarchyAnalysis *CHA = getAnalysis<ClassHierarchyAnalysis>();
|
|
SILModule *M = getModule();
|
|
bool ShouldCleanup = !getOptions().DebugSerialization;
|
|
DenseFunctionSet FullyInlinedSet;
|
|
ImmutableFunctionSet::Factory SetFactory;
|
|
|
|
SILOptFunctionBuilder FuncBuilder(*this);
|
|
for (auto &F : *M) {
|
|
// Don't inline into thunks, even transparent callees.
|
|
if (F.isThunk())
|
|
continue;
|
|
|
|
// Skip deserialized functions.
|
|
if (F.wasDeserializedCanonical())
|
|
continue;
|
|
|
|
runOnFunctionRecursively(FuncBuilder, &F,
|
|
FullApplySite(), FullyInlinedSet, SetFactory,
|
|
SetFactory.getEmptySet(), CHA);
|
|
|
|
// The inliner splits blocks at call sites. Re-merge trivial branches
|
|
// to reestablish a canonical CFG.
|
|
mergeBasicBlocks(&F);
|
|
}
|
|
|
|
if (!ShouldCleanup)
|
|
return;
|
|
|
|
// Now that we've inlined some functions, clean up. If there are any
|
|
// transparent functions that are deserialized from another module that are
|
|
// now unused, just remove them from the module.
|
|
//
|
|
// We do this with a simple linear scan, because transparent functions that
|
|
// reference each other have already been flattened.
|
|
for (auto FI = M->begin(), E = M->end(); FI != E; ) {
|
|
SILFunction &F = *FI++;
|
|
|
|
invalidateAnalysis(&F, SILAnalysis::InvalidationKind::Everything);
|
|
|
|
if (F.getRefCount() != 0) continue;
|
|
|
|
// Leave non-transparent functions alone.
|
|
if (!F.isTransparent())
|
|
continue;
|
|
|
|
// We discard functions that don't have external linkage,
|
|
// e.g. deserialized functions, internal functions, and thunks.
|
|
// Being marked transparent controls this.
|
|
if (F.isPossiblyUsedExternally()) continue;
|
|
|
|
// ObjC functions are called through the runtime and are therefore alive
|
|
// even if not referenced inside SIL.
|
|
if (F.getRepresentation() == SILFunctionTypeRepresentation::ObjCMethod)
|
|
continue;
|
|
|
|
// Okay, just erase the function from the module.
|
|
FuncBuilder.eraseFunction(&F);
|
|
}
|
|
}
|
|
|
|
};
|
|
} // end anonymous namespace
|
|
|
|
SILTransform *swift::createMandatoryInlining() {
|
|
return new MandatoryInlining();
|
|
}
|