//===------- OptimizeHopToExecutor.cpp - optimize hop_to_executor ---------===// // // This source file is part of the Swift.org open source project // // Copyright (c) 2014 - 2017 Apple Inc. and the Swift project authors // Licensed under Apache License v2.0 with Runtime Library Exception // // See https://swift.org/LICENSE.txt for license information // See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors // //===----------------------------------------------------------------------===// #define DEBUG_TYPE "optimize-hop-to-executor" #include "swift/SIL/ApplySite.h" #include "swift/SIL/MemAccessUtils.h" #include "swift/Basic/Assertions.h" #include "swift/SIL/SILBuilder.h" #include "swift/SIL/SILFunction.h" #include "swift/SILOptimizer/PassManager/Transforms.h" using namespace swift; namespace { /// Optimizes hop_to_executor instructions. /// /// * Redundant hop_to_executor elimination: if a hop_to_executor is dominated /// by another hop_to_executor with the same operand, it is eliminated: /// \code /// hop_to_executor %a /// ... // no suspension points /// hop_to_executor %a // can be eliminated /// \endcode /// /// * Dead hop_to_executor elimination: if a hop_to_executor is not followed by /// any code which requires to run on its actor's executor, it is eliminated: /// \code /// hop_to_executor %a /// ... // no instruction which require to run on %a /// return /// \endcode class OptimizeHopToExecutor { private: typedef llvm::DenseMap Actors; /// Basic-block specific information used for dataflow analysis. struct BlockState { enum { NotSet = -2, // Used in the forward dataflow in removeRedundantHopToExecutors. Unknown = -1, // Used in the backward dataflow in removeDeadHopToExecutors. ExecutorNeeded = Unknown, NoExecutorNeeded = 0, }; static_assert(ExecutorNeeded == Unknown, "needed for merge() to correctly merge ExecutorNeeded and NoExecutorNeeded"); /// The backlink to the SILBasicBlock. SILBasicBlock *block = nullptr; /// The value at the entry (i.e. the first instruction) of the block. int entry = NotSet; /// The value of the block itself. It's NotSet if the block has no /// significant instructions for the dataflow. int intra = NotSet; /// The value at the exit (i.e. after the terminator) of the block. int exit = NotSet; /// Merge two values at a control-flow merge point. static int merge(int lhs, int rhs) { if (lhs == NotSet || lhs == rhs) return rhs; if (rhs == NotSet) return lhs; return Unknown; } }; SILFunction *function; /// All block states. std::vector blockStates; llvm::DenseMap block2State; void collectActors(Actors &actors); void allocateBlockStates(); void solveDataflowForward(); void solveDataflowBackward(); bool removeRedundantHopToExecutors(const Actors &actors); bool removeDeadHopToExecutors(); static void updateNeedExecutor(int &needExecutor, SILInstruction *inst); static bool needsExecutor(SILInstruction *inst); static bool isGlobalMemory(SILValue addr); public: OptimizeHopToExecutor(SILFunction *function) : function(function) { } /// The entry point to the transformation. bool run(); void dump(); }; /// Search for hop_to_executor instructions and add their operands to \p actors. void OptimizeHopToExecutor::collectActors(Actors &actors) { int uniqueActorID = 0; if (auto isolation = function->getActorIsolation(); isolation && isolation->isCallerIsolationInheriting()) { actors[function->maybeGetIsolatedArgument()] = uniqueActorID++; } for (SILBasicBlock &block : *function) { for (SILInstruction &inst : block) { if (auto *hop = dyn_cast(&inst)) { auto oper = hop->getOperand(); if (actors.count(oper)) continue; actors[oper] = uniqueActorID++; } } } } /// Initialize blockStates and block2State. void OptimizeHopToExecutor::allocateBlockStates() { // Resizing is mandatory! Just adding states with push_back would potentially // invalidate previous pointers to states, which are stored in block2State. blockStates.resize(function->size()); for (auto blockAndIdx : llvm::enumerate(*function)) { BlockState *state = &blockStates[blockAndIdx.index()]; state->block = &blockAndIdx.value(); block2State[&blockAndIdx.value()] = state; } } /// Solve the dataflow in forward direction. void OptimizeHopToExecutor::solveDataflowForward() { bool changed = false; bool firstRound = true; do { changed = false; for (BlockState &state : blockStates) { int newEntry = state.entry; for (SILBasicBlock *pred : state.block->getPredecessorBlocks()) { newEntry = BlockState::merge(newEntry, block2State[pred]->exit); } if (newEntry != state.entry || firstRound) { changed = true; state.entry = newEntry; if (state.intra == BlockState::NotSet) state.exit = state.entry; } } firstRound = false; } while (changed); } /// Solve the dataflow in backward direction. void OptimizeHopToExecutor::solveDataflowBackward() { bool changed = false; bool firstRound = true; do { changed = false; for (BlockState &state : llvm::reverse(blockStates)) { int newExit = state.exit; for (SILBasicBlock *succ : state.block->getSuccessorBlocks()) { newExit = BlockState::merge(newExit, block2State[succ]->entry); } if (newExit != state.exit || firstRound) { changed = true; state.exit = newExit; if (state.intra == BlockState::NotSet) state.entry = state.exit; } } firstRound = false; } while (changed); } /// Returns true if \p inst is a suspension point or an async call. static bool isSuspensionPoint(SILInstruction *inst) { if (auto applySite = FullApplySite::isa(inst)) { if (applySite.isAsync() && !applySite.isCallerIsolationInheriting()) return true; return false; } if (isa(inst)) return true; return false; } /// Remove hop_to_executor instructions which are dominated by another /// hop_to_executor with the same operand. /// See the top-level comment on OptimizeHopToExecutor for details. bool OptimizeHopToExecutor::removeRedundantHopToExecutors(const Actors &actors) { // Initialize the dataflow. for (BlockState &state : blockStates) { state.entry = [&]() -> int { if (state.block != function->getEntryBlock()) { return BlockState::NotSet; } if (auto isolation = function->getActorIsolation(); isolation && isolation->isCallerIsolationInheriting()) { auto *fArg = cast(function->maybeGetIsolatedArgument()); return actors.lookup(SILValue(fArg)); } return BlockState::Unknown; }(); state.intra = BlockState::NotSet; for (SILInstruction &inst : *state.block) { if (isSuspensionPoint(&inst)) { // A suspension point (like an async call) can switch to another // executor. state.intra = BlockState::Unknown; } else if (auto *hop = dyn_cast(&inst)) { state.intra = actors.lookup(hop->getOperand()); } } state.exit = state.intra; } solveDataflowForward(); // Last step: do the transformation. bool changed = false; for (BlockState &state : blockStates) { // Iterating over all instructions is the same logic as above, just start // with the final entry-value. int actorIdx = state.entry; for (auto iter = state.block->begin(); iter != state.block->end();) { SILInstruction *inst = &*iter++; if (isSuspensionPoint(inst)) { actorIdx = BlockState::Unknown; continue; } auto *hop = dyn_cast(inst); if (!hop) continue; int newActorIdx = actors.lookup(hop->getOperand()); if (newActorIdx != actorIdx) { actorIdx = newActorIdx; continue; } if (hop->isMandatory()) continue; // There is a dominating hop_to_executor with the same operand. LLVM_DEBUG(llvm::dbgs() << "Redundant executor " << *hop); hop->eraseFromParent(); changed = true; } assert(actorIdx == state.exit); } return changed; } /// Remove hop_to_executor instructions which are not followed by any code which /// requires to run on the actor's executor. /// See the top-level comment on OptimizeHopToExecutor for details. bool OptimizeHopToExecutor::removeDeadHopToExecutors() { // Initialize the dataflow: go bottom up and if we see any instruction which // might require a dedicated executor, don't remove a preceding // hop_to_executor instruction. for (BlockState &state : blockStates) { state.exit = (state.block->getSuccessors().empty() ? BlockState::NoExecutorNeeded : BlockState::NotSet); state.intra = BlockState::NotSet; for (SILInstruction &inst : llvm::reverse(*state.block)) { updateNeedExecutor(state.intra, &inst); } state.entry = state.intra; } solveDataflowBackward(); // Last step: do the transformation. bool changed = false; for (BlockState &state : blockStates) { // Iterating over all instructions is the same logic as above, just start // with the final exit-value. int needActor = state.exit; for (auto iter = state.block->rbegin(); iter != state.block->rend();) { SILInstruction *inst = &*iter++; auto *hop = dyn_cast(inst); if (hop && !hop->isMandatory() && needActor == BlockState::NoExecutorNeeded) { // Remove the dead hop_to_executor. LLVM_DEBUG(llvm::dbgs() << "Dead executor " << *hop); hop->eraseFromParent(); changed = true; continue; } updateNeedExecutor(needActor, inst); } assert(needActor == state.entry); } return changed; } /// Updates \p needExecutor for the dataflow evaluation. void OptimizeHopToExecutor::updateNeedExecutor(int &needExecutor, SILInstruction *inst) { if (isa(inst)) { needExecutor = BlockState::NoExecutorNeeded; return; } if (isSuspensionPoint(inst)) { needExecutor = BlockState::NoExecutorNeeded; return; } if (needsExecutor(inst)) needExecutor = BlockState::ExecutorNeeded; } /// Returns true if \p inst needs to run on a specific executor. bool OptimizeHopToExecutor::needsExecutor(SILInstruction *inst) { // TODO: Is this the correct thing to check? if (auto *load = dyn_cast(inst)) { return isGlobalMemory(load->getOperand()); } if (auto *store = dyn_cast(inst)) { return isGlobalMemory(store->getDest()); } if (auto *copy = dyn_cast(inst)) { return isGlobalMemory(copy->getSrc()) || isGlobalMemory(copy->getDest()); } // Certain builtins have memory effects that are known to not depend on // the current executor. if (auto builtin = dyn_cast(inst)) { if (auto kind = builtin->getBuiltinKind()) { switch (*kind) { // The initialization of the default actor header isn't // executor-dependent, and it's important to recognize here because // we really want to eliminate the initial hop to the generic executor // in async actor initializers. // // Now, we can't safely hop to the actor before its initialization is // complete, and that includes the default-actor header. But this // optimization pass never causes us to hop to an executor *earlier* // than we would have otherwise. If we wanted to do that, we'd need // to have some way to ensure we don't skip over the initialization of // the stored properties as well, which is important even for default // actors because the mechanics of destroying an incomplete object don't // account for it being a "zombie" current executor in the runtime. case BuiltinValueKind::InitializeDefaultActor: return false; default: break; } } } // BeginBorrowInst and EndBorrowInst currently have // MemoryBehavior::MayHaveSideEffects. Fixing that is tracked by // rdar://111875527. These instructions only have effects in the sense of // memory dependencies, which aren't relevant for hop_to_executor // elimination. if (isa(inst) || isa(inst)) { return false; } // A call to a caller isolation inheriting function does not create dead // executors since caller isolation inheriting functions do not hop in their // prologue. if (auto fas = FullApplySite::isa(inst); fas && fas.isAsync() && fas.isCallerIsolationInheriting()) { return true; } // Treat returns from a caller isolation inheriting function as requiring the // liveness of hop to executors before it. // // DISCUSSION: We do this since callers of callee functions with isolation // inheriting isolation are not required to have a hop after the return from // the callee function... so we have no guarantee that there isn't code in the // caller that needs this hop to executor to run on the correct actor. if (isa(inst)) { if (auto isolation = inst->getFunction()->getActorIsolation(); isolation && isolation->isCallerIsolationInheriting()) { return true; } } return inst->mayReadOrWriteMemory(); } bool OptimizeHopToExecutor::isGlobalMemory(SILValue addr) { // TODO: use escape analysis to rule out locally allocated non-stack objects. SILValue base = getAccessBase(addr); return !isa(base); } bool OptimizeHopToExecutor::run() { Actors actors; collectActors(actors); if (actors.empty()) return false; allocateBlockStates(); bool changed = removeRedundantHopToExecutors(actors); changed |= removeDeadHopToExecutors(); return changed; } LLVM_ATTRIBUTE_USED void OptimizeHopToExecutor::dump() { for (BlockState &state : blockStates) { llvm::dbgs() << "bb" << state.block->getDebugID() << ": entry=" << state.entry << ", intra=" << state.intra << ", exit=" << state.exit << '\n'; } } class OptimizeHopToExecutorPass : public SILFunctionTransform { /// The entry point to the transformation. void run() override { if (!getFunction()->isAsync()) return; OptimizeHopToExecutor optimizeHopToExecutor(getFunction()); if (optimizeHopToExecutor.run()) invalidateAnalysis(SILAnalysis::InvalidationKind::Instructions); } }; } // end anonymous namespace SILTransform *swift::createOptimizeHopToExecutor() { return new OptimizeHopToExecutorPass(); }