//===--- DeadStoreElimination.cpp - SIL Dead Store Elimination ------------===// // // This source file is part of the Swift.org open source project // // Copyright (c) 2014 - 2016 Apple Inc. and the Swift project authors // Licensed under Apache License v2.0 with Runtime Library Exception // // See http://swift.org/LICENSE.txt for license information // See http://swift.org/CONTRIBUTORS.txt for the list of Swift project authors // //===----------------------------------------------------------------------===// /// /// \file /// /// This pass eliminates dead stores across basic blocks. /// /// A store is dead if after the store has occurred: /// /// 1. The store to pointer is not used along any path to program exit. /// 2. The store to pointer is overwritten by another store before any /// potential use of the pointer. /// /// Dead store elimination (DSE) eliminates such stores by: /// /// 1. Introducing a notion of a LSLocation that is used to model objects /// fields. (See below for more details). /// /// 2. Performing a post-order walk over the control flow graph, tracking any /// LSLocations that are read from or stored into in each basic block. After /// eliminating any dead stores in single blocks, it computes a genset and /// killset for each block. The genset keeps a list of upward visible stores /// and the killset keeps a list of LSLocation this basic block reads (kills). /// /// 3. An optimistic iterative dataflow is performed on the genset and killset /// until convergence. /// /// At the core of DSE, there is the LSLocation class. a LSLocation is an /// abstraction of an object field in program. It consists of a base and a /// projection path to the field accessed. /// /// When a load or store instruction is encountered, the memory is broken down /// to the indivisible components, i.e aggregates are broken down to their /// individual fields using the expand function. This gives the flexibility to /// find exactly which part of the store is alive and which part is dead. /// /// After the live parts of the store are determined, they are merged into the /// minimum number of stores possible using the reduce function. This is done /// so that we do not bloat SIL IR. /// /// Another way to implement the DSE optimization is to keep the instructions /// that read and/or write memory without breaking the memory read/written /// using the ProjectionPath. However, this can easily lead to loss of /// opportunities, e.g. a read that only kills part of a store may need to be /// treated as killing the entire store. However, using ProjectionPath does /// lead to more memory uses. /// //===----------------------------------------------------------------------===// #define DEBUG_TYPE "sil-dead-store-elim" #include "swift/SIL/Projection.h" #include "swift/SIL/SILArgument.h" #include "swift/SIL/SILBuilder.h" #include "swift/SILOptimizer/Analysis/AliasAnalysis.h" #include "swift/SILOptimizer/Analysis/EscapeAnalysis.h" #include "swift/SILOptimizer/Analysis/PostOrderAnalysis.h" #include "swift/SILOptimizer/Analysis/ValueTracking.h" #include "swift/SILOptimizer/PassManager/Passes.h" #include "swift/SILOptimizer/PassManager/Transforms.h" #include "swift/SILOptimizer/Utils/CFG.h" #include "swift/SILOptimizer/Utils/LSBase.h" #include "swift/SILOptimizer/Utils/Local.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/BitVector.h" #include "llvm/Support/Allocator.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" using namespace swift; STATISTIC(NumDeadStores, "Number of dead stores removed"); STATISTIC(NumPartialDeadStores, "Number of partial dead stores removed"); /// ComputeMaxStoreSet - If we ignore all reads, what is the max store set that /// can reach a particular point in a basic block. This helps in generating /// the genset and killset. i.e. if there is no upward visible store that can /// reach the beginning of a basic block, then we know that the genset and /// killset for the stored location need not be set for the basic block. /// /// BuildGenKillSet - Build the genset and killset of the basic block. /// /// PerformDSE - Perform the actual dead store elimination. enum class DSEKind : unsigned { ComputeMaxStoreSet = 0, BuildGenKillSet = 1, PerformDSE = 2, }; /// Return the deallocate stack instructions corresponding to the given /// AllocStackInst. static llvm::SmallVector findDeallocStackInst( AllocStackInst *ASI) { llvm::SmallVector DSIs; for (auto UI = ASI->use_begin(), E = ASI->use_end(); UI != E; ++UI) { if (DeallocStackInst *D = dyn_cast(UI->getUser())) { DSIs.push_back(D); } } return DSIs; } //===----------------------------------------------------------------------===// // Utility Functions //===----------------------------------------------------------------------===// static inline bool isComputeMaxStoreSet(DSEKind Kind) { return Kind == DSEKind::ComputeMaxStoreSet; } static inline bool isBuildingGenKillSet(DSEKind Kind) { return Kind == DSEKind::BuildGenKillSet; } static inline bool isPerformingDSE(DSEKind Kind) { return Kind == DSEKind::PerformDSE; } /// Returns true if this is an instruction that may have side effects in a /// general sense but are inert from a load store perspective. static bool isDeadStoreInertInstruction(SILInstruction *Inst) { switch (Inst->getKind()) { case ValueKind::StrongRetainInst: case ValueKind::StrongRetainUnownedInst: case ValueKind::UnownedRetainInst: case ValueKind::RetainValueInst: case ValueKind::DeallocStackInst: case ValueKind::CondFailInst: case ValueKind::IsUniqueInst: case ValueKind::IsUniqueOrPinnedInst: case ValueKind::FixLifetimeInst: return true; default: return false; } } //===----------------------------------------------------------------------===// // Basic Block Location State //===----------------------------------------------------------------------===// namespace { /// If this function has too many basic blocks or too many locations, it may /// take a long time to compute the genset and killset. The number of memory /// behavior or alias query we need to do in worst case is roughly linear to /// # of BBs x(times) # of locations. /// /// we could run DSE on functions with 256 basic blocks and 256 locations, /// which is a large function. constexpr unsigned MaxLSLocationBBMultiplicationNone = 256*256; /// we could run optimistic DSE on functions with less than 64 basic blocks /// and 64 locations which is a sizeable function. constexpr unsigned MaxLSLocationBBMultiplicationPessimistic = 64*64; /// If a large store is broken down to too many smaller stores, bail out. /// Currently, we only do partial dead store if we can form a single contiguous /// non-dead store. constexpr unsigned MaxPartialDeadStoreCountLimit = 1; /// forward declaration. class DSEContext; /// BlockState summarizes how LSLocations are used in a basic block. /// /// Initially the BBWriteSetOut is empty. Before a basic block is processed, it /// is initialized to the intersection of BBWriteSetIns of all successors of the /// basic block. /// /// BBWriteSetMid is initialized to BBWriteSetOut of the current basic block /// before instructions in the basic block is processed. /// /// Initially BBWriteSetIn is set to true. After the basic block is processed, /// if its BBWriteSetMid is different from BBWriteSetIn, BBWriteSetIn is /// assigned the value of BBWriteSetMid and the data flow is rerun on the /// current basic block's predecessors. /// /// Instructions in each basic block are processed in post-order as follows: /// /// 1. When a store instruction is encountered, the stored location is tracked. /// /// 2. When a load instruction is encountered, remove the loaded location and /// any location it may alias with from the BBWriteSetMid. /// /// 3. When an instruction reads from memory in an unknown way, the /// BBWriteSetMid bit is cleared if the instruction can read the /// corresponding LSLocation. /// class BlockState { public: /// The basic block this BlockState represents. SILBasicBlock *BB; /// Keep the number of LSLocations in the LocationVault. unsigned LocationNum; /// A bit vector for which the ith bit represents the ith LSLocation in /// LocationVault. If the bit is set, then the location currently has an /// upward visible store at the end of the basic block. llvm::SmallBitVector BBWriteSetOut; /// A bit vector for which the ith bit represents the ith LSLocation in /// LocationVault. If the bit is set, then the location currently has an /// upward visible store in middle of the basic block. llvm::SmallBitVector BBWriteSetMid; /// A bit vector for which the ith bit represents the ith LSLocation in /// LocationVault. If a bit in the vector is set, then the location has an /// upward visible store at the beginning of the basic block. llvm::SmallBitVector BBWriteSetIn; /// A bit vector for which the ith bit represents the ith LSLocation in /// LocationVault. If the bit is set, then the current basic block /// generates an upward visible store. llvm::SmallBitVector BBGenSet; /// A bit vector for which the ith bit represents the ith LSLocation in /// LocationVault. If the bit is set, then the current basic block /// kills an upward visible store. llvm::SmallBitVector BBKillSet; /// A bit vector to keep the maximum number of stores that can reach a /// certain point of the basic block. If a bit is set, that means there is /// potentially an upward visible store to the location at the particular /// point of the basic block. llvm::SmallBitVector BBMaxStoreSet; /// If a bit in the vector is set, then the location is dead at the end of /// this basic block. llvm::SmallBitVector BBDeallocateLocation; /// The dead stores in the current basic block. llvm::DenseSet DeadStores; /// Keeps track of what stores to generate after the data flow stabilizes. /// these stores come from partial dead stores. /// /// The first SILValue keeps the address of the live store and the second /// SILValue keeps the value of the store. llvm::DenseMap LiveStores; /// Constructors. BlockState(SILBasicBlock *B) : BB(B) {} /// Return the current basic block. SILBasicBlock *getBB() const { return BB; } /// Initialize the bitvectors for the current basic block. void init(DSEContext &Ctx, bool Optimistic); /// Check whether the BBWriteSetIn has changed. If it does, we need to rerun /// the data flow on this block's predecessors to reach fixed point. bool updateBBWriteSetIn(llvm::SmallBitVector &X); /// Functions to manipulate the write set. void startTrackingLocation(llvm::SmallBitVector &BV, unsigned bit); void stopTrackingLocation(llvm::SmallBitVector &BV, unsigned bit); bool isTrackingLocation(llvm::SmallBitVector &BV, unsigned bit); /// Set the store bit for stack slot deallocated in this basic block. void initStoreSetAtEndOfBlock(DSEContext &Ctx); }; } // end anonymous namespace bool BlockState::updateBBWriteSetIn(llvm::SmallBitVector &X) { if (BBWriteSetIn == X) return false; BBWriteSetIn = X; return true; } void BlockState::startTrackingLocation(llvm::SmallBitVector &BV, unsigned i) { BV.set(i); } void BlockState::stopTrackingLocation(llvm::SmallBitVector &BV, unsigned i) { BV.reset(i); } bool BlockState::isTrackingLocation(llvm::SmallBitVector &BV, unsigned i) { return BV.test(i); } //===----------------------------------------------------------------------===// // Top Level Implementation //===----------------------------------------------------------------------===// namespace { class DSEContext { enum class ProcessKind { ProcessOptimistic = 0, ProcessPessimistic = 1, ProcessNone = 2, }; private: /// The module we are currently processing. SILModule *Mod; /// The function we are currently processing. SILFunction *F; /// Pass manager, used to get various analysis. SILPassManager *PM; /// Alias Analysis. AliasAnalysis *AA; /// Escape Analysis. EscapeAnalysis *EA; /// Type Expansion Analysis. TypeExpansionAnalysis *TE; /// Allocator. llvm::BumpPtrAllocator BPA; /// Map every basic block to its location state. llvm::DenseMap BBToLocState; /// Keeps the actual BlockStates. std::vector BlockStates; /// Keeps all the locations for the current function. The BitVector in each /// BlockState is then laid on top of it to keep track of which LSLocation /// has an upward visible store. std::vector LocationVault; /// Keeps a list of basic blocks that have StoreInsts. If a basic block does /// not have StoreInst, we do not actually perform the last iteration where /// DSE is actually performed on the basic block. /// /// NOTE: This is never populated for functions which will only require 1 /// data flow iteration. For function that requires more than 1 iteration of /// the data flow this is populated when the first time the functions is /// walked, i.e. when the we generate the genset and killset. llvm::DenseSet BBWithStores; /// Contains a map between location to their index in the LocationVault. /// used to facilitate fast location to index lookup. LSLocationIndexMap LocToBitIndex; /// Keeps a map between the accessed SILValue and the location. LSLocationBaseMap BaseToLocIndex; /// Return the BlockState for the basic block this basic block belongs to. BlockState *getBlockState(SILBasicBlock *B) { return BBToLocState[B]; } /// Return the BlockState for the basic block this instruction belongs to. BlockState *getBlockState(SILInstruction *I) { return getBlockState(I->getParent()); } /// LSLocation written has been extracted, expanded and mapped to the bit /// position in the bitvector. update the max store set using the bit /// position. void processWriteForMaxStoreSet(BlockState *S, unsigned bit); /// There is a read to a location, expand the location into individual fields /// before processing them. void processRead(SILInstruction *Inst, BlockState *S, SILValue M, DSEKind K); void processReadForGenKillSet(BlockState *S, unsigned bit); void processReadForDSE(BlockState *S, unsigned Bit); /// There is a write to a location, expand the location into individual fields /// before processing them. void processWrite(SILInstruction *Inst, BlockState *S, SILValue V, SILValue M, DSEKind K); void processWriteForGenKillSet(BlockState *S, unsigned bit); bool processWriteForDSE(BlockState *S, unsigned bit); /// Process instructions. Extract locations from SIL LoadInst. void processLoadInst(SILInstruction *Inst, DSEKind Kind); /// Process instructions. Extract locations from SIL StoreInst. void processStoreInst(SILInstruction *Inst, DSEKind Kind); /// Process instructions. Extract locations from SIL DebugValueAddrInst. /// DebugValueAddrInst maybe promoted to DebugValue, when this is done, /// DebugValueAddrInst is effectively a read on the location. void processDebugValueAddrInst(SILInstruction *I, DSEKind Kind); void processDebugValueAddrInstForGenKillSet(SILInstruction *I); void processDebugValueAddrInstForDSE(SILInstruction *I); /// Process instructions. Extract locations from unknown memory inst. void processUnknownReadInst(SILInstruction *Inst, DSEKind Kind); void processUnknownReadInstForGenKillSet(SILInstruction *Inst); void processUnknownReadInstForDSE(SILInstruction *Inst); /// Check whether the instruction invalidate any locations due to change in /// its location Base. /// /// This is to handle a case like this. /// /// class Foo { var a : Int = 12 } /// for _ in 0 ...x { /// x = Foo(); /// x.a = 13 /// } /// x.a = 12 /// /// In this case, DSE cannot remove the x.a = 13 inside the loop. /// /// To do this, when the algorithm reaches the beginning of the basic block in /// the loop it will need to invalidate the location in the BBWriteSetMid. /// i.e. the base of the location is changed. /// /// If not, on the second iteration, the intersection of the successors of /// the loop basic block will have store to x.a and therefore x.a = 13 can now /// be considered dead. void invalidateLSLocationBase(SILInstruction *Inst, DSEKind Kind); void invalidateLSLocationBaseForGenKillSet(SILInstruction *Inst); void invalidateLSLocationBaseForDSE(SILInstruction *Inst); /// Get the bit representing the location in the LocationVault. unsigned getLocationBit(const LSLocation &L); public: /// Constructor. DSEContext(SILFunction *F, SILModule *M, SILPassManager *PM, AliasAnalysis *AA, EscapeAnalysis *EA, TypeExpansionAnalysis *TE) : Mod(M), F(F), PM(PM), AA(AA), EA(EA), TE(TE) {} /// Entry point for dead store elimination. bool run(); /// Run the iterative DF to converge the BBWriteSetIn. void runIterativeDSE(); /// Returns the escape analysis we use. EscapeAnalysis *getEA() { return EA; } /// Returns the function currently being processing. SILFunction *getFn() { return F; } /// Returns the location vault of the current function. std::vector &getLocationVault() { return LocationVault; } /// Use a set of ad hoc rules to tell whether we should run a pessimistic /// one iteration data flow on the function. ProcessKind getProcessFunctionKind(unsigned StoreCount); /// Compute the kill set for the basic block. return true if the store set /// changes. void processBasicBlockForDSE(SILBasicBlock *BB, bool Optimistic); /// Compute the genset and killset for the current basic block. void processBasicBlockForGenKillSet(SILBasicBlock *BB); /// Compute the BBWriteSetOut and BBWriteSetIn for the current basic /// block with the generated gen and kill set. bool processBasicBlockWithGenKillSet(SILBasicBlock *BB); /// Intersect the successors' BBWriteSetIns. void mergeSuccessorLiveIns(SILBasicBlock *BB); /// Update the BlockState based on the given instruction. void processInstruction(SILInstruction *I, DSEKind Kind); }; } // end anonymous namespace void BlockState::init(DSEContext &Ctx, bool Optimistic) { std::vector &LV = Ctx.getLocationVault(); LocationNum = LV.size(); // For function that requires just 1 iteration of the data flow to converge // we set the initial state of BBWriteSetIn to 0. // // For other functions, the initial state of BBWriteSetIn should be all 1's. // Otherwise the dataflow solution could be too conservative. // // Consider this case, the dead store by var a = 10 before the loop will not // be eliminated if the BBWriteSetIn is set to 0 initially. // // var a = 10 // for _ in 0...1024 {} // a = 10 // // However, by doing so, we can only eliminate the dead stores after the // data flow stabilizes. // BBWriteSetIn.resize(LocationNum, Optimistic); BBWriteSetOut.resize(LocationNum, false); BBWriteSetMid.resize(LocationNum, false); // GenSet and KillSet initially empty. BBGenSet.resize(LocationNum, false); BBKillSet.resize(LocationNum, false); // MaxStoreSet is optimistically set to true initially. BBMaxStoreSet.resize(LocationNum, true); // DeallocateLocation initially empty. BBDeallocateLocation.resize(LocationNum, false); } unsigned DSEContext::getLocationBit(const LSLocation &Loc) { // Return the bit position of the given Loc in the LocationVault. The bit // position is then used to set/reset the bitvector kept by each BlockState. // // We should have the location populated by the enumerateLSLocation at this // point. auto Iter = LocToBitIndex.find(Loc); assert(Iter != LocToBitIndex.end() && "LSLocation should have been enum'ed"); return Iter->second; } DSEContext::ProcessKind DSEContext::getProcessFunctionKind(unsigned StoreCount) { // Don't optimize function that are marked as 'no.optimize'. if (!F->shouldOptimize()) return ProcessKind::ProcessNone; // Really no point optimizing here as there is no dead stores. if (StoreCount < 1) return ProcessKind::ProcessNone; bool RunOneIteration = true; unsigned BBCount = 0; unsigned LocationCount = LocationVault.size(); // If all basic blocks will have their successors processed if // the basic blocks in the functions are iterated in post order. // Then this function can be processed in one iteration, i.e. no // need to generate the genset and killset. auto *PO = PM->getAnalysis()->get(F); llvm::DenseSet HandledBBs; for (SILBasicBlock *B : PO->getPostOrder()) { ++BBCount; for (auto &X : B->getSuccessors()) { if (HandledBBs.find(X) == HandledBBs.end()) { RunOneIteration = false; break; } } HandledBBs.insert(B); } // Data flow may take too long to run. if (BBCount * LocationCount > MaxLSLocationBBMultiplicationNone) return ProcessKind::ProcessNone; // This function's data flow would converge in 1 iteration. if (RunOneIteration) return ProcessKind::ProcessPessimistic; // We run one pessimistic data flow to do dead store elimination on // the function. if (BBCount * LocationCount > MaxLSLocationBBMultiplicationPessimistic) return ProcessKind::ProcessPessimistic; return ProcessKind::ProcessOptimistic; } void DSEContext::processBasicBlockForGenKillSet(SILBasicBlock *BB) { // Compute the MaxStoreSet at the end of the basic block. auto *BBState = getBlockState(BB); if (BB->succ_empty()) { BBState->BBMaxStoreSet |= BBState->BBDeallocateLocation; } else { auto Iter = BB->succ_begin(); BBState->BBMaxStoreSet = getBlockState(*Iter)->BBMaxStoreSet; Iter = std::next(Iter); for (auto EndIter = BB->succ_end(); Iter != EndIter; ++Iter) { BBState->BBMaxStoreSet &= getBlockState(*Iter)->BBMaxStoreSet; } } // Compute the genset and killset. // // Also compute the MaxStoreSet at the current position of the basic block. // // This helps generating the genset and killset. If there is no way a // location can have an upward visible store at a particular point in the // basic block, we do not need to turn on the genset and killset for the // location. // // Turning on the genset and killset can be costly as it involves querying // the AA interface. for (auto I = BB->rbegin(), E = BB->rend(); I != E; ++I) { // Only process store insts. if (isa(*I)) { if (BBWithStores.find(BB) == BBWithStores.end()) BBWithStores.insert(BB); processStoreInst(&(*I), DSEKind::ComputeMaxStoreSet); } // Compute the genset and killset for this instruction. processInstruction(&(*I), DSEKind::BuildGenKillSet); } } bool DSEContext::processBasicBlockWithGenKillSet(SILBasicBlock *BB) { // Compute the BBWriteSetOut at the end of the basic block. mergeSuccessorLiveIns(BB); // Compute the BBWriteSet at the beginning of the basic block. BlockState *S = getBlockState(BB); S->BBWriteSetMid = S->BBWriteSetOut; S->BBWriteSetMid.reset(S->BBKillSet); S->BBWriteSetMid |= S->BBGenSet; // If BBWriteSetIn changes, then keep iterating until reached a fixed point. return S->updateBBWriteSetIn(S->BBWriteSetMid); } void DSEContext::processBasicBlockForDSE(SILBasicBlock *BB, bool Optimistic) { // If we know this is not a one iteration function which means its // its BBWriteSetIn and BBWriteSetOut have been computed and converged, // and this basic block does not even have StoreInsts, there is no point // in processing every instruction in the basic block again as no store // will be eliminated. if (Optimistic && BBWithStores.find(BB) == BBWithStores.end()) return; // Intersect in the successor WriteSetIns. A store is dead if it is not read // from any path to the end of the program. Thus an intersection. mergeSuccessorLiveIns(BB); // Initialize the BBWriteSetMid to BBWriteSetOut to get started. BlockState *S = getBlockState(BB); S->BBWriteSetMid = S->BBWriteSetOut; // Process instructions in post-order fashion. for (auto I = BB->rbegin(), E = BB->rend(); I != E; ++I) { processInstruction(&(*I), DSEKind::PerformDSE); } S->BBWriteSetIn = S->BBWriteSetMid; } void BlockState::initStoreSetAtEndOfBlock(DSEContext &Ctx) { std::vector &LocationVault = Ctx.getLocationVault(); // We set the store bit at the end of the basic block in which a stack // allocated location is deallocated. for (unsigned i = 0; i < LocationVault.size(); ++i) { // Turn on the store bit at the block which the stack slot is deallocated. if (auto *ASI = dyn_cast(LocationVault[i].getBase())) { for (auto X : findDeallocStackInst(ASI)) { SILBasicBlock *DSIBB = X->getParent(); if (DSIBB != BB) continue; startTrackingLocation(BBDeallocateLocation, i); } } } } void DSEContext::mergeSuccessorLiveIns(SILBasicBlock *BB) { // If basic block has no successor, then all local writes can be considered // dead for block with no successor. BlockState *C = getBlockState(BB); if (BB->succ_empty()) { C->BBWriteSetOut |= C->BBDeallocateLocation; return; } // Use the first successor as the base condition. auto Iter = BB->succ_begin(); C->BBWriteSetOut = getBlockState(*Iter)->BBWriteSetIn; /// Merge/intersection is very frequently performed, so it is important to make /// it as cheap as possible. /// /// To do so, we canonicalize LSLocations, i.e. traced back to the underlying /// object. Therefore, no need to do a O(N^2) comparison to figure out what is /// dead along all successors. /// /// NOTE: Canonicalization does not solve the problem entirely. i.e. it is /// still possible that 2 LSLocations with different bases that happen to be /// the same object and field. In such case, we would miss a dead store /// opportunity. But this happens less often with canonicalization. Iter = std::next(Iter); for (auto EndIter = BB->succ_end(); Iter != EndIter; ++Iter) { C->BBWriteSetOut &= getBlockState(*Iter)->BBWriteSetIn; } // We set the store bit at the end of the basic block in which a stack // allocated location is deallocated. C->BBWriteSetOut |= C->BBDeallocateLocation; } void DSEContext::invalidateLSLocationBaseForGenKillSet(SILInstruction *I) { BlockState *S = getBlockState(I); for (unsigned i = 0; i < S->LocationNum; ++i) { if (LocationVault[i].getBase() != I) continue; S->startTrackingLocation(S->BBKillSet, i); S->stopTrackingLocation(S->BBGenSet, i); } } void DSEContext::invalidateLSLocationBaseForDSE(SILInstruction *I) { BlockState *S = getBlockState(I); for (unsigned i = 0; i < S->LocationNum; ++i) { if (!S->BBWriteSetMid.test(i)) continue; if (LocationVault[i].getBase() != I) continue; S->stopTrackingLocation(S->BBWriteSetMid, i); } } void DSEContext::invalidateLSLocationBase(SILInstruction *I, DSEKind Kind) { // If this instruction defines the base of a location, then we need to // invalidate any locations with the same base. // // Are we building genset and killset. if (isBuildingGenKillSet(Kind)) { invalidateLSLocationBaseForGenKillSet(I); return; } // Are we performing dead store elimination. if (isPerformingDSE(Kind)) { invalidateLSLocationBaseForDSE(I); return; } llvm_unreachable("Unknown DSE compute kind"); } void DSEContext::processReadForDSE(BlockState *S, unsigned bit) { // Remove any may/must-aliasing stores to the LSLocation, as they can't be // used to kill any upward visible stores due to the interfering load. LSLocation &R = LocationVault[bit]; for (unsigned i = 0; i < S->LocationNum; ++i) { if (!S->isTrackingLocation(S->BBWriteSetMid, i)) continue; LSLocation &L = LocationVault[i]; if (!L.isMayAliasLSLocation(R, AA)) continue; S->stopTrackingLocation(S->BBWriteSetMid, i); } } void DSEContext::processReadForGenKillSet(BlockState *S, unsigned bit) { // Start tracking the read to this LSLocation in the killset and update // the genset accordingly. // // Even though, LSLocations are canonicalized, we still need to consult // alias analysis to determine whether 2 LSLocations are disjointed. LSLocation &R = LocationVault[bit]; for (unsigned i = 0; i < S->LocationNum; ++i) { if (!S->BBMaxStoreSet.test(i)) continue; // Do nothing if the read location NoAlias with the current location. LSLocation &L = LocationVault[i]; if (!L.isMayAliasLSLocation(R, AA)) continue; // Update the genset and kill set. S->startTrackingLocation(S->BBKillSet, i); S->stopTrackingLocation(S->BBGenSet, i); } } void DSEContext::processRead(SILInstruction *I, BlockState *S, SILValue Mem, DSEKind Kind) { // Construct a LSLocation to represent the memory read by this instruction. // NOTE: The base will point to the actual object this inst is accessing, // not this particular field. // // e.g. %1 = alloc_stack $S // %2 = struct_element_addr %1, #a // %3 = load %2 : $*Int // // Base will point to %1, but not %2. Projection path will indicate which // field is accessed. // // This will make comparison between locations easier. This eases the // implementation of intersection operator in the data flow. LSLocation L; if (BaseToLocIndex.find(Mem) != BaseToLocIndex.end()) { L = BaseToLocIndex[Mem]; } else { SILValue UO = getUnderlyingObject(Mem); L = LSLocation(UO, ProjectionPath::getProjectionPath(UO, Mem)); } // If we can't figure out the Base or Projection Path for the read instruction, // process it as an unknown memory instruction for now. if (!L.isValid()) { processUnknownReadInst(I, Kind); return; } // Expand the given Mem into individual fields and process them as separate // reads. LSLocationList Locs; LSLocation::expand(L, &I->getModule(), Locs, TE); // Are we building the genset and killset. if (isBuildingGenKillSet(Kind)) { for (auto &E : Locs) { // Only building the gen and kill sets for now. processReadForGenKillSet(S, getLocationBit(E)); } return; } // Are we performing the actual DSE. if (isPerformingDSE(Kind)) { for (auto &E : Locs) { // This is the last iteration, compute BBWriteSetOut and perform DSE. processReadForDSE(S, getLocationBit(E)); } return; } llvm_unreachable("Unknown DSE compute kind"); } bool DSEContext::processWriteForDSE(BlockState *S, unsigned bit) { // If a tracked store must aliases with this store, then this store is dead. bool StoreDead = false; LSLocation &R = LocationVault[bit]; for (unsigned i = 0; i < S->LocationNum; ++i) { if (!S->isTrackingLocation(S->BBWriteSetMid, i)) continue; // If 2 locations may alias, we can still keep both stores. LSLocation &L = LocationVault[i]; if (!L.isMustAliasLSLocation(R, AA)) continue; // There is a must alias store. No need to check further. StoreDead = true; break; } // Track this new store. S->startTrackingLocation(S->BBWriteSetMid, bit); return StoreDead; } void DSEContext::processWriteForGenKillSet(BlockState *S, unsigned bit) { S->startTrackingLocation(S->BBGenSet, bit); } void DSEContext::processWriteForMaxStoreSet(BlockState *S, unsigned bit) { S->startTrackingLocation(S->BBMaxStoreSet, bit); } void DSEContext::processWrite(SILInstruction *I, BlockState *S, SILValue Val, SILValue Mem, DSEKind Kind) { // Construct a LSLocation to represent the memory read by this instruction. // NOTE: The base will point to the actual object this inst is accessing, // not this particular field. // // e.g. %1 = alloc_stack $S // %2 = struct_element_addr %1, #a // store %3 to %2 : $*Int // // Base will point to %1, but not %2. Projection path will indicate which // field is accessed. // // This will make comparison between locations easier. This eases the // implementation of intersection operator in the data flow. LSLocation L; if (BaseToLocIndex.find(Mem) != BaseToLocIndex.end()) { L = BaseToLocIndex[Mem]; } else { SILValue UO = getUnderlyingObject(Mem); L = LSLocation(UO, ProjectionPath::getProjectionPath(UO, Mem)); } // If we can't figure out the Base or Projection Path for the store // instruction, simply ignore it. if (!L.isValid()) return; // Expand the given Mem into individual fields and process them as separate // writes. bool Dead = true; LSLocationList Locs; LSLocation::expand(L, Mod, Locs, TE); llvm::SmallBitVector V(Locs.size()); // Are we computing max store set. if (isComputeMaxStoreSet(Kind)) { for (auto &E : Locs) { // Update the max store set for the basic block. processWriteForMaxStoreSet(S, getLocationBit(E)); } return; } // Are we computing genset and killset. if (isBuildingGenKillSet(Kind)) { for (auto &E : Locs) { // Only building the gen and kill sets here. processWriteForGenKillSet(S, getLocationBit(E)); } // Data flow has not stabilized, do not perform the DSE just yet. return; } // We are doing the actual DSE. assert(isPerformingDSE(Kind) && "Invalid computation kind"); unsigned idx = 0; for (auto &E : Locs) { // This is the last iteration, compute BBWriteSetOut and perform the dead // store elimination. if (processWriteForDSE(S, getLocationBit(E))) V.set(idx); Dead &= V.test(idx); ++idx; } // Fully dead store - stores to all the components are dead, therefore this // instruction is dead. if (Dead) { DEBUG(llvm::dbgs() << "Instruction Dead: " << *I << "\n"); S->DeadStores.insert(I); ++NumDeadStores; return; } // Partial dead store - stores to some locations are dead, but not all. This // is a partially dead store. Also at this point we know what locations are // dead. llvm::DenseSet Alives; if (V.any()) { // Take out locations that are dead. for (unsigned i = 0; i < V.size(); ++i) { if (V.test(i)) continue; // This location is alive. Alives.insert(Locs[i]); } // Try to create as few aggregated stores as possible out of the locations. LSLocation::reduce(L, Mod, Alives); // Oops, we have too many smaller stores generated, bail out. if (Alives.size() > MaxPartialDeadStoreCountLimit) return; // At this point, we are performing a partial dead store elimination. // // Locations here have a projection path from their Base, but this // particular instruction may not be accessing the base, so we need to // *rebase* the locations w.r.t. to the current instruction. SILValue B = Locs[0].getBase(); Optional BP = ProjectionPath::getProjectionPath(B, Mem); // Strip off the projection path from base to the accessed field. for (auto &X : Alives) { X.removePathPrefix(BP); } // We merely setup the remaining live stores, but do not materialize in IR // yet, These stores will be materialized before the algorithm exits. for (auto &X : Alives) { SILValue Value = X.getPath()->createExtract(Val, I, true); SILValue Addr = X.getPath()->createExtract(Mem, I, false); S->LiveStores[Addr] = Value; } // Lastly, mark the old store as dead. DEBUG(llvm::dbgs() << "Instruction Partially Dead: " << *I << "\n"); S->DeadStores.insert(I); ++NumPartialDeadStores; } } void DSEContext::processLoadInst(SILInstruction *I, DSEKind Kind) { processRead(I, getBlockState(I), cast(I)->getOperand(), Kind); } void DSEContext::processStoreInst(SILInstruction *I, DSEKind Kind) { auto *SI = cast(I); processWrite(I, getBlockState(I), SI->getSrc(), SI->getDest(), Kind); } void DSEContext::processDebugValueAddrInstForGenKillSet(SILInstruction *I) { BlockState *S = getBlockState(I); SILValue Mem = cast(I)->getOperand(); for (unsigned i = 0; i < S->LocationNum; ++i) { if (!S->BBMaxStoreSet.test(i)) continue; if (AA->isNoAlias(Mem, LocationVault[i].getBase())) continue; S->stopTrackingLocation(S->BBGenSet, i); S->startTrackingLocation(S->BBKillSet, i); } } void DSEContext::processDebugValueAddrInstForDSE(SILInstruction *I) { BlockState *S = getBlockState(I); SILValue Mem = cast(I)->getOperand(); for (unsigned i = 0; i < S->LocationNum; ++i) { if (!S->isTrackingLocation(S->BBWriteSetMid, i)) continue; if (AA->isNoAlias(Mem, LocationVault[i].getBase())) continue; S->stopTrackingLocation(S->BBWriteSetMid, i); } } void DSEContext::processDebugValueAddrInst(SILInstruction *I, DSEKind Kind) { // Are we building genset and killset. if (isBuildingGenKillSet(Kind)) { processDebugValueAddrInstForGenKillSet(I); return; } // Are we performing dead store elimination. if (isPerformingDSE(Kind)) { processDebugValueAddrInstForDSE(I); return; } llvm_unreachable("Unknown DSE compute kind"); } void DSEContext::processUnknownReadInstForGenKillSet(SILInstruction *I) { BlockState *S = getBlockState(I); for (unsigned i = 0; i < S->LocationNum; ++i) { if (!S->BBMaxStoreSet.test(i)) continue; if (!AA->mayReadFromMemory(I, LocationVault[i].getBase())) continue; // Update the genset and kill set. S->startTrackingLocation(S->BBKillSet, i); S->stopTrackingLocation(S->BBGenSet, i); } } void DSEContext::processUnknownReadInstForDSE(SILInstruction *I) { BlockState *S = getBlockState(I); for (unsigned i = 0; i < S->LocationNum; ++i) { if (!S->isTrackingLocation(S->BBWriteSetMid, i)) continue; if (!AA->mayReadFromMemory(I, LocationVault[i].getBase())) continue; S->stopTrackingLocation(S->BBWriteSetMid, i); } } void DSEContext::processUnknownReadInst(SILInstruction *I, DSEKind Kind) { // If this is a release on a guaranteed parameter, it can not call deinit, // which might read or write memory. if (isGuaranteedParamRelease(I)) return; // Are we building genset and killset. if (isBuildingGenKillSet(Kind)) { processUnknownReadInstForGenKillSet(I); return; } // Are we performing dead store elimination. if (isPerformingDSE(Kind)) { processUnknownReadInstForDSE(I); return; } llvm_unreachable("Unknown DSE compute kind"); } void DSEContext::processInstruction(SILInstruction *I, DSEKind Kind) { // If this instruction has side effects, but is inert from a store // perspective, skip it. if (isDeadStoreInertInstruction(I)) return; // A set of ad-hoc rules to process instructions. if (isa(I)) { processLoadInst(I, Kind); } else if (isa(I)) { processStoreInst(I, Kind); } else if (isa(I)) { processDebugValueAddrInst(I, Kind); } else if (I->mayReadFromMemory()) { processUnknownReadInst(I, Kind); } // Check whether this instruction will invalidate any other locations. invalidateLSLocationBase(I, Kind); } void DSEContext::runIterativeDSE() { // Generate the genset and killset for each basic block. We can process the // basic blocks in any order. // // We also Compute the max store set at the beginning of the basic block. // auto *PO = PM->getAnalysis()->get(F); for (SILBasicBlock *B : PO->getPostOrder()) { processBasicBlockForGenKillSet(B); } // Process each basic block with the gen and kill set. Every time the // BBWriteSetIn of a basic block changes, the optimization is rerun on its // predecessors. llvm::SmallVector WorkList; llvm::DenseSet HandledBBs; // Push into reverse post order so that we can pop from the back and get // post order. for (SILBasicBlock *B : PO->getReversePostOrder()) { WorkList.push_back(B); HandledBBs.insert(B); } while (!WorkList.empty()) { SILBasicBlock *BB = WorkList.pop_back_val(); HandledBBs.erase(BB); if (processBasicBlockWithGenKillSet(BB)) { for (auto X : BB->getPreds()) { // We do not push basic block into the worklist if its already // in the worklist. if (HandledBBs.find(X) != HandledBBs.end()) continue; WorkList.push_back(X); } } } } bool DSEContext::run() { // Is this a one iteration function. auto *PO = PM->getAnalysis()->get(F); std::pair LSCount = std::make_pair(0, 0); // Walk over the function and find all the locations accessed by // this function. LSLocation::enumerateLSLocations(*F, LocationVault, LocToBitIndex, BaseToLocIndex, TE, LSCount); // Check how to optimize this function. ProcessKind Kind = getProcessFunctionKind(LSCount.second); // We do not optimize this function at all. if (Kind == ProcessKind::ProcessNone) return false; // Do we run a pessimistic data flow ? bool Optimistic = Kind == ProcessKind::ProcessOptimistic ? true : false; // For all basic blocks in the function, initialize a BB state. // // DenseMap has a minimum size of 64, while many functions do not have more // than 64 basic blocks. Therefore, allocate the BlockState in a vector and // use pointer in BBToLocState to access them. for (auto &B : *F) { BlockStates.push_back(BlockState(&B)); // Since we know all the locations accessed in this function, we can resize // the bit vector to the appropriate size. BlockStates.back().init(*this, Optimistic); } // Initialize the BBToLocState mapping. for (auto &S : BlockStates) { BBToLocState[S.getBB()] = &S; } // Initialize the store bit state at the end of each basic block. for (auto &X : BlockStates) { X.initStoreSetAtEndOfBlock(*this); } // We perform dead store elimination in the following phases. // // Phase 1. we compute the max store set at the beginning of the basic block. // // Phase 2. we compute the genset and killset for every basic block. // // Phase 3. we run the data flow with the genset and killset until // BBWriteSetIns stop changing. // // Phase 4. we run the data flow for the last iteration and perform the DSE. // // Phase 5. we remove the dead stores. // // Phase 1 - 3 are only performed when we know the data flow will not // converge in a single iteration. Otherwise, we only run phase 4 and 5 // on the function. // We need to run the iterative data flow on the function. if (Optimistic) { runIterativeDSE(); } // The data flow has stabilized, run one last iteration over all the basic // blocks and try to remove dead stores. for (SILBasicBlock *B : PO->getPostOrder()) { processBasicBlockForDSE(B, Optimistic); } // Finally, delete the dead stores and create the live stores. bool Changed = false; for (SILBasicBlock &BB : *F) { // Create the stores that are alive due to partial dead stores. for (auto &I : getBlockState(&BB)->LiveStores) { Changed = true; SILInstruction *Inst = cast(I.first); auto *IT = &*std::next(Inst->getIterator()); SILBuilderWithScope Builder(IT); Builder.createStore(Inst->getLoc(), I.second, Inst); } // Delete the dead stores. for (auto &I : getBlockState(&BB)->DeadStores) { Changed = true; DEBUG(llvm::dbgs() << "*** Removing: " << *I << " ***\n"); // This way, we get rid of pass dependence on DCE. recursivelyDeleteTriviallyDeadInstructions(I, true); } } return Changed; } //===----------------------------------------------------------------------===// // Top Level Entry Point //===----------------------------------------------------------------------===// namespace { class DeadStoreElimination : public SILFunctionTransform { public: StringRef getName() override { return "SIL Dead Store Elimination"; } /// The entry point to the transformation. void run() override { auto *AA = PM->getAnalysis(); auto *EA = PM->getAnalysis(); auto *TE = PM->getAnalysis(); SILFunction *F = getFunction(); DEBUG(llvm::dbgs() << "*** DSE on function: " << F->getName() << " ***\n"); DSEContext DSE(F, &F->getModule(), PM, AA, EA, TE); if (DSE.run()) { invalidateAnalysis(SILAnalysis::InvalidationKind::Instructions); } } }; } // end anonymous namespace SILTransform *swift::createDeadStoreElimination() { return new DeadStoreElimination(); }