//===--- RedundantLoadElimination.cpp - SIL Load Forwarding ---------------===// // // This source file is part of the Swift.org open source project // // Copyright (c) 2014 - 2016 Apple Inc. and the Swift project authors // Licensed under Apache License v2.0 with Runtime Library Exception // // See http://swift.org/LICENSE.txt for license information // See http://swift.org/CONTRIBUTORS.txt for the list of Swift project authors // //===----------------------------------------------------------------------===// /// /// \file /// /// This pass eliminates redundant loads. /// /// A load can be eliminated if its value has already been held somewhere, /// i.e. generated by a previous load, LSLocation stored by a known value. /// /// In this case, one can replace the load instruction with the previous /// results. /// /// Redundant Load Elimination (RLE) eliminates such loads by: /// /// 1. Introducing a notion of a LSLocation that is used to model object /// fields. (See below for more details). /// /// 2. Introducing a notion of a LSValue that is used to model the value /// that currently resides in the associated LSLocation on the particular /// program path. (See below for more details). /// /// 3. Performing a RPO walk over the control flow graph, tracking any /// LSLocations that are read from or stored into in each basic block. The /// read or stored value, kept in a map between LSLocation and LSValue, /// becomes the available value for the LSLocation. /// /// 4. An optimistic iterative intersection-based dataflow is performed on the /// gensets until convergence. /// /// At the core of RLE, there is the LSLocation class. A LSLocation is an /// abstraction of an object field in program. It consists of a base and a /// projection path to the field accessed. /// /// In SIL, one can access an aggregate as a whole, i.e. store to a struct with /// 2 Int fields. A store like this will generate 2 *indivisible* LSLocations, /// 1 for each field and in addition to keeping a list of LSLocation, RLE also /// keeps their available LSValues. We call it *indivisible* because it /// cannot be broken down to more LSLocations. /// /// LSValue consists of a base - a SILValue from the load or store inst, /// as well as a projection path to which the field it represents. So, a /// store to an 2-field struct as mentioned above will generate 2 LSLocations /// and 2 LSValues. /// /// Every basic block keeps a map between LSLocation and LSValue. By /// keeping the LSLocation and LSValue in their indivisible form, one /// can easily find which part of the load is redundant and how to compute its /// forwarding value. /// /// Given the case which the 2 fields of the struct both have available values, /// RLE can find their LSValues (maybe by struct_extract from a larger /// value) and then aggregate them. /// /// However, this may introduce a lot of extraction and aggregation which may /// not be necessary. i.e. a store the struct followed by a load from the /// struct. To solve this problem, when RLE detects that a load instruction /// can be replaced by forwarded value, it will try to find minimum # of /// extractions necessary to form the forwarded value. It will group the /// available value's by the LSValue base, i.e. the LSValues come from the /// same instruction, and then use extraction to obtain the needed components /// of the base. /// //===----------------------------------------------------------------------===// #define DEBUG_TYPE "sil-redundant-load-elim" #include "swift/SIL/Projection.h" #include "swift/SIL/SILArgument.h" #include "swift/SIL/SILBuilder.h" #include "swift/SILOptimizer/Analysis/AliasAnalysis.h" #include "swift/SILOptimizer/Analysis/DominanceAnalysis.h" #include "swift/SILOptimizer/Analysis/PostOrderAnalysis.h" #include "swift/SILOptimizer/Analysis/ValueTracking.h" #include "swift/SILOptimizer/PassManager/Passes.h" #include "swift/SILOptimizer/PassManager/Transforms.h" #include "swift/SILOptimizer/Utils/CFG.h" #include "swift/SILOptimizer/Utils/Local.h" #include "swift/SILOptimizer/Utils/LSBase.h" #include "swift/SILOptimizer/Utils/SILSSAUpdater.h" #include "llvm/ADT/BitVector.h" #include "llvm/ADT/MapVector.h" #include "llvm/ADT/None.h" #include "llvm/ADT/Statistic.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" using namespace swift; STATISTIC(NumForwardedLoads, "Number of loads forwarded"); /// Return the deallocate stack instructions corresponding to the given /// AllocStackInst. static SILInstruction *findAllocStackInst(SILInstruction *I) { if (DeallocStackInst *DSI = dyn_cast(I)) return dyn_cast(DSI->getOperand()); return nullptr; } /// ComputeAvailSetMax - If we ignore all unknown writes, what is the max /// available set that can reach the a certain point in a basic block. This /// helps generating the genset and killset. i.e. if there is no downward visible /// value that can reach the end of a basic block, then we know that the genset /// and killset for the location need not be set. /// /// ComputeAvailGenKillSet - Build the genset and killset of the basic block. /// /// ComputeAvailValue - Compute the available value at the end of the basic /// block. /// /// PerformRLE - Perform the actual redundant load elimination. enum class RLEKind : unsigned { ComputeAvailSetMax = 0, ComputeAvailGenKillSet = 1, ComputeAvailValue = 2, PerformRLE = 3, }; //===----------------------------------------------------------------------===// // Utility Functions //===----------------------------------------------------------------------===// static bool inline isComputeAvailSetMax(RLEKind Kind) { return Kind == RLEKind::ComputeAvailSetMax; } static bool inline isComputeAvailGenKillSet(RLEKind Kind) { return Kind == RLEKind::ComputeAvailGenKillSet; } static bool inline isComputeAvailValue(RLEKind Kind) { return Kind == RLEKind::ComputeAvailValue; } static bool inline isPerformingRLE(RLEKind Kind) { return Kind == RLEKind::PerformRLE; } /// Returns true if this is an instruction that may have side effects in a /// general sense but are inert from a load store perspective. static bool isRLEInertInstruction(SILInstruction *Inst) { switch (Inst->getKind()) { case ValueKind::StrongRetainInst: case ValueKind::StrongRetainUnownedInst: case ValueKind::UnownedRetainInst: case ValueKind::RetainValueInst: case ValueKind::DeallocStackInst: case ValueKind::CondFailInst: case ValueKind::IsUniqueInst: case ValueKind::IsUniqueOrPinnedInst: case ValueKind::FixLifetimeInst: return true; default: return false; } } //===----------------------------------------------------------------------===// // Basic Block Location State //===----------------------------------------------------------------------===// namespace { /// If this function has too many basic blocks or too many locations, it may /// take a long time to compute the genset and killset. The number of memory /// behavior or alias query we need to do in worst case is roughly linear to /// # of BBs x(times) # of locations. /// /// we could run RLE on functions with 128 basic blocks and 128 locations, /// which is a large function. constexpr unsigned MaxLSLocationBBMultiplicationNone = 128*128; /// we could run optimistic RLE on functions with less than 64 basic blocks /// and 64 locations which is a sizeable function. constexpr unsigned MaxLSLocationBBMultiplicationPessimistic = 64*64; /// forward declaration. class RLEContext; /// State of the load store in one basic block which allows for forwarding from /// loads, stores -> loads class BlockState { public: enum class ValueState : unsigned { CoverValues = 0, ConcreteValues = 1, CoverAndConcreteValues = 2, }; private: /// # of locations in the LocationVault. unsigned LocationNum; /// The basic block that we are optimizing. SILBasicBlock *BB; /// A bit vector for which the ith bit represents the ith LSLocation in /// LocationVault. If the bit is set, then the location currently has an /// downward visible value at the beginning of the basic block. llvm::SmallBitVector ForwardSetIn; /// A bit vector for which the ith bit represents the ith LSLocation in /// LocationVault. If the bit is set, then the location currently has an /// downward visible value at the end of the basic block. llvm::SmallBitVector ForwardSetOut; /// A bit vector for which the ith bit represents the ith LSLocation in /// LocationVault. If we ignore all unknown write, what's the maximum set /// of available locations at the current position in the basic block. llvm::SmallBitVector ForwardSetMax; /// A bit vector for which the ith bit represents the ith LSLocation in /// LocationVault. If the bit is set, then the basic block generates a /// value for the location. llvm::SmallBitVector BBGenSet; /// A bit vector for which the ith bit represents the ith LSLocation in /// LocationVault. If the bit is set, then the basic block kills the /// value for the location. llvm::SmallBitVector BBKillSet; /// This is map between LSLocations and their available values at the /// beginning of this basic block. ValueTableMap ForwardValIn; /// This is map between LSLocations and their available values at the end of /// this basic block. ValueTableMap ForwardValOut; /// Keeps a list of replaceable instructions in the current basic block as /// well as their SILValue replacement. llvm::DenseMap RedundantLoads; /// LSLocation read or written has been extracted, expanded and mapped to the /// bit position in the bitvector. Update it in the ForwardSetIn of the /// current basic block. void updateForwardSetForRead(RLEContext &Ctx, unsigned B); void updateForwardSetForWrite(RLEContext &Ctx, unsigned B); /// LSLocation read or written has been extracted, expanded and mapped to the /// B position in the Bvector. Update it in the genset and killset of the /// current basic block. void updateGenKillSetForRead(RLEContext &Ctx, unsigned B); void updateGenKillSetForWrite(RLEContext &Ctx, unsigned B); /// LSLocation read or written has been extracted, expanded and mapped to the /// B position in the Bvector. Update it in the MaxAvailForwardSet of the /// current basic block. void updateMaxAvailSetForRead(RLEContext &Ctx, unsigned B); void updateMaxAvailSetForWrite(RLEContext &Ctx, unsigned B); /// LSLocation written has been extracted, expanded and mapped to the bit /// position in the bitvector. process it using the bit position. void updateForwardSetAndValForRead(RLEContext &Ctx, unsigned L, unsigned V); void updateForwardSetAndValForWrite(RLEContext &Ctx, unsigned L, unsigned V); /// There is a read to a LSLocation, expand the LSLocation into individual /// fields before processing them. void processRead(RLEContext &Ctx, SILInstruction *I, SILValue Mem, SILValue Val, RLEKind Kind); /// There is a write to a LSLocation, expand the LSLocation into individual /// fields before processing them. void processWrite(RLEContext &Ctx, SILInstruction *I, SILValue Mem, SILValue Val, RLEKind Kind); /// BitVector manipulation functions. void startTrackingLocation(llvm::SmallBitVector &BV, unsigned B); void stopTrackingLocation(llvm::SmallBitVector &BV, unsigned B); bool isTrackingLocation(llvm::SmallBitVector &BV, unsigned B); void startTrackingValue(ValueTableMap &VM, unsigned L, unsigned V); void stopTrackingValue(ValueTableMap &VM, unsigned B); public: BlockState() = default; void init(SILBasicBlock *NewBB, unsigned bitcnt, bool optimistic) { BB = NewBB; LocationNum = bitcnt; // For reachable basic blocks, the initial state of ForwardSetOut should be // all 1's. Otherwise the dataflow solution could be too conservative. // // Consider this case, the forwardable value by var a = 10 before the loop // will not be forwarded if the ForwardSetOut is set to 0 initially. // // var a = 10 // for _ in 0...1024 {} // use(a); // // However, by doing so, we can only do the data forwarding after the // data flow stabilizes. // // We set the initial state of unreachable block to 0, as we do not have // a value for the location. // // This is a bit conservative as we could be missing forwarding // opportunities. i.e. a joint block with 1 predecessor being an // unreachable block. // // we rely on other passes to clean up unreachable block. ForwardSetIn.resize(LocationNum, false); ForwardSetOut.resize(LocationNum, optimistic); // If we are running an optimistic data flow, set forward max to true // initially. ForwardSetMax.resize(LocationNum, optimistic); BBGenSet.resize(LocationNum, false); BBKillSet.resize(LocationNum, false); } /// Initialize the AvailSetMax by intersecting this basic block's /// predecessors' AvailSetMax. void mergePredecessorsAvailSetMax(RLEContext &Ctx); /// Initialize the AvailSet by intersecting this basic block' predecessors' /// AvailSet. void mergePredecessorAvailSet(RLEContext &Ctx); /// Initialize the AvailSet and AvailVal of the current basic block. void mergePredecessorAvailSetAndValue(RLEContext &Ctx); /// Reached the end of the basic block, update the ForwardValOut with the /// ForwardValIn. void updateForwardValOut() { ForwardValOut = ForwardValIn; } /// Check whether the ForwardSetOut has changed. If it does, we need to /// rerun the data flow to reach fixed point. bool updateForwardSetOut() { if (ForwardSetIn == ForwardSetOut) return false; ForwardSetOut = ForwardSetIn; return true; } /// Returns the current basic block we are processing. SILBasicBlock *getBB() const { return BB; } /// Returns the ForwardValIn for the current basic block. ValueTableMap &getForwardValIn() { return ForwardValIn; } /// Returns the ForwardValOut for the current basic block. ValueTableMap &getForwardValOut() { return ForwardValOut; } /// Returns the redundant loads and their replacement in the currently basic /// block. llvm::DenseMap &getRL() { return RedundantLoads; } /// Look into the value for the given LSLocation at end of the basic block, /// return one of the three ValueState type. ValueState getValueStateAtEndOfBlock(RLEContext &Ctx, LSLocation &L); /// Wrappers to query the value state of the location in this BlockState. bool isCoverValues(RLEContext &Ctx, LSLocation &L) { return getValueStateAtEndOfBlock(Ctx, L) == ValueState::CoverValues; } bool isConcreteValues(RLEContext &Ctx, LSLocation &L) { return getValueStateAtEndOfBlock(Ctx, L) == ValueState::ConcreteValues; } /// Iterate over the instructions in the basic block in forward order and /// process them w.r.t. the given \p Kind. void processInstructionWithKind(RLEContext &Ctx, SILInstruction *I, RLEKind Kind); void processBasicBlockWithKind(RLEContext &Ctx, RLEKind Kind); /// Process the current basic block with the genset and killset. Return true /// if the ForwardSetOut changes. bool processBasicBlockWithGenKillSet(); /// Set up the value for redundant load elimination. bool setupRLE(RLEContext &Ctx, SILInstruction *I, SILValue Mem); /// Process Instruction which writes to memory in an unknown way. void processUnknownWriteInst(RLEContext &Ctx, SILInstruction *I, RLEKind Kind); void processUnknownWriteInstForGenKillSet(RLEContext &Ctx, SILInstruction *I); void processUnknownWriteInstForRLE(RLEContext &Ctx, SILInstruction *I); void processDeallocStackInst(RLEContext &Ctx, SILInstruction *I, RLEKind Kind); void processDeallocStackInstForGenKillSet(RLEContext &Ctx, SILInstruction *I); void processDeallocStackInstForRLE(RLEContext &Ctx, SILInstruction *I); /// Process LoadInst. Extract LSLocations from LoadInst. void processLoadInst(RLEContext &Ctx, LoadInst *LI, RLEKind Kind); /// Process LoadInst. Extract LSLocations from StoreInst. void processStoreInst(RLEContext &Ctx, StoreInst *SI, RLEKind Kind); /// Returns a *single* forwardable SILValue for the given LSLocation right /// before the InsertPt instruction. SILValue reduceValuesAtEndOfBlock(RLEContext &Ctx, LSLocation &L); }; } // end anonymous namespace //===----------------------------------------------------------------------===// // RLEContext Interface //===----------------------------------------------------------------------===// namespace { using BBValueMap = llvm::DenseMap; /// This class stores global state that we use when computing redundant load and /// their replacement in each basic block. class RLEContext { enum class ProcessKind { ProcessMultipleIterations = 0, ProcessOneIteration = 1, ProcessNone = 2, }; private: /// Function currently processing. SILFunction *Fn; /// The passmanager we are using. SILPassManager *PM; /// The alias analysis that we will use during all computations. AliasAnalysis *AA; /// The type expansion analysis we will use during all computations. TypeExpansionAnalysis *TE; /// The SSA updater we use to materialize covering values. SILSSAUpdater Updater; /// The range that we use to iterate over the post order and reverse post /// order of the given function. PostOrderFunctionInfo *PO; /// Keeps all the locations for the current function. The BitVector in each /// BlockState is then laid on top of it to keep track of which LSLocation /// has a downward available value. std::vector LocationVault; /// Contains a map between LSLocation to their index in the LocationVault. /// Use for fast lookup. LSLocationIndexMap LocToBitIndex; /// Keeps a map between the accessed SILValue and the location. LSLocationBaseMap BaseToLocIndex; /// Keeps all the loadstorevalues for the current function. The BitVector in /// each g is then laid on top of it to keep track of which LSLocation /// has a downward available value. std::vector LSValueVault; /// Contains a map between LSLocation to their index in the LocationVault. /// Use for fast lookup. llvm::DenseMap ValToBitIndex; /// A map from each BasicBlock to its BlockState. llvm::SmallDenseMap BBToLocState; /// Keeps a list of basic blocks that have LoadInsts. If a basic block does /// not have LoadInst, we do not actually perform the last iteration where /// RLE is actually performed on the basic block. /// /// NOTE: This is never populated for functions which will only require 1 /// data flow iteration. For function that requires more than 1 iteration of /// the data flow this is populated when the first time the functions is /// walked, i.e. when the we generate the genset and killset. llvm::DenseSet BBWithLoads; public: RLEContext(SILFunction *F, SILPassManager *PM, AliasAnalysis *AA, TypeExpansionAnalysis *TE, PostOrderFunctionInfo *PO); RLEContext(const RLEContext &) = delete; RLEContext(RLEContext &&) = default; ~RLEContext() = default; /// Entry point to redundant load elimination. bool run(); /// Use a set of ad hoc rules to tell whether we should run a pessimistic /// one iteration data flow on the function. ProcessKind getProcessFunctionKind(unsigned LoadCount, unsigned StoreCount); /// Run the iterative data flow until convergence. void runIterativeRLE(); /// Process the basic blocks for the gen and kill set. void processBasicBlocksForGenKillSet(); /// Process the basic blocks with the gen and kill set. void processBasicBlocksWithGenKillSet(); /// Process the basic block for values generated in the current basic /// block. void processBasicBlocksForAvailValue(); /// Process basic blocks to perform the redundant load elimination. void processBasicBlocksForRLE(bool Optimistic); /// Returns the alias analysis we will use during all computations. AliasAnalysis *getAA() const { return AA; } /// Returns the current type expansion analysis we are . TypeExpansionAnalysis *getTE() const { return TE; } /// Returns the SILValue base to bit index. LSLocationBaseMap &getBM() { return BaseToLocIndex; } /// Return the BlockState for the basic block this basic block belongs to. BlockState &getBlockState(SILBasicBlock *B) { return BBToLocState[B]; } /// Get the bit representing the LSLocation in the LocationVault. unsigned getLocationBit(const LSLocation &L); /// Given the bit, get the LSLocation from the LocationVault. LSLocation &getLocation(const unsigned index); /// Get the bit representing the LSValue in the LSValueVault. unsigned getValueBit(const LSValue &L); /// Given the bit, get the LSValue from the LSValueVault. LSValue &getValue(const unsigned index); /// Given a LSLocation, try to collect all the LSValues for this LSLocation /// in the given basic block. If part of the locations have covering values, /// find the values in its predecessors. bool collectLocationValues(SILBasicBlock *BB, LSLocation &L, LSLocationValueMap &Values, ValueTableMap &VM); /// Transitively collect all the values that make up this location and /// create a SILArgument out of them. SILValue computePredecessorLocationValue(SILBasicBlock *BB, LSLocation &L); }; } // end anonymous namespace void BlockState::startTrackingValue(ValueTableMap &VM, unsigned L, unsigned V) { VM[L] = V; } void BlockState::stopTrackingValue(ValueTableMap &VM, unsigned B) { VM.erase(B); } bool BlockState::isTrackingLocation(llvm::SmallBitVector &BV, unsigned B) { return BV.test(B); } void BlockState::startTrackingLocation(llvm::SmallBitVector &BV, unsigned B) { BV.set(B); } void BlockState::stopTrackingLocation(llvm::SmallBitVector &BV, unsigned B) { BV.reset(B); } void BlockState::mergePredecessorsAvailSetMax(RLEContext &Ctx) { if (BB->pred_empty()) { ForwardSetMax.reset(); return; } auto Iter = BB->pred_begin(); ForwardSetMax = Ctx.getBlockState(*Iter).ForwardSetMax; Iter = std::next(Iter); for (auto EndIter = BB->pred_end(); Iter != EndIter; ++Iter) { ForwardSetMax &= Ctx.getBlockState(*Iter).ForwardSetMax; } } void BlockState::mergePredecessorAvailSet(RLEContext &Ctx) { // Clear the state if the basic block has no predecessor. if (BB->getPreds().begin() == BB->getPreds().end()) { ForwardSetIn.reset(); return; } auto Iter = BB->pred_begin(); ForwardSetIn = Ctx.getBlockState(*Iter).ForwardSetOut; Iter = std::next(Iter); for (auto EndIter = BB->pred_end(); Iter != EndIter; ++Iter) { ForwardSetIn &= Ctx.getBlockState(*Iter).ForwardSetOut; } } void BlockState::mergePredecessorAvailSetAndValue(RLEContext &Ctx) { // Clear the state if the basic block has no predecessor. if (BB->getPreds().begin() == BB->getPreds().end()) { ForwardSetIn.reset(); ForwardValIn.clear(); return; } auto Iter = BB->pred_begin(); ForwardSetIn = Ctx.getBlockState(*Iter).ForwardSetOut; ForwardValIn = Ctx.getBlockState(*Iter).ForwardValOut; Iter = std::next(Iter); for (auto EndIter = BB->pred_end(); Iter != EndIter; ++Iter) { BlockState &OtherState = Ctx.getBlockState(*Iter); ForwardSetIn &= OtherState.ForwardSetOut; // Merge in the predecessor state. for (unsigned i = 0; i < LocationNum; ++i) { if (OtherState.ForwardSetOut[i]) { // There are multiple values from multiple predecessors, set this as // a covering value. We do not need to track the value itself, as we // can always go to the predecessors BlockState to find it. ForwardValIn[i] = Ctx.getValueBit(LSValue(true)); continue; } // If this location does have an available value, then clear it. stopTrackingValue(ForwardValIn, i); stopTrackingLocation(ForwardSetIn, i); } } } void BlockState::processBasicBlockWithKind(RLEContext &Ctx, RLEKind Kind) { // Iterate over instructions in forward order. for (auto &II : *BB) { processInstructionWithKind(Ctx, &II, Kind); } } bool BlockState::processBasicBlockWithGenKillSet() { ForwardSetIn.reset(BBKillSet); ForwardSetIn |= BBGenSet; return updateForwardSetOut(); } SILValue BlockState::reduceValuesAtEndOfBlock(RLEContext &Ctx, LSLocation &L) { // First, collect current available locations and their corresponding values // into a map. LSLocationValueMap Values; LSLocationList Locs; LSLocation::expand(L, &BB->getModule(), Locs, Ctx.getTE()); // Find the values that this basic block defines and the locations which // we do not have a concrete value in the current basic block. ValueTableMap &OTM = getForwardValOut(); for (unsigned i = 0; i < Locs.size(); ++i) { Values[Locs[i]] = Ctx.getValue(OTM[Ctx.getLocationBit(Locs[i])]); } // Second, reduce the available values into a single SILValue we can use to // forward. SILValue TheForwardingValue; TheForwardingValue = LSValue::reduce(L, &BB->getModule(), Values, BB->getTerminator()); /// Return the forwarding value. return TheForwardingValue; } bool BlockState::setupRLE(RLEContext &Ctx, SILInstruction *I, SILValue Mem) { // Try to construct a SILValue for the current LSLocation. // // Collect the locations and their corresponding values into a map. LSLocation L; LSLocationBaseMap &BaseToLocIndex = Ctx.getBM(); if (BaseToLocIndex.find(Mem) != BaseToLocIndex.end()) { L = BaseToLocIndex[Mem]; } else { SILValue UO = getUnderlyingObject(Mem); L = LSLocation(UO, ProjectionPath::getProjectionPath(UO, Mem)); } LSLocationValueMap Values; // Use the ForwardValIn as we are currently processing the basic block. if (!Ctx.collectLocationValues(I->getParent(), L, Values, getForwardValIn())) return false; // Reduce the available values into a single SILValue we can use to forward. SILModule *Mod = &I->getModule(); SILValue TheForwardingValue = LSValue::reduce(L, Mod, Values, I); if (!TheForwardingValue) return false; // Now we have the forwarding value, record it for forwarding!. // // NOTE: we do not perform the RLE right here because doing so could introduce // new LSLocations. // // e.g. // %0 = load %x // %1 = load %x // %2 = extract_struct %1, #a // %3 = load %2 // // If we perform the RLE and replace %1 with %0, we end up having a memory // location we do not have before, i.e. Base == %0, and Path == #a. // // We may be able to add the LSLocation to the vault, but it gets // complicated very quickly, e.g. we need to resize the bit vectors size, // etc. // // However, since we already know the instruction to replace and the value to // replace it with, we can record it for now and forwarded it after all the // forwardable values are recorded in the function. // RedundantLoads[I] = TheForwardingValue; return true; } void BlockState::updateForwardSetForRead(RLEContext &Ctx, unsigned B) { startTrackingLocation(ForwardSetIn, B); } void BlockState::updateGenKillSetForRead(RLEContext &Ctx, unsigned B) { startTrackingLocation(BBGenSet, B); stopTrackingLocation(BBKillSet, B); } void BlockState::updateForwardSetAndValForRead(RLEContext &Ctx, unsigned L, unsigned V) { // Track the new location and value. startTrackingValue(ForwardValIn, L, V); startTrackingLocation(ForwardSetIn, L); } void BlockState::updateGenKillSetForWrite(RLEContext &Ctx, unsigned B) { // This is a store, invalidate any location that this location may alias, as // their values can no longer be forwarded. LSLocation &R = Ctx.getLocation(B); for (unsigned i = 0; i < LocationNum; ++i) { if (!isTrackingLocation(ForwardSetMax, i)) continue; LSLocation &L = Ctx.getLocation(i); if (!L.isMayAliasLSLocation(R, Ctx.getAA())) continue; // MayAlias, invalidate the location. stopTrackingLocation(BBGenSet, i); startTrackingLocation(BBKillSet, i); } // Start tracking this location. startTrackingLocation(BBGenSet, B); stopTrackingLocation(BBKillSet, B); } void BlockState::updateMaxAvailSetForWrite(RLEContext &Ctx, unsigned B) { startTrackingLocation(ForwardSetMax, B); } void BlockState::updateMaxAvailSetForRead(RLEContext &Ctx, unsigned B) { startTrackingLocation(ForwardSetMax, B); } void BlockState::updateForwardSetForWrite(RLEContext &Ctx, unsigned B) { // This is a store, invalidate any location that this location may alias, as // their values can no longer be forwarded. LSLocation &R = Ctx.getLocation(B); for (unsigned i = 0; i < LocationNum; ++i) { if (!isTrackingLocation(ForwardSetIn, i)) continue; LSLocation &L = Ctx.getLocation(i); if (!L.isMayAliasLSLocation(R, Ctx.getAA())) continue; // MayAlias, invalidate the location. stopTrackingLocation(ForwardSetIn, i); } // Start tracking this location. startTrackingLocation(ForwardSetIn, B); } void BlockState::updateForwardSetAndValForWrite(RLEContext &Ctx, unsigned L, unsigned V) { // This is a store, invalidate any location that this location may alias, as // their values can no longer be forwarded. LSLocation &R = Ctx.getLocation(L); for (unsigned i = 0; i < LocationNum; ++i) { if (!isTrackingLocation(ForwardSetIn, i)) continue; LSLocation &L = Ctx.getLocation(i); if (!L.isMayAliasLSLocation(R, Ctx.getAA())) continue; // MayAlias, invalidate the location and value. stopTrackingValue(ForwardValIn, i); stopTrackingLocation(ForwardSetIn, i); } // Start tracking this location and value. startTrackingLocation(ForwardSetIn, L); startTrackingValue(ForwardValIn, L, V); } void BlockState::processWrite(RLEContext &Ctx, SILInstruction *I, SILValue Mem, SILValue Val, RLEKind Kind) { // Initialize the LSLocation. LSLocation L; LSLocationBaseMap &BaseToLocIndex = Ctx.getBM(); if (BaseToLocIndex.find(Mem) != BaseToLocIndex.end()) { L = BaseToLocIndex[Mem]; } else { SILValue UO = getUnderlyingObject(Mem); L = LSLocation(UO, ProjectionPath::getProjectionPath(UO, Mem)); } // If we can't figure out the Base or Projection Path for the write, // process it as an unknown memory instruction. if (!L.isValid()) { // we can ignore unknown store instructions if we are computing the // AvailSetMax. if (!isComputeAvailSetMax(Kind)) { processUnknownWriteInst(Ctx, I, Kind); } return; } // Expand the given location and val into individual fields and process // them as separate writes. LSLocationList Locs; LSLocation::expand(L, &I->getModule(), Locs, Ctx.getTE()); if (isComputeAvailSetMax(Kind)) { for (unsigned i = 0; i < Locs.size(); ++i) { updateMaxAvailSetForWrite(Ctx, Ctx.getLocationBit(Locs[i])); } return; } // Are we computing the genset and killset ? if (isComputeAvailGenKillSet(Kind)) { for (unsigned i = 0; i < Locs.size(); ++i) { updateGenKillSetForWrite(Ctx, Ctx.getLocationBit(Locs[i])); } return; } // Are we computing available value or performing RLE? LSValueList Vals; LSValue::expand(Val, &I->getModule(), Vals, Ctx.getTE()); if (isComputeAvailValue(Kind) || isPerformingRLE(Kind)) { for (unsigned i = 0; i < Locs.size(); ++i) { updateForwardSetAndValForWrite(Ctx, Ctx.getLocationBit(Locs[i]), Ctx.getValueBit(Vals[i])); } return; } llvm_unreachable("Unknown RLE compute kind"); } void BlockState::processRead(RLEContext &Ctx, SILInstruction *I, SILValue Mem, SILValue Val, RLEKind Kind) { // Initialize the LSLocation. LSLocation L; LSLocationBaseMap &BaseToLocIndex = Ctx.getBM(); if (BaseToLocIndex.find(Mem) != BaseToLocIndex.end()) { L = BaseToLocIndex[Mem]; } else { SILValue UO = getUnderlyingObject(Mem); L = LSLocation(UO, ProjectionPath::getProjectionPath(UO, Mem)); } // If we can't figure out the Base or Projection Path for the read, simply // ignore it for now. if (!L.isValid()) return; // Expand the given LSLocation and Val into individual fields and process // them as separate reads. LSLocationList Locs; LSLocation::expand(L, &I->getModule(), Locs, Ctx.getTE()); if (isComputeAvailSetMax(Kind)) { for (unsigned i = 0; i < Locs.size(); ++i) { updateMaxAvailSetForRead(Ctx, Ctx.getLocationBit(Locs[i])); } return; } // Are we computing the genset and killset. if (isComputeAvailGenKillSet(Kind)) { for (unsigned i = 0; i < Locs.size(); ++i) { updateGenKillSetForRead(Ctx, Ctx.getLocationBit(Locs[i])); } return; } // Are we computing available values ?. bool CanForward = true; LSValueList Vals; LSValue::expand(Val, &I->getModule(), Vals, Ctx.getTE()); if (isComputeAvailValue(Kind) || isPerformingRLE(Kind)) { for (unsigned i = 0; i < Locs.size(); ++i) { if (isTrackingLocation(ForwardSetIn, Ctx.getLocationBit(Locs[i]))) continue; updateForwardSetAndValForRead(Ctx, Ctx.getLocationBit(Locs[i]), Ctx.getValueBit(Vals[i])); // We can not perform the forwarding as we are at least missing // some pieces of the read location. CanForward = false; } } // Simply return if we are not performing RLE or we do not have all the // values available to perform RLE. if (!isPerformingRLE(Kind) || !CanForward) return; // Lastly, forward value to the load. setupRLE(Ctx, I, Mem); } void BlockState::processStoreInst(RLEContext &Ctx, StoreInst *SI, RLEKind Kind) { processWrite(Ctx, SI, SI->getDest(), SI->getSrc(), Kind); } void BlockState::processLoadInst(RLEContext &Ctx, LoadInst *LI, RLEKind Kind) { processRead(Ctx, LI, LI->getOperand(), SILValue(LI), Kind); } void BlockState::processUnknownWriteInstForGenKillSet(RLEContext &Ctx, SILInstruction *I) { auto *AA = Ctx.getAA(); for (unsigned i = 0; i < LocationNum; ++i) { if (!isTrackingLocation(ForwardSetMax, i)) continue; // Invalidate any location this instruction may write to. // // TODO: checking may alias with Base is overly conservative, // we should check may alias with base plus projection path. LSLocation &R = Ctx.getLocation(i); if (!AA->mayWriteToMemory(I, R.getBase())) continue; // MayAlias. stopTrackingLocation(BBGenSet, i); startTrackingLocation(BBKillSet, i); } } void BlockState::processUnknownWriteInstForRLE(RLEContext &Ctx, SILInstruction *I) { auto *AA = Ctx.getAA(); for (unsigned i = 0; i < LocationNum; ++i) { if (!isTrackingLocation(ForwardSetIn, i)) continue; // Invalidate any location this instruction may write to. // // TODO: checking may alias with Base is overly conservative, // we should check may alias with base plus projection path. LSLocation &R = Ctx.getLocation(i); if (!AA->mayWriteToMemory(I, R.getBase())) continue; // MayAlias. stopTrackingLocation(ForwardSetIn, i); stopTrackingValue(ForwardValIn, i); } } void BlockState::processUnknownWriteInst(RLEContext &Ctx, SILInstruction *I, RLEKind Kind) { // If this is a release on a guaranteed parameter, it can not call deinit, // which might read or write memory. if (isGuaranteedParamRelease(I)) return; // Are we computing the genset and killset ? if (isComputeAvailGenKillSet(Kind)) { processUnknownWriteInstForGenKillSet(Ctx, I); return; } // Are we computing the available value or doing RLE ? if (isComputeAvailValue(Kind) || isPerformingRLE(Kind)) { processUnknownWriteInstForRLE(Ctx, I); return; } llvm_unreachable("Unknown RLE compute kind"); } void BlockState:: processDeallocStackInstForGenKillSet(RLEContext &Ctx, SILInstruction *I) { SILValue ASI = findAllocStackInst(I); for (unsigned i = 0; i < LocationNum; ++i) { LSLocation &R = Ctx.getLocation(i); if (R.getBase() != ASI) continue; // MayAlias. stopTrackingLocation(BBGenSet, i); startTrackingLocation(BBKillSet, i); } } void BlockState:: processDeallocStackInstForRLE(RLEContext &Ctx, SILInstruction *I) { SILValue ASI = findAllocStackInst(I); for (unsigned i = 0; i < LocationNum; ++i) { LSLocation &R = Ctx.getLocation(i); if (R.getBase() != ASI) continue; // MayAlias. stopTrackingLocation(ForwardSetIn, i); stopTrackingValue(ForwardValIn, i); } } void BlockState:: processDeallocStackInst(RLEContext &Ctx, SILInstruction *I, RLEKind Kind) { // Are we computing the genset and killset ? if (isComputeAvailGenKillSet(Kind)) { processDeallocStackInstForGenKillSet(Ctx, I); return; } // Are we computing the available value or doing RLE ? if (isComputeAvailValue(Kind) || isPerformingRLE(Kind)) { processDeallocStackInstForRLE(Ctx, I); return; } llvm_unreachable("Unknown RLE compute kind"); } void BlockState::processInstructionWithKind(RLEContext &Ctx, SILInstruction *Inst, RLEKind Kind) { // This is a StoreInst, try to see whether it clobbers any forwarding value if (auto *SI = dyn_cast(Inst)) { processStoreInst(Ctx, SI, Kind); return; } // This is a LoadInst. Let's see if we can find a previous loaded, stored // value to use instead of this load. if (auto *LI = dyn_cast(Inst)) { processLoadInst(Ctx, LI, Kind); return; } if (auto *DSI = dyn_cast(Inst)) { processDeallocStackInst(Ctx, DSI, Kind); return; } // If this instruction has side effects, but is inert from a load store // perspective, skip it. if (isRLEInertInstruction(Inst)) return; // If this instruction does not read or write memory, we can skip it. if (!Inst->mayReadOrWriteMemory()) return; // If we have an instruction that may write to memory and we cannot prove // that it and its operands cannot alias a load we have visited, // invalidate that load. if (Inst->mayWriteToMemory()) { processUnknownWriteInst(Ctx, Inst, Kind); return; } } RLEContext::ProcessKind RLEContext:: getProcessFunctionKind(unsigned LoadCount, unsigned StoreCount) { // Don't optimize function that are marked as 'no.optimize'. if (!Fn->shouldOptimize()) return ProcessKind::ProcessNone; // Really no point optimizing here as there is no forwardable loads. if (LoadCount + StoreCount < 2) return ProcessKind::ProcessNone; bool RunOneIteration = true; unsigned BBCount = 0; unsigned LocationCount = LocationVault.size(); if (LocationCount == 0) return ProcessKind::ProcessNone; // If all basic blocks will have their predecessors processed if // the basic blocks in the functions are iterated in post order. // Then this function can be processed in one iteration, i.e. no // need to generate the genset and killset. auto *PO = PM->getAnalysis()->get(Fn); llvm::DenseSet HandledBBs; for (SILBasicBlock *B : PO->getReversePostOrder()) { ++BBCount; for (auto X : B->getPreds()) { if (HandledBBs.find(X) == HandledBBs.end()) { RunOneIteration = false; break; } } HandledBBs.insert(B); } // Data flow may take too long to run. if (BBCount * LocationCount > MaxLSLocationBBMultiplicationNone) return ProcessKind::ProcessNone; // This function's data flow would converge in 1 iteration. if (RunOneIteration) return ProcessKind::ProcessOneIteration; // We run one pessimistic data flow to do dead store elimination on // the function. if (BBCount * LocationCount > MaxLSLocationBBMultiplicationPessimistic) return ProcessKind::ProcessOneIteration; return ProcessKind::ProcessMultipleIterations; } //===----------------------------------------------------------------------===// // RLEContext Implementation //===----------------------------------------------------------------------===// RLEContext::RLEContext(SILFunction *F, SILPassManager *PM, AliasAnalysis *AA, TypeExpansionAnalysis *TE, PostOrderFunctionInfo *PO) : Fn(F), PM(PM), AA(AA), TE(TE), PO(PO) { } LSLocation &RLEContext::getLocation(const unsigned index) { return LocationVault[index]; } unsigned RLEContext::getLocationBit(const LSLocation &Loc) { // Return the bit position of the given Loc in the LocationVault. The bit // position is then used to set/reset the bitvector kept by each BlockState. // // We should have the location populated by the enumerateLSLocation at this // point. auto Iter = LocToBitIndex.find(Loc); assert(Iter != LocToBitIndex.end() && "Location should have been enum'ed"); return Iter->second; } LSValue &RLEContext::getValue(const unsigned index) { return LSValueVault[index]; } unsigned RLEContext::getValueBit(const LSValue &Val) { // Return the bit position of the given Val in the LSValueVault. The bit // position is then used to set/reset the bitvector kept by each g. auto Iter = ValToBitIndex.find(Val); // We do not walk over the function and find all the possible LSValues // in this function, as some of the these values will not be used, i.e. // if the LoadInst that generates this value is actually RLE'ed. // Instead, we create the LSValues when we need them. if (Iter == ValToBitIndex.end()) { ValToBitIndex[Val] = LSValueVault.size(); LSValueVault.push_back(Val); return ValToBitIndex[Val]; } return Iter->second; } BlockState::ValueState BlockState::getValueStateAtEndOfBlock(RLEContext &Ctx, LSLocation &L) { // Find number of covering value and concrete values for the locations // expanded from the given location. unsigned CSCount = 0, CTCount = 0; LSLocationList Locs; LSLocation::expand(L, &BB->getModule(), Locs, Ctx.getTE()); ValueTableMap &OTM = getForwardValOut(); for (auto &X : Locs) { LSValue &V = Ctx.getValue(OTM[Ctx.getLocationBit(X)]); if (V.isCoveringValue()) { ++CSCount; continue; } ++CTCount; } if (CSCount == Locs.size()) return ValueState::CoverValues; if (CTCount == Locs.size()) return ValueState::ConcreteValues; return ValueState::CoverAndConcreteValues; } SILValue RLEContext::computePredecessorLocationValue(SILBasicBlock *BB, LSLocation &L) { BBValueMap Values; llvm::DenseSet HandledBBs; llvm::SmallVector WorkList; // Push in all the predecessors to get started. for (auto Pred : BB->getPreds()) { WorkList.push_back(Pred); } while (!WorkList.empty()) { auto *CurBB = WorkList.pop_back_val(); BlockState &Forwarder = getBlockState(CurBB); // Mark this basic block as processed. HandledBBs.insert(CurBB); // There are 3 cases that can happen here. // // 1. The current basic block contains concrete values for the entire // location. // 2. The current basic block contains covering values for the entire // location. // 3. The current basic block contains concrete value for part of the // location and covering values for the rest. // // This BlockState contains concrete values for all the expanded // locations, collect and reduce them into a single value in the current // basic block. if (Forwarder.isConcreteValues(*this, L)) { Values[CurBB] = Forwarder.reduceValuesAtEndOfBlock(*this, L); continue; } // This BlockState does not contain concrete value for any of the expanded // locations, collect in this block's predecessors. if (Forwarder.isCoverValues(*this, L)) { for (auto Pred : CurBB->getPreds()) { if (HandledBBs.find(Pred) != HandledBBs.end()) continue; WorkList.push_back(Pred); } continue; } // This block contains concrete values for some but not all the expanded // locations, recursively call collectLocationValues to materialize the // value that reaches this basic block. LSLocationValueMap LSValues; if (!collectLocationValues(CurBB, L, LSValues, Forwarder.getForwardValOut())) return SILValue(); // Reduce the available values into a single SILValue we can use to forward SILInstruction *IPt = CurBB->getTerminator(); Values[CurBB] = LSValue::reduce(L, &BB->getModule(), LSValues, IPt); } // Finally, collect all the values for the SILArgument, materialize it using // the SSAUpdater. Updater.Initialize(L.getType(&BB->getModule()).getObjectType()); for (auto V : Values) { Updater.AddAvailableValue(V.first, V.second); } return Updater.GetValueInMiddleOfBlock(BB); } bool RLEContext::collectLocationValues(SILBasicBlock *BB, LSLocation &L, LSLocationValueMap &Values, ValueTableMap &VM) { LSLocationSet CSLocs; LSLocationList Locs; LSLocation::expand(L, &BB->getModule(), Locs, TE); auto *Mod = &BB->getModule(); // Find the locations that this basic block defines and the locations which // we do not have a concrete value in the current basic block. for (auto &X : Locs) { Values[X] = getValue(VM[getLocationBit(X)]); if (!Values[X].isCoveringValue()) continue; CSLocs.insert(X); } // For locations which we do not have concrete values for in this basic // block, try to reduce it to the minimum # of locations possible, this // will help us to generate as few SILArguments as possible. LSLocation::reduce(L, Mod, CSLocs); // To handle covering value, we need to go to the predecessors and // materialize them there. for (auto &X : CSLocs) { SILValue V = computePredecessorLocationValue(BB, X); if (!V) return false; // We've constructed a concrete value for the covering value. Expand and // collect the newly created forwardable values. LSLocationList Locs; LSValueList Vals; LSLocation::expand(X, Mod, Locs, TE); LSValue::expand(V, Mod, Vals, TE); for (unsigned i = 0; i < Locs.size(); ++i) { Values[Locs[i]] = Vals[i]; assert(Values[Locs[i]].isValid() && "Invalid load store value"); } } return true; } void RLEContext::processBasicBlocksForGenKillSet() { for (SILBasicBlock *BB : PO->getReversePostOrder()) { BlockState &S = getBlockState(BB); // Compute the AvailSetMax at the beginning of the basic block. S.mergePredecessorsAvailSetMax(*this); // Compute the genset and killset. // // To optimize this process, we also compute the AvailSetMax at particular // point in the basic block. for (auto I = BB->begin(), E = BB->end(); I != E; ++I) { if (auto *LI = dyn_cast(&*I)) { if (BBWithLoads.find(BB) == BBWithLoads.end()) BBWithLoads.insert(BB); S.processLoadInst(*this, LI, RLEKind::ComputeAvailSetMax); } if (auto *SI = dyn_cast(&*I)) { S.processStoreInst(*this, SI, RLEKind::ComputeAvailSetMax); } S.processInstructionWithKind(*this, &*I, RLEKind::ComputeAvailGenKillSet); } } } void RLEContext::processBasicBlocksWithGenKillSet() { // Process each basic block with the gen and kill set. Every time the // ForwardSetOut of a basic block changes, the optimization is rerun on its // successors. llvm::SmallVector WorkList; llvm::DenseSet HandledBBs; // Push into the worklist in post order so that we can pop from the back and // get reverse post order. for (SILBasicBlock *BB : PO->getPostOrder()) { WorkList.push_back(BB); HandledBBs.insert(BB); } while (!WorkList.empty()) { SILBasicBlock *BB = WorkList.pop_back_val(); HandledBBs.erase(BB); // Intersection. BlockState &Forwarder = getBlockState(BB); // Compute the ForwardSetIn at the beginning of the basic block. Forwarder.mergePredecessorAvailSet(*this); if (Forwarder.processBasicBlockWithGenKillSet()) { for (auto &X : BB->getSuccessors()) { // We do not push basic block into the worklist if its already // in the worklist. if (HandledBBs.find(X) != HandledBBs.end()) continue; WorkList.push_back(X); } } } } void RLEContext::processBasicBlocksForAvailValue() { for (SILBasicBlock *BB : PO->getReversePostOrder()) { BlockState &Forwarder = getBlockState(BB); // Merge the predecessors. After merging, BlockState now contains // lists of available LSLocations and their values that reach the // beginning of the basic block along all paths. Forwarder.mergePredecessorAvailSetAndValue(*this); // Merge duplicate loads, and forward stores to // loads. We also update lists of stores|loads to reflect the end // of the basic block. Forwarder.processBasicBlockWithKind(*this, RLEKind::ComputeAvailValue); // Update the locations with available values. We do not need to update // the available BitVector here as they should have been initialized and // stabilized in the processBasicBlocksWithGenKillSet. Forwarder.updateForwardValOut(); } } void RLEContext::processBasicBlocksForRLE(bool Optimistic) { for (SILBasicBlock *BB : PO->getReversePostOrder()) { // If we know this is not a one iteration function which means its // forward sets have been computed and converged, // and this basic block does not even have LoadInsts, there is no point // in processing every instruction in the basic block again as no store // will be eliminated. if (Optimistic && BBWithLoads.find(BB) == BBWithLoads.end()) continue; BlockState &Forwarder = getBlockState(BB); // Merge the predecessors. After merging, BlockState now contains // lists of available LSLocations and their values that reach the // beginning of the basic block along all paths. Forwarder.mergePredecessorAvailSetAndValue(*this); // Perform the actual redundant load elimination. Forwarder.processBasicBlockWithKind(*this, RLEKind::PerformRLE); // If this is not a one iteration data flow, then the forward sets // have been computed. if (Optimistic) continue; // Update the locations with available values and their values. Forwarder.updateForwardSetOut(); Forwarder.updateForwardValOut(); } } void RLEContext::runIterativeRLE() { // Generate the genset and killset for every basic block. processBasicBlocksForGenKillSet(); // Process basic blocks in RPO. After the data flow converges, run last // iteration and perform load forwarding. processBasicBlocksWithGenKillSet(); // We have computed the available value bit, now go through every basic // block and compute the forwarding value locally. This is necessary as // when we perform the RLE in the last iteration, we must handle loops, i.e. // predecessor blocks which have not been processed when a basic block is // processed. processBasicBlocksForAvailValue(); } bool RLEContext::run() { // We perform redundant load elimination in the following phases. // // Phase 1. Compute the genset and killset for every basic block. // // Phase 2. Use an iterative data flow to compute whether there is an // available value at a given point, we do not yet care about what the value // is. // // Phase 3. we compute the real forwardable value at a given point. // // Phase 4. we perform the redundant load elimination. // Walk over the function and find all the locations accessed by // this function. std::pair LSCount = std::make_pair(0, 0); LSLocation::enumerateLSLocations(*Fn, LocationVault, LocToBitIndex, BaseToLocIndex, TE, LSCount); // Check how to optimize this function. ProcessKind Kind = getProcessFunctionKind(LSCount.first, LSCount.second); // We do not optimize this function at all. if (Kind == ProcessKind::ProcessNone) return false; // Do we run a multi-iteration data flow ? bool Optimistic = Kind == ProcessKind::ProcessMultipleIterations ? true : false; // These are a list of basic blocks that we actually processed. // We do not process unreachable block, instead we set their liveouts to nil. llvm::DenseSet BBToProcess; for (auto X : PO->getPostOrder()) BBToProcess.insert(X); // For all basic blocks in the function, initialize a BB state. Since we // know all the locations accessed in this function, we can resize the bit // vector to the appropriate size. for (auto &B : *Fn) { BBToLocState[&B] = BlockState(); BBToLocState[&B].init(&B, LocationVault.size(), Optimistic && BBToProcess.find(&B) != BBToProcess.end()); } if (Optimistic) runIterativeRLE(); // We have the available value bit computed and the local forwarding value. // Set up the load forwarding. processBasicBlocksForRLE(Optimistic); // Finally, perform the redundant load replacements. llvm::DenseSet InstsToDelete; bool SILChanged = false; for (auto &X : BBToLocState) { for (auto &F : X.second.getRL()) { DEBUG(llvm::dbgs() << "Replacing " << SILValue(F.first) << "With " << F.second); SILChanged = true; F.first->replaceAllUsesWith(F.second); InstsToDelete.insert(F.first); ++NumForwardedLoads; } } // Erase the instructions recursively, this way, we get rid of pass // dependence on DCE. for (auto &X : InstsToDelete) { // It is possible that the instruction still has uses, because it could be // used as the replacement Value, i.e. F.second, for some other RLE pairs. // // TODO: we should fix this, otherwise we are missing RLE opportunities. if (!X->use_empty()) continue; recursivelyDeleteTriviallyDeadInstructions(X, true); } return SILChanged; } //===----------------------------------------------------------------------===// // Top Level Entry Point //===----------------------------------------------------------------------===// namespace { class RedundantLoadElimination : public SILFunctionTransform { StringRef getName() override { return "SIL Redundant Load Elimination"; } /// The entry point to the transformation. void run() override { SILFunction *F = getFunction(); DEBUG(llvm::dbgs() << "*** RLE on function: " << F->getName() << " ***\n"); auto *AA = PM->getAnalysis(); auto *TE = PM->getAnalysis(); auto *PO = PM->getAnalysis()->get(F); RLEContext RLE(F, PM, AA, TE, PO); if (RLE.run()) { invalidateAnalysis(SILAnalysis::InvalidationKind::Instructions); } } }; } // end anonymous namespace SILTransform *swift::createRedundantLoadElimination() { return new RedundantLoadElimination(); }