mirror of
https://github.com/apple/swift.git
synced 2025-12-14 20:36:38 +01:00
Instead of caching alias results globally for the module, make AliasAnalysis a FunctionAnalysisBase which caches the alias results per function.
Why?
* So far the result caches could only grow. They were reset when they reached a certain size. This was not ideal. Now, they are invalidated whenever the function changes.
* It was not possible to actually invalidate an alias analysis result. This is required, for example in TempRValueOpt and TempLValueOpt (so far it was done manually with invalidateInstruction).
* Type based alias analysis results were also cached for the whole module, while it is actually dependent on the function, because it depends on the function's resilience expansion. This was a potential bug.
I also added a new PassManager API to directly get a function-base analysis:
getAnalysis(SILFunction *f)
The second change of this commit is the removal of the instruction-index indirection for the cache keys. Now the cache keys directly work on instruction pointers instead of instruction indices. This reduces the number of hash table lookups for a cache lookup from 3 to 1.
This indirection was needed to avoid dangling instruction pointers in the cache keys. But this is not needed anymore, because of the new delayed instruction deletion mechanism.
1705 lines
60 KiB
C++
1705 lines
60 KiB
C++
//===--- RedundantLoadElimination.cpp - SIL Load Forwarding ---------------===//
|
|
//
|
|
// This source file is part of the Swift.org open source project
|
|
//
|
|
// Copyright (c) 2014 - 2017 Apple Inc. and the Swift project authors
|
|
// Licensed under Apache License v2.0 with Runtime Library Exception
|
|
//
|
|
// See https://swift.org/LICENSE.txt for license information
|
|
// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
///
|
|
/// \file
|
|
///
|
|
/// This pass eliminates redundant loads.
|
|
///
|
|
/// A load can be eliminated if its value has already been held somewhere,
|
|
/// i.e. generated by a previous load, LSLocation stored by a known value.
|
|
///
|
|
/// In this case, one can replace the load instruction with the previous
|
|
/// results.
|
|
///
|
|
/// Redundant Load Elimination (RLE) eliminates such loads by:
|
|
///
|
|
/// 1. Introducing a notion of a LSLocation that is used to model object
|
|
/// fields. (See below for more details).
|
|
///
|
|
/// 2. Introducing a notion of a LSValue that is used to model the value
|
|
/// that currently resides in the associated LSLocation on the particular
|
|
/// program path. (See below for more details).
|
|
///
|
|
/// 3. Performing a RPO walk over the control flow graph, tracking any
|
|
/// LSLocations that are read from or stored into in each basic block. The
|
|
/// read or stored value, kept in a map between LSLocation and LSValue,
|
|
/// becomes the available value for the LSLocation.
|
|
///
|
|
/// 4. An optimistic iterative intersection-based dataflow is performed on the
|
|
/// gensets until convergence.
|
|
///
|
|
/// At the core of RLE, there is the LSLocation class. A LSLocation is an
|
|
/// abstraction of an object field in program. It consists of a base and a
|
|
/// projection path to the field accessed.
|
|
///
|
|
/// In SIL, one can access an aggregate as a whole, i.e. store to a struct with
|
|
/// 2 Int fields. A store like this will generate 2 *indivisible* LSLocations,
|
|
/// 1 for each field and in addition to keeping a list of LSLocation, RLE also
|
|
/// keeps their available LSValues. We call it *indivisible* because it
|
|
/// cannot be broken down to more LSLocations.
|
|
///
|
|
/// LSValue consists of a base - a SILValue from the load or store inst,
|
|
/// as well as a projection path to which the field it represents. So, a
|
|
/// store to an 2-field struct as mentioned above will generate 2 LSLocations
|
|
/// and 2 LSValues.
|
|
///
|
|
/// Every basic block keeps a map between LSLocation and LSValue. By
|
|
/// keeping the LSLocation and LSValue in their indivisible form, one
|
|
/// can easily find which part of the load is redundant and how to compute its
|
|
/// forwarding value.
|
|
///
|
|
/// Given the case which the 2 fields of the struct both have available values,
|
|
/// RLE can find their LSValues (maybe by struct_extract from a larger
|
|
/// value) and then aggregate them.
|
|
///
|
|
/// However, this may introduce a lot of extraction and aggregation which may
|
|
/// not be necessary. i.e. a store the struct followed by a load from the
|
|
/// struct. To solve this problem, when RLE detects that a load instruction
|
|
/// can be replaced by forwarded value, it will try to find minimum # of
|
|
/// extractions necessary to form the forwarded value. It will group the
|
|
/// available value's by the LSValue base, i.e. the LSValues come from the
|
|
/// same instruction, and then use extraction to obtain the needed components
|
|
/// of the base.
|
|
///
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
#define DEBUG_TYPE "sil-redundant-load-elim"
|
|
#include "swift/SIL/Projection.h"
|
|
#include "swift/SIL/SILArgument.h"
|
|
#include "swift/SIL/SILBuilder.h"
|
|
#include "swift/SIL/BasicBlockDatastructures.h"
|
|
#include "swift/SILOptimizer/Analysis/ARCAnalysis.h"
|
|
#include "swift/SILOptimizer/Analysis/AliasAnalysis.h"
|
|
#include "swift/SILOptimizer/Analysis/DeadEndBlocksAnalysis.h"
|
|
#include "swift/SILOptimizer/Analysis/DominanceAnalysis.h"
|
|
#include "swift/SILOptimizer/Analysis/PostOrderAnalysis.h"
|
|
#include "swift/SILOptimizer/Analysis/ValueTracking.h"
|
|
#include "swift/SILOptimizer/PassManager/Passes.h"
|
|
#include "swift/SILOptimizer/PassManager/Transforms.h"
|
|
#include "swift/SILOptimizer/Utils/CFGOptUtils.h"
|
|
#include "swift/SILOptimizer/Utils/InstOptUtils.h"
|
|
#include "swift/SILOptimizer/Utils/LoadStoreOptUtils.h"
|
|
#include "swift/SILOptimizer/Utils/SILSSAUpdater.h"
|
|
#include "swift/SIL/BasicBlockData.h"
|
|
#include "llvm/ADT/BitVector.h"
|
|
#include "llvm/ADT/MapVector.h"
|
|
#include "llvm/ADT/None.h"
|
|
#include "llvm/ADT/Statistic.h"
|
|
#include "llvm/Support/CommandLine.h"
|
|
#include "llvm/Support/Debug.h"
|
|
|
|
using namespace swift;
|
|
|
|
STATISTIC(NumForwardedLoads, "Number of loads forwarded");
|
|
|
|
static AllocStackInst *findAllocStackInst(DeallocStackInst *I) {
|
|
// It's allowed to be undef in unreachable code.
|
|
return dyn_cast<AllocStackInst>(I->getOperand());
|
|
}
|
|
|
|
/// ComputeAvailSetMax - If we ignore all unknown writes, what is the max
|
|
/// available set that can reach the a certain point in a basic block. This
|
|
/// helps generating the genset and killset. i.e. if there is no downward visible
|
|
/// value that can reach the end of a basic block, then we know that the genset
|
|
/// and killset for the location need not be set.
|
|
///
|
|
/// ComputeAvailGenKillSet - Build the genset and killset of the basic block.
|
|
///
|
|
/// ComputeAvailValue - Compute the available value at the end of the basic
|
|
/// block.
|
|
///
|
|
/// PerformRLE - Perform the actual redundant load elimination.
|
|
enum class RLEKind : unsigned {
|
|
ComputeAvailSetMax = 0,
|
|
ComputeAvailGenKillSet = 1,
|
|
ComputeAvailValue = 2,
|
|
PerformRLE = 3,
|
|
};
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
// Utility Functions
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
static bool inline isComputeAvailSetMax(RLEKind Kind) {
|
|
return Kind == RLEKind::ComputeAvailSetMax;
|
|
}
|
|
|
|
static bool inline isComputeAvailGenKillSet(RLEKind Kind) {
|
|
return Kind == RLEKind::ComputeAvailGenKillSet;
|
|
}
|
|
|
|
static bool inline isComputeAvailValue(RLEKind Kind) {
|
|
return Kind == RLEKind::ComputeAvailValue;
|
|
}
|
|
|
|
static bool inline isPerformingRLE(RLEKind Kind) {
|
|
return Kind == RLEKind::PerformRLE;
|
|
}
|
|
|
|
/// Returns true if this is an instruction that may have side effects in a
|
|
/// general sense but are inert from a load store perspective.
|
|
static bool isRLEInertInstruction(SILInstruction *Inst) {
|
|
switch (Inst->getKind()) {
|
|
case SILInstructionKind::DeallocStackInst:
|
|
case SILInstructionKind::CondFailInst:
|
|
case SILInstructionKind::IsEscapingClosureInst:
|
|
case SILInstructionKind::IsUniqueInst:
|
|
case SILInstructionKind::EndCOWMutationInst:
|
|
case SILInstructionKind::FixLifetimeInst:
|
|
case SILInstructionKind::EndAccessInst:
|
|
case SILInstructionKind::SetDeallocatingInst:
|
|
case SILInstructionKind::DeallocRefInst:
|
|
case SILInstructionKind::BeginBorrowInst:
|
|
case SILInstructionKind::EndBorrowInst:
|
|
return true;
|
|
default:
|
|
return false;
|
|
}
|
|
}
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
// Basic Block Location State
|
|
//===----------------------------------------------------------------------===//
|
|
namespace {
|
|
|
|
/// If this function has too many basic blocks or too many locations, it may
|
|
/// take a long time to compute the genset and killset. The number of memory
|
|
/// behavior or alias query we need to do in worst case is roughly linear to
|
|
/// # of BBs x(times) # of locations.
|
|
///
|
|
/// we could run RLE on functions with 128 basic blocks and 128 locations,
|
|
/// which is a large function.
|
|
constexpr unsigned MaxLSLocationBBMultiplicationNone = 128*128;
|
|
|
|
/// we could run optimistic RLE on functions with less than 64 basic blocks
|
|
/// and 64 locations which is a sizable function.
|
|
constexpr unsigned MaxLSLocationBBMultiplicationPessimistic = 64*64;
|
|
|
|
/// forward declaration.
|
|
class RLEContext;
|
|
|
|
/// State of the load store in one basic block which allows for forwarding from
|
|
/// loads, stores -> loads
|
|
class BlockState {
|
|
public:
|
|
enum class ValueState : unsigned {
|
|
CoverValues = 0,
|
|
ConcreteValues = 1,
|
|
CoverAndConcreteValues = 2,
|
|
};
|
|
|
|
private:
|
|
/// # of locations in the LocationVault.
|
|
unsigned LocationNum;
|
|
|
|
/// The basic block that we are optimizing.
|
|
SILBasicBlock *BB;
|
|
|
|
/// A bit vector for which the ith bit represents the ith LSLocation in
|
|
/// LocationVault. If the bit is set, then the location currently has an
|
|
/// downward visible value at the beginning of the basic block.
|
|
SmallBitVector ForwardSetIn;
|
|
|
|
/// A bit vector for which the ith bit represents the ith LSLocation in
|
|
/// LocationVault. If the bit is set, then the location currently has an
|
|
/// downward visible value at the end of the basic block.
|
|
SmallBitVector ForwardSetOut;
|
|
|
|
/// A bit vector for which the ith bit represents the ith LSLocation in
|
|
/// LocationVault. If we ignore all unknown write, what's the maximum set
|
|
/// of available locations at the current position in the basic block.
|
|
SmallBitVector ForwardSetMax;
|
|
|
|
/// A bit vector for which the ith bit represents the ith LSLocation in
|
|
/// LocationVault. If the bit is set, then the basic block generates a
|
|
/// value for the location.
|
|
SmallBitVector BBGenSet;
|
|
|
|
/// A bit vector for which the ith bit represents the ith LSLocation in
|
|
/// LocationVault. If the bit is set, then the basic block kills the
|
|
/// value for the location.
|
|
SmallBitVector BBKillSet;
|
|
|
|
/// This is map between LSLocations and their available values at the
|
|
/// beginning of this basic block.
|
|
ValueTableMap ForwardValIn;
|
|
|
|
/// This is map between LSLocations and their available values at the end of
|
|
/// this basic block.
|
|
ValueTableMap ForwardValOut;
|
|
|
|
/// Keeps a list of replaceable instructions in the current basic block as
|
|
/// well as their SILValue replacement.
|
|
llvm::DenseMap<SingleValueInstruction *, SILValue> RedundantLoads;
|
|
|
|
/// LSLocation read or written has been extracted, expanded and mapped to the
|
|
/// bit position in the bitvector. Update it in the ForwardSetIn of the
|
|
/// current basic block.
|
|
void updateForwardSetForRead(RLEContext &Ctx, unsigned B);
|
|
void updateForwardSetForWrite(RLEContext &Ctx, unsigned B);
|
|
|
|
/// LSLocation read or written has been extracted, expanded and mapped to the
|
|
/// B position in the Bvector. Update it in the genset and killset of the
|
|
/// current basic block.
|
|
void updateGenKillSetForRead(RLEContext &Ctx, unsigned B);
|
|
void updateGenKillSetForWrite(RLEContext &Ctx, unsigned B);
|
|
|
|
/// LSLocation read or written has been extracted, expanded and mapped to the
|
|
/// B position in the Bvector. Update it in the MaxAvailForwardSet of the
|
|
/// current basic block.
|
|
void updateMaxAvailSetForRead(RLEContext &Ctx, unsigned B);
|
|
void updateMaxAvailSetForWrite(RLEContext &Ctx, unsigned B);
|
|
|
|
/// LSLocation written has been extracted, expanded and mapped to the bit
|
|
/// position in the bitvector. process it using the bit position.
|
|
void updateForwardSetAndValForRead(RLEContext &Ctx, unsigned L, unsigned V);
|
|
void updateForwardSetAndValForWrite(RLEContext &Ctx, unsigned L, unsigned V);
|
|
|
|
/// There is a read to a LSLocation, expand the LSLocation into individual
|
|
/// fields before processing them.
|
|
void processRead(RLEContext &Ctx, SILInstruction *I, SILValue Mem,
|
|
SILValue Val, RLEKind Kind);
|
|
|
|
/// There is a write to a LSLocation, expand the LSLocation into individual
|
|
/// fields before processing them.
|
|
void processWrite(RLEContext &Ctx, SILInstruction *I, SILValue Mem,
|
|
SILValue Val, RLEKind Kind);
|
|
|
|
/// BitVector manipulation functions.
|
|
void startTrackingLocation(SmallBitVector &BV, unsigned B);
|
|
void stopTrackingLocation(SmallBitVector &BV, unsigned B);
|
|
bool isTrackingLocation(SmallBitVector &BV, unsigned B);
|
|
void startTrackingValue(ValueTableMap &VM, unsigned L, unsigned V);
|
|
void stopTrackingValue(ValueTableMap &VM, unsigned B);
|
|
|
|
public:
|
|
BlockState() = default;
|
|
|
|
void init(SILBasicBlock *NewBB, unsigned bitcnt, bool optimistic) {
|
|
BB = NewBB;
|
|
LocationNum = bitcnt;
|
|
// For reachable basic blocks, the initial state of ForwardSetOut should be
|
|
// all 1's. Otherwise the dataflow solution could be too conservative.
|
|
//
|
|
// Consider this case, the forwardable value by var a = 10 before the loop
|
|
// will not be forwarded if the ForwardSetOut is set to 0 initially.
|
|
//
|
|
// var a = 10
|
|
// for _ in 0...1024 {}
|
|
// use(a);
|
|
//
|
|
// However, by doing so, we can only do the data forwarding after the
|
|
// data flow stabilizes.
|
|
//
|
|
// We set the initial state of unreachable block to 0, as we do not have
|
|
// a value for the location.
|
|
//
|
|
// This is a bit conservative as we could be missing forwarding
|
|
// opportunities. i.e. a joint block with 1 predecessor being an
|
|
// unreachable block.
|
|
//
|
|
// we rely on other passes to clean up unreachable block.
|
|
ForwardSetIn.resize(LocationNum, false);
|
|
ForwardSetOut.resize(LocationNum, optimistic);
|
|
|
|
// If we are running an optimistic data flow, set forward max to true
|
|
// initially.
|
|
ForwardSetMax.resize(LocationNum, optimistic);
|
|
|
|
BBGenSet.resize(LocationNum, false);
|
|
BBKillSet.resize(LocationNum, false);
|
|
}
|
|
|
|
/// Initialize the AvailSetMax by intersecting this basic block's
|
|
/// predecessors' AvailSetMax.
|
|
void mergePredecessorsAvailSetMax(RLEContext &Ctx);
|
|
|
|
/// Initialize the AvailSet by intersecting this basic block' predecessors'
|
|
/// AvailSet.
|
|
void mergePredecessorAvailSet(RLEContext &Ctx);
|
|
|
|
/// Initialize the AvailSet and AvailVal of the current basic block.
|
|
void mergePredecessorAvailSetAndValue(RLEContext &Ctx);
|
|
|
|
/// Reached the end of the basic block, update the ForwardValOut with the
|
|
/// ForwardValIn.
|
|
void updateForwardValOut() { ForwardValOut = ForwardValIn; }
|
|
|
|
/// Check whether the ForwardSetOut has changed. If it does, we need to
|
|
/// rerun the data flow to reach fixed point.
|
|
bool updateForwardSetOut() {
|
|
if (ForwardSetIn == ForwardSetOut)
|
|
return false;
|
|
ForwardSetOut = ForwardSetIn;
|
|
return true;
|
|
}
|
|
|
|
/// Returns the current basic block we are processing.
|
|
SILBasicBlock *getBB() const { return BB; }
|
|
|
|
/// Returns the ForwardValIn for the current basic block.
|
|
ValueTableMap &getForwardValIn() { return ForwardValIn; }
|
|
|
|
/// Returns the ForwardValOut for the current basic block.
|
|
ValueTableMap &getForwardValOut() { return ForwardValOut; }
|
|
|
|
/// Returns the redundant loads and their replacement in the currently basic
|
|
/// block.
|
|
llvm::DenseMap<SingleValueInstruction *, SILValue> &getRL() {
|
|
return RedundantLoads;
|
|
}
|
|
|
|
/// Look into the value for the given LSLocation at end of the basic block,
|
|
/// return one of the three ValueState type.
|
|
ValueState getValueStateAtEndOfBlock(RLEContext &Ctx, LSLocation &L);
|
|
|
|
/// Wrappers to query the value state of the location in this BlockState.
|
|
bool isCoverValues(RLEContext &Ctx, LSLocation &L) {
|
|
return getValueStateAtEndOfBlock(Ctx, L) == ValueState::CoverValues;
|
|
}
|
|
bool isConcreteValues(RLEContext &Ctx, LSLocation &L) {
|
|
return getValueStateAtEndOfBlock(Ctx, L) == ValueState::ConcreteValues;
|
|
}
|
|
|
|
/// Iterate over the instructions in the basic block in forward order and
|
|
/// process them w.r.t. the given \p Kind.
|
|
void processInstructionWithKind(RLEContext &Ctx, SILInstruction *I,
|
|
RLEKind Kind);
|
|
void processBasicBlockWithKind(RLEContext &Ctx, RLEKind Kind);
|
|
|
|
/// Process the current basic block with the genset and killset. Return true
|
|
/// if the ForwardSetOut changes.
|
|
bool processBasicBlockWithGenKillSet();
|
|
|
|
/// Set up the value for redundant load elimination.
|
|
bool setupRLE(RLEContext &Ctx, SILInstruction *I, SILValue Mem);
|
|
|
|
/// Process Instruction which writes to memory in an unknown way.
|
|
void processUnknownWriteInst(RLEContext &Ctx, SILInstruction *I,
|
|
RLEKind Kind);
|
|
void processUnknownWriteInstForGenKillSet(RLEContext &Ctx, SILInstruction *I);
|
|
void processUnknownWriteInstForRLE(RLEContext &Ctx, SILInstruction *I);
|
|
|
|
|
|
void processDeallocStackInst(RLEContext &Ctx, DeallocStackInst *I,
|
|
RLEKind Kind);
|
|
void processDeallocStackInstForGenKillSet(RLEContext &Ctx, DeallocStackInst *I);
|
|
void processDeallocStackInstForRLE(RLEContext &Ctx, DeallocStackInst *I);
|
|
|
|
/// Process LoadInst. Extract LSLocations from LoadInst.
|
|
void processLoadInst(RLEContext &Ctx, LoadInst *LI, RLEKind Kind);
|
|
|
|
/// Process LoadInst. Extract LSLocations from StoreInst.
|
|
void processStoreInst(RLEContext &Ctx, StoreInst *SI, RLEKind Kind);
|
|
|
|
/// Returns a *single* forwardable SILValue for the given LSLocation right
|
|
/// before the InsertPt instruction.
|
|
SILValue reduceValuesAtEndOfBlock(RLEContext &Ctx, LSLocation &L);
|
|
|
|
#ifndef NDEBUG
|
|
void dump(RLEContext &Ctx);
|
|
#endif
|
|
};
|
|
|
|
} // end anonymous namespace
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
// RLEContext Interface
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
namespace {
|
|
|
|
/// This class stores global state that we use when computing redundant load and
|
|
/// their replacement in each basic block.
|
|
class RLEContext {
|
|
enum class ProcessKind {
|
|
ProcessMultipleIterations = 0,
|
|
ProcessOneIteration = 1,
|
|
ProcessNone = 2,
|
|
};
|
|
private:
|
|
/// Function currently processing.
|
|
SILFunction *Fn;
|
|
|
|
/// The passmanager we are using.
|
|
SILPassManager *PM;
|
|
|
|
/// The alias analysis that we will use during all computations.
|
|
AliasAnalysis *AA;
|
|
|
|
/// The type expansion analysis we will use during all computations.
|
|
TypeExpansionAnalysis *TE;
|
|
|
|
/// The SSA updater we use to materialize covering values.
|
|
SILSSAUpdater Updater;
|
|
|
|
/// The range that we use to iterate over the post order and reverse post
|
|
/// order of the given function.
|
|
PostOrderFunctionInfo *PO;
|
|
|
|
/// Epilogue release analysis.
|
|
EpilogueARCFunctionInfo *EAFI;
|
|
|
|
/// Keeps all the locations for the current function. The BitVector in each
|
|
/// BlockState is then laid on top of it to keep track of which LSLocation
|
|
/// has a downward available value.
|
|
std::vector<LSLocation> LocationVault;
|
|
|
|
/// Contains a map between LSLocation to their index in the LocationVault.
|
|
/// Use for fast lookup.
|
|
LSLocationIndexMap LocToBitIndex;
|
|
|
|
/// Keeps a map between the accessed SILValue and the location.
|
|
LSLocationBaseMap BaseToLocIndex;
|
|
|
|
/// Keeps all the loadstorevalues for the current function. The BitVector in
|
|
/// each g is then laid on top of it to keep track of which LSLocation
|
|
/// has a downward available value.
|
|
std::vector<LSValue> LSValueVault;
|
|
|
|
/// Contains a map between LSLocation to their index in the LocationVault.
|
|
/// Use for fast lookup.
|
|
llvm::DenseMap<LSValue, unsigned> ValToBitIndex;
|
|
|
|
/// A map from each BasicBlock to its BlockState.
|
|
BasicBlockData<BlockState> BBToLocState;
|
|
|
|
/// Keeps a list of basic blocks that have LoadInsts. If a basic block does
|
|
/// not have LoadInst, we do not actually perform the last iteration where
|
|
/// RLE is actually performed on the basic block.
|
|
///
|
|
/// NOTE: This is never populated for functions which will only require 1
|
|
/// data flow iteration. For function that requires more than 1 iteration of
|
|
/// the data flow this is populated when the first time the functions is
|
|
/// walked, i.e. when the we generate the genset and killset.
|
|
BasicBlockSet BBWithLoads;
|
|
|
|
/// If set, RLE ignores loads from that array type.
|
|
NominalTypeDecl *ArrayType;
|
|
|
|
#ifndef NDEBUG
|
|
SILPrintContext printCtx;
|
|
#endif
|
|
|
|
public:
|
|
RLEContext(SILFunction *F, SILPassManager *PM, AliasAnalysis *AA,
|
|
TypeExpansionAnalysis *TE, PostOrderFunctionInfo *PO,
|
|
EpilogueARCFunctionInfo *EAFI, bool disableArrayLoads);
|
|
|
|
RLEContext(const RLEContext &) = delete;
|
|
RLEContext(RLEContext &&) = delete;
|
|
RLEContext &operator=(const RLEContext &) = delete;
|
|
RLEContext &operator=(RLEContext &&) = delete;
|
|
~RLEContext() = default;
|
|
|
|
/// Entry point to redundant load elimination.
|
|
bool run();
|
|
|
|
SILFunction *getFunction() const { return Fn; }
|
|
|
|
/// Use a set of ad hoc rules to tell whether we should run a pessimistic
|
|
/// one iteration data flow on the function.
|
|
ProcessKind getProcessFunctionKind(unsigned LoadCount, unsigned StoreCount);
|
|
|
|
/// Run the iterative data flow until convergence.
|
|
void runIterativeRLE();
|
|
|
|
/// Process the basic blocks for the gen and kill set.
|
|
void processBasicBlocksForGenKillSet();
|
|
|
|
/// Process the basic blocks with the gen and kill set.
|
|
void processBasicBlocksWithGenKillSet();
|
|
|
|
/// Process the basic block for values generated in the current basic
|
|
/// block.
|
|
void processBasicBlocksForAvailValue();
|
|
|
|
/// Process basic blocks to perform the redundant load elimination.
|
|
void processBasicBlocksForRLE(bool Optimistic);
|
|
|
|
/// Returns the alias analysis we will use during all computations.
|
|
AliasAnalysis *getAA() const { return AA; }
|
|
|
|
/// Returns the current type expansion analysis we are .
|
|
TypeExpansionAnalysis *getTE() const { return TE; }
|
|
|
|
/// Returns current epilogue release function info we are using.
|
|
EpilogueARCFunctionInfo *getEAFI() const { return EAFI; }
|
|
|
|
/// Returns the SILValue base to bit index.
|
|
LSLocationBaseMap &getBM() { return BaseToLocIndex; }
|
|
|
|
/// Return the BlockState for the basic block this basic block belongs to.
|
|
BlockState &getBlockState(SILBasicBlock *B) { return BBToLocState[B]; }
|
|
|
|
/// Get the bit representing the LSLocation in the LocationVault.
|
|
unsigned getLocationBit(const LSLocation &L);
|
|
|
|
/// Given the bit, get the LSLocation from the LocationVault.
|
|
LSLocation &getLocation(const unsigned index);
|
|
|
|
/// Get the bit representing the LSValue in the LSValueVault.
|
|
unsigned getValueBit(const LSValue &L);
|
|
|
|
/// Given the bit, get the LSValue from the LSValueVault.
|
|
LSValue &getValue(const unsigned index);
|
|
|
|
/// Given a LSLocation, try to collect all the LSValues for this LSLocation
|
|
/// in the given basic block. If part of the locations have covering values,
|
|
/// find the values in its predecessors.
|
|
bool collectLocationValues(SILBasicBlock *BB, LSLocation &L,
|
|
LSLocationValueMap &Values, ValueTableMap &VM);
|
|
|
|
/// Transitively collect all the values that make up this location and
|
|
/// create a SILArgument out of them.
|
|
SILValue computePredecessorLocationValue(SILBasicBlock *BB, LSLocation &L);
|
|
|
|
/// Returns the LoadInst if \p Inst is a load inst we want to handle.
|
|
LoadInst *isLoadInstToHandle(SILInstruction *Inst) {
|
|
if (auto *LI = dyn_cast<LoadInst>(Inst)) {
|
|
if (!ArrayType ||
|
|
LI->getType().getNominalOrBoundGenericNominal() != ArrayType) {
|
|
return LI;
|
|
}
|
|
}
|
|
return nullptr;
|
|
}
|
|
};
|
|
|
|
} // end anonymous namespace
|
|
|
|
void BlockState::startTrackingValue(ValueTableMap &VM, unsigned L, unsigned V) {
|
|
VM[L] = V;
|
|
}
|
|
|
|
void BlockState::stopTrackingValue(ValueTableMap &VM, unsigned B) {
|
|
VM.erase(B);
|
|
}
|
|
|
|
bool BlockState::isTrackingLocation(SmallBitVector &BV, unsigned B) {
|
|
return BV.test(B);
|
|
}
|
|
|
|
void BlockState::startTrackingLocation(SmallBitVector &BV, unsigned B) {
|
|
BV.set(B);
|
|
}
|
|
|
|
void BlockState::stopTrackingLocation(SmallBitVector &BV, unsigned B) {
|
|
BV.reset(B);
|
|
}
|
|
|
|
void BlockState::mergePredecessorsAvailSetMax(RLEContext &Ctx) {
|
|
if (BB->pred_empty()) {
|
|
ForwardSetMax.reset();
|
|
return;
|
|
}
|
|
|
|
auto Iter = BB->pred_begin();
|
|
ForwardSetMax = Ctx.getBlockState(*Iter).ForwardSetMax;
|
|
Iter = std::next(Iter);
|
|
for (auto EndIter = BB->pred_end(); Iter != EndIter; ++Iter) {
|
|
ForwardSetMax &= Ctx.getBlockState(*Iter).ForwardSetMax;
|
|
}
|
|
}
|
|
|
|
void BlockState::mergePredecessorAvailSet(RLEContext &Ctx) {
|
|
// Clear the state if the basic block has no predecessor.
|
|
if (BB->getPredecessorBlocks().begin() == BB->getPredecessorBlocks().end()) {
|
|
ForwardSetIn.reset();
|
|
return;
|
|
}
|
|
|
|
auto Iter = BB->pred_begin();
|
|
ForwardSetIn = Ctx.getBlockState(*Iter).ForwardSetOut;
|
|
Iter = std::next(Iter);
|
|
for (auto EndIter = BB->pred_end(); Iter != EndIter; ++Iter) {
|
|
ForwardSetIn &= Ctx.getBlockState(*Iter).ForwardSetOut;
|
|
}
|
|
}
|
|
|
|
void BlockState::mergePredecessorAvailSetAndValue(RLEContext &Ctx) {
|
|
// Clear the state if the basic block has no predecessor.
|
|
if (BB->getPredecessorBlocks().begin() == BB->getPredecessorBlocks().end()) {
|
|
ForwardSetIn.reset();
|
|
ForwardValIn.clear();
|
|
return;
|
|
}
|
|
|
|
auto Iter = BB->pred_begin();
|
|
ForwardSetIn = Ctx.getBlockState(*Iter).ForwardSetOut;
|
|
ForwardValIn = Ctx.getBlockState(*Iter).ForwardValOut;
|
|
Iter = std::next(Iter);
|
|
for (auto EndIter = BB->pred_end(); Iter != EndIter; ++Iter) {
|
|
BlockState &OtherState = Ctx.getBlockState(*Iter);
|
|
ForwardSetIn &= OtherState.ForwardSetOut;
|
|
|
|
// Merge in the predecessor state.
|
|
for (unsigned i = 0; i < LocationNum; ++i) {
|
|
if (OtherState.ForwardSetOut[i]) {
|
|
// There are multiple values from multiple predecessors, set this as
|
|
// a covering value. We do not need to track the value itself, as we
|
|
// can always go to the predecessors BlockState to find it.
|
|
ForwardValIn[i] = Ctx.getValueBit(LSValue(true));
|
|
continue;
|
|
}
|
|
// If this location does have an available value, then clear it.
|
|
stopTrackingValue(ForwardValIn, i);
|
|
stopTrackingLocation(ForwardSetIn, i);
|
|
}
|
|
}
|
|
}
|
|
|
|
void BlockState::processBasicBlockWithKind(RLEContext &Ctx, RLEKind Kind) {
|
|
// Iterate over instructions in forward order.
|
|
for (auto &II : *BB) {
|
|
processInstructionWithKind(Ctx, &II, Kind);
|
|
}
|
|
}
|
|
|
|
bool BlockState::processBasicBlockWithGenKillSet() {
|
|
ForwardSetIn.reset(BBKillSet);
|
|
ForwardSetIn |= BBGenSet;
|
|
return updateForwardSetOut();
|
|
}
|
|
|
|
SILValue BlockState::reduceValuesAtEndOfBlock(RLEContext &Ctx, LSLocation &L) {
|
|
// First, collect current available locations and their corresponding values
|
|
// into a map.
|
|
LSLocationValueMap Values;
|
|
|
|
LSLocationList Locs;
|
|
LSLocation::expand(L, &BB->getModule(),
|
|
TypeExpansionContext(*BB->getParent()), Locs, Ctx.getTE());
|
|
|
|
// Find the values that this basic block defines and the locations which
|
|
// we do not have a concrete value in the current basic block.
|
|
ValueTableMap &OTM = getForwardValOut();
|
|
for (unsigned i = 0; i < Locs.size(); ++i) {
|
|
auto Val = Ctx.getValue(OTM[Ctx.getLocationBit(Locs[i])]);
|
|
auto AvailVal = makeCopiedValueAvailable(Val.getBase(), BB);
|
|
Values[Locs[i]] = LSValue(AvailVal, Val.getPath().getValue());
|
|
}
|
|
|
|
// Second, reduce the available values into a single SILValue we can use to
|
|
// forward.
|
|
SILValue TheForwardingValue =
|
|
LSValue::reduce(L, &BB->getModule(), Values, BB->getTerminator());
|
|
/// Return the forwarding value.
|
|
return TheForwardingValue;
|
|
}
|
|
|
|
bool BlockState::setupRLE(RLEContext &Ctx, SILInstruction *I, SILValue Mem) {
|
|
// Try to construct a SILValue for the current LSLocation.
|
|
//
|
|
// Collect the locations and their corresponding values into a map.
|
|
LSLocation L;
|
|
LSLocationBaseMap &BaseToLocIndex = Ctx.getBM();
|
|
if (BaseToLocIndex.find(Mem) != BaseToLocIndex.end()) {
|
|
L = BaseToLocIndex[Mem];
|
|
} else {
|
|
SILValue UO = getUnderlyingObject(Mem);
|
|
L = LSLocation(UO, ProjectionPath::getProjectionPath(UO, Mem));
|
|
}
|
|
|
|
LSLocationValueMap Values;
|
|
// Use the ForwardValIn as we are currently processing the basic block.
|
|
if (!Ctx.collectLocationValues(I->getParent(), L, Values, getForwardValIn()))
|
|
return false;
|
|
|
|
// Reduce the available values into a single SILValue we can use to forward.
|
|
SILModule *Mod = &I->getModule();
|
|
SILValue TheForwardingValue =
|
|
LSValue::reduce(L, Mod, Values, I);
|
|
|
|
if (!TheForwardingValue)
|
|
return false;
|
|
|
|
// Now we have the forwarding value, record it for forwarding!.
|
|
//
|
|
// NOTE: we do not perform the RLE right here because doing so could introduce
|
|
// new LSLocations.
|
|
//
|
|
// e.g.
|
|
// %0 = load %x
|
|
// %1 = load %x
|
|
// %2 = extract_struct %1, #a
|
|
// %3 = load %2
|
|
//
|
|
// If we perform the RLE and replace %1 with %0, we end up having a memory
|
|
// location we do not have before, i.e. Base == %0, and Path == #a.
|
|
//
|
|
// We may be able to add the LSLocation to the vault, but it gets
|
|
// complicated very quickly, e.g. we need to resize the bit vectors size,
|
|
// etc.
|
|
//
|
|
// However, since we already know the instruction to replace and the value to
|
|
// replace it with, we can record it for now and forwarded it after all the
|
|
// forwardable values are recorded in the function.
|
|
//
|
|
RedundantLoads[cast<SingleValueInstruction>(I)] = TheForwardingValue;
|
|
|
|
LLVM_DEBUG(llvm::dbgs() << "FORWARD " << TheForwardingValue << " to" << *I);
|
|
return true;
|
|
}
|
|
|
|
void BlockState::updateForwardSetForRead(RLEContext &Ctx, unsigned B) {
|
|
startTrackingLocation(ForwardSetIn, B);
|
|
}
|
|
|
|
void BlockState::updateGenKillSetForRead(RLEContext &Ctx, unsigned B) {
|
|
startTrackingLocation(BBGenSet, B);
|
|
stopTrackingLocation(BBKillSet, B);
|
|
}
|
|
|
|
void BlockState::updateForwardSetAndValForRead(RLEContext &Ctx, unsigned L,
|
|
unsigned V) {
|
|
// Track the new location and value.
|
|
startTrackingValue(ForwardValIn, L, V);
|
|
startTrackingLocation(ForwardSetIn, L);
|
|
}
|
|
|
|
void BlockState::updateGenKillSetForWrite(RLEContext &Ctx, unsigned B) {
|
|
// This is a store, invalidate any location that this location may alias, as
|
|
// their values can no longer be forwarded.
|
|
LSLocation &R = Ctx.getLocation(B);
|
|
for (unsigned i = 0; i < LocationNum; ++i) {
|
|
if (!isTrackingLocation(ForwardSetMax, i))
|
|
continue;
|
|
LSLocation &L = Ctx.getLocation(i);
|
|
if (!L.isMayAliasLSLocation(R, Ctx.getAA()))
|
|
continue;
|
|
// MayAlias, invalidate the location.
|
|
stopTrackingLocation(BBGenSet, i);
|
|
startTrackingLocation(BBKillSet, i);
|
|
}
|
|
|
|
// Start tracking this location.
|
|
startTrackingLocation(BBGenSet, B);
|
|
stopTrackingLocation(BBKillSet, B);
|
|
}
|
|
|
|
void BlockState::updateMaxAvailSetForWrite(RLEContext &Ctx, unsigned B) {
|
|
startTrackingLocation(ForwardSetMax, B);
|
|
}
|
|
|
|
void BlockState::updateMaxAvailSetForRead(RLEContext &Ctx, unsigned B) {
|
|
startTrackingLocation(ForwardSetMax, B);
|
|
}
|
|
|
|
void BlockState::updateForwardSetForWrite(RLEContext &Ctx, unsigned B) {
|
|
// This is a store, invalidate any location that this location may alias, as
|
|
// their values can no longer be forwarded.
|
|
LSLocation &R = Ctx.getLocation(B);
|
|
for (unsigned i = 0; i < LocationNum; ++i) {
|
|
if (!isTrackingLocation(ForwardSetIn, i))
|
|
continue;
|
|
LSLocation &L = Ctx.getLocation(i);
|
|
if (!L.isMayAliasLSLocation(R, Ctx.getAA()))
|
|
continue;
|
|
// MayAlias, invalidate the location.
|
|
stopTrackingLocation(ForwardSetIn, i);
|
|
}
|
|
|
|
// Start tracking this location.
|
|
startTrackingLocation(ForwardSetIn, B);
|
|
}
|
|
|
|
void BlockState::updateForwardSetAndValForWrite(RLEContext &Ctx, unsigned L,
|
|
unsigned V) {
|
|
// This is a store, invalidate any location that this location may alias, as
|
|
// their values can no longer be forwarded.
|
|
LSLocation &R = Ctx.getLocation(L);
|
|
for (unsigned i = 0; i < LocationNum; ++i) {
|
|
if (!isTrackingLocation(ForwardSetIn, i))
|
|
continue;
|
|
LSLocation &L = Ctx.getLocation(i);
|
|
if (!L.isMayAliasLSLocation(R, Ctx.getAA()))
|
|
continue;
|
|
// MayAlias, invalidate the location and value.
|
|
stopTrackingValue(ForwardValIn, i);
|
|
stopTrackingLocation(ForwardSetIn, i);
|
|
}
|
|
|
|
// Start tracking this location and value.
|
|
startTrackingLocation(ForwardSetIn, L);
|
|
startTrackingValue(ForwardValIn, L, V);
|
|
}
|
|
|
|
void BlockState::processWrite(RLEContext &Ctx, SILInstruction *I, SILValue Mem,
|
|
SILValue Val, RLEKind Kind) {
|
|
// Initialize the LSLocation.
|
|
LSLocation L;
|
|
LSLocationBaseMap &BaseToLocIndex = Ctx.getBM();
|
|
if (BaseToLocIndex.find(Mem) != BaseToLocIndex.end()) {
|
|
L = BaseToLocIndex[Mem];
|
|
} else {
|
|
SILValue UO = getUnderlyingObject(Mem);
|
|
L = LSLocation(UO, ProjectionPath::getProjectionPath(UO, Mem));
|
|
}
|
|
|
|
// If we can't figure out the Base or Projection Path for the write,
|
|
// process it as an unknown memory instruction.
|
|
if (!L.isValid()) {
|
|
// we can ignore unknown store instructions if we are computing the
|
|
// AvailSetMax.
|
|
if (!isComputeAvailSetMax(Kind)) {
|
|
processUnknownWriteInst(Ctx, I, Kind);
|
|
}
|
|
return;
|
|
}
|
|
|
|
auto *Fn = I->getFunction();
|
|
// Expand the given location and val into individual fields and process
|
|
// them as separate writes.
|
|
LSLocationList Locs;
|
|
LSLocation::expand(L, &I->getModule(), TypeExpansionContext(*Fn), Locs,
|
|
Ctx.getTE());
|
|
|
|
if (isComputeAvailSetMax(Kind)) {
|
|
for (unsigned i = 0; i < Locs.size(); ++i) {
|
|
updateMaxAvailSetForWrite(Ctx, Ctx.getLocationBit(Locs[i]));
|
|
}
|
|
return;
|
|
}
|
|
|
|
// Are we computing the genset and killset ?
|
|
if (isComputeAvailGenKillSet(Kind)) {
|
|
for (unsigned i = 0; i < Locs.size(); ++i) {
|
|
updateGenKillSetForWrite(Ctx, Ctx.getLocationBit(Locs[i]));
|
|
}
|
|
return;
|
|
}
|
|
|
|
// Are we computing available value or performing RLE?
|
|
LSValueList Vals;
|
|
LSValue::expand(Val, &I->getModule(), TypeExpansionContext(*Fn), Vals,
|
|
Ctx.getTE());
|
|
if (isComputeAvailValue(Kind) || isPerformingRLE(Kind)) {
|
|
for (unsigned i = 0; i < Locs.size(); ++i) {
|
|
updateForwardSetAndValForWrite(Ctx, Ctx.getLocationBit(Locs[i]),
|
|
Ctx.getValueBit(Vals[i]));
|
|
}
|
|
return;
|
|
}
|
|
|
|
llvm_unreachable("Unknown RLE compute kind");
|
|
}
|
|
|
|
void BlockState::processRead(RLEContext &Ctx, SILInstruction *I, SILValue Mem,
|
|
SILValue Val, RLEKind Kind) {
|
|
// Initialize the LSLocation.
|
|
LSLocation L;
|
|
LSLocationBaseMap &BaseToLocIndex = Ctx.getBM();
|
|
if (BaseToLocIndex.find(Mem) != BaseToLocIndex.end()) {
|
|
L = BaseToLocIndex[Mem];
|
|
} else {
|
|
SILValue UO = getUnderlyingObject(Mem);
|
|
L = LSLocation(UO, ProjectionPath::getProjectionPath(UO, Mem));
|
|
}
|
|
|
|
// If we can't figure out the Base or Projection Path for the read, simply
|
|
// ignore it for now.
|
|
if (!L.isValid())
|
|
return;
|
|
|
|
auto *Fn = I->getFunction();
|
|
// Expand the given LSLocation and Val into individual fields and process
|
|
// them as separate reads.
|
|
LSLocationList Locs;
|
|
LSLocation::expand(L, &I->getModule(), TypeExpansionContext(*Fn), Locs,
|
|
Ctx.getTE());
|
|
|
|
if (isComputeAvailSetMax(Kind)) {
|
|
for (unsigned i = 0; i < Locs.size(); ++i) {
|
|
updateMaxAvailSetForRead(Ctx, Ctx.getLocationBit(Locs[i]));
|
|
}
|
|
return;
|
|
}
|
|
|
|
// Are we computing the genset and killset.
|
|
if (isComputeAvailGenKillSet(Kind)) {
|
|
for (unsigned i = 0; i < Locs.size(); ++i) {
|
|
updateGenKillSetForRead(Ctx, Ctx.getLocationBit(Locs[i]));
|
|
}
|
|
return;
|
|
}
|
|
|
|
// Are we computing available values ?.
|
|
bool CanForward = true;
|
|
LSValueList Vals;
|
|
LSValue::expand(Val, &I->getModule(), TypeExpansionContext(*Fn), Vals,
|
|
Ctx.getTE());
|
|
if (isComputeAvailValue(Kind) || isPerformingRLE(Kind)) {
|
|
for (unsigned i = 0; i < Locs.size(); ++i) {
|
|
if (isTrackingLocation(ForwardSetIn, Ctx.getLocationBit(Locs[i])))
|
|
continue;
|
|
updateForwardSetAndValForRead(Ctx, Ctx.getLocationBit(Locs[i]),
|
|
Ctx.getValueBit(Vals[i]));
|
|
// We can not perform the forwarding as we are at least missing
|
|
// some pieces of the read location.
|
|
CanForward = false;
|
|
}
|
|
}
|
|
|
|
// Simply return if we are not performing RLE or we do not have all the
|
|
// values available to perform RLE.
|
|
if (!isPerformingRLE(Kind) || !CanForward)
|
|
return;
|
|
|
|
// Lastly, forward value to the load.
|
|
setupRLE(Ctx, I, Mem);
|
|
}
|
|
|
|
void BlockState::processStoreInst(RLEContext &Ctx, StoreInst *SI, RLEKind Kind) {
|
|
processWrite(Ctx, SI, SI->getDest(), SI->getSrc(), Kind);
|
|
}
|
|
|
|
void BlockState::processLoadInst(RLEContext &Ctx, LoadInst *LI, RLEKind Kind) {
|
|
processRead(Ctx, LI, LI->getOperand(), SILValue(LI), Kind);
|
|
}
|
|
|
|
void BlockState::processUnknownWriteInstForGenKillSet(RLEContext &Ctx,
|
|
SILInstruction *I) {
|
|
auto *AA = Ctx.getAA();
|
|
for (unsigned i = 0; i < LocationNum; ++i) {
|
|
if (!isTrackingLocation(ForwardSetMax, i))
|
|
continue;
|
|
// Invalidate any location this instruction may write to.
|
|
//
|
|
// TODO: checking may alias with Base is overly conservative,
|
|
// we should check may alias with base plus projection path.
|
|
LSLocation &R = Ctx.getLocation(i);
|
|
if (!AA->mayWriteToMemory(I, R.getBase()))
|
|
continue;
|
|
// MayAlias.
|
|
stopTrackingLocation(BBGenSet, i);
|
|
startTrackingLocation(BBKillSet, i);
|
|
}
|
|
}
|
|
|
|
void BlockState::processUnknownWriteInstForRLE(RLEContext &Ctx,
|
|
SILInstruction *I) {
|
|
auto *AA = Ctx.getAA();
|
|
for (unsigned i = 0; i < LocationNum; ++i) {
|
|
if (!isTrackingLocation(ForwardSetIn, i))
|
|
continue;
|
|
// Invalidate any location this instruction may write to.
|
|
//
|
|
// TODO: checking may alias with Base is overly conservative,
|
|
// we should check may alias with base plus projection path.
|
|
LSLocation &R = Ctx.getLocation(i);
|
|
if (!AA->mayWriteToMemory(I, R.getBase()))
|
|
continue;
|
|
// MayAlias.
|
|
stopTrackingLocation(ForwardSetIn, i);
|
|
stopTrackingValue(ForwardValIn, i);
|
|
}
|
|
}
|
|
|
|
void BlockState::processUnknownWriteInst(RLEContext &Ctx, SILInstruction *I,
|
|
RLEKind Kind) {
|
|
// If this is a release on a guaranteed parameter, it can not call deinit,
|
|
// which might read or write memory.
|
|
if (isIntermediateRelease(I, Ctx.getEAFI()))
|
|
return;
|
|
|
|
// Are we computing the genset and killset ?
|
|
if (isComputeAvailGenKillSet(Kind)) {
|
|
processUnknownWriteInstForGenKillSet(Ctx, I);
|
|
return;
|
|
}
|
|
|
|
// Are we computing the available value or doing RLE ?
|
|
if (isComputeAvailValue(Kind) || isPerformingRLE(Kind)) {
|
|
processUnknownWriteInstForRLE(Ctx, I);
|
|
return;
|
|
}
|
|
|
|
llvm_unreachable("Unknown RLE compute kind");
|
|
}
|
|
|
|
void BlockState::
|
|
processDeallocStackInstForGenKillSet(RLEContext &Ctx, DeallocStackInst *I) {
|
|
SILValue ASI = findAllocStackInst(I);
|
|
for (unsigned i = 0; i < LocationNum; ++i) {
|
|
LSLocation &R = Ctx.getLocation(i);
|
|
if (R.getBase() != ASI)
|
|
continue;
|
|
// MayAlias.
|
|
stopTrackingLocation(BBGenSet, i);
|
|
startTrackingLocation(BBKillSet, i);
|
|
}
|
|
}
|
|
|
|
void BlockState::
|
|
processDeallocStackInstForRLE(RLEContext &Ctx, DeallocStackInst *I) {
|
|
SILValue ASI = findAllocStackInst(I);
|
|
for (unsigned i = 0; i < LocationNum; ++i) {
|
|
LSLocation &R = Ctx.getLocation(i);
|
|
if (R.getBase() != ASI)
|
|
continue;
|
|
// MayAlias.
|
|
stopTrackingLocation(ForwardSetIn, i);
|
|
stopTrackingValue(ForwardValIn, i);
|
|
}
|
|
}
|
|
|
|
void BlockState::
|
|
processDeallocStackInst(RLEContext &Ctx, DeallocStackInst *I, RLEKind Kind) {
|
|
// Are we computing the genset and killset ?
|
|
if (isComputeAvailGenKillSet(Kind)) {
|
|
processDeallocStackInstForGenKillSet(Ctx, I);
|
|
return;
|
|
}
|
|
|
|
// Are we computing the available value or doing RLE ?
|
|
if (isComputeAvailValue(Kind) || isPerformingRLE(Kind)) {
|
|
processDeallocStackInstForRLE(Ctx, I);
|
|
return;
|
|
}
|
|
|
|
llvm_unreachable("Unknown RLE compute kind");
|
|
}
|
|
|
|
|
|
void BlockState::processInstructionWithKind(RLEContext &Ctx,
|
|
SILInstruction *Inst,
|
|
RLEKind Kind) {
|
|
// This is a StoreInst, try to see whether it clobbers any forwarding value
|
|
if (auto *SI = dyn_cast<StoreInst>(Inst)) {
|
|
processStoreInst(Ctx, SI, Kind);
|
|
return;
|
|
}
|
|
|
|
// This is a LoadInst. Let's see if we can find a previous loaded, stored
|
|
// value to use instead of this load.
|
|
if (auto *LI = Ctx.isLoadInstToHandle(Inst)) {
|
|
processLoadInst(Ctx, LI, Kind);
|
|
return;
|
|
}
|
|
|
|
if (auto *DSI = dyn_cast<DeallocStackInst>(Inst)) {
|
|
processDeallocStackInst(Ctx, DSI, Kind);
|
|
return;
|
|
}
|
|
|
|
// If this instruction has side effects, but is inert from a load store
|
|
// perspective, skip it.
|
|
if (isRLEInertInstruction(Inst))
|
|
return;
|
|
|
|
// If this instruction does not read or write memory, we can skip it.
|
|
if (!Inst->mayReadOrWriteMemory())
|
|
return;
|
|
|
|
// If we have an instruction that may write to memory and we cannot prove
|
|
// that it and its operands cannot alias a load we have visited,
|
|
// invalidate that load.
|
|
if (Inst->mayWriteToMemory()) {
|
|
LLVM_DEBUG(llvm::dbgs() << "WRITE " << *Inst);
|
|
processUnknownWriteInst(Ctx, Inst, Kind);
|
|
return;
|
|
}
|
|
LLVM_DEBUG(llvm::dbgs() << "READ " << *Inst);
|
|
}
|
|
|
|
RLEContext::ProcessKind
|
|
RLEContext::
|
|
getProcessFunctionKind(unsigned LoadCount, unsigned StoreCount) {
|
|
// Don't optimize function that are marked as 'no.optimize'.
|
|
if (!Fn->shouldOptimize())
|
|
return ProcessKind::ProcessNone;
|
|
|
|
// Really no point optimizing here as there is no forwardable loads.
|
|
if (LoadCount + StoreCount < 2)
|
|
return ProcessKind::ProcessNone;
|
|
|
|
bool RunOneIteration = true;
|
|
unsigned BBCount = 0;
|
|
unsigned LocationCount = LocationVault.size();
|
|
|
|
if (LocationCount == 0)
|
|
return ProcessKind::ProcessNone;
|
|
|
|
// If all basic blocks will have their predecessors processed if
|
|
// the basic blocks in the functions are iterated in post order.
|
|
// Then this function can be processed in one iteration, i.e. no
|
|
// need to generate the genset and killset.
|
|
auto *PO = PM->getAnalysis<PostOrderAnalysis>()->get(Fn);
|
|
BasicBlockSet HandledBBs(Fn);
|
|
for (SILBasicBlock *B : PO->getReversePostOrder()) {
|
|
++BBCount;
|
|
for (SILBasicBlock *pred : B->getPredecessorBlocks()) {
|
|
if (!HandledBBs.contains(pred)) {
|
|
RunOneIteration = false;
|
|
break;
|
|
}
|
|
}
|
|
HandledBBs.insert(B);
|
|
}
|
|
|
|
// Data flow may take too long to run.
|
|
if (BBCount * LocationCount > MaxLSLocationBBMultiplicationNone)
|
|
return ProcessKind::ProcessNone;
|
|
|
|
// This function's data flow would converge in 1 iteration.
|
|
if (RunOneIteration)
|
|
return ProcessKind::ProcessOneIteration;
|
|
|
|
// We run one pessimistic data flow to do dead store elimination on
|
|
// the function.
|
|
if (BBCount * LocationCount > MaxLSLocationBBMultiplicationPessimistic)
|
|
return ProcessKind::ProcessOneIteration;
|
|
|
|
return ProcessKind::ProcessMultipleIterations;
|
|
}
|
|
|
|
#ifndef NDEBUG
|
|
void BlockState::dump(RLEContext &Ctx) {
|
|
for (unsigned i = 0; i < LocationNum; ++i) {
|
|
if (!isTrackingLocation(ForwardSetMax, i))
|
|
continue;
|
|
|
|
llvm::dbgs() << "Loc #" << i << ":" << (BBGenSet[i] ? " Gen" : "")
|
|
<< (BBKillSet[i] ? " Kill" : "");
|
|
if (!ForwardSetIn.empty() && ForwardSetIn.test(i)) {
|
|
llvm::dbgs() << " IN ";
|
|
ValueTableMap::const_iterator inIter = ForwardValIn.find(i);
|
|
if (inIter != ForwardValIn.end()) {
|
|
if (SILValue base = Ctx.getValue(inIter->second).getBase())
|
|
llvm::dbgs() << base;
|
|
else
|
|
llvm::dbgs() << "no base";
|
|
}
|
|
}
|
|
if (!ForwardSetOut.empty() && ForwardSetOut.test(i)) {
|
|
llvm::dbgs() << " OUT ";
|
|
ValueTableMap::const_iterator outIter = ForwardValOut.find(i);
|
|
if (outIter != ForwardValOut.end()) {
|
|
if (SILValue base = Ctx.getValue(outIter->second).getBase())
|
|
llvm::dbgs() << base;
|
|
else
|
|
llvm::dbgs() << "no base";
|
|
}
|
|
}
|
|
llvm::dbgs() << "\n";
|
|
}
|
|
}
|
|
#endif
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
// RLEContext Implementation
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
RLEContext::RLEContext(SILFunction *F, SILPassManager *PM, AliasAnalysis *AA,
|
|
TypeExpansionAnalysis *TE, PostOrderFunctionInfo *PO,
|
|
EpilogueARCFunctionInfo *EAFI, bool disableArrayLoads)
|
|
: Fn(F), PM(PM), AA(AA), TE(TE), PO(PO), EAFI(EAFI), BBToLocState(F),
|
|
BBWithLoads(F),
|
|
ArrayType(disableArrayLoads
|
|
? F->getModule().getASTContext().getArrayDecl()
|
|
: nullptr)
|
|
#ifndef NDEBUG
|
|
,
|
|
printCtx(llvm::dbgs(), /*Verbose=*/false, /*Sorted=*/true)
|
|
#endif
|
|
{
|
|
}
|
|
|
|
LSLocation &RLEContext::getLocation(const unsigned index) {
|
|
return LocationVault[index];
|
|
}
|
|
|
|
unsigned RLEContext::getLocationBit(const LSLocation &Loc) {
|
|
// Return the bit position of the given Loc in the LocationVault. The bit
|
|
// position is then used to set/reset the bitvector kept by each BlockState.
|
|
//
|
|
// We should have the location populated by the enumerateLSLocation at this
|
|
// point.
|
|
auto Iter = LocToBitIndex.find(Loc);
|
|
assert(Iter != LocToBitIndex.end() && "Location should have been enum'ed");
|
|
return Iter->second;
|
|
}
|
|
|
|
LSValue &RLEContext::getValue(const unsigned index) {
|
|
return LSValueVault[index];
|
|
}
|
|
|
|
unsigned RLEContext::getValueBit(const LSValue &Val) {
|
|
// Return the bit position of the given Val in the LSValueVault. The bit
|
|
// position is then used to set/reset the bitvector kept by each g.
|
|
auto Iter = ValToBitIndex.find(Val);
|
|
|
|
// We do not walk over the function and find all the possible LSValues
|
|
// in this function, as some of the these values will not be used, i.e.
|
|
// if the LoadInst that generates this value is actually RLE'ed.
|
|
// Instead, we create the LSValues when we need them.
|
|
if (Iter == ValToBitIndex.end()) {
|
|
ValToBitIndex[Val] = LSValueVault.size();
|
|
LSValueVault.push_back(Val);
|
|
return ValToBitIndex[Val];
|
|
}
|
|
return Iter->second;
|
|
}
|
|
|
|
BlockState::ValueState BlockState::getValueStateAtEndOfBlock(RLEContext &Ctx,
|
|
LSLocation &L) {
|
|
// Find number of covering value and concrete values for the locations
|
|
// expanded from the given location.
|
|
unsigned CSCount = 0, CTCount = 0;
|
|
LSLocationList Locs;
|
|
LSLocation::expand(L, &BB->getModule(),
|
|
TypeExpansionContext(*BB->getParent()), Locs, Ctx.getTE());
|
|
|
|
ValueTableMap &OTM = getForwardValOut();
|
|
for (auto &X : Locs) {
|
|
LSValue &V = Ctx.getValue(OTM[Ctx.getLocationBit(X)]);
|
|
if (V.isCoveringValue()) {
|
|
++CSCount;
|
|
continue;
|
|
}
|
|
++CTCount;
|
|
}
|
|
|
|
if (CSCount == Locs.size())
|
|
return ValueState::CoverValues;
|
|
if (CTCount == Locs.size())
|
|
return ValueState::ConcreteValues;
|
|
return ValueState::CoverAndConcreteValues;
|
|
}
|
|
|
|
SILValue RLEContext::computePredecessorLocationValue(SILBasicBlock *BB,
|
|
LSLocation &L) {
|
|
llvm::SmallVector<std::pair<SILBasicBlock *, SILValue>, 8> Values;
|
|
BasicBlockWorklist WorkList(Fn);
|
|
|
|
// Push in all the predecessors to get started.
|
|
for (auto Pred : BB->getPredecessorBlocks()) {
|
|
WorkList.pushIfNotVisited(Pred);
|
|
}
|
|
|
|
while (SILBasicBlock *CurBB = WorkList.pop()) {
|
|
BlockState &Forwarder = getBlockState(CurBB);
|
|
|
|
// There are 3 cases that can happen here.
|
|
//
|
|
// 1. The current basic block contains concrete values for the entire
|
|
// location.
|
|
// 2. The current basic block contains covering values for the entire
|
|
// location.
|
|
// 3. The current basic block contains concrete value for part of the
|
|
// location and covering values for the rest.
|
|
//
|
|
// This BlockState contains concrete values for all the expanded
|
|
// locations, collect and reduce them into a single value in the current
|
|
// basic block.
|
|
if (Forwarder.isConcreteValues(*this, L)) {
|
|
Values.push_back({CurBB, Forwarder.reduceValuesAtEndOfBlock(*this, L)});
|
|
continue;
|
|
}
|
|
|
|
// This BlockState does not contain concrete value for any of the expanded
|
|
// locations, collect in this block's predecessors.
|
|
if (Forwarder.isCoverValues(*this, L)) {
|
|
for (auto Pred : CurBB->getPredecessorBlocks()) {
|
|
WorkList.pushIfNotVisited(Pred);
|
|
}
|
|
continue;
|
|
}
|
|
|
|
// This block contains concrete values for some but not all the expanded
|
|
// locations, recursively call collectLocationValues to materialize the
|
|
// value that reaches this basic block.
|
|
LSLocationValueMap LSValues;
|
|
if (!collectLocationValues(CurBB, L, LSValues, Forwarder.getForwardValOut()))
|
|
return SILValue();
|
|
|
|
// Reduce the available values into a single SILValue we can use to forward
|
|
SILInstruction *IPt = CurBB->getTerminator();
|
|
Values.push_back({CurBB, LSValue::reduce(L, &BB->getModule(), LSValues, IPt)});
|
|
}
|
|
|
|
auto ownershipRange =
|
|
makeTransformRange(llvm::make_range(Values.begin(), Values.end()),
|
|
[](std::pair<SILBasicBlock *, SILValue> v) {
|
|
return v.second.getOwnershipKind();
|
|
});
|
|
|
|
auto mergedOwnershipKind = ValueOwnershipKind::merge(ownershipRange);
|
|
|
|
// Finally, collect all the values for the SILArgument, materialize it using
|
|
// the SSAUpdater.
|
|
Updater.initialize(
|
|
L.getType(&BB->getModule(), TypeExpansionContext(*BB->getParent()))
|
|
.getObjectType(),
|
|
mergedOwnershipKind);
|
|
|
|
SmallVector<SILPhiArgument *, 8> insertedPhis;
|
|
Updater.setInsertedPhis(&insertedPhis);
|
|
|
|
for (auto V : Values) {
|
|
Updater.addAvailableValue(V.first, V.second);
|
|
}
|
|
|
|
auto Val = Updater.getValueInMiddleOfBlock(BB);
|
|
|
|
for (auto *phi : insertedPhis) {
|
|
if (phi == Val) {
|
|
continue;
|
|
}
|
|
// Fix lifetime of intermediate phis
|
|
SmallVector<SILBasicBlock *, 4> userBBs;
|
|
for (auto use : phi->getUses()) {
|
|
userBBs.push_back(use->getParentBlock());
|
|
}
|
|
endLifetimeAtLeakingBlocks(phi, userBBs);
|
|
}
|
|
return makeNewValueAvailable(Val, BB);
|
|
}
|
|
|
|
bool RLEContext::collectLocationValues(SILBasicBlock *BB, LSLocation &L,
|
|
LSLocationValueMap &Values,
|
|
ValueTableMap &VM) {
|
|
LSLocationList CSLocs;
|
|
LSLocationList Locs;
|
|
LSLocation::expand(L, &BB->getModule(),
|
|
TypeExpansionContext(*BB->getParent()), Locs, TE);
|
|
|
|
auto *Mod = &BB->getModule();
|
|
// Find the locations that this basic block defines and the locations which
|
|
// we do not have a concrete value in the current basic block.
|
|
for (auto &X : Locs) {
|
|
auto Val = getValue(VM[getLocationBit(X)]);
|
|
if (!Val.isCoveringValue()) {
|
|
auto AvailValue =
|
|
makeCopiedValueAvailable(Val.getBase(), BB);
|
|
Values[X] = LSValue(AvailValue, Val.getPath().getValue());
|
|
continue;
|
|
}
|
|
Values[X] = Val;
|
|
CSLocs.push_back(X);
|
|
}
|
|
|
|
// For locations which we do not have concrete values for in this basic
|
|
// block, try to reduce it to the minimum # of locations possible, this
|
|
// will help us to generate as few SILArguments as possible.
|
|
LSLocation::reduce(L, Mod, TypeExpansionContext(*BB->getParent()), CSLocs);
|
|
|
|
// To handle covering value, we need to go to the predecessors and
|
|
// materialize them there.
|
|
for (auto &X : CSLocs) {
|
|
SILValue V = computePredecessorLocationValue(BB, X);
|
|
if (!V)
|
|
return false;
|
|
// We've constructed a concrete value for the covering value. Expand and
|
|
// collect the newly created forwardable values.
|
|
LSLocationList Locs;
|
|
LSValueList Vals;
|
|
auto expansionContext = TypeExpansionContext(*BB->getParent());
|
|
LSLocation::expand(X, Mod, expansionContext, Locs, TE);
|
|
LSValue::expand(V, Mod, expansionContext, Vals, TE);
|
|
|
|
for (unsigned i = 0; i < Locs.size(); ++i) {
|
|
Values[Locs[i]] = Vals[i];
|
|
assert(Values[Locs[i]].isValid() && "Invalid load store value");
|
|
}
|
|
}
|
|
return true;
|
|
}
|
|
|
|
void RLEContext::processBasicBlocksForGenKillSet() {
|
|
for (SILBasicBlock *BB : PO->getReversePostOrder()) {
|
|
LLVM_DEBUG(llvm::dbgs() << "PROCESS " << printCtx.getID(BB)
|
|
<< " for Gen/Kill:\n";
|
|
BB->print(printCtx));
|
|
|
|
BlockState &S = getBlockState(BB);
|
|
|
|
// Compute the AvailSetMax at the beginning of the basic block.
|
|
S.mergePredecessorsAvailSetMax(*this);
|
|
|
|
// Compute the genset and killset.
|
|
//
|
|
// To optimize this process, we also compute the AvailSetMax at particular
|
|
// point in the basic block.
|
|
for (auto I = BB->begin(), E = BB->end(); I != E; ++I) {
|
|
if (auto *LI = dyn_cast<LoadInst>(&*I)) {
|
|
if (!BBWithLoads.contains(BB))
|
|
BBWithLoads.insert(BB);
|
|
S.processLoadInst(*this, LI, RLEKind::ComputeAvailSetMax);
|
|
}
|
|
if (auto *SI = dyn_cast<StoreInst>(&*I)) {
|
|
S.processStoreInst(*this, SI, RLEKind::ComputeAvailSetMax);
|
|
}
|
|
|
|
S.processInstructionWithKind(*this, &*I, RLEKind::ComputeAvailGenKillSet);
|
|
}
|
|
LLVM_DEBUG(S.dump(*this));
|
|
}
|
|
}
|
|
|
|
void RLEContext::processBasicBlocksWithGenKillSet() {
|
|
// Process each basic block with the gen and kill set. Every time the
|
|
// ForwardSetOut of a basic block changes, the optimization is rerun on its
|
|
// successors.
|
|
BasicBlockWorklist WorkList(Fn);
|
|
|
|
// Push into the worklist in post order so that we can pop from the back and
|
|
// get reverse post order.
|
|
for (SILBasicBlock *BB : PO->getPostOrder()) {
|
|
WorkList.push(BB);
|
|
}
|
|
while (SILBasicBlock *BB = WorkList.popAndForget()) {
|
|
LLVM_DEBUG(llvm::dbgs() << "PROCESS " << printCtx.getID(BB)
|
|
<< " with Gen/Kill.\n");
|
|
// Intersection.
|
|
BlockState &Forwarder = getBlockState(BB);
|
|
// Compute the ForwardSetIn at the beginning of the basic block.
|
|
Forwarder.mergePredecessorAvailSet(*this);
|
|
|
|
if (Forwarder.processBasicBlockWithGenKillSet()) {
|
|
for (SILBasicBlock *succ : BB->getSuccessors()) {
|
|
WorkList.pushIfNotVisited(succ);
|
|
}
|
|
}
|
|
LLVM_DEBUG(Forwarder.dump(*this));
|
|
}
|
|
}
|
|
|
|
void RLEContext::processBasicBlocksForAvailValue() {
|
|
for (SILBasicBlock *BB : PO->getReversePostOrder()) {
|
|
LLVM_DEBUG(llvm::dbgs() << "PROCESS " << printCtx.getID(BB)
|
|
<< " for available.\n");
|
|
|
|
BlockState &Forwarder = getBlockState(BB);
|
|
|
|
// Merge the predecessors. After merging, BlockState now contains
|
|
// lists of available LSLocations and their values that reach the
|
|
// beginning of the basic block along all paths.
|
|
Forwarder.mergePredecessorAvailSetAndValue(*this);
|
|
|
|
// Merge duplicate loads, and forward stores to
|
|
// loads. We also update lists of stores|loads to reflect the end
|
|
// of the basic block.
|
|
Forwarder.processBasicBlockWithKind(*this, RLEKind::ComputeAvailValue);
|
|
|
|
// Update the locations with available values. We do not need to update
|
|
// the available BitVector here as they should have been initialized and
|
|
// stabilized in the processBasicBlocksWithGenKillSet.
|
|
Forwarder.updateForwardValOut();
|
|
|
|
LLVM_DEBUG(Forwarder.dump(*this));
|
|
}
|
|
}
|
|
|
|
void RLEContext::processBasicBlocksForRLE(bool Optimistic) {
|
|
for (SILBasicBlock *BB : PO->getReversePostOrder()) {
|
|
LLVM_DEBUG(llvm::dbgs() << "PROCESS " << printCtx.getID(BB)
|
|
<< " for RLE.\n");
|
|
|
|
// If we know this is not a one iteration function which means its
|
|
// forward sets have been computed and converged,
|
|
// and this basic block does not even have LoadInsts, there is no point
|
|
// in processing every instruction in the basic block again as no store
|
|
// will be eliminated.
|
|
if (Optimistic && !BBWithLoads.contains(BB))
|
|
continue;
|
|
|
|
BlockState &Forwarder = getBlockState(BB);
|
|
|
|
// Merge the predecessors. After merging, BlockState now contains
|
|
// lists of available LSLocations and their values that reach the
|
|
// beginning of the basic block along all paths.
|
|
Forwarder.mergePredecessorAvailSetAndValue(*this);
|
|
|
|
LLVM_DEBUG(Forwarder.dump(*this));
|
|
|
|
// Perform the actual redundant load elimination.
|
|
Forwarder.processBasicBlockWithKind(*this, RLEKind::PerformRLE);
|
|
|
|
// If this is not a one iteration data flow, then the forward sets
|
|
// have been computed.
|
|
if (Optimistic)
|
|
continue;
|
|
|
|
// Update the locations with available values and their values.
|
|
Forwarder.updateForwardSetOut();
|
|
Forwarder.updateForwardValOut();
|
|
}
|
|
}
|
|
|
|
void RLEContext::runIterativeRLE() {
|
|
// Generate the genset and killset for every basic block.
|
|
processBasicBlocksForGenKillSet();
|
|
|
|
// Process basic blocks in RPO. After the data flow converges, run last
|
|
// iteration and perform load forwarding.
|
|
processBasicBlocksWithGenKillSet();
|
|
|
|
// We have computed the available value bit, now go through every basic
|
|
// block and compute the forwarding value locally. This is necessary as
|
|
// when we perform the RLE in the last iteration, we must handle loops, i.e.
|
|
// predecessor blocks which have not been processed when a basic block is
|
|
// processed.
|
|
processBasicBlocksForAvailValue();
|
|
}
|
|
|
|
bool RLEContext::run() {
|
|
// We perform redundant load elimination in the following phases.
|
|
//
|
|
// Phase 1. Compute the genset and killset for every basic block.
|
|
//
|
|
// Phase 2. Use an iterative data flow to compute whether there is an
|
|
// available value at a given point, we do not yet care about what the value
|
|
// is.
|
|
//
|
|
// Phase 3. we compute the real forwardable value at a given point.
|
|
//
|
|
// Phase 4. we perform the redundant load elimination.
|
|
// Walk over the function and find all the locations accessed by
|
|
// this function.
|
|
std::pair<int, int> LSCount = std::make_pair(0, 0);
|
|
LSLocation::enumerateLSLocations(*Fn, LocationVault,
|
|
LocToBitIndex,
|
|
BaseToLocIndex, TE,
|
|
LSCount);
|
|
|
|
// Check how to optimize this function.
|
|
ProcessKind Kind = getProcessFunctionKind(LSCount.first, LSCount.second);
|
|
|
|
// We do not optimize this function at all.
|
|
if (Kind == ProcessKind::ProcessNone)
|
|
return false;
|
|
|
|
// Do we run a multi-iteration data flow ?
|
|
const bool Optimistic = (Kind == ProcessKind::ProcessMultipleIterations);
|
|
|
|
// These are a list of basic blocks that we actually processed.
|
|
// We do not process unreachable block, instead we set their liveouts to nil.
|
|
BasicBlockSet BBToProcess(Fn);
|
|
for (auto X : PO->getPostOrder())
|
|
BBToProcess.insert(X);
|
|
|
|
// For all basic blocks in the function, initialize a BB state. Since we
|
|
// know all the locations accessed in this function, we can resize the bit
|
|
// vector to the appropriate size.
|
|
for (auto bs : BBToLocState) {
|
|
bs.data.init(&bs.block, LocationVault.size(),
|
|
Optimistic && BBToProcess.contains(&bs.block));
|
|
}
|
|
|
|
LLVM_DEBUG(for (unsigned i = 0; i < LocationVault.size(); ++i) {
|
|
llvm::dbgs() << "LSLocation #" << i;
|
|
getLocation(i).print(llvm::dbgs());
|
|
});
|
|
|
|
if (Optimistic)
|
|
runIterativeRLE();
|
|
|
|
// We have the available value bit computed and the local forwarding value.
|
|
// Set up the load forwarding.
|
|
processBasicBlocksForRLE(Optimistic);
|
|
|
|
// Finally, perform the redundant load replacements.
|
|
llvm::SmallVector<SILInstruction *, 16> InstsToDelete;
|
|
bool SILChanged = false;
|
|
for (auto bs : BBToLocState) {
|
|
auto &Loads = bs.data.getRL();
|
|
// Nothing to forward.
|
|
if (Loads.empty())
|
|
continue;
|
|
// We iterate the instructions in the basic block in a deterministic order
|
|
// and use this order to perform the load forwarding.
|
|
//
|
|
// NOTE: we could end up with different SIL depending on the ordering load
|
|
// forwarding is performed.
|
|
for (auto I = bs.block.rbegin(), E = bs.block.rend(); I != E; ++I) {
|
|
auto V = dyn_cast<SingleValueInstruction>(&*I);
|
|
if (!V)
|
|
continue;
|
|
auto Iter = Loads.find(V);
|
|
if (Iter == Loads.end())
|
|
continue;
|
|
LLVM_DEBUG(llvm::dbgs() << "Replacing " << SILValue(Iter->first)
|
|
<< "With " << Iter->second);
|
|
auto *origLoad = cast<LoadInst>(Iter->first);
|
|
SILValue newValue = Iter->second;
|
|
if (origLoad->getOwnershipQualifier() == LoadOwnershipQualifier::Take) {
|
|
SILBuilderWithScope(origLoad).createDestroyAddr(origLoad->getLoc(),
|
|
origLoad->getOperand());
|
|
}
|
|
SILChanged = true;
|
|
origLoad->replaceAllUsesWith(newValue);
|
|
InstsToDelete.push_back(origLoad);
|
|
++NumForwardedLoads;
|
|
}
|
|
}
|
|
|
|
// Erase the instructions recursively, this way, we get rid of pass
|
|
// dependence on DCE.
|
|
for (auto &X : InstsToDelete) {
|
|
// It is possible that the instruction still has uses, because it could be
|
|
// used as the replacement Value, i.e. F.second, for some other RLE pairs.
|
|
//
|
|
// TODO: we should fix this, otherwise we are missing RLE opportunities.
|
|
if (X->hasUsesOfAnyResult())
|
|
continue;
|
|
recursivelyDeleteTriviallyDeadInstructions(X, true);
|
|
}
|
|
return SILChanged;
|
|
}
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
// Top Level Entry Point
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
namespace {
|
|
|
|
class RedundantLoadElimination : public SILFunctionTransform {
|
|
|
|
private:
|
|
bool disableArrayLoads;
|
|
|
|
public:
|
|
|
|
RedundantLoadElimination(bool disableArrayLoads)
|
|
: disableArrayLoads(disableArrayLoads) { }
|
|
|
|
/// The entry point to the transformation.
|
|
void run() override {
|
|
SILFunction *F = getFunction();
|
|
|
|
LLVM_DEBUG(llvm::dbgs() << "*** RLE on function: " << F->getName()
|
|
<< " ***\n");
|
|
|
|
auto *AA = PM->getAnalysis<AliasAnalysis>(F);
|
|
auto *TE = PM->getAnalysis<TypeExpansionAnalysis>();
|
|
auto *PO = PM->getAnalysis<PostOrderAnalysis>()->get(F);
|
|
auto *EAFI = PM->getAnalysis<EpilogueARCAnalysis>()->get(F);
|
|
|
|
RLEContext RLE(F, PM, AA, TE, PO, EAFI, disableArrayLoads);
|
|
if (RLE.run()) {
|
|
invalidateAnalysis(SILAnalysis::InvalidationKind::Instructions);
|
|
}
|
|
}
|
|
};
|
|
|
|
} // end anonymous namespace
|
|
|
|
SILTransform *swift::createEarlyRedundantLoadElimination() {
|
|
return new RedundantLoadElimination(/*disableArrayLoads=*/true);
|
|
}
|
|
|
|
SILTransform *swift::createRedundantLoadElimination() {
|
|
return new RedundantLoadElimination(/*disableArrayLoads=*/false);
|
|
}
|