mirror of
https://github.com/apple/swift.git
synced 2025-12-21 12:14:44 +01:00
enumerate them lazily. This leads to compilation time improvement, as some of the LSValues previously enumerated do not be created in this approach. i.e. we enumerate LSValues created by loads previously, but the LoadInsts could be target for RLE. In such case, the enumerated LSValues are not used. Compilation time improvement: 1775ms to 1721ms (2.7% to 2.6% of the entire compilation time for stdlib -O). Existing tests ensure correctness. Note: we still enumerate locations, as we need to know how many locations there are in the function to resize the bitvector appropriately before the data flow runs.
1399 lines
50 KiB
C++
1399 lines
50 KiB
C++
//===-------- RedundantLoadElimination.cpp - SIL Load Forwarding ---------===//
|
|
//
|
|
// This source file is part of the Swift.org open source project
|
|
//
|
|
// Copyright (c) 2014 - 2015 Apple Inc. and the Swift project authors
|
|
// Licensed under Apache License v2.0 with Runtime Library Exception
|
|
//
|
|
// See http://swift.org/LICENSE.txt for license information
|
|
// See http://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
///
|
|
/// \file
|
|
///
|
|
/// This pass eliminates redundant loads.
|
|
///
|
|
/// A load can be eliminated if its value has already been held somewhere,
|
|
/// i.e. generated by a previous load, LSLocation stored by a known value.
|
|
///
|
|
/// In this case, one can replace the load instruction with the previous
|
|
/// results.
|
|
///
|
|
/// Redundant Load Elimination (RLE) eliminates such loads by:
|
|
///
|
|
/// 1. Introducing a notion of a LSLocation that is used to model object
|
|
/// fields. (See below for more details).
|
|
///
|
|
/// 2. Introducing a notion of a LSValue that is used to model the value
|
|
/// that currently resides in the associated LSLocation on the particular
|
|
/// program path. (See below for more details).
|
|
///
|
|
/// 3. Performing a RPO walk over the control flow graph, tracking any
|
|
/// LSLocations that are read from or stored into in each basic block. The
|
|
/// read or stored value, kept in a map between LSLocation and LSValue,
|
|
/// becomes the available value for the LSLocation.
|
|
///
|
|
/// 4. An optimistic iterative intersection-based dataflow is performed on the
|
|
/// gensets until convergence.
|
|
///
|
|
/// At the core of RLE, there is the LSLocation class. A LSLocation is an
|
|
/// abstraction of an object field in program. It consists of a base and a
|
|
/// projection path to the field accessed.
|
|
///
|
|
/// In SIL, one can access an aggregate as a whole, i.e. store to a struct with
|
|
/// 2 Int fields. A store like this will generate 2 *indivisible* LSLocations,
|
|
/// 1 for each field and in addition to keeping a list of LSLocation, RLE also
|
|
/// keeps their available LSValues. We call it *indivisible* because it
|
|
/// cannot be broken down to more LSLocations.
|
|
///
|
|
/// LSValue consists of a base - a SILValue from the load or store inst,
|
|
/// as well as a projection path to which the field it represents. So, a
|
|
/// store to an 2-field struct as mentioned above will generate 2 LSLocations
|
|
/// and 2 LSValues.
|
|
///
|
|
/// Every basic block keeps a map between LSLocation and LSValue. By
|
|
/// keeping the LSLocation and LSValue in their indivisible form, one
|
|
/// can easily find which part of the load is redundant and how to compute its
|
|
/// forwarding value.
|
|
///
|
|
/// Given the case which the 2 fields of the struct both have available values,
|
|
/// RLE can find their LSValues (maybe by struct_extract from a larger
|
|
/// value) and then aggregate them.
|
|
///
|
|
/// However, this may introduce a lot of extraction and aggregation which may
|
|
/// not be necessary. i.e. a store the struct followed by a load from the
|
|
/// struct. To solve this problem, when RLE detects that a load instruction
|
|
/// can be replaced by forwarded value, it will try to find minimum # of
|
|
/// extractions necessary to form the forwarded value. It will group the
|
|
/// available value's by the LSValue base, i.e. the LSValues come from the
|
|
/// same instruction, and then use extraction to obtain the needed components
|
|
/// of the base.
|
|
///
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
#define DEBUG_TYPE "sil-redundant-load-elim"
|
|
#include "swift/SIL/Projection.h"
|
|
#include "swift/SIL/SILArgument.h"
|
|
#include "swift/SIL/SILBuilder.h"
|
|
#include "swift/SIL/SILValueProjection.h"
|
|
#include "swift/SILOptimizer/Analysis/AliasAnalysis.h"
|
|
#include "swift/SILOptimizer/Analysis/DominanceAnalysis.h"
|
|
#include "swift/SILOptimizer/Analysis/PostOrderAnalysis.h"
|
|
#include "swift/SILOptimizer/Analysis/ValueTracking.h"
|
|
#include "swift/SILOptimizer/PassManager/Passes.h"
|
|
#include "swift/SILOptimizer/PassManager/Transforms.h"
|
|
#include "swift/SILOptimizer/Utils/CFG.h"
|
|
#include "swift/SILOptimizer/Utils/Local.h"
|
|
#include "swift/SILOptimizer/Utils/SILSSAUpdater.h"
|
|
#include "llvm/ADT/BitVector.h"
|
|
#include "llvm/ADT/MapVector.h"
|
|
#include "llvm/ADT/None.h"
|
|
#include "llvm/ADT/Statistic.h"
|
|
#include "llvm/Support/CommandLine.h"
|
|
#include "llvm/Support/Debug.h"
|
|
|
|
using namespace swift;
|
|
|
|
STATISTIC(NumForwardedLoads, "Number of loads forwarded");
|
|
|
|
/// ComputeAvailSetMax - If we ignore all unknown writes, what is the max
|
|
/// available set that can reach the a certain point in a basic block. This
|
|
/// helps generating the genset and killset. i.e. if there is no downward visible
|
|
/// value that can reach the end of a basic block, then we know that the genset
|
|
/// and killset for the location need not be set.
|
|
///
|
|
/// ComputeAvailGenKillSet - Build the genset and killset of the basic block.
|
|
///
|
|
/// ComputeAvailSet - Compute the available set at the end of the basic block.
|
|
///
|
|
/// ComputeAvailValue - Compute the available value at the end of the basic
|
|
/// block.
|
|
///
|
|
/// PerformRLE - Perform the actual redundant load elimination.
|
|
enum class RLEKind : unsigned {
|
|
ComputeAvailSetMax = 0,
|
|
ComputeAvailGenKillSet = 1,
|
|
ComputeAvailSet = 2,
|
|
ComputeAvailValue = 3,
|
|
PerformRLE = 4,
|
|
};
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
// Utility Functions
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
static bool inline isComputeAvailSetMax(RLEKind Kind) {
|
|
return Kind == RLEKind::ComputeAvailSetMax;
|
|
}
|
|
|
|
static bool inline isComputeAvailGenKillSet(RLEKind Kind) {
|
|
return Kind == RLEKind::ComputeAvailGenKillSet;
|
|
}
|
|
|
|
static bool inline isComputeAvailSet(RLEKind Kind) {
|
|
return Kind == RLEKind::ComputeAvailSet;
|
|
}
|
|
|
|
static bool inline isComputeAvailValue(RLEKind Kind) {
|
|
return Kind == RLEKind::ComputeAvailValue;
|
|
}
|
|
|
|
static bool inline isPerformingRLE(RLEKind Kind) {
|
|
return Kind == RLEKind::PerformRLE;
|
|
}
|
|
|
|
/// Return true if all basic blocks have their predecessors processed if
|
|
/// they are iterated in reverse post order.
|
|
static bool isOneIterationFunction(PostOrderFunctionInfo *PO) {
|
|
llvm::DenseSet<SILBasicBlock *> HandledBBs;
|
|
for (SILBasicBlock *B : PO->getReversePostOrder()) {
|
|
for (auto X : B->getPreds()) {
|
|
if (HandledBBs.find(X) == HandledBBs.end())
|
|
return false;
|
|
}
|
|
HandledBBs.insert(B);
|
|
}
|
|
return true;
|
|
}
|
|
|
|
/// Returns true if this is an instruction that may have side effects in a
|
|
/// general sense but are inert from a load store perspective.
|
|
static bool isRLEInertInstruction(SILInstruction *Inst) {
|
|
switch (Inst->getKind()) {
|
|
case ValueKind::StrongRetainInst:
|
|
case ValueKind::StrongRetainUnownedInst:
|
|
case ValueKind::UnownedRetainInst:
|
|
case ValueKind::RetainValueInst:
|
|
case ValueKind::DeallocStackInst:
|
|
case ValueKind::CondFailInst:
|
|
case ValueKind::IsUniqueInst:
|
|
case ValueKind::IsUniqueOrPinnedInst:
|
|
return true;
|
|
default:
|
|
return false;
|
|
}
|
|
}
|
|
|
|
/// Returns true if the given basic block is reachable from the entry block.
|
|
///
|
|
/// TODO: this is very inefficient, can we make use of the domtree.
|
|
static bool isReachable(SILBasicBlock *Block) {
|
|
SmallPtrSet<SILBasicBlock *, 16> Visited;
|
|
llvm::SmallVector<SILBasicBlock *, 16> Worklist;
|
|
SILBasicBlock *EntryBB = &*Block->getParent()->begin();
|
|
Worklist.push_back(EntryBB);
|
|
Visited.insert(EntryBB);
|
|
|
|
while (!Worklist.empty()) {
|
|
auto *CurBB = Worklist.back();
|
|
Worklist.pop_back();
|
|
|
|
if (CurBB == Block)
|
|
return true;
|
|
|
|
for (auto &Succ : CurBB->getSuccessors())
|
|
if (!Visited.insert(Succ).second)
|
|
Worklist.push_back(Succ);
|
|
}
|
|
return false;
|
|
}
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
// Basic Block Location State
|
|
//===----------------------------------------------------------------------===//
|
|
namespace {
|
|
|
|
// If there are too many locations in the function, we bail out.
|
|
constexpr unsigned MaxLSLocationLimit = 2048;
|
|
|
|
/// forward declaration.
|
|
class RLEContext;
|
|
|
|
/// State of the load store in one basic block which allows for forwarding from
|
|
/// loads, stores -> loads
|
|
class BlockState {
|
|
public:
|
|
enum class ValueState : unsigned {
|
|
CoverValues = 0,
|
|
ConcreteValues = 1,
|
|
CoverAndConcreteValues = 2,
|
|
};
|
|
|
|
private:
|
|
/// # of locations in the LocationVault.
|
|
unsigned LocationNum;
|
|
|
|
/// The basic block that we are optimizing.
|
|
SILBasicBlock *BB;
|
|
|
|
/// A bit vector for which the ith bit represents the ith LSLocation in
|
|
/// LocationVault. If the bit is set, then the location currently has an
|
|
/// downward visible value at the beginning of the basic block.
|
|
llvm::SmallBitVector ForwardSetIn;
|
|
|
|
/// A bit vector for which the ith bit represents the ith LSLocation in
|
|
/// LocationVault. If the bit is set, then the location currently has an
|
|
/// downward visible value at the end of the basic block.
|
|
llvm::SmallBitVector ForwardSetOut;
|
|
|
|
/// A bit vector for which the ith bit represents the ith LSLocation in
|
|
/// LocationVault. If we ignore all unknown write, whats the maximum set
|
|
/// of available locations at the current position in the basic block.
|
|
llvm::SmallBitVector ForwardSetMax;
|
|
|
|
/// A bit vector for which the ith bit represents the ith LSLocation in
|
|
/// LocationVault. If the bit is set, then the basic block generates a
|
|
/// value for the location.
|
|
llvm::SmallBitVector BBGenSet;
|
|
|
|
/// A bit vector for which the ith bit represents the ith LSLocation in
|
|
/// LocationVault. If the bit is set, then the basic block kills the
|
|
/// value for the location.
|
|
llvm::SmallBitVector BBKillSet;
|
|
|
|
/// This is map between LSLocations and their available values at the
|
|
/// beginning of this basic block.
|
|
ValueTableMap ForwardValIn;
|
|
|
|
/// This is map between LSLocations and their available values at the end of
|
|
/// this basic block.
|
|
ValueTableMap ForwardValOut;
|
|
|
|
/// Keeps a list of replaceable instructions in the current basic block as
|
|
/// well as their SILValue replacement.
|
|
llvm::DenseMap<SILInstruction *, SILValue> RedundantLoads;
|
|
|
|
/// LSLocation read or written has been extracted, expanded and mapped to the
|
|
/// bit position in the bitvector. Update it in the ForwardSetIn of the
|
|
/// current basic block.
|
|
void updateForwardSetForRead(RLEContext &Ctx, unsigned bit);
|
|
void updateForwardSetForWrite(RLEContext &Ctx, unsigned bit);
|
|
|
|
/// LSLocation read or written has been extracted, expanded and mapped to the
|
|
/// bit position in the bitvector. Update it in the genset and killset of the
|
|
/// current basic block.
|
|
void updateGenKillSetForRead(RLEContext &Ctx, unsigned bit);
|
|
void updateGenKillSetForWrite(RLEContext &Ctx, unsigned bit);
|
|
|
|
/// LSLocation read or written has been extracted, expanded and mapped to the
|
|
/// bit position in the bitvector. Update it in the MaxAvailForwardSet of the
|
|
/// current basic block.
|
|
void updateMaxAvailForwardSetForRead(RLEContext &Ctx, unsigned bit);
|
|
void updateMaxAvailForwardSetForWrite(RLEContext &Ctx, unsigned bit);
|
|
|
|
/// LSLocation written has been extracted, expanded and mapped to the bit
|
|
/// position in the bitvector. process it using the bit position.
|
|
void updateForwardSetAndValForRead(RLEContext &Ctx, unsigned lbit,
|
|
unsigned vbit);
|
|
void updateForwardSetAndValForWrite(RLEContext &Ctx, unsigned lbit,
|
|
unsigned vbit);
|
|
|
|
/// There is a read to a LSLocation, expand the LSLocation into individual
|
|
/// fields before processing them.
|
|
void processRead(RLEContext &Ctx, SILInstruction *I, SILValue Mem,
|
|
SILValue Val, RLEKind Kind);
|
|
|
|
/// There is a write to a LSLocation, expand the LSLocation into individual
|
|
/// fields before processing them.
|
|
void processWrite(RLEContext &Ctx, SILInstruction *I, SILValue Mem,
|
|
SILValue Val, RLEKind Kind);
|
|
|
|
/// BitVector manipulation functions.
|
|
void startTrackingLocation(llvm::SmallBitVector &BV, unsigned bit);
|
|
void stopTrackingLocation(llvm::SmallBitVector &BV, unsigned bit);
|
|
bool isTrackingLocation(llvm::SmallBitVector &BV, unsigned bit);
|
|
void startTrackingValue(ValueTableMap &VM, unsigned lbit, unsigned vbit);
|
|
void stopTrackingValue(ValueTableMap &VM, unsigned bit);
|
|
|
|
public:
|
|
BlockState() = default;
|
|
|
|
void init(SILBasicBlock *NewBB, unsigned bitcnt, bool reachable) {
|
|
BB = NewBB;
|
|
LocationNum = bitcnt;
|
|
// For reachable basic blocks, the initial state of ForwardSetOut should be
|
|
// all 1's. Otherwise the dataflow solution could be too conservative.
|
|
//
|
|
// Consider this case, the forwardable value by var a = 10 before the loop
|
|
// will not be forwarded if the ForwardSetOut is set to 0 initially.
|
|
//
|
|
// var a = 10
|
|
// for _ in 0...1024 {}
|
|
// use(a);
|
|
//
|
|
// However, by doing so, we can only do the data forwarding after the
|
|
// data flow stabilizes.
|
|
//
|
|
// We set the initial state of unreachable block to 0, as we do not have
|
|
// a value for the location.
|
|
//
|
|
// This is a bit conservative as we could be missing forwarding
|
|
// opportunities. i.e. a joint block with 1 predecessor being an
|
|
// unreachable block.
|
|
//
|
|
// we rely on other passes to clean up unreachable block.
|
|
ForwardSetIn.resize(LocationNum, false);
|
|
ForwardSetOut.resize(LocationNum, reachable);
|
|
|
|
ForwardSetMax.resize(LocationNum, true);
|
|
|
|
BBGenSet.resize(LocationNum, false);
|
|
BBKillSet.resize(LocationNum, false);
|
|
}
|
|
|
|
/// Initialize the AvailSetMax by intersecting this basic block's
|
|
/// predecessors' AvailSetMax.
|
|
void mergePredecessorsAvailSetMax(RLEContext &Ctx);
|
|
|
|
/// Initialize the AvailSet by intersecting this basic block' predecessors'
|
|
/// AvailSet.
|
|
void mergePredecessorAvailSet(RLEContext &Ctx);
|
|
|
|
/// Initialize the AvailSet and AvailVal of the current basic block.
|
|
void mergePredecessorAvailSetAndValue(RLEContext &Ctx);
|
|
|
|
/// Reached the end of the basic block, update the ForwardValOut with the
|
|
/// ForwardValIn.
|
|
void updateForwardValOut() { ForwardValOut = ForwardValIn; }
|
|
|
|
/// Check whether the ForwardSetOut has changed. If it does, we need to
|
|
/// rerun the data flow to reach fixed point.
|
|
bool updateForwardSetOut() {
|
|
if (ForwardSetIn == ForwardSetOut)
|
|
return false;
|
|
ForwardSetOut = ForwardSetIn;
|
|
return true;
|
|
}
|
|
|
|
/// Returns the current basic block we are processing.
|
|
SILBasicBlock *getBB() const { return BB; }
|
|
|
|
/// Returns the ForwardValIn for the current basic block.
|
|
ValueTableMap &getForwardValIn() { return ForwardValIn; }
|
|
|
|
/// Returns the ForwardValOut for the current basic block.
|
|
ValueTableMap &getForwardValOut() { return ForwardValOut; }
|
|
|
|
/// Returns the redundant loads and their replacement in the currently basic
|
|
/// block.
|
|
llvm::DenseMap<SILInstruction *, SILValue> &getRL() { return RedundantLoads; }
|
|
|
|
/// Look into the value for the given LSLocation at end of the basic block,
|
|
/// return one of the three ValueState type.
|
|
ValueState getValueStateAtEndOfBlock(RLEContext &Ctx, LSLocation &L);
|
|
|
|
/// Wrappers to query the value state of the location in this BlockState.
|
|
bool isCoverValues(RLEContext &Ctx, LSLocation &L) {
|
|
return getValueStateAtEndOfBlock(Ctx, L) == ValueState::CoverValues;
|
|
}
|
|
bool isConcreteValues(RLEContext &Ctx, LSLocation &L) {
|
|
return getValueStateAtEndOfBlock(Ctx, L) == ValueState::ConcreteValues;
|
|
}
|
|
|
|
/// Iterate over the instructions in the basic block in forward order and
|
|
/// process them w.r.t. the given \p Kind.
|
|
void processInstructionWithKind(RLEContext &Ctx, SILInstruction *I,
|
|
RLEKind Kind);
|
|
void processBasicBlockWithKind(RLEContext &Ctx, RLEKind Kind);
|
|
|
|
/// Process the current basic block with the genset and killset. Return true
|
|
/// if the ForwardSetOut changes.
|
|
bool processBasicBlockWithGenKillSet();
|
|
|
|
/// Set up the value for redundant load elimination.
|
|
bool setupRLE(RLEContext &Ctx, SILInstruction *I, SILValue Mem);
|
|
|
|
/// Process Instruction which writes to memory in an unknown way.
|
|
void processUnknownWriteInst(RLEContext &Ctx, SILInstruction *I,
|
|
RLEKind Kind);
|
|
void processUnknownWriteInstForGenKillSet(RLEContext &Ctx, SILInstruction *I);
|
|
void processUnknownWriteInstForRLE(RLEContext &Ctx, SILInstruction *I);
|
|
|
|
/// Process LoadInst. Extract LSLocations from LoadInst.
|
|
void processLoadInst(RLEContext &Ctx, LoadInst *LI, RLEKind Kind);
|
|
|
|
/// Process LoadInst. Extract LSLocations from StoreInst.
|
|
void processStoreInst(RLEContext &Ctx, StoreInst *SI, RLEKind Kind);
|
|
|
|
/// Returns a *single* forwardable SILValue for the given LSLocation right
|
|
/// before the InsertPt instruction.
|
|
SILValue reduceValuesAtEndOfBlock(RLEContext &Ctx, LSLocation &L);
|
|
};
|
|
|
|
} // end anonymous namespace
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
// RLEContext Interface
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
namespace {
|
|
|
|
using BBValueMap = llvm::DenseMap<SILBasicBlock *, SILValue>;
|
|
|
|
/// This class stores global state that we use when computing redundant load and
|
|
/// their replacement in each basic block.
|
|
class RLEContext {
|
|
/// Function currently processing.
|
|
SILFunction *Fn;
|
|
|
|
/// The alias analysis that we will use during all computations.
|
|
AliasAnalysis *AA;
|
|
|
|
/// The type expansion analysis we will use during all computations.
|
|
TypeExpansionAnalysis *TE;
|
|
|
|
/// The SSA updater we use to materialize covering values.
|
|
SILSSAUpdater Updater;
|
|
|
|
/// The range that we use to iterate over the post order and reverse post
|
|
/// order of the given function.
|
|
PostOrderFunctionInfo *PO;
|
|
|
|
/// Keeps all the locations for the current function. The BitVector in each
|
|
/// BlockState is then laid on top of it to keep track of which LSLocation
|
|
/// has a downward available value.
|
|
std::vector<LSLocation> LocationVault;
|
|
|
|
/// Contains a map between LSLocation to their index in the LocationVault.
|
|
/// Use for fast lookup.
|
|
llvm::DenseMap<LSLocation, unsigned> LocToBitIndex;
|
|
|
|
/// Keeps all the loadstorevalues for the current function. The BitVector in
|
|
/// each g is then laid on top of it to keep track of which LSLocation
|
|
/// has a downward available value.
|
|
std::vector<LSValue> LSValueVault;
|
|
|
|
/// Contains a map between LSLocation to their index in the LocationVault.
|
|
/// Use for fast lookup.
|
|
llvm::DenseMap<LSValue, unsigned> ValToBitIndex;
|
|
|
|
/// A map from each BasicBlock to its BlockState.
|
|
llvm::SmallDenseMap<SILBasicBlock *, BlockState, 4> BBToLocState;
|
|
|
|
/// A map for each basic block and whether its predecessors have forwardable
|
|
/// edges.
|
|
llvm::DenseMap<SILBasicBlock *, bool> ForwardableEdge;
|
|
|
|
public:
|
|
RLEContext(SILFunction *F, AliasAnalysis *AA, TypeExpansionAnalysis *TE,
|
|
PostOrderFunctionInfo *PO);
|
|
|
|
RLEContext(const RLEContext &) = delete;
|
|
RLEContext(RLEContext &&) = default;
|
|
~RLEContext() = default;
|
|
|
|
/// Entry point to redundant load elimination.
|
|
bool run();
|
|
|
|
/// Run the iterative data flow until convergence.
|
|
void runIterativeRLE();
|
|
|
|
/// Process the basic blocks for the gen and kill set.
|
|
void processBasicBlocksForGenKillSet();
|
|
|
|
/// Process the basic blocks with the gen and kill set.
|
|
void processBasicBlocksWithGenKillSet();
|
|
|
|
/// Process the basic block for values generated in the current basic
|
|
/// block.
|
|
void processBasicBlocksForAvailValue();
|
|
|
|
/// Process basic blocks to perform the redundant load elimination.
|
|
void processBasicBlocksForRLE();
|
|
|
|
/// Returns the alias analysis we will use during all computations.
|
|
AliasAnalysis *getAA() const { return AA; }
|
|
|
|
/// Returns the current type expansion analysis we are .
|
|
TypeExpansionAnalysis *getTE() const { return TE; }
|
|
|
|
/// Return the BlockState for the basic block this basic block belongs to.
|
|
BlockState &getBlockState(SILBasicBlock *B) { return BBToLocState[B]; }
|
|
|
|
/// Get the bit representing the LSLocation in the LocationVault.
|
|
unsigned getLocationBit(const LSLocation &L);
|
|
|
|
/// Given the bit, get the LSLocation from the LocationVault.
|
|
LSLocation &getLocation(const unsigned index);
|
|
|
|
/// Get the bit representing the LSValue in the LSValueVault.
|
|
unsigned getValueBit(const LSValue &L);
|
|
|
|
/// Given the bit, get the LSValue from the LSValueVault.
|
|
LSValue &getValue(const unsigned index);
|
|
|
|
/// Transitively collect all the values that make up this location and
|
|
/// create a SILArgument out of them.
|
|
SILValue computePredecessorLocationValue(SILBasicBlock *BB, LSLocation &L);
|
|
|
|
/// Given a LSLocation, try to collect all the LSValues for this LSLocation
|
|
/// in the given basic block. If part of the locations have covering values,
|
|
/// find the values in its predecessors.
|
|
bool collectLocationValues(SILBasicBlock *BB, LSLocation &L,
|
|
LSLocationValueMap &Values, ValueTableMap &VM);
|
|
};
|
|
|
|
} // end anonymous namespace
|
|
|
|
void BlockState::startTrackingValue(ValueTableMap &VM, unsigned l, unsigned v){
|
|
VM[l] = v;
|
|
}
|
|
|
|
void BlockState::stopTrackingValue(ValueTableMap &VM, unsigned bit) {
|
|
VM.erase(bit);
|
|
}
|
|
|
|
bool BlockState::isTrackingLocation(llvm::SmallBitVector &BV, unsigned bit) {
|
|
return BV.test(bit);
|
|
}
|
|
|
|
void BlockState::startTrackingLocation(llvm::SmallBitVector &BV, unsigned bit) {
|
|
BV.set(bit);
|
|
}
|
|
|
|
void BlockState::stopTrackingLocation(llvm::SmallBitVector &BV, unsigned bit) {
|
|
BV.reset(bit);
|
|
}
|
|
|
|
void BlockState::mergePredecessorsAvailSetMax(RLEContext &Ctx) {
|
|
if (BB->pred_empty()) {
|
|
ForwardSetMax.reset();
|
|
return;
|
|
}
|
|
|
|
auto Iter = BB->pred_begin();
|
|
ForwardSetMax = Ctx.getBlockState(*Iter).ForwardSetMax;
|
|
Iter = std::next(Iter);
|
|
for (auto EndIter = BB->pred_end(); Iter != EndIter; ++Iter) {
|
|
ForwardSetMax &= Ctx.getBlockState(*Iter).ForwardSetMax;
|
|
}
|
|
}
|
|
|
|
void BlockState::mergePredecessorAvailSet(RLEContext &Ctx) {
|
|
// Clear the state if the basic block has no predecessor.
|
|
if (BB->getPreds().begin() == BB->getPreds().end()) {
|
|
ForwardSetIn.reset();
|
|
return;
|
|
}
|
|
|
|
auto Iter = BB->pred_begin();
|
|
ForwardSetIn = Ctx.getBlockState(*Iter).ForwardSetOut;
|
|
Iter = std::next(Iter);
|
|
for (auto EndIter = BB->pred_end(); Iter != EndIter; ++Iter) {
|
|
ForwardSetIn &= Ctx.getBlockState(*Iter).ForwardSetOut;
|
|
}
|
|
}
|
|
|
|
void BlockState::mergePredecessorAvailSetAndValue(RLEContext &Ctx) {
|
|
// Clear the state if the basic block has no predecessor.
|
|
if (BB->getPreds().begin() == BB->getPreds().end()) {
|
|
ForwardSetIn.reset();
|
|
ForwardValIn.clear();
|
|
return;
|
|
}
|
|
|
|
auto Iter = BB->pred_begin();
|
|
ForwardSetIn = Ctx.getBlockState(*Iter).ForwardSetOut;
|
|
ForwardValIn = Ctx.getBlockState(*Iter).ForwardValOut;
|
|
Iter = std::next(Iter);
|
|
for (auto EndIter = BB->pred_end(); Iter != EndIter; ++Iter) {
|
|
BlockState &OtherState = Ctx.getBlockState(*Iter);
|
|
ForwardSetIn &= OtherState.ForwardSetOut;
|
|
|
|
// Merge in the predecessor state.
|
|
for (unsigned i = 0; i < LocationNum; ++i) {
|
|
if (OtherState.ForwardSetOut[i]) {
|
|
// There are multiple values from multiple predecessors, set this as
|
|
// a covering value. We do not need to track the value itself, as we
|
|
// can always go to the predecessors BlockState to find it.
|
|
ForwardValIn[i] = Ctx.getValueBit(LSValue(true));
|
|
continue;
|
|
}
|
|
// If this location does have an available value, then clear it.
|
|
stopTrackingValue(ForwardValIn, i);
|
|
stopTrackingLocation(ForwardSetIn, i);
|
|
}
|
|
}
|
|
}
|
|
|
|
void BlockState::processBasicBlockWithKind(RLEContext &Ctx, RLEKind Kind) {
|
|
// Iterate over instructions in forward order.
|
|
for (auto &II : *BB) {
|
|
processInstructionWithKind(Ctx, &II, Kind);
|
|
}
|
|
}
|
|
|
|
bool BlockState::processBasicBlockWithGenKillSet() {
|
|
ForwardSetIn.reset(BBKillSet);
|
|
ForwardSetIn |= BBGenSet;
|
|
return updateForwardSetOut();
|
|
}
|
|
|
|
SILValue BlockState::reduceValuesAtEndOfBlock(RLEContext &Ctx, LSLocation &L) {
|
|
// First, collect current available locations and their corresponding values
|
|
// into a map.
|
|
LSLocationValueMap Values;
|
|
|
|
LSLocationList Locs;
|
|
LSLocation::expand(L, &BB->getModule(), Locs, Ctx.getTE());
|
|
|
|
// Find the values that this basic block defines and the locations which
|
|
// we do not have a concrete value in the current basic block.
|
|
ValueTableMap &OTM = getForwardValOut();
|
|
for (unsigned i = 0; i < Locs.size(); ++i) {
|
|
Values[Locs[i]] = Ctx.getValue(OTM[Ctx.getLocationBit(Locs[i])]);
|
|
}
|
|
|
|
// Second, reduce the available values into a single SILValue we can use to
|
|
// forward.
|
|
SILValue TheForwardingValue;
|
|
TheForwardingValue = LSValue::reduce(L, &BB->getModule(), Values,
|
|
BB->getTerminator(), Ctx.getTE());
|
|
/// Return the forwarding value.
|
|
return TheForwardingValue;
|
|
}
|
|
|
|
bool BlockState::setupRLE(RLEContext &Ctx, SILInstruction *I, SILValue Mem) {
|
|
// Try to construct a SILValue for the current LSLocation.
|
|
//
|
|
// Collect the locations and their corresponding values into a map.
|
|
LSLocation L(Mem);
|
|
LSLocationValueMap Values;
|
|
// Use the ForwardValIn as we are currently processing the basic block.
|
|
if (!Ctx.collectLocationValues(I->getParent(), L, Values, getForwardValIn()))
|
|
return false;
|
|
|
|
// Reduce the available values into a single SILValue we can use to forward.
|
|
SILModule *Mod = &I->getModule();
|
|
SILValue TheForwardingValue = LSValue::reduce(L, Mod, Values, I, Ctx.getTE());
|
|
if (!TheForwardingValue)
|
|
return false;
|
|
|
|
// Now we have the forwarding value, record it for forwarding!.
|
|
//
|
|
// NOTE: we do not perform the RLE right here because doing so could introduce
|
|
// new LSLocations.
|
|
//
|
|
// e.g.
|
|
// %0 = load %x
|
|
// %1 = load %x
|
|
// %2 = extract_struct %1, #a
|
|
// %3 = load %2
|
|
//
|
|
// If we perform the RLE and replace %1 with %0, we end up having a memory
|
|
// location we do not have before, i.e. Base == %0, and Path == #a.
|
|
//
|
|
// We may be able to add the LSLocation to the vault, but it gets
|
|
// complicated very quickly, e.g. we need to resize the bit vectors size,
|
|
// etc.
|
|
//
|
|
// However, since we already know the instruction to replace and the value to
|
|
// replace it with, we can record it for now and forwarded it after all the
|
|
// forwardable values are recorded in the function.
|
|
//
|
|
RedundantLoads[I] = TheForwardingValue;
|
|
return true;
|
|
}
|
|
|
|
void BlockState::updateForwardSetForRead(RLEContext &Ctx, unsigned bit) {
|
|
// Track the new location and value.
|
|
startTrackingLocation(ForwardSetIn, bit);
|
|
}
|
|
|
|
void BlockState::updateGenKillSetForRead(RLEContext &Ctx, unsigned bit) {
|
|
// Track the new location and value.
|
|
startTrackingLocation(BBGenSet, bit);
|
|
stopTrackingLocation(BBKillSet, bit);
|
|
}
|
|
|
|
void BlockState::updateForwardSetAndValForRead(RLEContext &Ctx, unsigned lbit,
|
|
unsigned vbit) {
|
|
// Track the new location and value.
|
|
startTrackingValue(ForwardValIn, lbit, vbit);
|
|
startTrackingLocation(ForwardSetIn, lbit);
|
|
}
|
|
|
|
void BlockState::updateGenKillSetForWrite(RLEContext &Ctx, unsigned bit) {
|
|
// This is a store, invalidate any location that this location may alias, as
|
|
// their values can no longer be forwarded.
|
|
LSLocation &R = Ctx.getLocation(bit);
|
|
for (unsigned i = 0; i < LocationNum; ++i) {
|
|
if (!isTrackingLocation(ForwardSetMax, i))
|
|
continue;
|
|
LSLocation &L = Ctx.getLocation(i);
|
|
if (!L.isMayAliasLSLocation(R, Ctx.getAA()))
|
|
continue;
|
|
// MayAlias, invalidate the location.
|
|
stopTrackingLocation(BBGenSet, i);
|
|
startTrackingLocation(BBKillSet, i);
|
|
}
|
|
|
|
// Start tracking this location.
|
|
startTrackingLocation(BBGenSet, bit);
|
|
stopTrackingLocation(BBKillSet, bit);
|
|
}
|
|
|
|
void BlockState::updateMaxAvailForwardSetForWrite(RLEContext &Ctx,
|
|
unsigned bit) {
|
|
// Start tracking this location.
|
|
startTrackingLocation(ForwardSetMax, bit);
|
|
}
|
|
|
|
void BlockState::updateMaxAvailForwardSetForRead(RLEContext &Ctx,
|
|
unsigned bit) {
|
|
// Start tracking this location.
|
|
startTrackingLocation(ForwardSetMax, bit);
|
|
}
|
|
|
|
void BlockState::updateForwardSetForWrite(RLEContext &Ctx, unsigned bit) {
|
|
// This is a store, invalidate any location that this location may alias, as
|
|
// their values can no longer be forwarded.
|
|
LSLocation &R = Ctx.getLocation(bit);
|
|
for (unsigned i = 0; i < LocationNum; ++i) {
|
|
if (!isTrackingLocation(ForwardSetIn, i))
|
|
continue;
|
|
LSLocation &L = Ctx.getLocation(i);
|
|
if (!L.isMayAliasLSLocation(R, Ctx.getAA()))
|
|
continue;
|
|
// MayAlias, invalidate the location.
|
|
stopTrackingLocation(ForwardSetIn, i);
|
|
}
|
|
|
|
// Start tracking this location.
|
|
startTrackingLocation(ForwardSetIn, bit);
|
|
}
|
|
|
|
void BlockState::updateForwardSetAndValForWrite(RLEContext &Ctx, unsigned lbit,
|
|
unsigned vbit) {
|
|
// This is a store, invalidate any location that this location may alias, as
|
|
// their values can no longer be forwarded.
|
|
LSLocation &R = Ctx.getLocation(lbit);
|
|
for (unsigned i = 0; i < LocationNum; ++i) {
|
|
if (!isTrackingLocation(ForwardSetIn, i))
|
|
continue;
|
|
LSLocation &L = Ctx.getLocation(i);
|
|
if (!L.isMayAliasLSLocation(R, Ctx.getAA()))
|
|
continue;
|
|
// MayAlias, invalidate the location and value.
|
|
stopTrackingValue(ForwardValIn, i);
|
|
stopTrackingLocation(ForwardSetIn, i);
|
|
}
|
|
|
|
// Start tracking this location and value.
|
|
startTrackingLocation(ForwardSetIn, lbit);
|
|
startTrackingValue(ForwardValIn, lbit, vbit);
|
|
}
|
|
|
|
void BlockState::processWrite(RLEContext &Ctx, SILInstruction *I, SILValue Mem,
|
|
SILValue Val, RLEKind Kind) {
|
|
// Initialize the LSLocation.
|
|
LSLocation L(Mem);
|
|
|
|
// If we cant figure out the Base or Projection Path for the write,
|
|
// process it as an unknown memory instruction.
|
|
if (!L.isValid()) {
|
|
// we can ignore unknown store instructions if we are computing the
|
|
// AvailSetMax.
|
|
if (!isComputeAvailSetMax(Kind)) {
|
|
processUnknownWriteInst(Ctx, I, Kind);
|
|
}
|
|
return;
|
|
}
|
|
|
|
// Expand the given location and val into individual fields and process
|
|
// them as separate writes.
|
|
LSLocationList Locs;
|
|
LSLocation::expand(L, &I->getModule(), Locs, Ctx.getTE());
|
|
|
|
if (isComputeAvailSetMax(Kind)) {
|
|
for (unsigned i = 0; i < Locs.size(); ++i) {
|
|
updateMaxAvailForwardSetForWrite(Ctx, Ctx.getLocationBit(Locs[i]));
|
|
}
|
|
return;
|
|
}
|
|
|
|
// Are we computing the genset and killset ?
|
|
if (isComputeAvailGenKillSet(Kind)) {
|
|
for (unsigned i = 0; i < Locs.size(); ++i) {
|
|
updateGenKillSetForWrite(Ctx, Ctx.getLocationBit(Locs[i]));
|
|
}
|
|
return;
|
|
}
|
|
|
|
// Are we computing available set ?
|
|
if (isComputeAvailSet(Kind)) {
|
|
for (unsigned i = 0; i < Locs.size(); ++i) {
|
|
updateForwardSetForWrite(Ctx, Ctx.getLocationBit(Locs[i]));
|
|
}
|
|
return;
|
|
}
|
|
|
|
// Are we computing available value or performing RLE?
|
|
if (isComputeAvailValue(Kind) || isPerformingRLE(Kind)) {
|
|
LSValueList Vals;
|
|
LSValue::expand(Val, &I->getModule(), Vals, Ctx.getTE());
|
|
for (unsigned i = 0; i < Locs.size(); ++i) {
|
|
updateForwardSetAndValForWrite(Ctx, Ctx.getLocationBit(Locs[i]),
|
|
Ctx.getValueBit(Vals[i]));
|
|
}
|
|
return;
|
|
}
|
|
|
|
llvm_unreachable("Unknown RLE compute kind");
|
|
}
|
|
|
|
void BlockState::processRead(RLEContext &Ctx, SILInstruction *I, SILValue Mem,
|
|
SILValue Val, RLEKind Kind) {
|
|
// Initialize the LSLocation.
|
|
LSLocation L(Mem);
|
|
|
|
// If we cant figure out the Base or Projection Path for the read, simply
|
|
// ignore it for now.
|
|
if (!L.isValid())
|
|
return;
|
|
|
|
// Expand the given LSLocation and Val into individual fields and process
|
|
// them as separate reads.
|
|
LSLocationList Locs;
|
|
LSLocation::expand(L, &I->getModule(), Locs, Ctx.getTE());
|
|
|
|
if (isComputeAvailSetMax(Kind)) {
|
|
for (unsigned i = 0; i < Locs.size(); ++i) {
|
|
updateMaxAvailForwardSetForRead(Ctx, Ctx.getLocationBit(Locs[i]));
|
|
}
|
|
return;
|
|
}
|
|
|
|
// Are we computing the genset and killset.
|
|
if (isComputeAvailGenKillSet(Kind)) {
|
|
for (unsigned i = 0; i < Locs.size(); ++i) {
|
|
updateGenKillSetForRead(Ctx, Ctx.getLocationBit(Locs[i]));
|
|
}
|
|
return;
|
|
}
|
|
|
|
// Are we computing available set ?.
|
|
if (isComputeAvailSet(Kind)) {
|
|
for (auto &X : Locs) {
|
|
if (isTrackingLocation(ForwardSetIn, Ctx.getLocationBit(X)))
|
|
continue;
|
|
updateForwardSetForRead(Ctx, Ctx.getLocationBit(X));
|
|
}
|
|
return;
|
|
}
|
|
|
|
// Are we computing available values ?.
|
|
bool CanForward = true;
|
|
if (isComputeAvailValue(Kind) || isPerformingRLE(Kind)) {
|
|
LSValueList Vals;
|
|
LSValue::expand(Val, &I->getModule(), Vals, Ctx.getTE());
|
|
for (unsigned i = 0; i < Locs.size(); ++i) {
|
|
if (isTrackingLocation(ForwardSetIn, Ctx.getLocationBit(Locs[i])))
|
|
continue;
|
|
updateForwardSetAndValForRead(Ctx, Ctx.getLocationBit(Locs[i]),
|
|
Ctx.getValueBit(Vals[i]));
|
|
// We can not perform the forwarding as we are at least missing
|
|
// some pieces of the read location.
|
|
CanForward = false;
|
|
}
|
|
}
|
|
|
|
// Simply return if we are not performing RLE or we do not have all the
|
|
// values available to perform RLE.
|
|
if (!isPerformingRLE(Kind) || !CanForward)
|
|
return;
|
|
|
|
// Lastly, forward value to the load.
|
|
setupRLE(Ctx, I, Mem);
|
|
}
|
|
|
|
void BlockState::processStoreInst(RLEContext &Ctx, StoreInst *SI, RLEKind Kind) {
|
|
processWrite(Ctx, SI, SI->getDest(), SI->getSrc(), Kind);
|
|
}
|
|
|
|
void BlockState::processLoadInst(RLEContext &Ctx, LoadInst *LI, RLEKind Kind) {
|
|
processRead(Ctx, LI, LI->getOperand(), SILValue(LI), Kind);
|
|
}
|
|
|
|
void BlockState::processUnknownWriteInstForGenKillSet(RLEContext &Ctx,
|
|
SILInstruction *I) {
|
|
auto *AA = Ctx.getAA();
|
|
for (unsigned i = 0; i < LocationNum; ++i) {
|
|
if (!isTrackingLocation(ForwardSetMax, i))
|
|
continue;
|
|
// Invalidate any location this instruction may write to.
|
|
//
|
|
// TODO: checking may alias with Base is overly conservative,
|
|
// we should check may alias with base plus projection path.
|
|
LSLocation &R = Ctx.getLocation(i);
|
|
if (!AA->mayWriteToMemory(I, R.getBase()))
|
|
continue;
|
|
// MayAlias.
|
|
stopTrackingLocation(BBGenSet, i);
|
|
startTrackingLocation(BBKillSet, i);
|
|
}
|
|
}
|
|
|
|
void BlockState::processUnknownWriteInstForRLE(RLEContext &Ctx,
|
|
SILInstruction *I) {
|
|
auto *AA = Ctx.getAA();
|
|
for (unsigned i = 0; i < LocationNum; ++i) {
|
|
if (!isTrackingLocation(ForwardSetIn, i))
|
|
continue;
|
|
// Invalidate any location this instruction may write to.
|
|
//
|
|
// TODO: checking may alias with Base is overly conservative,
|
|
// we should check may alias with base plus projection path.
|
|
LSLocation &R = Ctx.getLocation(i);
|
|
if (!AA->mayWriteToMemory(I, R.getBase()))
|
|
continue;
|
|
// MayAlias.
|
|
stopTrackingLocation(ForwardSetIn, i);
|
|
stopTrackingValue(ForwardValIn, i);
|
|
}
|
|
}
|
|
|
|
void BlockState::processUnknownWriteInst(RLEContext &Ctx, SILInstruction *I,
|
|
RLEKind Kind) {
|
|
// Are we computing the genset and killset ?
|
|
if (isComputeAvailGenKillSet(Kind)) {
|
|
processUnknownWriteInstForGenKillSet(Ctx, I);
|
|
return;
|
|
}
|
|
|
|
// Are we computing the available value or doing RLE ?
|
|
if (isComputeAvailValue(Kind) || isPerformingRLE(Kind)) {
|
|
processUnknownWriteInstForRLE(Ctx, I);
|
|
return;
|
|
}
|
|
|
|
llvm_unreachable("Unknown RLE compute kind");
|
|
}
|
|
|
|
|
|
void BlockState::processInstructionWithKind(RLEContext &Ctx,
|
|
SILInstruction *Inst,
|
|
RLEKind Kind) {
|
|
// This is a StoreInst, try to see whether it clobbers any forwarding value
|
|
if (auto *SI = dyn_cast<StoreInst>(Inst)) {
|
|
processStoreInst(Ctx, SI, Kind);
|
|
return;
|
|
}
|
|
|
|
// This is a LoadInst. Let's see if we can find a previous loaded, stored
|
|
// value to use instead of this load.
|
|
if (auto *LI = dyn_cast<LoadInst>(Inst)) {
|
|
processLoadInst(Ctx, LI, Kind);
|
|
return;
|
|
}
|
|
|
|
// If this instruction has side effects, but is inert from a load store
|
|
// perspective, skip it.
|
|
if (isRLEInertInstruction(Inst))
|
|
return;
|
|
|
|
// If this instruction does not read or write memory, we can skip it.
|
|
if (!Inst->mayReadOrWriteMemory())
|
|
return;
|
|
|
|
// If we have an instruction that may write to memory and we cannot prove
|
|
// that it and its operands cannot alias a load we have visited,
|
|
// invalidate that load.
|
|
if (Inst->mayWriteToMemory()) {
|
|
processUnknownWriteInst(Ctx, Inst, Kind);
|
|
return;
|
|
}
|
|
}
|
|
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
// RLEContext Implementation
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
RLEContext::RLEContext(SILFunction *F, AliasAnalysis *AA,
|
|
TypeExpansionAnalysis *TE, PostOrderFunctionInfo *PO)
|
|
: Fn(F), AA(AA), TE(TE), PO(PO) {
|
|
// Walk over the function and find all the locations accessed by
|
|
// this function.
|
|
LSLocation::enumerateLSLocations(*Fn, LocationVault, LocToBitIndex, TE);
|
|
|
|
// For all basic blocks in the function, initialize a BB state. Since we
|
|
// know all the locations accessed in this function, we can resize the bit
|
|
// vector to the appropriate size.
|
|
for (auto &B : *F) {
|
|
BBToLocState[&B] = BlockState();
|
|
BBToLocState[&B].init(&B, LocationVault.size(), isReachable(&B));
|
|
}
|
|
}
|
|
|
|
LSLocation &RLEContext::getLocation(const unsigned index) {
|
|
return LocationVault[index];
|
|
}
|
|
|
|
unsigned RLEContext::getLocationBit(const LSLocation &Loc) {
|
|
// Return the bit position of the given Loc in the LocationVault. The bit
|
|
// position is then used to set/reset the bitvector kept by each BlockState.
|
|
//
|
|
// We should have the location populated by the enumerateLSLocation at this
|
|
// point.
|
|
auto Iter = LocToBitIndex.find(Loc);
|
|
assert(Iter != LocToBitIndex.end() && "Location should have been enum'ed");
|
|
return Iter->second;
|
|
}
|
|
|
|
LSValue &RLEContext::getValue(const unsigned index) {
|
|
return LSValueVault[index];
|
|
}
|
|
|
|
unsigned RLEContext::getValueBit(const LSValue &Val) {
|
|
// Return the bit position of the given Val in the LSValueVault. The bit
|
|
// position is then used to set/reset the bitvector kept by each g.
|
|
auto Iter = ValToBitIndex.find(Val);
|
|
|
|
// We do not walk over the function and find all the possible LSValues
|
|
// in this funciton, as some of the these values will not be used, i.e.
|
|
// if the LoadInst that generates this value is actually RLE'ed.
|
|
// Instead, we create the LSValues when we need them.
|
|
if (Iter == ValToBitIndex.end()) {
|
|
ValToBitIndex[Val] = LSValueVault.size();
|
|
LSValueVault.push_back(Val);
|
|
return ValToBitIndex[Val];
|
|
}
|
|
return Iter->second;
|
|
}
|
|
|
|
BlockState::ValueState BlockState::getValueStateAtEndOfBlock(RLEContext &Ctx,
|
|
LSLocation &L) {
|
|
// Find number of covering value and concrete values for the locations
|
|
// expanded from the given location.
|
|
unsigned CSCount = 0, CTCount = 0;
|
|
LSLocationList Locs;
|
|
LSLocation::expand(L, &BB->getModule(), Locs, Ctx.getTE());
|
|
|
|
ValueTableMap &OTM = getForwardValOut();
|
|
for (auto &X : Locs) {
|
|
LSValue &V = Ctx.getValue(OTM[Ctx.getLocationBit(X)]);
|
|
if (V.isCoveringValue()) {
|
|
++CSCount;
|
|
continue;
|
|
}
|
|
++CTCount;
|
|
}
|
|
|
|
if (CSCount == Locs.size())
|
|
return ValueState::CoverValues;
|
|
if (CTCount == Locs.size())
|
|
return ValueState::ConcreteValues;
|
|
return ValueState::CoverAndConcreteValues;
|
|
}
|
|
|
|
SILValue RLEContext::computePredecessorLocationValue(SILBasicBlock *BB,
|
|
LSLocation &L) {
|
|
BBValueMap Values;
|
|
llvm::DenseSet<SILBasicBlock *> HandledBBs;
|
|
llvm::SmallVector<SILBasicBlock *, 8> WorkList;
|
|
|
|
// Push in all the predecessors to get started.
|
|
for (auto Pred : BB->getPreds()) {
|
|
WorkList.push_back(Pred);
|
|
}
|
|
|
|
while (!WorkList.empty()) {
|
|
auto *CurBB = WorkList.pop_back_val();
|
|
BlockState &Forwarder = getBlockState(CurBB);
|
|
|
|
// Mark this basic block as processed.
|
|
HandledBBs.insert(CurBB);
|
|
|
|
// There are 3 cases that can happen here.
|
|
//
|
|
// 1. The current basic block contains concrete values for the entire
|
|
// location.
|
|
// 2. The current basic block contains covering values for the entire
|
|
// location.
|
|
// 3. The current basic block contains concrete value for part of the
|
|
// location and covering values for the rest.
|
|
//
|
|
// This BlockState contains concrete values for all the expanded
|
|
// locations, collect and reduce them into a single value in the current
|
|
// basic block.
|
|
if (Forwarder.isConcreteValues(*this, L)) {
|
|
Values[CurBB] = Forwarder.reduceValuesAtEndOfBlock(*this, L);
|
|
continue;
|
|
}
|
|
|
|
// This BlockState does not contain concrete value for any of the expanded
|
|
// locations, collect in this block's predecessors.
|
|
if (Forwarder.isCoverValues(*this, L)) {
|
|
for (auto Pred : CurBB->getPreds()) {
|
|
if (HandledBBs.find(Pred) != HandledBBs.end())
|
|
continue;
|
|
WorkList.push_back(Pred);
|
|
}
|
|
continue;
|
|
}
|
|
|
|
// This block contains concrete values for some but not all the expanded
|
|
// locations, recursively call collectLocationValues to materialize the
|
|
// value that reaches this basic block.
|
|
LSLocationValueMap LSValues;
|
|
if (!collectLocationValues(CurBB, L, LSValues, Forwarder.getForwardValOut()))
|
|
return SILValue();
|
|
|
|
// Reduce the available values into a single SILValue we can use to forward
|
|
SILInstruction *IPt = CurBB->getTerminator();
|
|
Values[CurBB] = LSValue::reduce(L, &BB->getModule(), LSValues, IPt, TE);
|
|
}
|
|
|
|
// Finally, collect all the values for the SILArgument, materialize it using
|
|
// the SSAUpdater.
|
|
Updater.Initialize(L.getType());
|
|
for (auto V : Values) {
|
|
Updater.AddAvailableValue(V.first, V.second);
|
|
}
|
|
|
|
return Updater.GetValueInMiddleOfBlock(BB);
|
|
}
|
|
|
|
bool RLEContext::collectLocationValues(SILBasicBlock *BB, LSLocation &L,
|
|
LSLocationValueMap &Values,
|
|
ValueTableMap &VM) {
|
|
LSLocationSet CSLocs;
|
|
LSLocationList Locs;
|
|
LSLocation::expand(L, &BB->getModule(), Locs, TE);
|
|
|
|
auto *Mod = &BB->getModule();
|
|
// Find the locations that this basic block defines and the locations which
|
|
// we do not have a concrete value in the current basic block.
|
|
for (auto &X : Locs) {
|
|
Values[X] = getValue(VM[getLocationBit(X)]);
|
|
if (!Values[X].isCoveringValue())
|
|
continue;
|
|
CSLocs.insert(X);
|
|
}
|
|
|
|
// For locations which we do not have concrete values for in this basic
|
|
// block, try to reduce it to the minimum # of locations possible, this
|
|
// will help us to generate as few SILArguments as possible.
|
|
LSLocation::reduce(L, Mod, CSLocs, TE);
|
|
|
|
// To handle covering value, we need to go to the predecessors and
|
|
// materialize them there.
|
|
for (auto &X : CSLocs) {
|
|
SILValue V = computePredecessorLocationValue(BB, X);
|
|
if (!V)
|
|
return false;
|
|
|
|
// We've constructed a concrete value for the covering value. Expand and
|
|
// collect the newly created forwardable values.
|
|
LSLocationList Locs;
|
|
LSValueList Vals;
|
|
LSLocation::expand(X, Mod, Locs, TE);
|
|
LSValue::expand(V, Mod, Vals, TE);
|
|
|
|
for (unsigned i = 0; i < Locs.size(); ++i) {
|
|
Values[Locs[i]] = Vals[i];
|
|
assert(Values[Locs[i]].isValid() && "Invalid load store value");
|
|
}
|
|
}
|
|
return true;
|
|
}
|
|
|
|
void RLEContext::processBasicBlocksForGenKillSet() {
|
|
for (SILBasicBlock *BB : PO->getReversePostOrder()) {
|
|
BlockState &S = getBlockState(BB);
|
|
|
|
// Compute the AvailSetMax at the beginning of the basic block.
|
|
S.mergePredecessorsAvailSetMax(*this);
|
|
|
|
// Compute the genset and killset.
|
|
//
|
|
// To optimize this process, we also compute the AvailSetMax at particular
|
|
// point in the basic block.
|
|
for (auto I = BB->begin(), E = BB->end(); I != E; ++I) {
|
|
if (auto *LI = dyn_cast<LoadInst>(&*I)) {
|
|
S.processLoadInst(*this, LI, RLEKind::ComputeAvailSetMax);
|
|
}
|
|
if (auto *SI = dyn_cast<StoreInst>(&*I)) {
|
|
S.processStoreInst(*this, SI, RLEKind::ComputeAvailSetMax);
|
|
}
|
|
|
|
S.processInstructionWithKind(*this, &*I, RLEKind::ComputeAvailGenKillSet);
|
|
}
|
|
}
|
|
}
|
|
|
|
void RLEContext::processBasicBlocksWithGenKillSet() {
|
|
// Process each basic block with the gen and kill set. Every time the
|
|
// ForwardSetOut of a basic block changes, the optimization is rerun on its
|
|
// successors.
|
|
llvm::SmallVector<SILBasicBlock *, 16> WorkList;
|
|
llvm::DenseSet<SILBasicBlock *> HandledBBs;
|
|
|
|
// Push into the worklist in post order so that we can pop from the back and
|
|
// get reverse post order.
|
|
for (SILBasicBlock *BB : PO->getPostOrder()) {
|
|
WorkList.push_back(BB);
|
|
HandledBBs.insert(BB);
|
|
}
|
|
while (!WorkList.empty()) {
|
|
SILBasicBlock *BB = WorkList.pop_back_val();
|
|
HandledBBs.erase(BB);
|
|
|
|
// Intersection.
|
|
BlockState &Forwarder = getBlockState(BB);
|
|
// Compute the ForwardSetIn at the beginning of the basic block.
|
|
Forwarder.mergePredecessorAvailSet(*this);
|
|
|
|
if (Forwarder.processBasicBlockWithGenKillSet()) {
|
|
for (auto &X : BB->getSuccessors()) {
|
|
// We do not push basic block into the worklist if its already
|
|
// in the worklist.
|
|
if (HandledBBs.find(X) != HandledBBs.end())
|
|
continue;
|
|
WorkList.push_back(X);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
void RLEContext::processBasicBlocksForAvailValue() {
|
|
for (SILBasicBlock *BB : PO->getReversePostOrder()) {
|
|
BlockState &Forwarder = getBlockState(BB);
|
|
|
|
// Merge the predecessors. After merging, BlockState now contains
|
|
// lists of available LSLocations and their values that reach the
|
|
// beginning of the basic block along all paths.
|
|
Forwarder.mergePredecessorAvailSetAndValue(*this);
|
|
|
|
// Merge duplicate loads, and forward stores to
|
|
// loads. We also update lists of stores|loads to reflect the end
|
|
// of the basic block.
|
|
Forwarder.processBasicBlockWithKind(*this, RLEKind::ComputeAvailValue);
|
|
|
|
// Update the locations with available values. We do not need to update
|
|
// the available BitVector here as they should have been initialized and
|
|
// stabilized in the processBasicBlocksWithGenKillSet.
|
|
Forwarder.updateForwardValOut();
|
|
}
|
|
}
|
|
|
|
void RLEContext::processBasicBlocksForRLE() {
|
|
for (SILBasicBlock *BB : PO->getReversePostOrder()) {
|
|
BlockState &Forwarder = getBlockState(BB);
|
|
|
|
// Merge the predecessors. After merging, BlockState now contains
|
|
// lists of available LSLocations and their values that reach the
|
|
// beginning of the basic block along all paths.
|
|
Forwarder.mergePredecessorAvailSetAndValue(*this);
|
|
|
|
// Perform the actual redundant load elimination.
|
|
Forwarder.processBasicBlockWithKind(*this, RLEKind::PerformRLE);
|
|
|
|
// Update the locations with available values and their values.
|
|
Forwarder.updateForwardSetOut();
|
|
Forwarder.updateForwardValOut();
|
|
}
|
|
}
|
|
|
|
void RLEContext::runIterativeRLE() {
|
|
// We perform redundant load elimination in the following phases.
|
|
//
|
|
// Phase 1. Compute the genset and killset for every basic block.
|
|
//
|
|
// Phase 2. Use an iterative data flow to compute whether there is an
|
|
// available value at a given point, we do not yet care about what the value
|
|
// is.
|
|
//
|
|
// Phase 3. we compute the real forwardable value at a given point.
|
|
//
|
|
// Phase 4. we perform the redundant load elimination.
|
|
//
|
|
// Generate the genset and killset for every basic block.
|
|
processBasicBlocksForGenKillSet();
|
|
|
|
// Process basic blocks in RPO. After the data flow converges, run last
|
|
// iteration and perform load forwarding.
|
|
processBasicBlocksWithGenKillSet();
|
|
|
|
// We have computed the available value bit, now go through every basic
|
|
// block and compute the forwarding value locally. This is necessary as
|
|
// when we perform the RLE in the last iteration, we must handle loops, i.e.
|
|
// predecessor blocks which have not been processed when a basic block is
|
|
// processed.
|
|
processBasicBlocksForAvailValue();
|
|
}
|
|
|
|
bool RLEContext::run() {
|
|
// Data flow may take too long to converge.
|
|
if (LocationVault.size() > MaxLSLocationLimit)
|
|
return false;
|
|
|
|
if (!isOneIterationFunction(PO))
|
|
runIterativeRLE();
|
|
|
|
// We have the available value bit computed and the local forwarding value.
|
|
// Set up the load forwarding.
|
|
processBasicBlocksForRLE();
|
|
|
|
// Finally, perform the redundant load replacements.
|
|
llvm::DenseSet<SILInstruction *> InstsToDelete;
|
|
bool SILChanged = false;
|
|
for (auto &X : BBToLocState) {
|
|
for (auto &F : X.second.getRL()) {
|
|
DEBUG(llvm::dbgs() << "Replacing " << SILValue(F.first) << "With "
|
|
<< F.second);
|
|
SILChanged = true;
|
|
SILValue(F.first).replaceAllUsesWith(F.second);
|
|
InstsToDelete.insert(F.first);
|
|
++NumForwardedLoads;
|
|
}
|
|
}
|
|
|
|
// Erase the instructions recursively, this way, we get rid of pass
|
|
// dependence on DCE.
|
|
for (auto &X : InstsToDelete) {
|
|
// It is possible that the instruction still has uses, because it could be
|
|
// used as the replacement Value, i.e. F.second, for some other RLE pairs.
|
|
//
|
|
// TODO: we should fix this, otherwise we are missing RLE opportunities.
|
|
if (!X->use_empty())
|
|
continue;
|
|
recursivelyDeleteTriviallyDeadInstructions(X, true);
|
|
}
|
|
return SILChanged;
|
|
}
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
// Top Level Entry Point
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
namespace {
|
|
|
|
class RedundantLoadElimination : public SILFunctionTransform {
|
|
|
|
StringRef getName() override { return "SIL Redundant Load Elimination"; }
|
|
|
|
/// The entry point to the transformation.
|
|
void run() override {
|
|
SILFunction *F = getFunction();
|
|
DEBUG(llvm::dbgs() << "*** RLE on function: " << F->getName() << " ***\n");
|
|
|
|
auto *AA = PM->getAnalysis<AliasAnalysis>();
|
|
auto *TE = PM->getAnalysis<TypeExpansionAnalysis>();
|
|
auto *PO = PM->getAnalysis<PostOrderAnalysis>()->get(F);
|
|
|
|
RLEContext RLE(F, AA, TE, PO);
|
|
if (RLE.run()) {
|
|
invalidateAnalysis(SILAnalysis::InvalidationKind::Instructions);
|
|
}
|
|
}
|
|
};
|
|
|
|
} // end anonymous namespace
|
|
|
|
SILTransform *swift::createRedundantLoadElimination() {
|
|
return new RedundantLoadElimination();
|
|
}
|