mirror of
https://github.com/apple/swift.git
synced 2025-12-21 12:14:44 +01:00
After the refactoring, RLE runs in the following phases: Phase 1. we use an iterative data flow to compute whether there is an available value at a given point, we do not yet care about what the value is. Phase 2. we compute the real forwardable value at a given point. Phase 3. we setup the SILValues for the redundant load elimination. Phase 4. we perform the redundant load elimination. Previously we were computing available bit as well as what the available value is every iteration of the data flow. I do not see a compilation time improvement though, but this helps to move to a genset and killset later as we only need to expand Phase 1 into a few smaller phases to compute genset & killset first and then iterate until convergence for the data flow. I verified that we are performing same # of RLE on stdlib before the change. Existing test ensure correctness.
1093 lines
38 KiB
C++
1093 lines
38 KiB
C++
//===-------- RedundantLoadElimination.cpp - SIL Load Forwarding ---------===//
|
|
//
|
|
// This source file is part of the Swift.org open source project
|
|
//
|
|
// Copyright (c) 2014 - 2015 Apple Inc. and the Swift project authors
|
|
// Licensed under Apache License v2.0 with Runtime Library Exception
|
|
//
|
|
// See http://swift.org/LICENSE.txt for license information
|
|
// See http://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
///
|
|
/// \file
|
|
///
|
|
/// This pass eliminates redundant loads.
|
|
///
|
|
/// A load can be eliminated if its value has already been held somewhere,
|
|
/// i.e. generated by a previous load, LSLocation stored by a known value.
|
|
///
|
|
/// In this case, one can replace the load instruction with the previous
|
|
/// results.
|
|
///
|
|
/// Redudant Load Elimination (RLE) eliminates such loads by:
|
|
///
|
|
/// 1. Introducing a notion of a LSLocation that is used to model object
|
|
/// fields. (See below for more details).
|
|
///
|
|
/// 2. Introducing a notion of a LSValue that is used to model the value
|
|
/// that currently resides in the associated LSLocation on the particular
|
|
/// program path. (See below for more details).
|
|
///
|
|
/// 3. Performing a RPO walk over the control flow graph, tracking any
|
|
/// LSLocations that are read from or stored into in each basic block. The
|
|
/// read or stored value, kept in a map (gen-set) between LSLocation and
|
|
/// LSValue, becomes the available value for the LSLocation.
|
|
///
|
|
/// 4. An optimistic iterative intersection-based dataflow is performed on the
|
|
/// gensets until convergence.
|
|
///
|
|
/// At the core of RLE, there is the LSLocation class. A LSLocation is an
|
|
/// abstraction of an object field in program. It consists of a base and a
|
|
/// projection path to the field accessed.
|
|
///
|
|
/// In SIL, one can access an aggregate as a whole, i.e. store to a struct with
|
|
/// 2 Int fields. A store like this will generate 2 *indivisible* LSLocations,
|
|
/// 1 for each field and in addition to keeping a list of LSLocation, RLE also
|
|
/// keeps their available LSValues. We call it *indivisible* because it
|
|
/// can not be broken down to more LSLocations.
|
|
///
|
|
/// LSValue consists of a base - a SILValue from the load or store inst,
|
|
/// as well as a projection path to which the field it represents. So, a
|
|
/// store to an 2-field struct as mentioned above will generate 2 LSLocations
|
|
/// and 2 LSValues.
|
|
///
|
|
/// Every basic block keeps a map between LSLocation and LSValue. By
|
|
/// keeping the LSLocation and LSValue in their indivisible form, one
|
|
/// can easily find which part of the load is redundant and how to compute its
|
|
/// forwarding value.
|
|
///
|
|
/// Given the case which the 2 fields of the struct both have available values,
|
|
/// RLE can find their LSValues (maybe by struct_extract from a larger
|
|
/// value) and then aggregate them.
|
|
///
|
|
/// However, this may introduce a lot of extraction and aggregation which may
|
|
/// not be necessary. i.e. a store the struct followed by a load from the
|
|
/// struct. To solve this problem, when RLE detects that an load instruction
|
|
/// can be replaced by forwarded value, it will try to find minimum # of
|
|
/// extractions necessary to form the forwarded value. It will group the
|
|
/// available value's by the LSValue base, i.e. the LSValues come from the
|
|
/// same instruction, and then use extraction to obtain the needed components
|
|
/// of the base.
|
|
///
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
#define DEBUG_TYPE "sil-redundant-load-elim"
|
|
#include "swift/SIL/Projection.h"
|
|
#include "swift/SIL/SILArgument.h"
|
|
#include "swift/SIL/SILBuilder.h"
|
|
#include "swift/SIL/SILValueProjection.h"
|
|
#include "swift/SILOptimizer/Analysis/AliasAnalysis.h"
|
|
#include "swift/SILOptimizer/Analysis/DominanceAnalysis.h"
|
|
#include "swift/SILOptimizer/Analysis/PostOrderAnalysis.h"
|
|
#include "swift/SILOptimizer/Analysis/ValueTracking.h"
|
|
#include "swift/SILOptimizer/PassManager/Passes.h"
|
|
#include "swift/SILOptimizer/PassManager/Transforms.h"
|
|
#include "swift/SILOptimizer/Utils/CFG.h"
|
|
#include "swift/SILOptimizer/Utils/Local.h"
|
|
#include "swift/SILOptimizer/Utils/SILSSAUpdater.h"
|
|
#include "llvm/ADT/BitVector.h"
|
|
#include "llvm/ADT/MapVector.h"
|
|
#include "llvm/ADT/None.h"
|
|
#include "llvm/ADT/Statistic.h"
|
|
#include "llvm/ADT/TinyPtrVector.h"
|
|
#include "llvm/Support/CommandLine.h"
|
|
#include "llvm/Support/Debug.h"
|
|
|
|
using namespace swift;
|
|
|
|
STATISTIC(NumForwardedLoads, "Number of loads forwarded");
|
|
|
|
enum class RLEKind : unsigned {
|
|
ComputeAvailSet = 0,
|
|
ComputeAvailValue = 1,
|
|
PerformRLE = 2,
|
|
};
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
// Utility Functions
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
bool inline isComputeAvailSet(RLEKind Kind) {
|
|
return Kind == RLEKind::ComputeAvailSet;
|
|
}
|
|
|
|
bool inline isComputeAvailValue(RLEKind Kind) {
|
|
return Kind == RLEKind::ComputeAvailValue;
|
|
}
|
|
|
|
bool inline isPerformRLE(RLEKind Kind) { return Kind == RLEKind::PerformRLE; }
|
|
|
|
/// Returns true if this is an instruction that may have side effects in a
|
|
/// general sense but are inert from a load store perspective.
|
|
static bool isRLEInertInstruction(SILInstruction *Inst) {
|
|
switch (Inst->getKind()) {
|
|
case ValueKind::StrongRetainInst:
|
|
case ValueKind::StrongRetainUnownedInst:
|
|
case ValueKind::UnownedRetainInst:
|
|
case ValueKind::RetainValueInst:
|
|
case ValueKind::DeallocStackInst:
|
|
case ValueKind::CondFailInst:
|
|
case ValueKind::IsUniqueInst:
|
|
case ValueKind::IsUniqueOrPinnedInst:
|
|
return true;
|
|
default:
|
|
return false;
|
|
}
|
|
}
|
|
|
|
/// Returns true if the given basic block is reachable from the entry block.
|
|
///
|
|
/// TODO: this is very inefficient, can we make use of the domtree.
|
|
static bool isReachable(SILBasicBlock *Block) {
|
|
SmallPtrSet<SILBasicBlock *, 16> Visited;
|
|
llvm::SmallVector<SILBasicBlock *, 16> Worklist;
|
|
SILBasicBlock *EntryBB = &*Block->getParent()->begin();
|
|
Worklist.push_back(EntryBB);
|
|
Visited.insert(EntryBB);
|
|
|
|
while (!Worklist.empty()) {
|
|
auto *CurBB = Worklist.back();
|
|
Worklist.pop_back();
|
|
|
|
if (CurBB == Block)
|
|
return true;
|
|
|
|
for (auto &Succ : CurBB->getSuccessors())
|
|
if (!Visited.insert(Succ).second)
|
|
Worklist.push_back(Succ);
|
|
}
|
|
return false;
|
|
}
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
// Basic Block Location State
|
|
//===----------------------------------------------------------------------===//
|
|
namespace {
|
|
|
|
// If there are too many locations in the function, we give up.
|
|
constexpr unsigned MaxLSLocationLimit = 2048;
|
|
|
|
/// forward declaration.
|
|
class RLEContext;
|
|
/// State of the load store in one basic block which allows for forwarding from
|
|
/// loads, stores -> loads
|
|
class BlockState {
|
|
public:
|
|
enum class ValueState : unsigned {
|
|
CoverValues = 0,
|
|
ConcreteValues = 1,
|
|
CoverAndConcreteValues = 2,
|
|
};
|
|
|
|
private:
|
|
/// The basic block that we are optimizing.
|
|
SILBasicBlock *BB;
|
|
|
|
/// A bit vector for which the ith bit represents the ith LSLocation in
|
|
/// LSLocationVault. If the bit is set, then the location currently has an
|
|
/// downward visible value.
|
|
llvm::BitVector ForwardSetIn;
|
|
|
|
/// If ForwardSetOut changes while processing a basicblock, then all its
|
|
/// successors need to be rerun.
|
|
llvm::BitVector ForwardSetOut;
|
|
|
|
/// This is map between LSLocations and their available values at the
|
|
/// beginning of this basic block.
|
|
ValueTableMap ForwardValIn;
|
|
|
|
/// This is map between LSLocations and their available values at the end of
|
|
/// this basic block.
|
|
ValueTableMap ForwardValOut;
|
|
|
|
/// Keeps a list of replaceable instructions in the current basic block as
|
|
/// well as their SILValue replacement.
|
|
llvm::DenseMap<SILInstruction *, SILValue> RedundantLoads;
|
|
|
|
void updateForwardValOut() { ForwardValOut = ForwardValIn; }
|
|
|
|
/// Check whether the ForwardSetOut has changed. If it does, we need to
|
|
/// rerun the data flow to reach fixed point.
|
|
bool updateForwardSetOut() {
|
|
// Check the available value bit vector for changes.
|
|
bool Changed = false;
|
|
Changed |= (ForwardSetIn != ForwardSetOut);
|
|
if (!Changed)
|
|
return Changed;
|
|
ForwardSetOut = ForwardSetIn;
|
|
return Changed;
|
|
}
|
|
|
|
/// Merge in the state of an individual predecessor.
|
|
void mergePredecessorState(RLEContext &Ctx, BlockState &OtherState,
|
|
RLEKind Kind);
|
|
|
|
/// LSLocation read has been extracted, expanded and mapped to the bit
|
|
/// position in the bitvector. process it using the bit position.
|
|
void updateForwardSetForRead(RLEContext &Ctx, unsigned bit);
|
|
|
|
/// LSLocation read has been extracted, expanded and mapped to the bit
|
|
/// position in the bitvector. process it using the bit position.
|
|
void updateForwardValForRead(RLEContext &Ctx, unsigned lbit, unsigned vbit);
|
|
|
|
/// LSLocation written has been extracted, expanded and mapped to the bit
|
|
/// position in the bitvector. process it using the bit position.
|
|
void updateForwardSetForWrite(RLEContext &Ctx, unsigned bit);
|
|
|
|
/// LSLocation written has been extracted, expanded and mapped to the bit
|
|
/// position in the bitvector. process it using the bit position.
|
|
void updateForwardValForWrite(RLEContext &Ctx, unsigned lbit, unsigned vbit);
|
|
|
|
/// There is a read to a LSLocation, expand the LSLocation into individual
|
|
/// fields before processing them.
|
|
void processRead(RLEContext &Ctx, SILInstruction *I, SILValue Mem,
|
|
SILValue Val, RLEKind Kind);
|
|
|
|
/// There is a write to a LSLocation, expand the LSLocation into individual
|
|
/// fields before processing them.
|
|
void processWrite(RLEContext &Ctx, SILInstruction *I, SILValue Mem,
|
|
SILValue Val, RLEKind Kind);
|
|
|
|
/// BitVector manipulation functions.
|
|
void clearLSLocations();
|
|
void startTrackingLSLocation(unsigned bit);
|
|
void stopTrackingLSLocation(unsigned bit);
|
|
bool isTrackingLSLocation(unsigned bit);
|
|
void startTrackingLSValue(unsigned lbit, unsigned vbit);
|
|
void stopTrackingLSValue(unsigned bit);
|
|
|
|
public:
|
|
BlockState() = default;
|
|
|
|
void init(SILBasicBlock *NewBB, unsigned bitcnt, bool reachable) {
|
|
BB = NewBB;
|
|
// The initial state of ForwardSetOut should be all 1's. Otherwise the
|
|
// dataflow solution could be too conservative.
|
|
//
|
|
// Consider this case, the forwardable value by var a = 10 before the loop
|
|
// will not be forwarded if the ForwardSetOut is set to 0 initially.
|
|
//
|
|
// var a = 10
|
|
// for _ in 0...1024 {}
|
|
// use(a);
|
|
//
|
|
// However, by doing so, we can only do the data forwarding after the
|
|
// data flow stablizes.
|
|
//
|
|
ForwardSetIn.resize(bitcnt, false);
|
|
ForwardSetOut.resize(bitcnt, reachable);
|
|
}
|
|
|
|
/// Returns the current basic block we are processing.
|
|
SILBasicBlock *getBB() const { return BB; }
|
|
|
|
/// Returns the ForwardValIn for the current basic block.
|
|
ValueTableMap &getForwardValIn() { return ForwardValIn; }
|
|
|
|
/// Returns the ForwardValOut for the current basic block.
|
|
ValueTableMap &getForwardValOut() { return ForwardValOut; }
|
|
|
|
/// Returns the redundant loads and their replacement in the currently basic
|
|
/// block.
|
|
llvm::DenseMap<SILInstruction *, SILValue> &getRL() { return RedundantLoads; }
|
|
|
|
/// Look into the value for the given LSLocation at end of the basic block,
|
|
/// return one of the three ValueState type.
|
|
ValueState getValueStateAtEndOfBlock(RLEContext &Ctx, LSLocation &L);
|
|
|
|
/// Wrappers to query the value state of the location in this BlockState.
|
|
bool isCoverValues(RLEContext &Ctx, LSLocation &L) {
|
|
return getValueStateAtEndOfBlock(Ctx, L) == ValueState::CoverValues;
|
|
}
|
|
bool isConcreteValues(RLEContext &Ctx, LSLocation &L) {
|
|
return getValueStateAtEndOfBlock(Ctx, L) == ValueState::ConcreteValues;
|
|
}
|
|
|
|
bool optimize(RLEContext &Ctx, RLEKind Kind);
|
|
|
|
/// Set up the value for redundant load elimination.
|
|
bool setupRLE(RLEContext &Ctx, SILInstruction *I, SILValue Mem);
|
|
|
|
/// Merge in the states of all predecessors.
|
|
void mergePredecessorStates(RLEContext &Ctx, RLEKind Kind);
|
|
|
|
/// Process Instruction which writes to memory in an unknown way.
|
|
void processUnknownWriteInst(RLEContext &Ctx, SILInstruction *I,
|
|
RLEKind Kind);
|
|
|
|
/// Process LoadInst. Extract LSLocations from LoadInst.
|
|
void processLoadInst(RLEContext &Ctx, LoadInst *LI, RLEKind Kind);
|
|
|
|
/// Process LoadInst. Extract LSLocations from StoreInst.
|
|
void processStoreInst(RLEContext &Ctx, StoreInst *SI, RLEKind Kind);
|
|
|
|
/// Returns a *single* forwardable SILValue for the given LSLocation right
|
|
/// before the InsertPt instruction.
|
|
SILValue reduceValuesAtEndOfBlock(RLEContext &Ctx, LSLocation &L);
|
|
};
|
|
|
|
} // end anonymous namespace
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
// RLEContext Interface
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
namespace {
|
|
|
|
using BBValueMap = llvm::DenseMap<SILBasicBlock *, SILValue>;
|
|
|
|
/// This class stores global state that we use when computing redudant load and
|
|
/// their replacement in each basic block.
|
|
class RLEContext {
|
|
/// Function currently processing.
|
|
SILFunction *Fn;
|
|
|
|
/// The alias analysis that we will use during all computations.
|
|
AliasAnalysis *AA;
|
|
|
|
/// The type expansion analysis we will use during all computations.
|
|
TypeExpansionAnalysis *TE;
|
|
|
|
/// The SSA updater we use to materialize covering values.
|
|
SILSSAUpdater Updater;
|
|
|
|
/// The range that we use to iterate over the reverse post order of the given
|
|
/// function.
|
|
PostOrderFunctionInfo::reverse_range ReversePostOrder;
|
|
|
|
/// Keeps all the locations for the current function. The BitVector in each
|
|
/// BlockState is then laid on top of it to keep track of which LSLocation
|
|
/// has a downward available value.
|
|
std::vector<LSLocation> LSLocationVault;
|
|
|
|
/// Contains a map between LSLocation to their index in the LSLocationVault.
|
|
/// Use for fast lookup.
|
|
llvm::DenseMap<LSLocation, unsigned> LocToBitIndex;
|
|
|
|
/// Keeps all the loadstorevalues for the current function. The BitVector in
|
|
/// each BBState is then laid on top of it to keep track of which LSLocation
|
|
/// has a downward available value.
|
|
std::vector<LSValue> LSValueVault;
|
|
|
|
/// Contains a map between LSLocation to their index in the LSLocationVault.
|
|
/// Use for fast lookup.
|
|
llvm::DenseMap<LSValue, unsigned> ValToBitIndex;
|
|
|
|
/// A map from each BasicBlock to its BlockState.
|
|
llvm::SmallDenseMap<SILBasicBlock *, BlockState, 4> BBToLocState;
|
|
|
|
/// A map for each basic block and whether its predecessors have forwardable
|
|
/// edges.
|
|
llvm::DenseMap<SILBasicBlock *, bool> ForwardableEdge;
|
|
|
|
public:
|
|
RLEContext(SILFunction *F, AliasAnalysis *AA, TypeExpansionAnalysis *TE,
|
|
PostOrderFunctionInfo::reverse_range RPOT);
|
|
|
|
RLEContext(const RLEContext &) = delete;
|
|
RLEContext(RLEContext &&) = default;
|
|
~RLEContext() = default;
|
|
|
|
bool run();
|
|
|
|
bool processBasicBlocks(RLEKind Kind);
|
|
|
|
/// Returns the alias analysis we will use during all computations.
|
|
AliasAnalysis *getAA() const { return AA; }
|
|
|
|
/// Returns the current type expansion analysis we are .
|
|
TypeExpansionAnalysis *getTE() const { return TE; }
|
|
|
|
/// Return the BlockState for the basic block this basic block belongs to.
|
|
BlockState &getBlockState(SILBasicBlock *B) { return BBToLocState[B]; }
|
|
|
|
/// Get the bit representing the LSLocation in the LSLocationVault.
|
|
unsigned getLSLocationBit(const LSLocation &L);
|
|
|
|
/// Given the bit, get the LSLocation from the LSLocationVault.
|
|
LSLocation &getLSLocation(const unsigned index);
|
|
|
|
/// Get the bit representing the LSValue in the LSValueVault.
|
|
unsigned getLSValueBit(const LSValue &L);
|
|
|
|
/// Given the bit, get the LSValue from the LSValueVault.
|
|
LSValue &getLSValue(const unsigned index);
|
|
|
|
/// Transistively collect all the values that make up this location and
|
|
/// create a SILArgument out of them.
|
|
SILValue computePredecessorLocationValue(SILBasicBlock *BB, LSLocation &L);
|
|
|
|
/// Given a LSLocation, try to collect all the LSValues for this LSLocation
|
|
/// in the given basic block.
|
|
bool gatherLocationValues(SILBasicBlock *B, LSLocation &L,
|
|
LSLocationValueMap &Vs, ValueTableMap &VM);
|
|
};
|
|
|
|
} // end anonymous namespace
|
|
|
|
bool BlockState::isTrackingLSLocation(unsigned bit) {
|
|
return ForwardSetIn.test(bit);
|
|
}
|
|
|
|
void BlockState::startTrackingLSLocation(unsigned bit) {
|
|
ForwardSetIn.set(bit);
|
|
}
|
|
|
|
void BlockState::stopTrackingLSLocation(unsigned bit) {
|
|
ForwardSetIn.reset(bit);
|
|
}
|
|
|
|
void BlockState::clearLSLocations() { ForwardSetIn.reset(); }
|
|
|
|
void BlockState::startTrackingLSValue(unsigned lbit, unsigned vbit) {
|
|
ForwardValIn[lbit] = vbit;
|
|
}
|
|
|
|
void BlockState::stopTrackingLSValue(unsigned bit) { ForwardValIn.erase(bit); }
|
|
|
|
SILValue BlockState::reduceValuesAtEndOfBlock(RLEContext &Ctx, LSLocation &L) {
|
|
// First, collect current available locations and their corresponding values
|
|
// into a map.
|
|
LSLocationValueMap Values;
|
|
|
|
LSLocationList Locs;
|
|
LSLocation::expand(L, &BB->getModule(), Locs, Ctx.getTE());
|
|
|
|
// Find the values that this basic block defines and the locations which
|
|
// we do not have a concrete value in the current basic block.
|
|
ValueTableMap &OTM = getForwardValOut();
|
|
for (unsigned i = 0; i < Locs.size(); ++i) {
|
|
Values[Locs[i]] = Ctx.getLSValue(OTM[Ctx.getLSLocationBit(Locs[i])]);
|
|
}
|
|
|
|
// Second, reduce the available values into a single SILValue we can use to
|
|
// forward.
|
|
SILValue TheForwardingValue;
|
|
TheForwardingValue = LSValue::reduce(L, &BB->getModule(), Values,
|
|
BB->getTerminator(), Ctx.getTE());
|
|
/// Return the forwarding value.
|
|
return TheForwardingValue;
|
|
}
|
|
|
|
bool BlockState::setupRLE(RLEContext &Ctx, SILInstruction *I, SILValue Mem) {
|
|
// Try to construct a SILValue for the current LSLocation.
|
|
//
|
|
// Collect the locations and their corresponding values into a map.
|
|
LSLocation L(Mem);
|
|
LSLocationValueMap Values;
|
|
// Use the ForwardValIn as we are currently processing the basic block.
|
|
if (!Ctx.gatherLocationValues(I->getParent(), L, Values, getForwardValIn()))
|
|
return false;
|
|
|
|
// Reduce the available values into a single SILValue we can use to forward.
|
|
SILModule *Mod = &I->getModule();
|
|
SILValue TheForwardingValue;
|
|
TheForwardingValue = LSValue::reduce(L, Mod, Values, I, Ctx.getTE());
|
|
if (!TheForwardingValue)
|
|
return false;
|
|
|
|
// Now we have the forwarding value, record it for forwarding!.
|
|
//
|
|
// NOTE: we do not perform the RLE right here because doing so could introduce
|
|
// new LSLocations.
|
|
//
|
|
// e.g.
|
|
// %0 = load %x
|
|
// %1 = load %x
|
|
// %2 = extract_struct %1, #a
|
|
// %3 = load %2
|
|
//
|
|
// If we perform the RLE and replace %1 with %0, we end up having a memory
|
|
// location we do not have before, i.e. Base == %0, and Path == #a.
|
|
//
|
|
// We may be able to add the LSLocation to the vault, but it gets
|
|
// complicated very quickly, e.g. we need to resize the bit vectors size,
|
|
// etc.
|
|
//
|
|
// However, since we already know the instruction to replace and the value to
|
|
// replace it with, we can record it for now and forwarded it after all the
|
|
// forwardable values are recorded in the function.
|
|
//
|
|
RedundantLoads[I] = TheForwardingValue;
|
|
return true;
|
|
}
|
|
|
|
void BlockState::updateForwardSetForRead(RLEContext &Ctx, unsigned bit) {
|
|
// Track the new location and value.
|
|
startTrackingLSLocation(bit);
|
|
}
|
|
|
|
void BlockState::updateForwardValForRead(RLEContext &Ctx, unsigned lbit,
|
|
unsigned vbit) {
|
|
// Track the new location and value.
|
|
startTrackingLSValue(lbit, vbit);
|
|
startTrackingLSLocation(lbit);
|
|
}
|
|
|
|
void BlockState::updateForwardSetForWrite(RLEContext &Ctx, unsigned bit) {
|
|
// This is a store, invalidate any location that this location may alias, as
|
|
// their values can no longer be forwarded.
|
|
LSLocation &R = Ctx.getLSLocation(bit);
|
|
for (unsigned i = 0; i < ForwardSetIn.size(); ++i) {
|
|
if (!isTrackingLSLocation(i))
|
|
continue;
|
|
LSLocation &L = Ctx.getLSLocation(i);
|
|
if (!L.isMayAliasLSLocation(R, Ctx.getAA()))
|
|
continue;
|
|
// MayAlias, invalidate the LSLocation.
|
|
stopTrackingLSLocation(i);
|
|
}
|
|
|
|
// Start tracking this LSLocation.
|
|
startTrackingLSLocation(bit);
|
|
}
|
|
|
|
void BlockState::updateForwardValForWrite(RLEContext &Ctx, unsigned lbit,
|
|
unsigned vbit) {
|
|
// This is a store, invalidate any location that this location may alias, as
|
|
// their values can no longer be forwarded.
|
|
LSLocation &R = Ctx.getLSLocation(lbit);
|
|
for (unsigned i = 0; i < ForwardSetIn.size(); ++i) {
|
|
if (!isTrackingLSLocation(i))
|
|
continue;
|
|
LSLocation &L = Ctx.getLSLocation(i);
|
|
if (!L.isMayAliasLSLocation(R, Ctx.getAA()))
|
|
continue;
|
|
// MayAlias, invalidate the location and value.
|
|
stopTrackingLSValue(i);
|
|
stopTrackingLSLocation(i);
|
|
}
|
|
|
|
// Start tracking this location and value.
|
|
startTrackingLSLocation(lbit);
|
|
startTrackingLSValue(lbit, vbit);
|
|
}
|
|
|
|
void BlockState::processWrite(RLEContext &Ctx, SILInstruction *I, SILValue Mem,
|
|
SILValue Val, RLEKind Kind) {
|
|
// Initialize the LSLocation.
|
|
LSLocation L(Mem);
|
|
|
|
// If we cant figure out the Base or Projection Path for the write,
|
|
// process it as an unknown memory instruction.
|
|
if (!L.isValid()) {
|
|
processUnknownWriteInst(Ctx, I, Kind);
|
|
return;
|
|
}
|
|
|
|
// Expand the given location and val into individual fields and process
|
|
// them as separate writes.
|
|
LSLocationList Locs;
|
|
LSLocation::expand(L, &I->getModule(), Locs, Ctx.getTE());
|
|
|
|
// Are we computing available set ?
|
|
if (isComputeAvailSet(Kind)) {
|
|
for (unsigned i = 0; i < Locs.size(); ++i) {
|
|
updateForwardSetForWrite(Ctx, Ctx.getLSLocationBit(Locs[i]));
|
|
}
|
|
return;
|
|
}
|
|
|
|
// Are we computing available value ?
|
|
if (isComputeAvailValue(Kind) || isPerformRLE(Kind)) {
|
|
LSValueList Vals;
|
|
LSValue::expand(Val, &I->getModule(), Vals, Ctx.getTE());
|
|
for (unsigned i = 0; i < Locs.size(); ++i) {
|
|
updateForwardValForWrite(Ctx, Ctx.getLSLocationBit(Locs[i]),
|
|
Ctx.getLSValueBit(Vals[i]));
|
|
}
|
|
return;
|
|
}
|
|
|
|
llvm_unreachable("Unknown RLE compute kind");
|
|
}
|
|
|
|
void BlockState::processRead(RLEContext &Ctx, SILInstruction *I, SILValue Mem,
|
|
SILValue Val, RLEKind Kind) {
|
|
// Initialize the LSLocation.
|
|
LSLocation L(Mem);
|
|
|
|
// If we cant figure out the Base or Projection Path for the read, simply
|
|
// ignore it for now.
|
|
if (!L.isValid())
|
|
return;
|
|
|
|
// Expand the given LSLocation and Val into individual fields and process
|
|
// them as separate reads.
|
|
LSLocationList Locs;
|
|
LSLocation::expand(L, &I->getModule(), Locs, Ctx.getTE());
|
|
|
|
// Are we computing available set ?.
|
|
if (isComputeAvailSet(Kind)) {
|
|
for (auto &X : Locs) {
|
|
if (isTrackingLSLocation(Ctx.getLSLocationBit(X)))
|
|
continue;
|
|
updateForwardSetForRead(Ctx, Ctx.getLSLocationBit(X));
|
|
}
|
|
return;
|
|
}
|
|
|
|
// Are we computing available values ?.
|
|
bool CanForward = true;
|
|
if (isComputeAvailValue(Kind) || isPerformRLE(Kind)) {
|
|
LSValueList Vals;
|
|
LSValue::expand(Val, &I->getModule(), Vals, Ctx.getTE());
|
|
for (unsigned i = 0; i < Locs.size(); ++i) {
|
|
if (isTrackingLSLocation(Ctx.getLSLocationBit(Locs[i])))
|
|
continue;
|
|
updateForwardValForRead(Ctx, Ctx.getLSLocationBit(Locs[i]),
|
|
Ctx.getLSValueBit(Vals[i]));
|
|
CanForward = false;
|
|
}
|
|
}
|
|
|
|
// Simply return if we are not performing RLE or we do not have all the
|
|
// values available to perform RLE.
|
|
if (!isPerformRLE(Kind) || !CanForward)
|
|
return;
|
|
|
|
// Lastly, forward value to the load.
|
|
setupRLE(Ctx, I, Mem);
|
|
}
|
|
|
|
void BlockState::processStoreInst(RLEContext &Ctx, StoreInst *SI,
|
|
RLEKind Kind) {
|
|
processWrite(Ctx, SI, SI->getDest(), SI->getSrc(), Kind);
|
|
}
|
|
|
|
void BlockState::processLoadInst(RLEContext &Ctx, LoadInst *LI, RLEKind Kind) {
|
|
processRead(Ctx, LI, LI->getOperand(), SILValue(LI), Kind);
|
|
}
|
|
|
|
void BlockState::processUnknownWriteInst(RLEContext &Ctx, SILInstruction *I,
|
|
RLEKind Kind) {
|
|
auto *AA = Ctx.getAA();
|
|
for (unsigned i = 0; i < ForwardSetIn.size(); ++i) {
|
|
if (!isTrackingLSLocation(i))
|
|
continue;
|
|
// Invalidate any location this instruction may write to.
|
|
//
|
|
// TODO: checking may alias with Base is overly conservative,
|
|
// we should check may alias with base plus projection path.
|
|
LSLocation &R = Ctx.getLSLocation(i);
|
|
if (!AA->mayWriteToMemory(I, R.getBase()))
|
|
continue;
|
|
// MayAlias.
|
|
stopTrackingLSLocation(i);
|
|
stopTrackingLSValue(i);
|
|
}
|
|
}
|
|
|
|
/// Promote stored values to loads and merge duplicated loads.
|
|
bool BlockState::optimize(RLEContext &Ctx, RLEKind Kind) {
|
|
for (auto &II : *BB) {
|
|
SILInstruction *Inst = &II;
|
|
DEBUG(llvm::dbgs() << " Visiting: " << *Inst);
|
|
|
|
// This is a StoreInst, try to see whether it clobbers any forwarding value
|
|
if (auto *SI = dyn_cast<StoreInst>(Inst)) {
|
|
processStoreInst(Ctx, SI, Kind);
|
|
continue;
|
|
}
|
|
|
|
// This is a LoadInst. Let's see if we can find a previous loaded, stored
|
|
// value to use instead of this load.
|
|
if (auto *LI = dyn_cast<LoadInst>(Inst)) {
|
|
processLoadInst(Ctx, LI, Kind);
|
|
continue;
|
|
}
|
|
|
|
// If this instruction has side effects, but is inert from a load store
|
|
// perspective, skip it.
|
|
if (isRLEInertInstruction(Inst))
|
|
continue;
|
|
|
|
// If this instruction does not read or write memory, we can skip it.
|
|
if (!Inst->mayReadOrWriteMemory())
|
|
continue;
|
|
|
|
// If we have an instruction that may write to memory and we can not prove
|
|
// that it and its operands can not alias a load we have visited, invalidate
|
|
// that load.
|
|
if (Inst->mayWriteToMemory()) {
|
|
processUnknownWriteInst(Ctx, Inst, Kind);
|
|
continue;
|
|
}
|
|
}
|
|
|
|
// The basic block is finished, see whether there is a change in the
|
|
// ForwardSetOut set.
|
|
if (isComputeAvailSet(Kind))
|
|
return updateForwardSetOut();
|
|
|
|
// Update the ForwardValOut if we are computing the available values.
|
|
if (isComputeAvailValue(Kind)) {
|
|
updateForwardValOut();
|
|
return false;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
void BlockState::mergePredecessorState(RLEContext &Ctx, BlockState &OtherState,
|
|
RLEKind Kind) {
|
|
// Are we computing the available set ?
|
|
if (isComputeAvailSet(Kind)) {
|
|
ForwardSetIn &= OtherState.ForwardSetOut;
|
|
return;
|
|
}
|
|
|
|
// Are we computing the available value ?
|
|
if (isComputeAvailValue(Kind) || isPerformRLE(Kind)) {
|
|
// Merge in the predecessor state.
|
|
llvm::SmallVector<unsigned, 8> LocDeleteList;
|
|
for (unsigned i = 0; i < ForwardSetIn.size(); ++i) {
|
|
if (OtherState.ForwardSetOut[i]) {
|
|
// There are multiple values from multiple predecessors, set this as
|
|
// a covering value. We do not need to track the value itself, as we
|
|
// can always go to the predecessors BlockState to find it.
|
|
ForwardValIn[i] = Ctx.getLSValueBit(LSValue(true));
|
|
continue;
|
|
}
|
|
// If this location does have an available value, then clear it.
|
|
stopTrackingLSLocation(i);
|
|
}
|
|
}
|
|
}
|
|
|
|
void BlockState::mergePredecessorStates(RLEContext &Ctx, RLEKind Kind) {
|
|
// Clear the state if the basic block has no predecessor.
|
|
if (BB->getPreds().begin() == BB->getPreds().end()) {
|
|
clearLSLocations();
|
|
return;
|
|
}
|
|
|
|
// We initialize the state with the first predecessor's state and merge
|
|
// in states of other predecessors.
|
|
bool HasAtLeastOnePred = false;
|
|
// For each predecessor of BB...
|
|
for (auto Pred : BB->getPreds()) {
|
|
BlockState &Other = Ctx.getBlockState(Pred);
|
|
|
|
// If we have not had at least one predecessor, initialize BlockState
|
|
// with the state of the initial predecessor.
|
|
// If BB is also a predecessor of itself, we should not initialize.
|
|
if (!HasAtLeastOnePred) {
|
|
if (isComputeAvailSet(Kind)) {
|
|
ForwardSetIn = Other.ForwardSetOut;
|
|
}
|
|
if (isComputeAvailValue(Kind) || isPerformRLE(Kind)) {
|
|
ForwardSetIn = Other.ForwardSetOut;
|
|
ForwardValIn = Other.ForwardValOut;
|
|
}
|
|
} else {
|
|
mergePredecessorState(Ctx, Other, Kind);
|
|
}
|
|
HasAtLeastOnePred = true;
|
|
}
|
|
}
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
// RLEContext Implementation
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
RLEContext::RLEContext(SILFunction *F, AliasAnalysis *AA,
|
|
TypeExpansionAnalysis *TE,
|
|
PostOrderFunctionInfo::reverse_range RPOT)
|
|
: Fn(F), AA(AA), TE(TE), ReversePostOrder(RPOT) {
|
|
// Walk over the function and find all the locations accessed by
|
|
// this function.
|
|
LSLocation::enumerateLSLocations(*Fn, LSLocationVault, LocToBitIndex, TE);
|
|
|
|
// Walk over the function and find all the values used in this function.
|
|
LSValue::enumerateLSValues(*Fn, LSValueVault, ValToBitIndex, TE);
|
|
|
|
// For all basic blocks in the function, initialize a BB state. Since we
|
|
// know all the locations accessed in this function, we can resize the bit
|
|
// vector to the appropriate size.
|
|
for (auto &B : *F) {
|
|
BBToLocState[&B] = BlockState();
|
|
// We set the initial state of unreachable block to 0, as we do not have
|
|
// a value for the location.
|
|
//
|
|
// This is a bit conservative as we could be missing forwarding
|
|
// opportunities. i.e. a joint block with 1 predecessor being an
|
|
// unreachable block.
|
|
//
|
|
// we rely on other passes to clean up unreachable block.
|
|
BBToLocState[&B].init(&B, LSLocationVault.size(), isReachable(&B));
|
|
}
|
|
}
|
|
|
|
BlockState::ValueState BlockState::getValueStateAtEndOfBlock(RLEContext &Ctx,
|
|
LSLocation &L) {
|
|
LSLocationList Locs;
|
|
LSLocation::expand(L, &BB->getModule(), Locs, Ctx.getTE());
|
|
|
|
// Find number of covering value and concrete values for the locations
|
|
// expanded from the given location.
|
|
unsigned CSCount = 0, CTCount = 0;
|
|
ValueTableMap &OTM = getForwardValOut();
|
|
for (auto &X : Locs) {
|
|
LSValue &V = Ctx.getLSValue(OTM[Ctx.getLSLocationBit(X)]);
|
|
if (V.isCoveringValue()) {
|
|
++CSCount;
|
|
continue;
|
|
}
|
|
++CTCount;
|
|
}
|
|
|
|
if (CSCount == Locs.size())
|
|
return ValueState::CoverValues;
|
|
if (CTCount == Locs.size())
|
|
return ValueState::ConcreteValues;
|
|
return ValueState::CoverAndConcreteValues;
|
|
}
|
|
|
|
SILValue RLEContext::computePredecessorLocationValue(SILBasicBlock *BB,
|
|
LSLocation &L) {
|
|
BBValueMap Values;
|
|
llvm::DenseSet<SILBasicBlock *> HandledBBs;
|
|
llvm::SmallVector<SILBasicBlock *, 8> WorkList;
|
|
|
|
// Push in all the predecessors to get started.
|
|
for (auto Pred : BB->getPreds()) {
|
|
WorkList.push_back(Pred);
|
|
}
|
|
|
|
while (!WorkList.empty()) {
|
|
auto *CurBB = WorkList.pop_back_val();
|
|
BlockState &Forwarder = getBlockState(CurBB);
|
|
|
|
// Mark this basic block as processed.
|
|
HandledBBs.insert(CurBB);
|
|
|
|
// This BlockState contains concrete values for all the expanded
|
|
// locations,
|
|
// collect and reduce them into a single value in the current block.
|
|
if (Forwarder.isConcreteValues(*this, L)) {
|
|
Values[CurBB] = Forwarder.reduceValuesAtEndOfBlock(*this, L);
|
|
continue;
|
|
}
|
|
|
|
// This BlockState does not contain concrete value for any of the expanded
|
|
// locations, collect in this block's predecessors.
|
|
if (Forwarder.isCoverValues(*this, L)) {
|
|
for (auto Pred : CurBB->getPreds()) {
|
|
if (HandledBBs.find(Pred) != HandledBBs.end())
|
|
continue;
|
|
WorkList.push_back(Pred);
|
|
}
|
|
continue;
|
|
}
|
|
|
|
// This BlockState contains concrete values for some but not all the
|
|
// expanded locations, recursively call gatherLocationValues to
|
|
// materialize
|
|
// the value that reaches this basic block.
|
|
LSLocationValueMap LSValues;
|
|
if (!gatherLocationValues(CurBB, L, LSValues, Forwarder.getForwardValOut()))
|
|
return SILValue();
|
|
|
|
// Reduce the available values into a single SILValue we can use to
|
|
// forward.
|
|
SILInstruction *IPt = CurBB->getTerminator();
|
|
Values[CurBB] = LSValue::reduce(L, &BB->getModule(), LSValues, IPt, TE);
|
|
}
|
|
|
|
// Finally, collect all the values for the SILArgument, materialize it using
|
|
// the SSAUpdater.
|
|
Updater.Initialize(L.getType());
|
|
for (auto V : Values) {
|
|
Updater.AddAvailableValue(V.first, V.second);
|
|
}
|
|
|
|
return Updater.GetValueInMiddleOfBlock(BB);
|
|
}
|
|
|
|
LSLocation &RLEContext::getLSLocation(const unsigned index) {
|
|
return LSLocationVault[index];
|
|
}
|
|
|
|
unsigned RLEContext::getLSLocationBit(const LSLocation &Loc) {
|
|
// Return the bit position of the given Loc in the LSLocationVault. The bit
|
|
// position is then used to set/reset the bitvector kept by each BlockState.
|
|
//
|
|
// We should have the location populated by the enumerateLSLocation at this
|
|
// point.
|
|
auto Iter = LocToBitIndex.find(Loc);
|
|
assert(Iter != LocToBitIndex.end() && "LSLocation should have been enum'ed");
|
|
return Iter->second;
|
|
}
|
|
|
|
LSValue &RLEContext::getLSValue(const unsigned index) {
|
|
return LSValueVault[index];
|
|
}
|
|
|
|
unsigned RLEContext::getLSValueBit(const LSValue &Val) {
|
|
// Return the bit position of the given Val in the LSValueVault. The bit
|
|
// position is then used to set/reset the bitvector kept by each BBState.
|
|
auto Iter = ValToBitIndex.find(Val);
|
|
if (Iter == ValToBitIndex.end()) {
|
|
ValToBitIndex[Val] = LSValueVault.size();
|
|
LSValueVault.push_back(Val);
|
|
return ValToBitIndex[Val];
|
|
}
|
|
return Iter->second;
|
|
}
|
|
|
|
bool RLEContext::gatherLocationValues(SILBasicBlock *BB, LSLocation &L,
|
|
LSLocationValueMap &Values,
|
|
ValueTableMap &VM) {
|
|
LSLocationSet CSLocs;
|
|
LSLocationList Locs;
|
|
LSLocation::expand(L, &BB->getModule(), Locs, TE);
|
|
|
|
auto *Mod = &BB->getModule();
|
|
// Find the locations that this basic block defines and the locations which
|
|
// we do not have a concrete value in the current basic block.
|
|
for (auto &X : Locs) {
|
|
Values[X] = getLSValue(VM[getLSLocationBit(X)]);
|
|
if (!Values[X].isCoveringValue())
|
|
continue;
|
|
CSLocs.insert(X);
|
|
}
|
|
|
|
// For locations which we do not have concrete values for in this basic
|
|
// block, try to reduce it to the minimum # of locations possible, this
|
|
// will help us to generate as few SILArguments as possible.
|
|
LSLocation::reduce(L, Mod, CSLocs, TE);
|
|
|
|
// To handle covering value, we need to go to the predecessors and
|
|
// materialize them there.
|
|
for (auto &X : CSLocs) {
|
|
SILValue V = computePredecessorLocationValue(BB, X);
|
|
if (!V)
|
|
return false;
|
|
|
|
// We've constructed a concrete value for the covering value. Expand and
|
|
// collect the newly created forwardable values.
|
|
LSLocationList Locs;
|
|
LSValueList Vals;
|
|
LSLocation::expand(X, Mod, Locs, TE);
|
|
LSValue::expand(V, Mod, Vals, TE);
|
|
|
|
for (unsigned i = 0; i < Locs.size(); ++i) {
|
|
Values[Locs[i]] = Vals[i];
|
|
assert(Values[Locs[i]].isValid() && "Invalid load store value");
|
|
}
|
|
}
|
|
return true;
|
|
}
|
|
|
|
bool RLEContext::processBasicBlocks(RLEKind Kind) {
|
|
bool Changed = false;
|
|
for (SILBasicBlock *BB : ReversePostOrder) {
|
|
BlockState &Forwarder = getBlockState(BB);
|
|
|
|
// Merge the predecessors. After merging, BlockState now contains
|
|
// lists of available LSLocations and their values that reach the
|
|
// beginning of the basic block along all paths.
|
|
Forwarder.mergePredecessorStates(*this, Kind);
|
|
|
|
// Merge duplicate loads, and forward stores to
|
|
// loads. We also update lists of stores|loads to reflect the end
|
|
// of the basic block.
|
|
Changed |= Forwarder.optimize(*this, Kind);
|
|
}
|
|
return Changed;
|
|
}
|
|
|
|
bool RLEContext::run() {
|
|
// Data flow may take too long to converge.
|
|
if (LSLocationVault.size() > MaxLSLocationLimit)
|
|
return false;
|
|
|
|
// We perform redundant load elimination in the following phases.
|
|
//
|
|
// Phase 1. we use an iterative data flow to compute whether there is an
|
|
// available value at a given point, we do not yet care about what the value
|
|
// is.
|
|
//
|
|
// Phase 2. we compute the real forwardable value at a given point.
|
|
//
|
|
// Phase 3. we perform the redundant load elimination.
|
|
|
|
// Find whether there is an available value at a given point.
|
|
//
|
|
// Process basic blocks in RPO. After the data flow converges, run last
|
|
// iteration and perform load forwarding.
|
|
bool ForwardSetChanged = false;
|
|
do {
|
|
ForwardSetChanged = processBasicBlocks(RLEKind::ComputeAvailSet);
|
|
} while (ForwardSetChanged);
|
|
|
|
// We have computed the available value bit, now go through every basic
|
|
// block and compute the forwarding value locally.
|
|
processBasicBlocks(RLEKind::ComputeAvailValue);
|
|
|
|
// We have the available value bit computed and the local forwarding value.
|
|
// Set up the load forwarding.
|
|
processBasicBlocks(RLEKind::PerformRLE);
|
|
|
|
// Finally, perform the redundant load replacements.
|
|
llvm::DenseSet<SILInstruction *> InstsToRemove;
|
|
bool SILChanged = false;
|
|
for (auto &X : BBToLocState) {
|
|
for (auto &F : X.second.getRL()) {
|
|
DEBUG(llvm::dbgs() << "Replacing " << SILValue(F.first) << "With "
|
|
<< F.second);
|
|
SILChanged = true;
|
|
SILValue(F.first).replaceAllUsesWith(F.second);
|
|
InstsToRemove.insert(F.first);
|
|
++NumForwardedLoads;
|
|
}
|
|
}
|
|
|
|
// Erase the instructions recursively, this way, we get rid of pass
|
|
// dependence on DCE.
|
|
for (auto &X : InstsToRemove) {
|
|
// It is possible that the instruction still has uses, because it could be
|
|
// used as the replacement Value, i.e. F.second, for some other RLE pairs.
|
|
//
|
|
// TODO: we should fix this, otherwise we are missing RLE opportunities.
|
|
if (!X->use_empty())
|
|
continue;
|
|
recursivelyDeleteTriviallyDeadInstructions(X, true);
|
|
}
|
|
return SILChanged;
|
|
}
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
// Top Level Entry Point
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
namespace {
|
|
|
|
class RedundantLoadElimination : public SILFunctionTransform {
|
|
|
|
/// The entry point to the transformation.
|
|
void run() override {
|
|
SILFunction *F = getFunction();
|
|
DEBUG(llvm::dbgs() << "***** Redundant Load Elimination on function: "
|
|
<< F->getName() << " *****\n");
|
|
|
|
auto *AA = PM->getAnalysis<AliasAnalysis>();
|
|
auto *TE = PM->getAnalysis<TypeExpansionAnalysis>();
|
|
auto *PO = PM->getAnalysis<PostOrderAnalysis>()->get(F);
|
|
|
|
RLEContext RLE(F, AA, TE, PO->getReversePostOrder());
|
|
if (RLE.run()) {
|
|
invalidateAnalysis(SILAnalysis::InvalidationKind::Instructions);
|
|
}
|
|
}
|
|
|
|
StringRef getName() override { return "SIL Redundant Load Elimination"; }
|
|
};
|
|
|
|
} // end anonymous namespace
|
|
|
|
SILTransform *swift::createRedundantLoadElimination() {
|
|
return new RedundantLoadElimination();
|
|
}
|