mirror of
https://github.com/apple/swift.git
synced 2025-12-14 20:36:38 +01:00
1538 lines
55 KiB
C++
1538 lines
55 KiB
C++
//===--- RedundantLoadElimination.cpp - SIL Load Forwarding ---------------===//
|
|
//
|
|
// This source file is part of the Swift.org open source project
|
|
//
|
|
// Copyright (c) 2014 - 2016 Apple Inc. and the Swift project authors
|
|
// Licensed under Apache License v2.0 with Runtime Library Exception
|
|
//
|
|
// See http://swift.org/LICENSE.txt for license information
|
|
// See http://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
///
|
|
/// \file
|
|
///
|
|
/// This pass eliminates redundant loads.
|
|
///
|
|
/// A load can be eliminated if its value has already been held somewhere,
|
|
/// i.e. generated by a previous load, LSLocation stored by a known value.
|
|
///
|
|
/// In this case, one can replace the load instruction with the previous
|
|
/// results.
|
|
///
|
|
/// Redundant Load Elimination (RLE) eliminates such loads by:
|
|
///
|
|
/// 1. Introducing a notion of a LSLocation that is used to model object
|
|
/// fields. (See below for more details).
|
|
///
|
|
/// 2. Introducing a notion of a LSValue that is used to model the value
|
|
/// that currently resides in the associated LSLocation on the particular
|
|
/// program path. (See below for more details).
|
|
///
|
|
/// 3. Performing a RPO walk over the control flow graph, tracking any
|
|
/// LSLocations that are read from or stored into in each basic block. The
|
|
/// read or stored value, kept in a map between LSLocation and LSValue,
|
|
/// becomes the available value for the LSLocation.
|
|
///
|
|
/// 4. An optimistic iterative intersection-based dataflow is performed on the
|
|
/// gensets until convergence.
|
|
///
|
|
/// At the core of RLE, there is the LSLocation class. A LSLocation is an
|
|
/// abstraction of an object field in program. It consists of a base and a
|
|
/// projection path to the field accessed.
|
|
///
|
|
/// In SIL, one can access an aggregate as a whole, i.e. store to a struct with
|
|
/// 2 Int fields. A store like this will generate 2 *indivisible* LSLocations,
|
|
/// 1 for each field and in addition to keeping a list of LSLocation, RLE also
|
|
/// keeps their available LSValues. We call it *indivisible* because it
|
|
/// cannot be broken down to more LSLocations.
|
|
///
|
|
/// LSValue consists of a base - a SILValue from the load or store inst,
|
|
/// as well as a projection path to which the field it represents. So, a
|
|
/// store to an 2-field struct as mentioned above will generate 2 LSLocations
|
|
/// and 2 LSValues.
|
|
///
|
|
/// Every basic block keeps a map between LSLocation and LSValue. By
|
|
/// keeping the LSLocation and LSValue in their indivisible form, one
|
|
/// can easily find which part of the load is redundant and how to compute its
|
|
/// forwarding value.
|
|
///
|
|
/// Given the case which the 2 fields of the struct both have available values,
|
|
/// RLE can find their LSValues (maybe by struct_extract from a larger
|
|
/// value) and then aggregate them.
|
|
///
|
|
/// However, this may introduce a lot of extraction and aggregation which may
|
|
/// not be necessary. i.e. a store the struct followed by a load from the
|
|
/// struct. To solve this problem, when RLE detects that a load instruction
|
|
/// can be replaced by forwarded value, it will try to find minimum # of
|
|
/// extractions necessary to form the forwarded value. It will group the
|
|
/// available value's by the LSValue base, i.e. the LSValues come from the
|
|
/// same instruction, and then use extraction to obtain the needed components
|
|
/// of the base.
|
|
///
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
#define DEBUG_TYPE "sil-redundant-load-elim"
|
|
#include "swift/SIL/Projection.h"
|
|
#include "swift/SIL/SILArgument.h"
|
|
#include "swift/SIL/SILBuilder.h"
|
|
#include "swift/SILOptimizer/Analysis/AliasAnalysis.h"
|
|
#include "swift/SILOptimizer/Analysis/DominanceAnalysis.h"
|
|
#include "swift/SILOptimizer/Analysis/PostOrderAnalysis.h"
|
|
#include "swift/SILOptimizer/Analysis/ValueTracking.h"
|
|
#include "swift/SILOptimizer/PassManager/Passes.h"
|
|
#include "swift/SILOptimizer/PassManager/Transforms.h"
|
|
#include "swift/SILOptimizer/Utils/CFG.h"
|
|
#include "swift/SILOptimizer/Utils/Local.h"
|
|
#include "swift/SILOptimizer/Utils/LSBase.h"
|
|
#include "swift/SILOptimizer/Utils/SILSSAUpdater.h"
|
|
#include "llvm/ADT/BitVector.h"
|
|
#include "llvm/ADT/MapVector.h"
|
|
#include "llvm/ADT/None.h"
|
|
#include "llvm/ADT/Statistic.h"
|
|
#include "llvm/Support/CommandLine.h"
|
|
#include "llvm/Support/Debug.h"
|
|
|
|
using namespace swift;
|
|
|
|
STATISTIC(NumForwardedLoads, "Number of loads forwarded");
|
|
|
|
/// Return the deallocate stack instructions corresponding to the given
|
|
/// AllocStackInst.
|
|
static SILInstruction *findAllocStackInst(SILInstruction *I) {
|
|
if (DeallocStackInst *DSI = dyn_cast<DeallocStackInst>(I))
|
|
return dyn_cast<SILInstruction>(DSI->getOperand());
|
|
return nullptr;
|
|
}
|
|
|
|
/// ComputeAvailSetMax - If we ignore all unknown writes, what is the max
|
|
/// available set that can reach the a certain point in a basic block. This
|
|
/// helps generating the genset and killset. i.e. if there is no downward visible
|
|
/// value that can reach the end of a basic block, then we know that the genset
|
|
/// and killset for the location need not be set.
|
|
///
|
|
/// ComputeAvailGenKillSet - Build the genset and killset of the basic block.
|
|
///
|
|
/// ComputeAvailValue - Compute the available value at the end of the basic
|
|
/// block.
|
|
///
|
|
/// PerformRLE - Perform the actual redundant load elimination.
|
|
enum class RLEKind : unsigned {
|
|
ComputeAvailSetMax = 0,
|
|
ComputeAvailGenKillSet = 1,
|
|
ComputeAvailValue = 2,
|
|
PerformRLE = 3,
|
|
};
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
// Utility Functions
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
static bool inline isComputeAvailSetMax(RLEKind Kind) {
|
|
return Kind == RLEKind::ComputeAvailSetMax;
|
|
}
|
|
|
|
static bool inline isComputeAvailGenKillSet(RLEKind Kind) {
|
|
return Kind == RLEKind::ComputeAvailGenKillSet;
|
|
}
|
|
|
|
static bool inline isComputeAvailValue(RLEKind Kind) {
|
|
return Kind == RLEKind::ComputeAvailValue;
|
|
}
|
|
|
|
static bool inline isPerformingRLE(RLEKind Kind) {
|
|
return Kind == RLEKind::PerformRLE;
|
|
}
|
|
|
|
/// Returns true if this is an instruction that may have side effects in a
|
|
/// general sense but are inert from a load store perspective.
|
|
static bool isRLEInertInstruction(SILInstruction *Inst) {
|
|
switch (Inst->getKind()) {
|
|
case ValueKind::StrongRetainInst:
|
|
case ValueKind::StrongRetainUnownedInst:
|
|
case ValueKind::UnownedRetainInst:
|
|
case ValueKind::RetainValueInst:
|
|
case ValueKind::DeallocStackInst:
|
|
case ValueKind::CondFailInst:
|
|
case ValueKind::IsUniqueInst:
|
|
case ValueKind::IsUniqueOrPinnedInst:
|
|
case ValueKind::FixLifetimeInst:
|
|
return true;
|
|
default:
|
|
return false;
|
|
}
|
|
}
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
// Basic Block Location State
|
|
//===----------------------------------------------------------------------===//
|
|
namespace {
|
|
|
|
/// If this function has too many basic blocks or too many locations, it may
|
|
/// take a long time to compute the genset and killset. The number of memory
|
|
/// behavior or alias query we need to do in worst case is roughly linear to
|
|
/// # of BBs x(times) # of locations.
|
|
///
|
|
/// we could run RLE on functions with 128 basic blocks and 128 locations,
|
|
/// which is a large function.
|
|
constexpr unsigned MaxLSLocationBBMultiplicationNone = 128*128;
|
|
|
|
/// we could run optimistic RLE on functions with less than 64 basic blocks
|
|
/// and 64 locations which is a sizeable function.
|
|
constexpr unsigned MaxLSLocationBBMultiplicationPessimistic = 64*64;
|
|
|
|
/// forward declaration.
|
|
class RLEContext;
|
|
|
|
/// State of the load store in one basic block which allows for forwarding from
|
|
/// loads, stores -> loads
|
|
class BlockState {
|
|
public:
|
|
enum class ValueState : unsigned {
|
|
CoverValues = 0,
|
|
ConcreteValues = 1,
|
|
CoverAndConcreteValues = 2,
|
|
};
|
|
|
|
private:
|
|
/// # of locations in the LocationVault.
|
|
unsigned LocationNum;
|
|
|
|
/// The basic block that we are optimizing.
|
|
SILBasicBlock *BB;
|
|
|
|
/// A bit vector for which the ith bit represents the ith LSLocation in
|
|
/// LocationVault. If the bit is set, then the location currently has an
|
|
/// downward visible value at the beginning of the basic block.
|
|
llvm::SmallBitVector ForwardSetIn;
|
|
|
|
/// A bit vector for which the ith bit represents the ith LSLocation in
|
|
/// LocationVault. If the bit is set, then the location currently has an
|
|
/// downward visible value at the end of the basic block.
|
|
llvm::SmallBitVector ForwardSetOut;
|
|
|
|
/// A bit vector for which the ith bit represents the ith LSLocation in
|
|
/// LocationVault. If we ignore all unknown write, what's the maximum set
|
|
/// of available locations at the current position in the basic block.
|
|
llvm::SmallBitVector ForwardSetMax;
|
|
|
|
/// A bit vector for which the ith bit represents the ith LSLocation in
|
|
/// LocationVault. If the bit is set, then the basic block generates a
|
|
/// value for the location.
|
|
llvm::SmallBitVector BBGenSet;
|
|
|
|
/// A bit vector for which the ith bit represents the ith LSLocation in
|
|
/// LocationVault. If the bit is set, then the basic block kills the
|
|
/// value for the location.
|
|
llvm::SmallBitVector BBKillSet;
|
|
|
|
/// This is map between LSLocations and their available values at the
|
|
/// beginning of this basic block.
|
|
ValueTableMap ForwardValIn;
|
|
|
|
/// This is map between LSLocations and their available values at the end of
|
|
/// this basic block.
|
|
ValueTableMap ForwardValOut;
|
|
|
|
/// Keeps a list of replaceable instructions in the current basic block as
|
|
/// well as their SILValue replacement.
|
|
llvm::DenseMap<SILInstruction *, SILValue> RedundantLoads;
|
|
|
|
/// LSLocation read or written has been extracted, expanded and mapped to the
|
|
/// bit position in the bitvector. Update it in the ForwardSetIn of the
|
|
/// current basic block.
|
|
void updateForwardSetForRead(RLEContext &Ctx, unsigned B);
|
|
void updateForwardSetForWrite(RLEContext &Ctx, unsigned B);
|
|
|
|
/// LSLocation read or written has been extracted, expanded and mapped to the
|
|
/// B position in the Bvector. Update it in the genset and killset of the
|
|
/// current basic block.
|
|
void updateGenKillSetForRead(RLEContext &Ctx, unsigned B);
|
|
void updateGenKillSetForWrite(RLEContext &Ctx, unsigned B);
|
|
|
|
/// LSLocation read or written has been extracted, expanded and mapped to the
|
|
/// B position in the Bvector. Update it in the MaxAvailForwardSet of the
|
|
/// current basic block.
|
|
void updateMaxAvailSetForRead(RLEContext &Ctx, unsigned B);
|
|
void updateMaxAvailSetForWrite(RLEContext &Ctx, unsigned B);
|
|
|
|
/// LSLocation written has been extracted, expanded and mapped to the bit
|
|
/// position in the bitvector. process it using the bit position.
|
|
void updateForwardSetAndValForRead(RLEContext &Ctx, unsigned L, unsigned V);
|
|
void updateForwardSetAndValForWrite(RLEContext &Ctx, unsigned L, unsigned V);
|
|
|
|
/// There is a read to a LSLocation, expand the LSLocation into individual
|
|
/// fields before processing them.
|
|
void processRead(RLEContext &Ctx, SILInstruction *I, SILValue Mem,
|
|
SILValue Val, RLEKind Kind);
|
|
|
|
/// There is a write to a LSLocation, expand the LSLocation into individual
|
|
/// fields before processing them.
|
|
void processWrite(RLEContext &Ctx, SILInstruction *I, SILValue Mem,
|
|
SILValue Val, RLEKind Kind);
|
|
|
|
/// BitVector manipulation functions.
|
|
void startTrackingLocation(llvm::SmallBitVector &BV, unsigned B);
|
|
void stopTrackingLocation(llvm::SmallBitVector &BV, unsigned B);
|
|
bool isTrackingLocation(llvm::SmallBitVector &BV, unsigned B);
|
|
void startTrackingValue(ValueTableMap &VM, unsigned L, unsigned V);
|
|
void stopTrackingValue(ValueTableMap &VM, unsigned B);
|
|
|
|
public:
|
|
BlockState() = default;
|
|
|
|
void init(SILBasicBlock *NewBB, unsigned bitcnt, bool optimistic) {
|
|
BB = NewBB;
|
|
LocationNum = bitcnt;
|
|
// For reachable basic blocks, the initial state of ForwardSetOut should be
|
|
// all 1's. Otherwise the dataflow solution could be too conservative.
|
|
//
|
|
// Consider this case, the forwardable value by var a = 10 before the loop
|
|
// will not be forwarded if the ForwardSetOut is set to 0 initially.
|
|
//
|
|
// var a = 10
|
|
// for _ in 0...1024 {}
|
|
// use(a);
|
|
//
|
|
// However, by doing so, we can only do the data forwarding after the
|
|
// data flow stabilizes.
|
|
//
|
|
// We set the initial state of unreachable block to 0, as we do not have
|
|
// a value for the location.
|
|
//
|
|
// This is a bit conservative as we could be missing forwarding
|
|
// opportunities. i.e. a joint block with 1 predecessor being an
|
|
// unreachable block.
|
|
//
|
|
// we rely on other passes to clean up unreachable block.
|
|
ForwardSetIn.resize(LocationNum, false);
|
|
ForwardSetOut.resize(LocationNum, optimistic);
|
|
|
|
// If we are running an optimistic data flow, set forward max to true
|
|
// initially.
|
|
ForwardSetMax.resize(LocationNum, optimistic);
|
|
|
|
BBGenSet.resize(LocationNum, false);
|
|
BBKillSet.resize(LocationNum, false);
|
|
}
|
|
|
|
/// Initialize the AvailSetMax by intersecting this basic block's
|
|
/// predecessors' AvailSetMax.
|
|
void mergePredecessorsAvailSetMax(RLEContext &Ctx);
|
|
|
|
/// Initialize the AvailSet by intersecting this basic block' predecessors'
|
|
/// AvailSet.
|
|
void mergePredecessorAvailSet(RLEContext &Ctx);
|
|
|
|
/// Initialize the AvailSet and AvailVal of the current basic block.
|
|
void mergePredecessorAvailSetAndValue(RLEContext &Ctx);
|
|
|
|
/// Reached the end of the basic block, update the ForwardValOut with the
|
|
/// ForwardValIn.
|
|
void updateForwardValOut() { ForwardValOut = ForwardValIn; }
|
|
|
|
/// Check whether the ForwardSetOut has changed. If it does, we need to
|
|
/// rerun the data flow to reach fixed point.
|
|
bool updateForwardSetOut() {
|
|
if (ForwardSetIn == ForwardSetOut)
|
|
return false;
|
|
ForwardSetOut = ForwardSetIn;
|
|
return true;
|
|
}
|
|
|
|
/// Returns the current basic block we are processing.
|
|
SILBasicBlock *getBB() const { return BB; }
|
|
|
|
/// Returns the ForwardValIn for the current basic block.
|
|
ValueTableMap &getForwardValIn() { return ForwardValIn; }
|
|
|
|
/// Returns the ForwardValOut for the current basic block.
|
|
ValueTableMap &getForwardValOut() { return ForwardValOut; }
|
|
|
|
/// Returns the redundant loads and their replacement in the currently basic
|
|
/// block.
|
|
llvm::DenseMap<SILInstruction *, SILValue> &getRL() { return RedundantLoads; }
|
|
|
|
/// Look into the value for the given LSLocation at end of the basic block,
|
|
/// return one of the three ValueState type.
|
|
ValueState getValueStateAtEndOfBlock(RLEContext &Ctx, LSLocation &L);
|
|
|
|
/// Wrappers to query the value state of the location in this BlockState.
|
|
bool isCoverValues(RLEContext &Ctx, LSLocation &L) {
|
|
return getValueStateAtEndOfBlock(Ctx, L) == ValueState::CoverValues;
|
|
}
|
|
bool isConcreteValues(RLEContext &Ctx, LSLocation &L) {
|
|
return getValueStateAtEndOfBlock(Ctx, L) == ValueState::ConcreteValues;
|
|
}
|
|
|
|
/// Iterate over the instructions in the basic block in forward order and
|
|
/// process them w.r.t. the given \p Kind.
|
|
void processInstructionWithKind(RLEContext &Ctx, SILInstruction *I,
|
|
RLEKind Kind);
|
|
void processBasicBlockWithKind(RLEContext &Ctx, RLEKind Kind);
|
|
|
|
/// Process the current basic block with the genset and killset. Return true
|
|
/// if the ForwardSetOut changes.
|
|
bool processBasicBlockWithGenKillSet();
|
|
|
|
/// Set up the value for redundant load elimination.
|
|
bool setupRLE(RLEContext &Ctx, SILInstruction *I, SILValue Mem);
|
|
|
|
/// Process Instruction which writes to memory in an unknown way.
|
|
void processUnknownWriteInst(RLEContext &Ctx, SILInstruction *I,
|
|
RLEKind Kind);
|
|
void processUnknownWriteInstForGenKillSet(RLEContext &Ctx, SILInstruction *I);
|
|
void processUnknownWriteInstForRLE(RLEContext &Ctx, SILInstruction *I);
|
|
|
|
|
|
void processDeallocStackInst(RLEContext &Ctx, SILInstruction *I,
|
|
RLEKind Kind);
|
|
void processDeallocStackInstForGenKillSet(RLEContext &Ctx, SILInstruction *I);
|
|
void processDeallocStackInstForRLE(RLEContext &Ctx, SILInstruction *I);
|
|
|
|
/// Process LoadInst. Extract LSLocations from LoadInst.
|
|
void processLoadInst(RLEContext &Ctx, LoadInst *LI, RLEKind Kind);
|
|
|
|
/// Process LoadInst. Extract LSLocations from StoreInst.
|
|
void processStoreInst(RLEContext &Ctx, StoreInst *SI, RLEKind Kind);
|
|
|
|
/// Returns a *single* forwardable SILValue for the given LSLocation right
|
|
/// before the InsertPt instruction.
|
|
SILValue reduceValuesAtEndOfBlock(RLEContext &Ctx, LSLocation &L);
|
|
};
|
|
|
|
} // end anonymous namespace
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
// RLEContext Interface
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
namespace {
|
|
|
|
using BBValueMap = llvm::DenseMap<SILBasicBlock *, SILValue>;
|
|
|
|
/// This class stores global state that we use when computing redundant load and
|
|
/// their replacement in each basic block.
|
|
class RLEContext {
|
|
enum class ProcessKind {
|
|
ProcessMultipleIterations = 0,
|
|
ProcessOneIteration = 1,
|
|
ProcessNone = 2,
|
|
};
|
|
private:
|
|
/// Function currently processing.
|
|
SILFunction *Fn;
|
|
|
|
/// The passmanager we are using.
|
|
SILPassManager *PM;
|
|
|
|
/// The alias analysis that we will use during all computations.
|
|
AliasAnalysis *AA;
|
|
|
|
/// The type expansion analysis we will use during all computations.
|
|
TypeExpansionAnalysis *TE;
|
|
|
|
/// The SSA updater we use to materialize covering values.
|
|
SILSSAUpdater Updater;
|
|
|
|
/// The range that we use to iterate over the post order and reverse post
|
|
/// order of the given function.
|
|
PostOrderFunctionInfo *PO;
|
|
|
|
/// Keeps all the locations for the current function. The BitVector in each
|
|
/// BlockState is then laid on top of it to keep track of which LSLocation
|
|
/// has a downward available value.
|
|
std::vector<LSLocation> LocationVault;
|
|
|
|
/// Contains a map between LSLocation to their index in the LocationVault.
|
|
/// Use for fast lookup.
|
|
LSLocationIndexMap LocToBitIndex;
|
|
|
|
/// Keeps a map between the accessed SILValue and the location.
|
|
LSLocationBaseMap BaseToLocIndex;
|
|
|
|
/// Keeps all the loadstorevalues for the current function. The BitVector in
|
|
/// each g is then laid on top of it to keep track of which LSLocation
|
|
/// has a downward available value.
|
|
std::vector<LSValue> LSValueVault;
|
|
|
|
/// Contains a map between LSLocation to their index in the LocationVault.
|
|
/// Use for fast lookup.
|
|
llvm::DenseMap<LSValue, unsigned> ValToBitIndex;
|
|
|
|
/// A map from each BasicBlock to its BlockState.
|
|
llvm::SmallDenseMap<SILBasicBlock *, BlockState, 16> BBToLocState;
|
|
|
|
/// Keeps a list of basic blocks that have LoadInsts. If a basic block does
|
|
/// not have LoadInst, we do not actually perform the last iteration where
|
|
/// RLE is actually performed on the basic block.
|
|
///
|
|
/// NOTE: This is never populated for functions which will only require 1
|
|
/// data flow iteration. For function that requires more than 1 iteration of
|
|
/// the data flow this is populated when the first time the functions is
|
|
/// walked, i.e. when the we generate the genset and killset.
|
|
llvm::DenseSet<SILBasicBlock *> BBWithLoads;
|
|
|
|
public:
|
|
RLEContext(SILFunction *F, SILPassManager *PM, AliasAnalysis *AA,
|
|
TypeExpansionAnalysis *TE, PostOrderFunctionInfo *PO);
|
|
|
|
RLEContext(const RLEContext &) = delete;
|
|
RLEContext(RLEContext &&) = default;
|
|
~RLEContext() = default;
|
|
|
|
/// Entry point to redundant load elimination.
|
|
bool run();
|
|
|
|
/// Use a set of ad hoc rules to tell whether we should run a pessimistic
|
|
/// one iteration data flow on the function.
|
|
ProcessKind getProcessFunctionKind(unsigned LoadCount, unsigned StoreCount);
|
|
|
|
/// Run the iterative data flow until convergence.
|
|
void runIterativeRLE();
|
|
|
|
/// Process the basic blocks for the gen and kill set.
|
|
void processBasicBlocksForGenKillSet();
|
|
|
|
/// Process the basic blocks with the gen and kill set.
|
|
void processBasicBlocksWithGenKillSet();
|
|
|
|
/// Process the basic block for values generated in the current basic
|
|
/// block.
|
|
void processBasicBlocksForAvailValue();
|
|
|
|
/// Process basic blocks to perform the redundant load elimination.
|
|
void processBasicBlocksForRLE(bool Optimistic);
|
|
|
|
/// Returns the alias analysis we will use during all computations.
|
|
AliasAnalysis *getAA() const { return AA; }
|
|
|
|
/// Returns the current type expansion analysis we are .
|
|
TypeExpansionAnalysis *getTE() const { return TE; }
|
|
|
|
/// Returns the SILValue base to bit index.
|
|
LSLocationBaseMap &getBM() { return BaseToLocIndex; }
|
|
|
|
/// Return the BlockState for the basic block this basic block belongs to.
|
|
BlockState &getBlockState(SILBasicBlock *B) { return BBToLocState[B]; }
|
|
|
|
/// Get the bit representing the LSLocation in the LocationVault.
|
|
unsigned getLocationBit(const LSLocation &L);
|
|
|
|
/// Given the bit, get the LSLocation from the LocationVault.
|
|
LSLocation &getLocation(const unsigned index);
|
|
|
|
/// Get the bit representing the LSValue in the LSValueVault.
|
|
unsigned getValueBit(const LSValue &L);
|
|
|
|
/// Given the bit, get the LSValue from the LSValueVault.
|
|
LSValue &getValue(const unsigned index);
|
|
|
|
/// Given a LSLocation, try to collect all the LSValues for this LSLocation
|
|
/// in the given basic block. If part of the locations have covering values,
|
|
/// find the values in its predecessors.
|
|
bool collectLocationValues(SILBasicBlock *BB, LSLocation &L,
|
|
LSLocationValueMap &Values, ValueTableMap &VM);
|
|
|
|
/// Transitively collect all the values that make up this location and
|
|
/// create a SILArgument out of them.
|
|
SILValue computePredecessorLocationValue(SILBasicBlock *BB, LSLocation &L);
|
|
};
|
|
|
|
} // end anonymous namespace
|
|
|
|
void BlockState::startTrackingValue(ValueTableMap &VM, unsigned L, unsigned V) {
|
|
VM[L] = V;
|
|
}
|
|
|
|
void BlockState::stopTrackingValue(ValueTableMap &VM, unsigned B) {
|
|
VM.erase(B);
|
|
}
|
|
|
|
bool BlockState::isTrackingLocation(llvm::SmallBitVector &BV, unsigned B) {
|
|
return BV.test(B);
|
|
}
|
|
|
|
void BlockState::startTrackingLocation(llvm::SmallBitVector &BV, unsigned B) {
|
|
BV.set(B);
|
|
}
|
|
|
|
void BlockState::stopTrackingLocation(llvm::SmallBitVector &BV, unsigned B) {
|
|
BV.reset(B);
|
|
}
|
|
|
|
void BlockState::mergePredecessorsAvailSetMax(RLEContext &Ctx) {
|
|
if (BB->pred_empty()) {
|
|
ForwardSetMax.reset();
|
|
return;
|
|
}
|
|
|
|
auto Iter = BB->pred_begin();
|
|
ForwardSetMax = Ctx.getBlockState(*Iter).ForwardSetMax;
|
|
Iter = std::next(Iter);
|
|
for (auto EndIter = BB->pred_end(); Iter != EndIter; ++Iter) {
|
|
ForwardSetMax &= Ctx.getBlockState(*Iter).ForwardSetMax;
|
|
}
|
|
}
|
|
|
|
void BlockState::mergePredecessorAvailSet(RLEContext &Ctx) {
|
|
// Clear the state if the basic block has no predecessor.
|
|
if (BB->getPreds().begin() == BB->getPreds().end()) {
|
|
ForwardSetIn.reset();
|
|
return;
|
|
}
|
|
|
|
auto Iter = BB->pred_begin();
|
|
ForwardSetIn = Ctx.getBlockState(*Iter).ForwardSetOut;
|
|
Iter = std::next(Iter);
|
|
for (auto EndIter = BB->pred_end(); Iter != EndIter; ++Iter) {
|
|
ForwardSetIn &= Ctx.getBlockState(*Iter).ForwardSetOut;
|
|
}
|
|
}
|
|
|
|
void BlockState::mergePredecessorAvailSetAndValue(RLEContext &Ctx) {
|
|
// Clear the state if the basic block has no predecessor.
|
|
if (BB->getPreds().begin() == BB->getPreds().end()) {
|
|
ForwardSetIn.reset();
|
|
ForwardValIn.clear();
|
|
return;
|
|
}
|
|
|
|
auto Iter = BB->pred_begin();
|
|
ForwardSetIn = Ctx.getBlockState(*Iter).ForwardSetOut;
|
|
ForwardValIn = Ctx.getBlockState(*Iter).ForwardValOut;
|
|
Iter = std::next(Iter);
|
|
for (auto EndIter = BB->pred_end(); Iter != EndIter; ++Iter) {
|
|
BlockState &OtherState = Ctx.getBlockState(*Iter);
|
|
ForwardSetIn &= OtherState.ForwardSetOut;
|
|
|
|
// Merge in the predecessor state.
|
|
for (unsigned i = 0; i < LocationNum; ++i) {
|
|
if (OtherState.ForwardSetOut[i]) {
|
|
// There are multiple values from multiple predecessors, set this as
|
|
// a covering value. We do not need to track the value itself, as we
|
|
// can always go to the predecessors BlockState to find it.
|
|
ForwardValIn[i] = Ctx.getValueBit(LSValue(true));
|
|
continue;
|
|
}
|
|
// If this location does have an available value, then clear it.
|
|
stopTrackingValue(ForwardValIn, i);
|
|
stopTrackingLocation(ForwardSetIn, i);
|
|
}
|
|
}
|
|
}
|
|
|
|
void BlockState::processBasicBlockWithKind(RLEContext &Ctx, RLEKind Kind) {
|
|
// Iterate over instructions in forward order.
|
|
for (auto &II : *BB) {
|
|
processInstructionWithKind(Ctx, &II, Kind);
|
|
}
|
|
}
|
|
|
|
bool BlockState::processBasicBlockWithGenKillSet() {
|
|
ForwardSetIn.reset(BBKillSet);
|
|
ForwardSetIn |= BBGenSet;
|
|
return updateForwardSetOut();
|
|
}
|
|
|
|
SILValue BlockState::reduceValuesAtEndOfBlock(RLEContext &Ctx, LSLocation &L) {
|
|
// First, collect current available locations and their corresponding values
|
|
// into a map.
|
|
LSLocationValueMap Values;
|
|
|
|
LSLocationList Locs;
|
|
LSLocation::expand(L, &BB->getModule(), Locs, Ctx.getTE());
|
|
|
|
// Find the values that this basic block defines and the locations which
|
|
// we do not have a concrete value in the current basic block.
|
|
ValueTableMap &OTM = getForwardValOut();
|
|
for (unsigned i = 0; i < Locs.size(); ++i) {
|
|
Values[Locs[i]] = Ctx.getValue(OTM[Ctx.getLocationBit(Locs[i])]);
|
|
}
|
|
|
|
// Second, reduce the available values into a single SILValue we can use to
|
|
// forward.
|
|
SILValue TheForwardingValue;
|
|
TheForwardingValue = LSValue::reduce(L, &BB->getModule(), Values,
|
|
BB->getTerminator());
|
|
/// Return the forwarding value.
|
|
return TheForwardingValue;
|
|
}
|
|
|
|
bool BlockState::setupRLE(RLEContext &Ctx, SILInstruction *I, SILValue Mem) {
|
|
// Try to construct a SILValue for the current LSLocation.
|
|
//
|
|
// Collect the locations and their corresponding values into a map.
|
|
LSLocation L;
|
|
LSLocationBaseMap &BaseToLocIndex = Ctx.getBM();
|
|
if (BaseToLocIndex.find(Mem) != BaseToLocIndex.end()) {
|
|
L = BaseToLocIndex[Mem];
|
|
} else {
|
|
SILValue UO = getUnderlyingObject(Mem);
|
|
L = LSLocation(UO, ProjectionPath::getProjectionPath(UO, Mem));
|
|
}
|
|
|
|
LSLocationValueMap Values;
|
|
// Use the ForwardValIn as we are currently processing the basic block.
|
|
if (!Ctx.collectLocationValues(I->getParent(), L, Values, getForwardValIn()))
|
|
return false;
|
|
|
|
// Reduce the available values into a single SILValue we can use to forward.
|
|
SILModule *Mod = &I->getModule();
|
|
SILValue TheForwardingValue = LSValue::reduce(L, Mod, Values, I);
|
|
if (!TheForwardingValue)
|
|
return false;
|
|
|
|
// Now we have the forwarding value, record it for forwarding!.
|
|
//
|
|
// NOTE: we do not perform the RLE right here because doing so could introduce
|
|
// new LSLocations.
|
|
//
|
|
// e.g.
|
|
// %0 = load %x
|
|
// %1 = load %x
|
|
// %2 = extract_struct %1, #a
|
|
// %3 = load %2
|
|
//
|
|
// If we perform the RLE and replace %1 with %0, we end up having a memory
|
|
// location we do not have before, i.e. Base == %0, and Path == #a.
|
|
//
|
|
// We may be able to add the LSLocation to the vault, but it gets
|
|
// complicated very quickly, e.g. we need to resize the bit vectors size,
|
|
// etc.
|
|
//
|
|
// However, since we already know the instruction to replace and the value to
|
|
// replace it with, we can record it for now and forwarded it after all the
|
|
// forwardable values are recorded in the function.
|
|
//
|
|
RedundantLoads[I] = TheForwardingValue;
|
|
return true;
|
|
}
|
|
|
|
void BlockState::updateForwardSetForRead(RLEContext &Ctx, unsigned B) {
|
|
startTrackingLocation(ForwardSetIn, B);
|
|
}
|
|
|
|
void BlockState::updateGenKillSetForRead(RLEContext &Ctx, unsigned B) {
|
|
startTrackingLocation(BBGenSet, B);
|
|
stopTrackingLocation(BBKillSet, B);
|
|
}
|
|
|
|
void BlockState::updateForwardSetAndValForRead(RLEContext &Ctx, unsigned L,
|
|
unsigned V) {
|
|
// Track the new location and value.
|
|
startTrackingValue(ForwardValIn, L, V);
|
|
startTrackingLocation(ForwardSetIn, L);
|
|
}
|
|
|
|
void BlockState::updateGenKillSetForWrite(RLEContext &Ctx, unsigned B) {
|
|
// This is a store, invalidate any location that this location may alias, as
|
|
// their values can no longer be forwarded.
|
|
LSLocation &R = Ctx.getLocation(B);
|
|
for (unsigned i = 0; i < LocationNum; ++i) {
|
|
if (!isTrackingLocation(ForwardSetMax, i))
|
|
continue;
|
|
LSLocation &L = Ctx.getLocation(i);
|
|
if (!L.isMayAliasLSLocation(R, Ctx.getAA()))
|
|
continue;
|
|
// MayAlias, invalidate the location.
|
|
stopTrackingLocation(BBGenSet, i);
|
|
startTrackingLocation(BBKillSet, i);
|
|
}
|
|
|
|
// Start tracking this location.
|
|
startTrackingLocation(BBGenSet, B);
|
|
stopTrackingLocation(BBKillSet, B);
|
|
}
|
|
|
|
void BlockState::updateMaxAvailSetForWrite(RLEContext &Ctx, unsigned B) {
|
|
startTrackingLocation(ForwardSetMax, B);
|
|
}
|
|
|
|
void BlockState::updateMaxAvailSetForRead(RLEContext &Ctx, unsigned B) {
|
|
startTrackingLocation(ForwardSetMax, B);
|
|
}
|
|
|
|
void BlockState::updateForwardSetForWrite(RLEContext &Ctx, unsigned B) {
|
|
// This is a store, invalidate any location that this location may alias, as
|
|
// their values can no longer be forwarded.
|
|
LSLocation &R = Ctx.getLocation(B);
|
|
for (unsigned i = 0; i < LocationNum; ++i) {
|
|
if (!isTrackingLocation(ForwardSetIn, i))
|
|
continue;
|
|
LSLocation &L = Ctx.getLocation(i);
|
|
if (!L.isMayAliasLSLocation(R, Ctx.getAA()))
|
|
continue;
|
|
// MayAlias, invalidate the location.
|
|
stopTrackingLocation(ForwardSetIn, i);
|
|
}
|
|
|
|
// Start tracking this location.
|
|
startTrackingLocation(ForwardSetIn, B);
|
|
}
|
|
|
|
void BlockState::updateForwardSetAndValForWrite(RLEContext &Ctx, unsigned L,
|
|
unsigned V) {
|
|
// This is a store, invalidate any location that this location may alias, as
|
|
// their values can no longer be forwarded.
|
|
LSLocation &R = Ctx.getLocation(L);
|
|
for (unsigned i = 0; i < LocationNum; ++i) {
|
|
if (!isTrackingLocation(ForwardSetIn, i))
|
|
continue;
|
|
LSLocation &L = Ctx.getLocation(i);
|
|
if (!L.isMayAliasLSLocation(R, Ctx.getAA()))
|
|
continue;
|
|
// MayAlias, invalidate the location and value.
|
|
stopTrackingValue(ForwardValIn, i);
|
|
stopTrackingLocation(ForwardSetIn, i);
|
|
}
|
|
|
|
// Start tracking this location and value.
|
|
startTrackingLocation(ForwardSetIn, L);
|
|
startTrackingValue(ForwardValIn, L, V);
|
|
}
|
|
|
|
void BlockState::processWrite(RLEContext &Ctx, SILInstruction *I, SILValue Mem,
|
|
SILValue Val, RLEKind Kind) {
|
|
// Initialize the LSLocation.
|
|
LSLocation L;
|
|
LSLocationBaseMap &BaseToLocIndex = Ctx.getBM();
|
|
if (BaseToLocIndex.find(Mem) != BaseToLocIndex.end()) {
|
|
L = BaseToLocIndex[Mem];
|
|
} else {
|
|
SILValue UO = getUnderlyingObject(Mem);
|
|
L = LSLocation(UO, ProjectionPath::getProjectionPath(UO, Mem));
|
|
}
|
|
|
|
// If we can't figure out the Base or Projection Path for the write,
|
|
// process it as an unknown memory instruction.
|
|
if (!L.isValid()) {
|
|
// we can ignore unknown store instructions if we are computing the
|
|
// AvailSetMax.
|
|
if (!isComputeAvailSetMax(Kind)) {
|
|
processUnknownWriteInst(Ctx, I, Kind);
|
|
}
|
|
return;
|
|
}
|
|
|
|
// Expand the given location and val into individual fields and process
|
|
// them as separate writes.
|
|
LSLocationList Locs;
|
|
LSLocation::expand(L, &I->getModule(), Locs, Ctx.getTE());
|
|
|
|
if (isComputeAvailSetMax(Kind)) {
|
|
for (unsigned i = 0; i < Locs.size(); ++i) {
|
|
updateMaxAvailSetForWrite(Ctx, Ctx.getLocationBit(Locs[i]));
|
|
}
|
|
return;
|
|
}
|
|
|
|
// Are we computing the genset and killset ?
|
|
if (isComputeAvailGenKillSet(Kind)) {
|
|
for (unsigned i = 0; i < Locs.size(); ++i) {
|
|
updateGenKillSetForWrite(Ctx, Ctx.getLocationBit(Locs[i]));
|
|
}
|
|
return;
|
|
}
|
|
|
|
// Are we computing available value or performing RLE?
|
|
LSValueList Vals;
|
|
LSValue::expand(Val, &I->getModule(), Vals, Ctx.getTE());
|
|
if (isComputeAvailValue(Kind) || isPerformingRLE(Kind)) {
|
|
for (unsigned i = 0; i < Locs.size(); ++i) {
|
|
updateForwardSetAndValForWrite(Ctx, Ctx.getLocationBit(Locs[i]),
|
|
Ctx.getValueBit(Vals[i]));
|
|
}
|
|
return;
|
|
}
|
|
|
|
llvm_unreachable("Unknown RLE compute kind");
|
|
}
|
|
|
|
void BlockState::processRead(RLEContext &Ctx, SILInstruction *I, SILValue Mem,
|
|
SILValue Val, RLEKind Kind) {
|
|
// Initialize the LSLocation.
|
|
LSLocation L;
|
|
LSLocationBaseMap &BaseToLocIndex = Ctx.getBM();
|
|
if (BaseToLocIndex.find(Mem) != BaseToLocIndex.end()) {
|
|
L = BaseToLocIndex[Mem];
|
|
} else {
|
|
SILValue UO = getUnderlyingObject(Mem);
|
|
L = LSLocation(UO, ProjectionPath::getProjectionPath(UO, Mem));
|
|
}
|
|
|
|
// If we can't figure out the Base or Projection Path for the read, simply
|
|
// ignore it for now.
|
|
if (!L.isValid())
|
|
return;
|
|
|
|
// Expand the given LSLocation and Val into individual fields and process
|
|
// them as separate reads.
|
|
LSLocationList Locs;
|
|
LSLocation::expand(L, &I->getModule(), Locs, Ctx.getTE());
|
|
|
|
if (isComputeAvailSetMax(Kind)) {
|
|
for (unsigned i = 0; i < Locs.size(); ++i) {
|
|
updateMaxAvailSetForRead(Ctx, Ctx.getLocationBit(Locs[i]));
|
|
}
|
|
return;
|
|
}
|
|
|
|
// Are we computing the genset and killset.
|
|
if (isComputeAvailGenKillSet(Kind)) {
|
|
for (unsigned i = 0; i < Locs.size(); ++i) {
|
|
updateGenKillSetForRead(Ctx, Ctx.getLocationBit(Locs[i]));
|
|
}
|
|
return;
|
|
}
|
|
|
|
// Are we computing available values ?.
|
|
bool CanForward = true;
|
|
LSValueList Vals;
|
|
LSValue::expand(Val, &I->getModule(), Vals, Ctx.getTE());
|
|
if (isComputeAvailValue(Kind) || isPerformingRLE(Kind)) {
|
|
for (unsigned i = 0; i < Locs.size(); ++i) {
|
|
if (isTrackingLocation(ForwardSetIn, Ctx.getLocationBit(Locs[i])))
|
|
continue;
|
|
updateForwardSetAndValForRead(Ctx, Ctx.getLocationBit(Locs[i]),
|
|
Ctx.getValueBit(Vals[i]));
|
|
// We can not perform the forwarding as we are at least missing
|
|
// some pieces of the read location.
|
|
CanForward = false;
|
|
}
|
|
}
|
|
|
|
// Simply return if we are not performing RLE or we do not have all the
|
|
// values available to perform RLE.
|
|
if (!isPerformingRLE(Kind) || !CanForward)
|
|
return;
|
|
|
|
// Lastly, forward value to the load.
|
|
setupRLE(Ctx, I, Mem);
|
|
}
|
|
|
|
void BlockState::processStoreInst(RLEContext &Ctx, StoreInst *SI, RLEKind Kind) {
|
|
processWrite(Ctx, SI, SI->getDest(), SI->getSrc(), Kind);
|
|
}
|
|
|
|
void BlockState::processLoadInst(RLEContext &Ctx, LoadInst *LI, RLEKind Kind) {
|
|
processRead(Ctx, LI, LI->getOperand(), SILValue(LI), Kind);
|
|
}
|
|
|
|
void BlockState::processUnknownWriteInstForGenKillSet(RLEContext &Ctx,
|
|
SILInstruction *I) {
|
|
auto *AA = Ctx.getAA();
|
|
for (unsigned i = 0; i < LocationNum; ++i) {
|
|
if (!isTrackingLocation(ForwardSetMax, i))
|
|
continue;
|
|
// Invalidate any location this instruction may write to.
|
|
//
|
|
// TODO: checking may alias with Base is overly conservative,
|
|
// we should check may alias with base plus projection path.
|
|
LSLocation &R = Ctx.getLocation(i);
|
|
if (!AA->mayWriteToMemory(I, R.getBase()))
|
|
continue;
|
|
// MayAlias.
|
|
stopTrackingLocation(BBGenSet, i);
|
|
startTrackingLocation(BBKillSet, i);
|
|
}
|
|
}
|
|
|
|
void BlockState::processUnknownWriteInstForRLE(RLEContext &Ctx,
|
|
SILInstruction *I) {
|
|
auto *AA = Ctx.getAA();
|
|
for (unsigned i = 0; i < LocationNum; ++i) {
|
|
if (!isTrackingLocation(ForwardSetIn, i))
|
|
continue;
|
|
// Invalidate any location this instruction may write to.
|
|
//
|
|
// TODO: checking may alias with Base is overly conservative,
|
|
// we should check may alias with base plus projection path.
|
|
LSLocation &R = Ctx.getLocation(i);
|
|
if (!AA->mayWriteToMemory(I, R.getBase()))
|
|
continue;
|
|
// MayAlias.
|
|
stopTrackingLocation(ForwardSetIn, i);
|
|
stopTrackingValue(ForwardValIn, i);
|
|
}
|
|
}
|
|
|
|
void BlockState::processUnknownWriteInst(RLEContext &Ctx, SILInstruction *I,
|
|
RLEKind Kind) {
|
|
// If this is a release on a guaranteed parameter, it can not call deinit,
|
|
// which might read or write memory.
|
|
if (isGuaranteedParamRelease(I))
|
|
return;
|
|
|
|
// Are we computing the genset and killset ?
|
|
if (isComputeAvailGenKillSet(Kind)) {
|
|
processUnknownWriteInstForGenKillSet(Ctx, I);
|
|
return;
|
|
}
|
|
|
|
// Are we computing the available value or doing RLE ?
|
|
if (isComputeAvailValue(Kind) || isPerformingRLE(Kind)) {
|
|
processUnknownWriteInstForRLE(Ctx, I);
|
|
return;
|
|
}
|
|
|
|
llvm_unreachable("Unknown RLE compute kind");
|
|
}
|
|
|
|
void BlockState::
|
|
processDeallocStackInstForGenKillSet(RLEContext &Ctx, SILInstruction *I) {
|
|
SILValue ASI = findAllocStackInst(I);
|
|
for (unsigned i = 0; i < LocationNum; ++i) {
|
|
LSLocation &R = Ctx.getLocation(i);
|
|
if (R.getBase() != ASI)
|
|
continue;
|
|
// MayAlias.
|
|
stopTrackingLocation(BBGenSet, i);
|
|
startTrackingLocation(BBKillSet, i);
|
|
}
|
|
}
|
|
|
|
void BlockState::
|
|
processDeallocStackInstForRLE(RLEContext &Ctx, SILInstruction *I) {
|
|
SILValue ASI = findAllocStackInst(I);
|
|
for (unsigned i = 0; i < LocationNum; ++i) {
|
|
LSLocation &R = Ctx.getLocation(i);
|
|
if (R.getBase() != ASI)
|
|
continue;
|
|
// MayAlias.
|
|
stopTrackingLocation(ForwardSetIn, i);
|
|
stopTrackingValue(ForwardValIn, i);
|
|
}
|
|
}
|
|
|
|
void BlockState::
|
|
processDeallocStackInst(RLEContext &Ctx, SILInstruction *I, RLEKind Kind) {
|
|
// Are we computing the genset and killset ?
|
|
if (isComputeAvailGenKillSet(Kind)) {
|
|
processDeallocStackInstForGenKillSet(Ctx, I);
|
|
return;
|
|
}
|
|
|
|
// Are we computing the available value or doing RLE ?
|
|
if (isComputeAvailValue(Kind) || isPerformingRLE(Kind)) {
|
|
processDeallocStackInstForRLE(Ctx, I);
|
|
return;
|
|
}
|
|
|
|
llvm_unreachable("Unknown RLE compute kind");
|
|
}
|
|
|
|
|
|
void BlockState::processInstructionWithKind(RLEContext &Ctx,
|
|
SILInstruction *Inst,
|
|
RLEKind Kind) {
|
|
// This is a StoreInst, try to see whether it clobbers any forwarding value
|
|
if (auto *SI = dyn_cast<StoreInst>(Inst)) {
|
|
processStoreInst(Ctx, SI, Kind);
|
|
return;
|
|
}
|
|
|
|
// This is a LoadInst. Let's see if we can find a previous loaded, stored
|
|
// value to use instead of this load.
|
|
if (auto *LI = dyn_cast<LoadInst>(Inst)) {
|
|
processLoadInst(Ctx, LI, Kind);
|
|
return;
|
|
}
|
|
|
|
if (auto *DSI = dyn_cast<DeallocStackInst>(Inst)) {
|
|
processDeallocStackInst(Ctx, DSI, Kind);
|
|
return;
|
|
}
|
|
|
|
// If this instruction has side effects, but is inert from a load store
|
|
// perspective, skip it.
|
|
if (isRLEInertInstruction(Inst))
|
|
return;
|
|
|
|
// If this instruction does not read or write memory, we can skip it.
|
|
if (!Inst->mayReadOrWriteMemory())
|
|
return;
|
|
|
|
// If we have an instruction that may write to memory and we cannot prove
|
|
// that it and its operands cannot alias a load we have visited,
|
|
// invalidate that load.
|
|
if (Inst->mayWriteToMemory()) {
|
|
processUnknownWriteInst(Ctx, Inst, Kind);
|
|
return;
|
|
}
|
|
}
|
|
|
|
RLEContext::ProcessKind
|
|
RLEContext::
|
|
getProcessFunctionKind(unsigned LoadCount, unsigned StoreCount) {
|
|
// Don't optimize function that are marked as 'no.optimize'.
|
|
if (!Fn->shouldOptimize())
|
|
return ProcessKind::ProcessNone;
|
|
|
|
// Really no point optimizing here as there is no forwardable loads.
|
|
if (LoadCount + StoreCount < 2)
|
|
return ProcessKind::ProcessNone;
|
|
|
|
bool RunOneIteration = true;
|
|
unsigned BBCount = 0;
|
|
unsigned LocationCount = LocationVault.size();
|
|
|
|
if (LocationCount == 0)
|
|
return ProcessKind::ProcessNone;
|
|
|
|
// If all basic blocks will have their predecessors processed if
|
|
// the basic blocks in the functions are iterated in post order.
|
|
// Then this function can be processed in one iteration, i.e. no
|
|
// need to generate the genset and killset.
|
|
auto *PO = PM->getAnalysis<PostOrderAnalysis>()->get(Fn);
|
|
llvm::DenseSet<SILBasicBlock *> HandledBBs;
|
|
for (SILBasicBlock *B : PO->getReversePostOrder()) {
|
|
++BBCount;
|
|
for (auto X : B->getPreds()) {
|
|
if (HandledBBs.find(X) == HandledBBs.end()) {
|
|
RunOneIteration = false;
|
|
break;
|
|
}
|
|
}
|
|
HandledBBs.insert(B);
|
|
}
|
|
|
|
// Data flow may take too long to run.
|
|
if (BBCount * LocationCount > MaxLSLocationBBMultiplicationNone)
|
|
return ProcessKind::ProcessNone;
|
|
|
|
// This function's data flow would converge in 1 iteration.
|
|
if (RunOneIteration)
|
|
return ProcessKind::ProcessOneIteration;
|
|
|
|
// We run one pessimistic data flow to do dead store elimination on
|
|
// the function.
|
|
if (BBCount * LocationCount > MaxLSLocationBBMultiplicationPessimistic)
|
|
return ProcessKind::ProcessOneIteration;
|
|
|
|
return ProcessKind::ProcessMultipleIterations;
|
|
}
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
// RLEContext Implementation
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
RLEContext::RLEContext(SILFunction *F, SILPassManager *PM, AliasAnalysis *AA,
|
|
TypeExpansionAnalysis *TE, PostOrderFunctionInfo *PO)
|
|
: Fn(F), PM(PM), AA(AA), TE(TE), PO(PO) {
|
|
}
|
|
|
|
LSLocation &RLEContext::getLocation(const unsigned index) {
|
|
return LocationVault[index];
|
|
}
|
|
|
|
unsigned RLEContext::getLocationBit(const LSLocation &Loc) {
|
|
// Return the bit position of the given Loc in the LocationVault. The bit
|
|
// position is then used to set/reset the bitvector kept by each BlockState.
|
|
//
|
|
// We should have the location populated by the enumerateLSLocation at this
|
|
// point.
|
|
auto Iter = LocToBitIndex.find(Loc);
|
|
assert(Iter != LocToBitIndex.end() && "Location should have been enum'ed");
|
|
return Iter->second;
|
|
}
|
|
|
|
LSValue &RLEContext::getValue(const unsigned index) {
|
|
return LSValueVault[index];
|
|
}
|
|
|
|
unsigned RLEContext::getValueBit(const LSValue &Val) {
|
|
// Return the bit position of the given Val in the LSValueVault. The bit
|
|
// position is then used to set/reset the bitvector kept by each g.
|
|
auto Iter = ValToBitIndex.find(Val);
|
|
|
|
// We do not walk over the function and find all the possible LSValues
|
|
// in this function, as some of the these values will not be used, i.e.
|
|
// if the LoadInst that generates this value is actually RLE'ed.
|
|
// Instead, we create the LSValues when we need them.
|
|
if (Iter == ValToBitIndex.end()) {
|
|
ValToBitIndex[Val] = LSValueVault.size();
|
|
LSValueVault.push_back(Val);
|
|
return ValToBitIndex[Val];
|
|
}
|
|
return Iter->second;
|
|
}
|
|
|
|
BlockState::ValueState BlockState::getValueStateAtEndOfBlock(RLEContext &Ctx,
|
|
LSLocation &L) {
|
|
// Find number of covering value and concrete values for the locations
|
|
// expanded from the given location.
|
|
unsigned CSCount = 0, CTCount = 0;
|
|
LSLocationList Locs;
|
|
LSLocation::expand(L, &BB->getModule(), Locs, Ctx.getTE());
|
|
|
|
ValueTableMap &OTM = getForwardValOut();
|
|
for (auto &X : Locs) {
|
|
LSValue &V = Ctx.getValue(OTM[Ctx.getLocationBit(X)]);
|
|
if (V.isCoveringValue()) {
|
|
++CSCount;
|
|
continue;
|
|
}
|
|
++CTCount;
|
|
}
|
|
|
|
if (CSCount == Locs.size())
|
|
return ValueState::CoverValues;
|
|
if (CTCount == Locs.size())
|
|
return ValueState::ConcreteValues;
|
|
return ValueState::CoverAndConcreteValues;
|
|
}
|
|
|
|
SILValue RLEContext::computePredecessorLocationValue(SILBasicBlock *BB,
|
|
LSLocation &L) {
|
|
BBValueMap Values;
|
|
llvm::DenseSet<SILBasicBlock *> HandledBBs;
|
|
llvm::SmallVector<SILBasicBlock *, 8> WorkList;
|
|
|
|
// Push in all the predecessors to get started.
|
|
for (auto Pred : BB->getPreds()) {
|
|
WorkList.push_back(Pred);
|
|
}
|
|
|
|
while (!WorkList.empty()) {
|
|
auto *CurBB = WorkList.pop_back_val();
|
|
BlockState &Forwarder = getBlockState(CurBB);
|
|
|
|
// Mark this basic block as processed.
|
|
HandledBBs.insert(CurBB);
|
|
|
|
// There are 3 cases that can happen here.
|
|
//
|
|
// 1. The current basic block contains concrete values for the entire
|
|
// location.
|
|
// 2. The current basic block contains covering values for the entire
|
|
// location.
|
|
// 3. The current basic block contains concrete value for part of the
|
|
// location and covering values for the rest.
|
|
//
|
|
// This BlockState contains concrete values for all the expanded
|
|
// locations, collect and reduce them into a single value in the current
|
|
// basic block.
|
|
if (Forwarder.isConcreteValues(*this, L)) {
|
|
Values[CurBB] = Forwarder.reduceValuesAtEndOfBlock(*this, L);
|
|
continue;
|
|
}
|
|
|
|
// This BlockState does not contain concrete value for any of the expanded
|
|
// locations, collect in this block's predecessors.
|
|
if (Forwarder.isCoverValues(*this, L)) {
|
|
for (auto Pred : CurBB->getPreds()) {
|
|
if (HandledBBs.find(Pred) != HandledBBs.end())
|
|
continue;
|
|
WorkList.push_back(Pred);
|
|
}
|
|
continue;
|
|
}
|
|
|
|
// This block contains concrete values for some but not all the expanded
|
|
// locations, recursively call collectLocationValues to materialize the
|
|
// value that reaches this basic block.
|
|
LSLocationValueMap LSValues;
|
|
if (!collectLocationValues(CurBB, L, LSValues, Forwarder.getForwardValOut()))
|
|
return SILValue();
|
|
|
|
// Reduce the available values into a single SILValue we can use to forward
|
|
SILInstruction *IPt = CurBB->getTerminator();
|
|
Values[CurBB] = LSValue::reduce(L, &BB->getModule(), LSValues, IPt);
|
|
}
|
|
|
|
// Finally, collect all the values for the SILArgument, materialize it using
|
|
// the SSAUpdater.
|
|
Updater.Initialize(L.getType(&BB->getModule()).getObjectType());
|
|
for (auto V : Values) {
|
|
Updater.AddAvailableValue(V.first, V.second);
|
|
}
|
|
|
|
return Updater.GetValueInMiddleOfBlock(BB);
|
|
}
|
|
|
|
bool RLEContext::collectLocationValues(SILBasicBlock *BB, LSLocation &L,
|
|
LSLocationValueMap &Values,
|
|
ValueTableMap &VM) {
|
|
LSLocationSet CSLocs;
|
|
LSLocationList Locs;
|
|
LSLocation::expand(L, &BB->getModule(), Locs, TE);
|
|
|
|
auto *Mod = &BB->getModule();
|
|
// Find the locations that this basic block defines and the locations which
|
|
// we do not have a concrete value in the current basic block.
|
|
for (auto &X : Locs) {
|
|
Values[X] = getValue(VM[getLocationBit(X)]);
|
|
if (!Values[X].isCoveringValue())
|
|
continue;
|
|
CSLocs.insert(X);
|
|
}
|
|
|
|
// For locations which we do not have concrete values for in this basic
|
|
// block, try to reduce it to the minimum # of locations possible, this
|
|
// will help us to generate as few SILArguments as possible.
|
|
LSLocation::reduce(L, Mod, CSLocs);
|
|
|
|
// To handle covering value, we need to go to the predecessors and
|
|
// materialize them there.
|
|
for (auto &X : CSLocs) {
|
|
SILValue V = computePredecessorLocationValue(BB, X);
|
|
if (!V)
|
|
return false;
|
|
|
|
// We've constructed a concrete value for the covering value. Expand and
|
|
// collect the newly created forwardable values.
|
|
LSLocationList Locs;
|
|
LSValueList Vals;
|
|
LSLocation::expand(X, Mod, Locs, TE);
|
|
LSValue::expand(V, Mod, Vals, TE);
|
|
|
|
for (unsigned i = 0; i < Locs.size(); ++i) {
|
|
Values[Locs[i]] = Vals[i];
|
|
assert(Values[Locs[i]].isValid() && "Invalid load store value");
|
|
}
|
|
}
|
|
return true;
|
|
}
|
|
|
|
void RLEContext::processBasicBlocksForGenKillSet() {
|
|
for (SILBasicBlock *BB : PO->getReversePostOrder()) {
|
|
BlockState &S = getBlockState(BB);
|
|
|
|
// Compute the AvailSetMax at the beginning of the basic block.
|
|
S.mergePredecessorsAvailSetMax(*this);
|
|
|
|
// Compute the genset and killset.
|
|
//
|
|
// To optimize this process, we also compute the AvailSetMax at particular
|
|
// point in the basic block.
|
|
for (auto I = BB->begin(), E = BB->end(); I != E; ++I) {
|
|
if (auto *LI = dyn_cast<LoadInst>(&*I)) {
|
|
if (BBWithLoads.find(BB) == BBWithLoads.end())
|
|
BBWithLoads.insert(BB);
|
|
S.processLoadInst(*this, LI, RLEKind::ComputeAvailSetMax);
|
|
}
|
|
if (auto *SI = dyn_cast<StoreInst>(&*I)) {
|
|
S.processStoreInst(*this, SI, RLEKind::ComputeAvailSetMax);
|
|
}
|
|
|
|
S.processInstructionWithKind(*this, &*I, RLEKind::ComputeAvailGenKillSet);
|
|
}
|
|
}
|
|
}
|
|
|
|
void RLEContext::processBasicBlocksWithGenKillSet() {
|
|
// Process each basic block with the gen and kill set. Every time the
|
|
// ForwardSetOut of a basic block changes, the optimization is rerun on its
|
|
// successors.
|
|
llvm::SmallVector<SILBasicBlock *, 16> WorkList;
|
|
llvm::DenseSet<SILBasicBlock *> HandledBBs;
|
|
|
|
// Push into the worklist in post order so that we can pop from the back and
|
|
// get reverse post order.
|
|
for (SILBasicBlock *BB : PO->getPostOrder()) {
|
|
WorkList.push_back(BB);
|
|
HandledBBs.insert(BB);
|
|
}
|
|
while (!WorkList.empty()) {
|
|
SILBasicBlock *BB = WorkList.pop_back_val();
|
|
HandledBBs.erase(BB);
|
|
|
|
// Intersection.
|
|
BlockState &Forwarder = getBlockState(BB);
|
|
// Compute the ForwardSetIn at the beginning of the basic block.
|
|
Forwarder.mergePredecessorAvailSet(*this);
|
|
|
|
if (Forwarder.processBasicBlockWithGenKillSet()) {
|
|
for (auto &X : BB->getSuccessors()) {
|
|
// We do not push basic block into the worklist if its already
|
|
// in the worklist.
|
|
if (HandledBBs.find(X) != HandledBBs.end())
|
|
continue;
|
|
WorkList.push_back(X);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
void RLEContext::processBasicBlocksForAvailValue() {
|
|
for (SILBasicBlock *BB : PO->getReversePostOrder()) {
|
|
BlockState &Forwarder = getBlockState(BB);
|
|
|
|
// Merge the predecessors. After merging, BlockState now contains
|
|
// lists of available LSLocations and their values that reach the
|
|
// beginning of the basic block along all paths.
|
|
Forwarder.mergePredecessorAvailSetAndValue(*this);
|
|
|
|
// Merge duplicate loads, and forward stores to
|
|
// loads. We also update lists of stores|loads to reflect the end
|
|
// of the basic block.
|
|
Forwarder.processBasicBlockWithKind(*this, RLEKind::ComputeAvailValue);
|
|
|
|
// Update the locations with available values. We do not need to update
|
|
// the available BitVector here as they should have been initialized and
|
|
// stabilized in the processBasicBlocksWithGenKillSet.
|
|
Forwarder.updateForwardValOut();
|
|
}
|
|
}
|
|
|
|
void RLEContext::processBasicBlocksForRLE(bool Optimistic) {
|
|
for (SILBasicBlock *BB : PO->getReversePostOrder()) {
|
|
// If we know this is not a one iteration function which means its
|
|
// forward sets have been computed and converged,
|
|
// and this basic block does not even have LoadInsts, there is no point
|
|
// in processing every instruction in the basic block again as no store
|
|
// will be eliminated.
|
|
if (Optimistic && BBWithLoads.find(BB) == BBWithLoads.end())
|
|
continue;
|
|
|
|
BlockState &Forwarder = getBlockState(BB);
|
|
|
|
// Merge the predecessors. After merging, BlockState now contains
|
|
// lists of available LSLocations and their values that reach the
|
|
// beginning of the basic block along all paths.
|
|
Forwarder.mergePredecessorAvailSetAndValue(*this);
|
|
|
|
// Perform the actual redundant load elimination.
|
|
Forwarder.processBasicBlockWithKind(*this, RLEKind::PerformRLE);
|
|
|
|
// If this is not a one iteration data flow, then the forward sets
|
|
// have been computed.
|
|
if (Optimistic)
|
|
continue;
|
|
|
|
// Update the locations with available values and their values.
|
|
Forwarder.updateForwardSetOut();
|
|
Forwarder.updateForwardValOut();
|
|
}
|
|
}
|
|
|
|
void RLEContext::runIterativeRLE() {
|
|
// Generate the genset and killset for every basic block.
|
|
processBasicBlocksForGenKillSet();
|
|
|
|
// Process basic blocks in RPO. After the data flow converges, run last
|
|
// iteration and perform load forwarding.
|
|
processBasicBlocksWithGenKillSet();
|
|
|
|
// We have computed the available value bit, now go through every basic
|
|
// block and compute the forwarding value locally. This is necessary as
|
|
// when we perform the RLE in the last iteration, we must handle loops, i.e.
|
|
// predecessor blocks which have not been processed when a basic block is
|
|
// processed.
|
|
processBasicBlocksForAvailValue();
|
|
}
|
|
|
|
bool RLEContext::run() {
|
|
// We perform redundant load elimination in the following phases.
|
|
//
|
|
// Phase 1. Compute the genset and killset for every basic block.
|
|
//
|
|
// Phase 2. Use an iterative data flow to compute whether there is an
|
|
// available value at a given point, we do not yet care about what the value
|
|
// is.
|
|
//
|
|
// Phase 3. we compute the real forwardable value at a given point.
|
|
//
|
|
// Phase 4. we perform the redundant load elimination.
|
|
// Walk over the function and find all the locations accessed by
|
|
// this function.
|
|
std::pair<int, int> LSCount = std::make_pair(0, 0);
|
|
LSLocation::enumerateLSLocations(*Fn, LocationVault,
|
|
LocToBitIndex,
|
|
BaseToLocIndex, TE,
|
|
LSCount);
|
|
|
|
// Check how to optimize this function.
|
|
ProcessKind Kind = getProcessFunctionKind(LSCount.first, LSCount.second);
|
|
|
|
// We do not optimize this function at all.
|
|
if (Kind == ProcessKind::ProcessNone)
|
|
return false;
|
|
|
|
// Do we run a multi-iteration data flow ?
|
|
bool Optimistic = Kind == ProcessKind::ProcessMultipleIterations ?
|
|
true : false;
|
|
|
|
// These are a list of basic blocks that we actually processed.
|
|
// We do not process unreachable block, instead we set their liveouts to nil.
|
|
llvm::DenseSet<SILBasicBlock *> BBToProcess;
|
|
for (auto X : PO->getPostOrder())
|
|
BBToProcess.insert(X);
|
|
|
|
// For all basic blocks in the function, initialize a BB state. Since we
|
|
// know all the locations accessed in this function, we can resize the bit
|
|
// vector to the appropriate size.
|
|
for (auto &B : *Fn) {
|
|
BBToLocState[&B] = BlockState();
|
|
BBToLocState[&B].init(&B, LocationVault.size(), Optimistic &&
|
|
BBToProcess.find(&B) != BBToProcess.end());
|
|
}
|
|
|
|
if (Optimistic)
|
|
runIterativeRLE();
|
|
|
|
// We have the available value bit computed and the local forwarding value.
|
|
// Set up the load forwarding.
|
|
processBasicBlocksForRLE(Optimistic);
|
|
|
|
// Finally, perform the redundant load replacements.
|
|
llvm::DenseSet<SILInstruction *> InstsToDelete;
|
|
bool SILChanged = false;
|
|
for (auto &X : BBToLocState) {
|
|
for (auto &F : X.second.getRL()) {
|
|
DEBUG(llvm::dbgs() << "Replacing " << SILValue(F.first) << "With "
|
|
<< F.second);
|
|
SILChanged = true;
|
|
F.first->replaceAllUsesWith(F.second);
|
|
InstsToDelete.insert(F.first);
|
|
++NumForwardedLoads;
|
|
}
|
|
}
|
|
|
|
// Erase the instructions recursively, this way, we get rid of pass
|
|
// dependence on DCE.
|
|
for (auto &X : InstsToDelete) {
|
|
// It is possible that the instruction still has uses, because it could be
|
|
// used as the replacement Value, i.e. F.second, for some other RLE pairs.
|
|
//
|
|
// TODO: we should fix this, otherwise we are missing RLE opportunities.
|
|
if (!X->use_empty())
|
|
continue;
|
|
recursivelyDeleteTriviallyDeadInstructions(X, true);
|
|
}
|
|
return SILChanged;
|
|
}
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
// Top Level Entry Point
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
namespace {
|
|
|
|
class RedundantLoadElimination : public SILFunctionTransform {
|
|
|
|
StringRef getName() override { return "SIL Redundant Load Elimination"; }
|
|
|
|
/// The entry point to the transformation.
|
|
void run() override {
|
|
SILFunction *F = getFunction();
|
|
DEBUG(llvm::dbgs() << "*** RLE on function: " << F->getName() << " ***\n");
|
|
|
|
auto *AA = PM->getAnalysis<AliasAnalysis>();
|
|
auto *TE = PM->getAnalysis<TypeExpansionAnalysis>();
|
|
auto *PO = PM->getAnalysis<PostOrderAnalysis>()->get(F);
|
|
|
|
RLEContext RLE(F, PM, AA, TE, PO);
|
|
if (RLE.run()) {
|
|
invalidateAnalysis(SILAnalysis::InvalidationKind::Instructions);
|
|
}
|
|
}
|
|
};
|
|
|
|
} // end anonymous namespace
|
|
|
|
SILTransform *swift::createRedundantLoadElimination() {
|
|
return new RedundantLoadElimination();
|
|
}
|