mirror of
https://github.com/apple/swift.git
synced 2025-12-21 12:14:44 +01:00
RLE: better handling of ref_element/tail_addr [immutable]
Rerun RLE with cutting off the base address of loads at `ref_element/tail_addr [immutable]`. This increases the chance of catching loads of immutable COW class properties or elements.
This commit is contained in:
@@ -26,8 +26,6 @@ namespace swift {
|
||||
/// nothing left to strip.
|
||||
SILValue getUnderlyingObject(SILValue V);
|
||||
|
||||
SILValue getUnderlyingObjectStopAtMarkDependence(SILValue V);
|
||||
|
||||
SILValue stripSinglePredecessorArgs(SILValue V);
|
||||
|
||||
/// Return the underlying SILValue after stripping off all casts from the
|
||||
|
||||
@@ -379,21 +379,37 @@ public:
|
||||
static void reduce(LSLocation Base, SILModule *Mod,
|
||||
TypeExpansionContext context, LSLocationList &Locs);
|
||||
|
||||
/// Gets the base address for `v`.
|
||||
/// If `stopAtImmutable` is true, the base address is only calculated up to
|
||||
/// a `ref_element_addr [immutable]` or a `ref_tail_addr [immutable]`.
|
||||
/// Return the base address and true if such an immutable class projection
|
||||
/// is found.
|
||||
static std::pair<SILValue, bool>
|
||||
getBaseAddressOrObject(SILValue v, bool stopAtImmutable);
|
||||
|
||||
/// Enumerate the given Mem LSLocation.
|
||||
static void enumerateLSLocation(TypeExpansionContext context, SILModule *M,
|
||||
/// If `stopAtImmutable` is true, the base address is only calculated up to
|
||||
/// a `ref_element_addr [immutable]` or a `ref_tail_addr [immutable]`.
|
||||
/// Returns true if it's an immutable location.
|
||||
static bool enumerateLSLocation(TypeExpansionContext context, SILModule *M,
|
||||
SILValue Mem,
|
||||
std::vector<LSLocation> &LSLocationVault,
|
||||
LSLocationIndexMap &LocToBit,
|
||||
LSLocationBaseMap &BaseToLoc,
|
||||
TypeExpansionAnalysis *TE);
|
||||
TypeExpansionAnalysis *TE,
|
||||
bool stopAtImmutable);
|
||||
|
||||
/// Enumerate all the locations in the function.
|
||||
/// If `stopAtImmutable` is true, the base addresses are only calculated up to
|
||||
/// a `ref_element_addr [immutable]` or a `ref_tail_addr [immutable]`.
|
||||
static void enumerateLSLocations(SILFunction &F,
|
||||
std::vector<LSLocation> &LSLocationVault,
|
||||
LSLocationIndexMap &LocToBit,
|
||||
LSLocationBaseMap &BaseToLoc,
|
||||
TypeExpansionAnalysis *TE,
|
||||
std::pair<int, int> &LSCount);
|
||||
bool stopAtImmutable,
|
||||
int &numLoads, int &numStores,
|
||||
bool &immutableLoadsFound);
|
||||
};
|
||||
|
||||
static inline llvm::hash_code hash_value(const LSLocation &L) {
|
||||
|
||||
@@ -62,18 +62,6 @@ SILValue swift::getUnderlyingObject(SILValue v) {
|
||||
}
|
||||
}
|
||||
|
||||
SILValue swift::getUnderlyingObjectStopAtMarkDependence(SILValue v) {
|
||||
while (true) {
|
||||
SILValue v2 = stripCastsWithoutMarkDependence(v);
|
||||
v2 = stripAddressProjections(v2);
|
||||
v2 = stripIndexingInsts(v2);
|
||||
v2 = lookThroughOwnershipInsts(v2);
|
||||
if (v2 == v)
|
||||
return v2;
|
||||
v = v2;
|
||||
}
|
||||
}
|
||||
|
||||
/// Return the underlying SILValue after stripping off identity SILArguments if
|
||||
/// we belong to a BB with one predecessor.
|
||||
SILValue swift::stripSinglePredecessorArgs(SILValue V) {
|
||||
|
||||
@@ -1182,14 +1182,17 @@ void DSEContext::runIterativeDSE() {
|
||||
}
|
||||
|
||||
bool DSEContext::run() {
|
||||
std::pair<int, int> LSCount = std::make_pair(0, 0);
|
||||
int numLoads = 0, numStores = 0;
|
||||
bool immutableLoadsFound = false;
|
||||
// Walk over the function and find all the locations accessed by
|
||||
// this function.
|
||||
LSLocation::enumerateLSLocations(*F, LocationVault, LocToBitIndex,
|
||||
BaseToLocIndex, TE, LSCount);
|
||||
BaseToLocIndex, TE,
|
||||
/*stopAtImmutable*/ false,
|
||||
numLoads, numStores, immutableLoadsFound);
|
||||
|
||||
// Check how to optimize this function.
|
||||
ProcessKind Kind = getProcessFunctionKind(LSCount.second);
|
||||
ProcessKind Kind = getProcessFunctionKind(numStores);
|
||||
|
||||
// We do not optimize this function at all.
|
||||
if (Kind == ProcessKind::ProcessNone)
|
||||
|
||||
@@ -486,14 +486,21 @@ private:
|
||||
/// If set, RLE ignores loads from that array type.
|
||||
NominalTypeDecl *ArrayType;
|
||||
|
||||
/// Se to true if loads with a `ref_element_addr [immutable]` or
|
||||
/// `ref_tail_addr [immutable]` base address are found.
|
||||
bool immutableLoadsFound = false;
|
||||
|
||||
/// Only optimize loads with a base address of `ref_element_addr [immutable]`
|
||||
/// `ref_tail_addr [immutable]`.
|
||||
bool onlyImmutableLoads;
|
||||
|
||||
#ifndef NDEBUG
|
||||
SILPrintContext printCtx;
|
||||
#endif
|
||||
|
||||
public:
|
||||
RLEContext(SILFunction *F, SILPassManager *PM, AliasAnalysis *AA,
|
||||
TypeExpansionAnalysis *TE, PostOrderFunctionInfo *PO,
|
||||
EpilogueARCFunctionInfo *EAFI, bool disableArrayLoads);
|
||||
RLEContext(SILFunction *F, SILPassManager *PM,
|
||||
bool disableArrayLoads, bool onlyImmutableLoads);
|
||||
|
||||
RLEContext(const RLEContext &) = delete;
|
||||
RLEContext(RLEContext &&) = delete;
|
||||
@@ -504,6 +511,8 @@ public:
|
||||
/// Entry point to redundant load elimination.
|
||||
bool run();
|
||||
|
||||
bool shouldOptimizeImmutableLoads() const { return immutableLoadsFound; }
|
||||
|
||||
SILFunction *getFunction() const { return Fn; }
|
||||
|
||||
/// Use a set of ad hoc rules to tell whether we should run a pessimistic
|
||||
@@ -570,6 +579,11 @@ public:
|
||||
LI->getType().getNominalOrBoundGenericNominal() != ArrayType) {
|
||||
return LI;
|
||||
}
|
||||
if (onlyImmutableLoads &&
|
||||
!LSLocation::getBaseAddressOrObject(LI->getOperand(),
|
||||
/*stopAtImmutable*/ true).second) {
|
||||
return nullptr;
|
||||
}
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
@@ -1200,14 +1214,17 @@ void BlockState::dump(RLEContext &Ctx) {
|
||||
// RLEContext Implementation
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
RLEContext::RLEContext(SILFunction *F, SILPassManager *PM, AliasAnalysis *AA,
|
||||
TypeExpansionAnalysis *TE, PostOrderFunctionInfo *PO,
|
||||
EpilogueARCFunctionInfo *EAFI, bool disableArrayLoads)
|
||||
: Fn(F), PM(PM), AA(AA), TE(TE), PO(PO), EAFI(EAFI), BBToLocState(F),
|
||||
BBWithLoads(F),
|
||||
RLEContext::RLEContext(SILFunction *F, SILPassManager *PM,
|
||||
bool disableArrayLoads, bool onlyImmutableLoads)
|
||||
: Fn(F), PM(PM), AA(PM->getAnalysis<AliasAnalysis>(F)),
|
||||
TE(PM->getAnalysis<TypeExpansionAnalysis>()),
|
||||
PO(PM->getAnalysis<PostOrderAnalysis>()->get(F)),
|
||||
EAFI(PM->getAnalysis<EpilogueARCAnalysis>()->get(F)),
|
||||
BBToLocState(F), BBWithLoads(F),
|
||||
ArrayType(disableArrayLoads
|
||||
? F->getModule().getASTContext().getArrayDecl()
|
||||
: nullptr)
|
||||
: nullptr),
|
||||
onlyImmutableLoads(onlyImmutableLoads)
|
||||
#ifndef NDEBUG
|
||||
,
|
||||
printCtx(llvm::dbgs(), /*Verbose=*/false, /*Sorted=*/true)
|
||||
@@ -1567,14 +1584,15 @@ bool RLEContext::run() {
|
||||
// Phase 4. we perform the redundant load elimination.
|
||||
// Walk over the function and find all the locations accessed by
|
||||
// this function.
|
||||
std::pair<int, int> LSCount = std::make_pair(0, 0);
|
||||
int numLoads = 0, numStores = 0;
|
||||
LSLocation::enumerateLSLocations(*Fn, LocationVault,
|
||||
LocToBitIndex,
|
||||
BaseToLocIndex, TE,
|
||||
LSCount);
|
||||
/*stopAtImmutable*/ onlyImmutableLoads,
|
||||
numLoads, numStores, immutableLoadsFound);
|
||||
|
||||
// Check how to optimize this function.
|
||||
ProcessKind Kind = getProcessFunctionKind(LSCount.first, LSCount.second);
|
||||
ProcessKind Kind = getProcessFunctionKind(numLoads, numStores);
|
||||
|
||||
// We do not optimize this function at all.
|
||||
if (Kind == ProcessKind::ProcessNone)
|
||||
@@ -1681,15 +1699,21 @@ public:
|
||||
LLVM_DEBUG(llvm::dbgs() << "*** RLE on function: " << F->getName()
|
||||
<< " ***\n");
|
||||
|
||||
auto *AA = PM->getAnalysis<AliasAnalysis>(F);
|
||||
auto *TE = PM->getAnalysis<TypeExpansionAnalysis>();
|
||||
auto *PO = PM->getAnalysis<PostOrderAnalysis>()->get(F);
|
||||
auto *EAFI = PM->getAnalysis<EpilogueARCAnalysis>()->get(F);
|
||||
|
||||
RLEContext RLE(F, PM, AA, TE, PO, EAFI, disableArrayLoads);
|
||||
RLEContext RLE(F, PM, disableArrayLoads,
|
||||
/*onlyImmutableLoads*/ false);
|
||||
if (RLE.run()) {
|
||||
invalidateAnalysis(SILAnalysis::InvalidationKind::Instructions);
|
||||
}
|
||||
if (RLE.shouldOptimizeImmutableLoads()) {
|
||||
/// Re-running RLE with cutting base addresses off at
|
||||
/// `ref_element_addr [immutable]` or `ref_tail_addr [immutable]` can
|
||||
/// expose additional opportunities.
|
||||
RLEContext RLE2(F, PM, disableArrayLoads,
|
||||
/*onlyImmutableLoads*/ true);
|
||||
if (RLE2.run()) {
|
||||
invalidateAnalysis(SILAnalysis::InvalidationKind::Instructions);
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
@@ -245,28 +245,60 @@ void LSLocation::reduce(LSLocation Base, SILModule *M,
|
||||
replaceSubLocations(Base, M, context, Locs, SubLocations);
|
||||
}
|
||||
|
||||
void LSLocation::enumerateLSLocation(TypeExpansionContext context, SILModule *M,
|
||||
std::pair<SILValue, bool>
|
||||
LSLocation::getBaseAddressOrObject(SILValue v, bool stopAtImmutable) {
|
||||
bool isImmutable = false;
|
||||
while (true) {
|
||||
if (auto *rea = dyn_cast<RefElementAddrInst>(v)) {
|
||||
if (rea->isImmutable()) {
|
||||
isImmutable = true;
|
||||
if (stopAtImmutable)
|
||||
return {v, true};
|
||||
}
|
||||
}
|
||||
if (auto *rta = dyn_cast<RefTailAddrInst>(v)) {
|
||||
if (rta->isImmutable()) {
|
||||
isImmutable = true;
|
||||
if (stopAtImmutable)
|
||||
return {v, true};
|
||||
}
|
||||
}
|
||||
SILValue v2 = stripCastsWithoutMarkDependence(v);
|
||||
v2 = stripSinglePredecessorArgs(v2);
|
||||
if (Projection::isAddressProjection(v2))
|
||||
v2 = cast<SingleValueInstruction>(v2)->getOperand(0);
|
||||
v2 = stripIndexingInsts(v2);
|
||||
v2 = lookThroughOwnershipInsts(v2);
|
||||
if (v2 == v)
|
||||
return {v2, isImmutable};
|
||||
v = v2;
|
||||
}
|
||||
}
|
||||
|
||||
bool LSLocation::enumerateLSLocation(TypeExpansionContext context, SILModule *M,
|
||||
SILValue Mem,
|
||||
std::vector<LSLocation> &Locations,
|
||||
LSLocationIndexMap &IndexMap,
|
||||
LSLocationBaseMap &BaseMap,
|
||||
TypeExpansionAnalysis *TypeCache) {
|
||||
TypeExpansionAnalysis *TypeCache,
|
||||
bool stopAtImmutable) {
|
||||
// We have processed this SILValue before.
|
||||
if (BaseMap.find(Mem) != BaseMap.end())
|
||||
return;
|
||||
return false;
|
||||
|
||||
// Construct a Location to represent the memory written by this instruction.
|
||||
// ProjectionPath currently does not handle mark_dependence so stop our
|
||||
// underlying object search at these instructions.
|
||||
// We still get a benefit if we cse mark_dependence instructions and then
|
||||
// merge loads from them.
|
||||
SILValue UO = getUnderlyingObjectStopAtMarkDependence(Mem);
|
||||
auto baseAndImmutable = getBaseAddressOrObject(Mem, stopAtImmutable);
|
||||
SILValue UO = baseAndImmutable.first;
|
||||
LSLocation L(UO, ProjectionPath::getProjectionPath(UO, Mem));
|
||||
|
||||
// If we can't figure out the Base or Projection Path for the memory location,
|
||||
// simply ignore it for now.
|
||||
if (!L.isValid())
|
||||
return;
|
||||
return false;
|
||||
|
||||
// Record the SILValue to location mapping.
|
||||
BaseMap[Mem] = L;
|
||||
@@ -281,6 +313,7 @@ void LSLocation::enumerateLSLocation(TypeExpansionContext context, SILModule *M,
|
||||
IndexMap[Loc] = Locations.size();
|
||||
Locations.push_back(Loc);
|
||||
}
|
||||
return baseAndImmutable.first;
|
||||
}
|
||||
|
||||
void
|
||||
@@ -289,22 +322,26 @@ LSLocation::enumerateLSLocations(SILFunction &F,
|
||||
LSLocationIndexMap &IndexMap,
|
||||
LSLocationBaseMap &BaseMap,
|
||||
TypeExpansionAnalysis *TypeCache,
|
||||
std::pair<int, int> &LSCount) {
|
||||
bool stopAtImmutable,
|
||||
int &numLoads, int &numStores,
|
||||
bool &immutableLoadsFound) {
|
||||
// Enumerate all locations accessed by the loads or stores.
|
||||
for (auto &B : F) {
|
||||
for (auto &I : B) {
|
||||
if (auto *LI = dyn_cast<LoadInst>(&I)) {
|
||||
enumerateLSLocation(F.getTypeExpansionContext(), &I.getModule(),
|
||||
if (enumerateLSLocation(F.getTypeExpansionContext(), &I.getModule(),
|
||||
LI->getOperand(), Locations, IndexMap, BaseMap,
|
||||
TypeCache);
|
||||
++LSCount.first;
|
||||
TypeCache, stopAtImmutable)) {
|
||||
immutableLoadsFound = true;
|
||||
}
|
||||
++numLoads;
|
||||
continue;
|
||||
}
|
||||
if (auto *SI = dyn_cast<StoreInst>(&I)) {
|
||||
enumerateLSLocation(F.getTypeExpansionContext(), &I.getModule(),
|
||||
SI->getDest(), Locations, IndexMap, BaseMap,
|
||||
TypeCache);
|
||||
++LSCount.second;
|
||||
TypeCache, stopAtImmutable);
|
||||
++numStores;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -163,6 +163,22 @@ bb0(%0 : @owned $AB):
|
||||
return %5 : $Int
|
||||
}
|
||||
|
||||
// CHECK-LABEL: sil [ossa] @forward_load_of_immutable_class_property
|
||||
// CHECK: [[L:%[0-9]+]] = load
|
||||
// CHECK: apply %{{[0-9]+}}([[L]])
|
||||
// CHECK-NOT: load
|
||||
// CHECK: return [[L]]
|
||||
// CHECK-LABEL: } // end sil function 'forward_load_of_immutable_class_property'
|
||||
sil [ossa] @forward_load_of_immutable_class_property : $@convention(thin) (@guaranteed AB) -> Int {
|
||||
bb0(%0 : @guaranteed $AB):
|
||||
%1 = ref_element_addr [immutable] %0 : $AB, #AB.value
|
||||
%2 = load [trivial] %1 : $*Int
|
||||
%3 = function_ref @use_Int : $@convention(thin) (Int) -> ()
|
||||
apply %3(%2) : $@convention(thin) (Int) -> ()
|
||||
%5 = load [trivial] %1 : $*Int
|
||||
return %5 : $Int
|
||||
}
|
||||
|
||||
// CHECK-LABEL: sil hidden [ossa] @load_forward_across_end_cow_mutation :
|
||||
// CHECK-NOT: = load
|
||||
// CHECK-LABEL: } // end sil function 'load_forward_across_end_cow_mutation'
|
||||
|
||||
Reference in New Issue
Block a user