[Remangler] Improve performance by caching hashes.

The deepHash() function gets called repeatedly as we descend the
node tree, which results in O(n^2) behaviour because we're traversing
entire node subtree from each node we try substitution in, in order
to calculate the hash.

Fix by adding a hash table for hashes, so that we can look up hashes
we've already computed.

This appears to yield a 26.8% saving in local tests.

rdar://125739630
This commit is contained in:
Alastair Houghton
2024-04-04 16:02:08 +01:00
parent f751131719
commit 5ece45a61f
4 changed files with 129 additions and 43 deletions

View File

@@ -242,7 +242,7 @@ bool Remangler::trySubstitution(Node *node, SubstitutionEntry &entry) {
return true;
// Go ahead and initialize the substitution entry.
entry.setNode(node, /*treatAsIdentifier=*/ false);
entry = entryForNode(node);
int Idx = findSubstitution(entry);
if (Idx < 0)

View File

@@ -59,36 +59,6 @@ bool SubstitutionEntry::identifierEquals(Node *lhs, Node *rhs) {
return true;
}
void SubstitutionEntry::deepHash(Node *node) {
if (treatAsIdentifier) {
combineHash((size_t) Node::Kind::Identifier);
assert(node->hasText());
switch (node->getKind()) {
case Node::Kind::InfixOperator:
case Node::Kind::PrefixOperator:
case Node::Kind::PostfixOperator:
for (char c : node->getText()) {
combineHash((unsigned char)translateOperatorChar(c));
}
return;
default:
break;
}
} else {
combineHash((size_t) node->getKind());
}
if (node->hasIndex()) {
combineHash(node->getIndex());
} else if (node->hasText()) {
for (char c : node->getText()) {
combineHash((unsigned char) c);
}
}
for (Node *child : *node) {
deepHash(child);
}
}
bool SubstitutionEntry::deepEquals(Node *lhs, Node *rhs) const {
if (lhs->getKind() != rhs->getKind())
return false;
@@ -114,10 +84,108 @@ bool SubstitutionEntry::deepEquals(Node *lhs, Node *rhs) const {
if (!deepEquals(*li, *ri))
return false;
}
return true;
}
static inline size_t combineHash(size_t currentHash, size_t newValue) {
return 33 * currentHash + newValue;
}
/// Calculate the hash for a node.
size_t RemanglerBase::hashForNode(Node *node,
bool treatAsIdentifier) {
size_t hash = 0;
if (treatAsIdentifier) {
hash = combineHash(hash, (size_t)Node::Kind::Identifier);
assert(node->hasText());
switch (node->getKind()) {
case Node::Kind::InfixOperator:
case Node::Kind::PrefixOperator:
case Node::Kind::PostfixOperator:
for (char c : node->getText()) {
hash = combineHash(hash, (unsigned char)translateOperatorChar(c));
}
return hash;
default:
break;
}
} else {
hash = combineHash(hash, (size_t) node->getKind());
}
if (node->hasIndex()) {
hash = combineHash(hash, node->getIndex());
} else if (node->hasText()) {
for (char c : node->getText()) {
hash = combineHash(hash, (unsigned char) c);
}
}
for (Node *child : *node) {
SubstitutionEntry entry = entryForNode(child, treatAsIdentifier);
hash = combineHash(hash, entry.hash());
}
return hash;
}
/// Rotate a size_t by N bits
static inline size_t rotate(size_t value, size_t shift) {
const size_t bits = sizeof(size_t) * 8;
return (value >> shift) | (value << (bits - shift));
}
/// Compute a hash value from a node *pointer*.
/// Used for look-ups in HashHash. The numbers in here were determined
/// experimentally.
static inline size_t nodeHash(Node *node) {
// Multiply by a magic number
const size_t nodePrime = ((size_t)node) * 2043;
// We rotate by a different amount because the alignment of Node
// changes depending on the machine's pointer size
switch (sizeof(size_t)) {
case 4:
return rotate(nodePrime, 11);
case 8:
return rotate(nodePrime, 12);
case 16:
return rotate(nodePrime, 13);
default:
return rotate(nodePrime, 12);
}
}
/// Construct a SubstitutionEntry for a given node.
/// This will look in the HashHash to see if we already know the hash
/// (which avoids recursive hashing on the Node tree).
SubstitutionEntry RemanglerBase::entryForNode(Node *node,
bool treatAsIdentifier) {
const size_t ident = treatAsIdentifier ? 4 : 0;
const size_t hash = nodeHash(node) + ident;
// Use linear probing with a limit
for (size_t n = 0; n < HashHashMaxProbes; ++n) {
const size_t ndx = (hash + n) & (HashHashCapacity - 1);
SubstitutionEntry entry = HashHash[ndx];
if (entry.isEmpty()) {
size_t entryHash = hashForNode(node, treatAsIdentifier);
entry.setNode(node, treatAsIdentifier, entryHash);
HashHash[ndx] = entry;
return entry;
} else if (entry.matches(node, treatAsIdentifier)) {
return entry;
}
}
// Hash table is full at this hash value
SubstitutionEntry entry;
size_t entryHash = hashForNode(node, treatAsIdentifier);
entry.setNode(node, treatAsIdentifier, entryHash);
return entry;
}
// Find a substitution and return its index.
// Returns -1 if no substitution is found.
int RemanglerBase::findSubstitution(const SubstitutionEntry &entry) {
@@ -356,7 +424,7 @@ bool Remangler::trySubstitution(Node *node, SubstitutionEntry &entry,
return true;
// Go ahead and initialize the substitution entry.
entry.setNode(node, treatAsIdentifier);
entry = entryForNode(node, treatAsIdentifier);
int Idx = findSubstitution(entry);
if (Idx < 0)

View File

@@ -42,10 +42,10 @@ class SubstitutionEntry {
bool treatAsIdentifier = false;
public:
void setNode(Node *node, bool treatAsIdentifier) {
void setNode(Node *node, bool treatAsIdentifier, size_t hash) {
this->treatAsIdentifier = treatAsIdentifier;
TheNode = node;
deepHash(node);
StoredHash = hash;
}
struct Hasher {
@@ -54,6 +54,14 @@ public:
}
};
bool isEmpty() const { return !TheNode; }
bool matches(Node *node, bool treatAsIdentifier) const {
return node == TheNode && treatAsIdentifier == this->treatAsIdentifier;
}
size_t hash() const { return StoredHash; }
private:
friend bool operator==(const SubstitutionEntry &lhs,
const SubstitutionEntry &rhs) {
@@ -69,12 +77,6 @@ private:
static bool identifierEquals(Node *lhs, Node *rhs);
void combineHash(size_t newValue) {
StoredHash = 33 * StoredHash + newValue;
}
void deepHash(Node *node);
bool deepEquals(Node *lhs, Node *rhs) const;
};
@@ -131,6 +133,13 @@ protected:
// Used to allocate temporary nodes and the output string (in Buffer).
NodeFactory &Factory;
// Recursively calculating the node hashes can be expensive if the node tree
// is deep, so we keep a hash table mapping (Node *, treatAsIdentifier) pairs
// to hashes.
static const size_t HashHashCapacity = 512; // Must be a power of 2
static const size_t HashHashMaxProbes = 8;
SubstitutionEntry HashHash[HashHashCapacity] = {};
// An efficient hash-map implementation in the spirit of llvm's SmallPtrSet:
// The first 16 substitutions are stored in an inline-allocated array to avoid
// malloc calls in the common case.
@@ -148,7 +157,16 @@ protected:
RemanglerBuffer Buffer;
protected:
RemanglerBase(NodeFactory &Factory) : Factory(Factory), Buffer(Factory) { }
RemanglerBase(NodeFactory &Factory)
: Factory(Factory), Buffer(Factory) { }
/// Compute the hash for a node.
size_t hashForNode(Node *node, bool treatAsIdentifier = false);
/// Construct a SubstitutionEntry for a given node.
/// This will look in the HashHash to see if we already know the hash,
/// to avoid having to walk the entire subtree.
SubstitutionEntry entryForNode(Node *node, bool treatAsIdentifier = false);
/// Find a substitution and return its index.
/// Returns -1 if no substitution is found.

View File

@@ -1 +1 @@
Error: (3:340) unable to re-mangle $sBf32__t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_tN
Error: (3:408) unable to re-mangle $sBf32__t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_tN