Demangler: further speed improvements.

Avoid using std::string and std::vector in the demangler. Instead use vectors/strings with storage allocated by the NodeFactory’s bump pointer allocator.
This brings another 35% speedup. Especially in the case the Demangle::Context is not reused for subsequent demanglings.
This commit is contained in:
Erik Eckstein
2017-03-03 14:37:26 -08:00
parent 3456d04925
commit f220a3b727
3 changed files with 208 additions and 99 deletions

View File

@@ -207,6 +207,9 @@ public:
// Only to be used by the demangler parsers.
void addChild(NodePointer Child, NodeFactory &Factory);
// Reverses the order of children.
void reverseChildren(size_t StartingAt = 0);
};
/// Returns true if the mangledName starts with the swift mangling prefix.

View File

@@ -35,6 +35,8 @@ using llvm::StringRef;
namespace swift {
namespace Demangle {
class CharVector;
/// The allocator for demangling nodes and other demangling-internal stuff.
///
/// It implements a simple bump-pointer allocator.
@@ -77,14 +79,14 @@ public:
#endif
}
~NodeFactory() {
virtual ~NodeFactory() {
freeSlabs(CurrentSlab);
#ifdef NODE_FACTORY_DEBUGGING
std::cerr << "Delete NodeFactory " << this << "\n";
#endif
}
void clear();
virtual void clear();
/// Allocates an object of type T or an array of objects of type T.
template<typename T> T *Allocate(size_t NumObjects = 1) {
@@ -173,6 +175,12 @@ public:
/// The \p Text string is copied.
NodePointer createNode(Node::Kind K, llvm::StringRef Text);
/// Creates a node of kind \p K with a \p Text payload.
///
/// The \p Text string is already allocted with the Factory and therefore
/// it is _not_ copied.
NodePointer createNode(Node::Kind K, const CharVector &Text);
/// Creates a node of kind \p K with a \p Text payload, which must be a C
/// string literal.
///
@@ -180,6 +188,92 @@ public:
NodePointer createNode(Node::Kind K, const char *Text);
};
/// A vector with a storage managed by a NodeFactory.
///
/// This Vector class only provides the minimal functionality needed by the
/// Demangler.
template<typename T> class Vector {
protected:
T *Elems = nullptr;
size_t NumElems = 0;
size_t Capacity = 0;
public:
typedef T *iterator;
Vector() { }
/// Construct a vector with an inital capacity.
explicit Vector(NodeFactory &Factory, size_t InitialCapacity) {
init(Factory, InitialCapacity);
}
/// Clears the content and re-allocates the buffer with an initial capacity.
void init(NodeFactory &Factory, size_t InitialCapacity) {
Elems = Factory.Allocate<T>(InitialCapacity);
NumElems = 0;
Capacity = InitialCapacity;
}
void free() {
Capacity = 0;
Elems = 0;
}
iterator begin() { return Elems; }
iterator end() { return Elems + NumElems; }
T &operator[](size_t Idx) {
assert(Idx < NumElems);
return Elems[Idx];
}
const T &operator[](size_t Idx) const {
assert(Idx < NumElems);
return Elems[Idx];
}
size_t size() const { return NumElems; }
bool empty() const { return NumElems == 0; }
T &back() { return (*this)[NumElems - 1]; }
void push_back(const T &NewElem, NodeFactory &Factory) {
if (NumElems >= Capacity)
Factory.Reallocate(Elems, Capacity, /*Growth*/ 1);
assert(NumElems < Capacity);
Elems[NumElems++] = NewElem;
}
T pop_back_val() {
if (empty())
return T();
T Val = (*this)[NumElems - 1];
NumElems--;
return Val;
}
};
/// A vector of chars (a string) with a storage managed by a NodeFactory.
///
/// This CharVector class only provides the minimal functionality needed by the
/// Demangler.
class CharVector : public Vector<char> {
public:
// Append another string.
void append(StringRef Rhs, NodeFactory &Factory);
// Append an integer as readable number.
void append(int Number, NodeFactory &Factory);
StringRef str() const {
return StringRef(Elems, NumElems);
}
};
/// The demangler.
///
/// It de-mangles a string and it also ownes the returned node-tree. This means
@@ -194,22 +288,14 @@ private:
size_t Pos;
};
std::vector<NodeWithPos> NodeStack;
std::vector<NodePointer> Substitutions;
std::vector<unsigned> PendingSubstitutions;
Vector<NodeWithPos> NodeStack;
Vector<NodePointer> Substitutions;
Vector<unsigned> PendingSubstitutions;
static const int MaxNumWords = 26;
StringRef Words[MaxNumWords];
int NumWords = 0;
static NodePointer pop_back_val(std::vector<NodePointer> &NodeVector) {
if (NodeVector.empty())
return nullptr;
NodePointer Val = NodeVector.back();
NodeVector.pop_back();
return Val;
}
bool nextIf(StringRef str) {
if (!Text.substr(Pos).startswith(str)) return false;
Pos += str.size();
@@ -241,16 +327,11 @@ private:
}
void pushNode(NodePointer Nd) {
NodeStack.push_back({ Nd, Pos });
NodeStack.push_back({ Nd, Pos }, *this);
}
NodePointer popNode() {
if (!NodeStack.empty()) {
NodePointer Val = NodeStack.back().Node;
NodeStack.pop_back();
return Val;
}
return nullptr;
return NodeStack.pop_back_val().Node;
}
NodePointer popNode(Node::Kind kind) {
@@ -279,7 +360,7 @@ private:
void addSubstitution(NodePointer Nd) {
if (Nd)
Substitutions.push_back(Nd);
Substitutions.push_back(Nd, *this);
}
NodePointer addChild(NodePointer Parent, NodePointer Child);
@@ -326,7 +407,7 @@ private:
NodePointer popProtocol();
NodePointer demangleBoundGenericType();
NodePointer demangleBoundGenericArgs(NodePointer nominalType,
const std::vector<NodePointer> &TypeLists,
const Vector<NodePointer> &TypeLists,
size_t TypeListIdx);
NodePointer demangleInitializer();
NodePointer demangleImplParamConvention();
@@ -368,6 +449,8 @@ private:
public:
Demangler() {}
void clear() override;
/// Demangle the given symbol and return the parse tree.
///
/// \param MangledName The mangled symbol string, which start with the

View File

@@ -310,6 +310,11 @@ void Node::addChild(NodePointer Child, Context &Ctx) {
addChild(Child, *Ctx.D);
}
void Node::reverseChildren(size_t StartingAt) {
assert(StartingAt <= NumChildren);
std::reverse(Children + StartingAt, Children + NumChildren);
}
//////////////////////////////////
// NodeFactory member functions //
//////////////////////////////////
@@ -350,18 +355,49 @@ NodePointer NodeFactory::createNode(Node::Kind K, Node::IndexType Index) {
NodePointer NodeFactory::createNode(Node::Kind K, llvm::StringRef Text) {
return new (Allocate<Node>()) Node(K, Text.copy(*this));
}
NodePointer NodeFactory::createNode(Node::Kind K, const CharVector &Text) {
return new (Allocate<Node>()) Node(K, Text.str());
}
NodePointer NodeFactory::createNode(Node::Kind K, const char *Text) {
return new (Allocate<Node>()) Node(K, llvm::StringRef(Text));
}
//////////////////////////////////
// CharVector member functions //
//////////////////////////////////
void CharVector::append(StringRef Rhs, NodeFactory &Factory) {
if (NumElems + Rhs.size() > Capacity)
Factory.Reallocate(Elems, Capacity, /*Growth*/ Rhs.size());
memcpy(Elems + NumElems, Rhs.data(), Rhs.size());
NumElems += Rhs.size();
assert(NumElems <= Capacity);
}
void CharVector::append(int Number, NodeFactory &Factory) {
const int MaxIntPrintSize = 8;
if (NumElems + MaxIntPrintSize > Capacity)
Factory.Reallocate(Elems, Capacity, /*Growth*/ MaxIntPrintSize);
int Length = snprintf(Elems + NumElems, MaxIntPrintSize, "%d", Number);
assert(Length > 0 && Length < MaxIntPrintSize);
NumElems += Length;
}
//////////////////////////////////
// Demangler member functions //
//////////////////////////////////
void Demangler::clear() {
NodeStack.free();
Substitutions.free();
PendingSubstitutions.free();
NodeFactory::clear();
}
void Demangler::init(StringRef MangledName) {
NodeStack.clear();
Substitutions.clear();
PendingSubstitutions.clear();
NodeStack.init(*this, 16);
Substitutions.init(*this, 16);
PendingSubstitutions.init(*this, 4);
NumWords = 0;
Text = MangledName;
Pos = 0;
@@ -681,7 +717,7 @@ NodePointer Demangler::demangleIdentifier() {
hasWordSubsts = true;
}
}
std::string Identifier;
CharVector Identifier;
do {
while (hasWordSubsts && isLetter(peekChar())) {
char c = nextChar();
@@ -697,7 +733,7 @@ NodePointer Demangler::demangleIdentifier() {
return nullptr;
assert(WordIdx < MaxNumWords);
StringRef Slice = Words[WordIdx];
Identifier.append(Slice.data(), Slice.size());
Identifier.append(Slice, *this);
}
if (nextIf('0'))
break;
@@ -710,10 +746,12 @@ NodePointer Demangler::demangleIdentifier() {
return nullptr;
StringRef Slice = StringRef(Text.data() + Pos, numChars);
if (isPunycoded) {
if (!Punycode::decodePunycodeUTF8(Slice, Identifier))
std::string PunycodedString;
if (!Punycode::decodePunycodeUTF8(Slice, PunycodedString))
return nullptr;
Identifier.append(StringRef(PunycodedString), *this);
} else {
Identifier.append(Slice.data(), Slice.size());
Identifier.append(Slice, *this);
int wordStartPos = -1;
for (int Idx = 0, End = (int)Slice.size(); Idx <= End; ++Idx) {
char c = (Idx < End ? Slice[Idx] : 0);
@@ -744,12 +782,11 @@ NodePointer Demangler::demangleOperatorIdentifier() {
static const char op_char_table[] = "& @/= > <*!|+?%-~ ^ .";
std::string OpStr;
OpStr.reserve(Ident->getText().size());
CharVector OpStr;
for (signed char c : Ident->getText()) {
if (c < 0) {
// Pass through Unicode characters.
OpStr.push_back(c);
OpStr.push_back(c, *this);
continue;
}
if (!isLowerLetter(c))
@@ -757,7 +794,7 @@ NodePointer Demangler::demangleOperatorIdentifier() {
char o = op_char_table[c - 'a'];
if (o == ' ')
return nullptr;
OpStr.push_back(o);
OpStr.push_back(o, *this);
}
switch (nextChar()) {
case 'i': return createNode(Node::Kind::InfixOperator, OpStr);
@@ -828,16 +865,20 @@ NodePointer Demangler::demangleBuiltinType() {
int size = demangleIndex() - 1;
if (size <= 0)
return nullptr;
Ty = createNode(Node::Kind::BuiltinTypeName,
std::move(DemanglerPrinter() << "Builtin.Float" << size).str());
CharVector name;
name.append("Builtin.Float", *this);
name.append(size, *this);
Ty = createNode(Node::Kind::BuiltinTypeName, name);
break;
}
case 'i': {
int size = demangleIndex() - 1;
if (size <= 0)
return nullptr;
Ty = createNode(Node::Kind::BuiltinTypeName,
(DemanglerPrinter() << "Builtin.Int" << size).str());
CharVector name;
name.append("Builtin.Int", *this);
name.append(size, *this);
Ty = createNode(Node::Kind::BuiltinTypeName, name);
break;
}
case 'v': {
@@ -848,9 +889,12 @@ NodePointer Demangler::demangleBuiltinType() {
if (!EltType || EltType->getKind() != Node::Kind::BuiltinTypeName ||
!EltType->getText().startswith("Builtin."))
return nullptr;
Ty = createNode(Node::Kind::BuiltinTypeName,
(DemanglerPrinter() << "Builtin.Vec" << elts << "x" <<
EltType->getText().substr(sizeof("Builtin.") - 1)).str());
CharVector name;
name.append("Builtin.Vec", *this);
name.append(elts, *this);
name.push_back('x', *this);
name.append(EltType->getText().substr(sizeof("Builtin.") - 1), *this);
Ty = createNode(Node::Kind::BuiltinTypeName, name);
break;
}
case 'O':
@@ -936,7 +980,6 @@ NodePointer Demangler::popTuple() {
Node::Kind::NonVariadicTuple);
if (!popNode(Node::Kind::EmptyList)) {
std::vector<NodePointer> Nodes;
bool firstElem = false;
do {
firstElem = (popNode(Node::Kind::FirstElementMarker) != nullptr);
@@ -949,12 +992,10 @@ NodePointer Demangler::popTuple() {
if (!Ty)
return nullptr;
TupleElmt->addChild(Ty, *this);
Nodes.push_back(TupleElmt);
Root->addChild(TupleElmt, *this);
} while (!firstElem);
while (NodePointer TupleElmt = pop_back_val(Nodes)) {
Root->addChild(TupleElmt, *this);
}
Root->reverseChildren();
}
return createType(Root);
}
@@ -963,18 +1004,16 @@ NodePointer Demangler::popTypeList() {
NodePointer Root = createNode(Node::Kind::TypeList);
if (!popNode(Node::Kind::EmptyList)) {
std::vector<NodePointer> Nodes;
bool firstElem = false;
do {
firstElem = (popNode(Node::Kind::FirstElementMarker) != nullptr);
NodePointer Ty = popNode(Node::Kind::Type);
if (!Ty)
return nullptr;
Nodes.push_back(Ty);
} while (!firstElem);
while (NodePointer Ty = pop_back_val(Nodes)) {
Root->addChild(Ty, *this);
}
} while (!firstElem);
Root->reverseChildren();
}
return Root;
}
@@ -987,17 +1026,15 @@ NodePointer Demangler::popProtocol() {
}
NodePointer Demangler::demangleBoundGenericType() {
std::vector<NodePointer> TypeListList;
std::vector<NodePointer> Types;
Vector<NodePointer> TypeListList(*this, 4);
for (;;) {
NodePointer TList = createNode(Node::Kind::TypeList);
TypeListList.push_back(TList);
TypeListList.push_back(TList, *this);
while (NodePointer Ty = popNode(Node::Kind::Type)) {
Types.push_back(Ty);
}
while (NodePointer Ty = pop_back_val(Types)) {
TList->addChild(Ty, *this);
}
TList->reverseChildren();
if (popNode(Node::Kind::EmptyList))
break;
if (!popNode(Node::Kind::FirstElementMarker))
@@ -1008,7 +1045,7 @@ NodePointer Demangler::demangleBoundGenericType() {
}
NodePointer Demangler::demangleBoundGenericArgs(NodePointer Nominal,
const std::vector<NodePointer> &TypeLists,
const Vector<NodePointer> &TypeLists,
size_t TypeListIdx) {
if (!Nominal || Nominal->getNumChildren() < 2)
return nullptr;
@@ -1057,7 +1094,7 @@ NodePointer Demangler::demangleBoundGenericArgs(NodePointer Nominal,
}
NodePointer Demangler::demangleImplParamConvention() {
StringRef attr;
const char *attr = nullptr;
switch (nextChar()) {
case 'i': attr = "@in"; break;
case 'l': attr = "@inout"; break;
@@ -1076,7 +1113,7 @@ NodePointer Demangler::demangleImplParamConvention() {
}
NodePointer Demangler::demangleImplResultConvention(Node::Kind ConvKind) {
StringRef attr;
const char *attr = nullptr;
switch (nextChar()) {
case 'r': attr = "@out"; break;
case 'o': attr = "@owned"; break;
@@ -1192,19 +1229,6 @@ NodePointer Demangler::demangleMetatype() {
}
}
static std::string getArchetypeName(Node::IndexType index,
Node::IndexType depth) {
DemanglerPrinter name;
do {
name << (char)('A' + (index % 26));
index /= 26;
} while (index);
if (depth != 0)
name << depth;
return std::move(name).str();
}
NodePointer Demangler::demangleArchetype() {
switch (nextChar()) {
case 'a': {
@@ -1257,19 +1281,19 @@ NodePointer Demangler::demangleAssociatedTypeSimple(
NodePointer Demangler::demangleAssociatedTypeCompound(
NodePointer GenericParamIdx) {
std::vector<NodePointer> AssocTyNames;
Vector<NodePointer> AssocTyNames(*this, 4);
bool firstElem = false;
do {
firstElem = (popNode(Node::Kind::FirstElementMarker) != nullptr);
NodePointer AssocTyName = popAssocTypeName();
if (!AssocTyName)
return nullptr;
AssocTyNames.push_back(AssocTyName);
AssocTyNames.push_back(AssocTyName, *this);
} while (!firstElem);
NodePointer Base = GenericParamIdx;
while (NodePointer AssocTy = pop_back_val(AssocTyNames)) {
while (NodePointer AssocTy = AssocTyNames.pop_back_val()) {
NodePointer depTy = createNode(Node::Kind::DependentMemberType);
depTy = addChild(depTy, createType(Base));
Base = addChild(depTy, AssocTy);
@@ -1291,11 +1315,17 @@ NodePointer Demangler::popAssocTypeName() {
NodePointer Demangler::getDependentGenericParamType(int depth, int index) {
if (depth < 0 || index < 0)
return nullptr;
DemanglerPrinter PrintName;
PrintName << getArchetypeName(index, depth);
auto paramTy = createNode(Node::Kind::DependentGenericParamType,
std::move(PrintName).str());
CharVector name;
int idxChar = index;
do {
name.push_back((char)('A' + (idxChar % 26)), *this);
idxChar /= 26;
} while (idxChar);
if (depth != 0)
name.append(depth, *this);
auto paramTy = createNode(Node::Kind::DependentGenericParamType, name);
paramTy->addChild(createNode(Node::Kind::Index, depth), *this);
paramTy->addChild(createNode(Node::Kind::Index, index), *this);
return paramTy;
@@ -1428,10 +1458,10 @@ NodePointer Demangler::demangleFunctionSpecialization() {
case FunctionSigSpecializationParamKind::ConstantPropGlobal:
case FunctionSigSpecializationParamKind::ConstantPropString:
case FunctionSigSpecializationParamKind::ClosureProp: {
std::vector<NodePointer> Types;
size_t FixedChildren = Param->getNumChildren();
while (NodePointer Ty = popNode(Node::Kind::Type)) {
assert(ParamKind == FunctionSigSpecializationParamKind::ClosureProp);
Types.push_back(Ty);
Param = addChild(Param, Ty);
}
NodePointer Name = popNode(Node::Kind::Identifier);
if (!Name)
@@ -1445,9 +1475,7 @@ NodePointer Demangler::demangleFunctionSpecialization() {
}
addChild(Param, createNode(
Node::Kind::FunctionSignatureSpecializationParamPayload, Text));
while (NodePointer Ty = pop_back_val(Types)) {
Param = addChild(Param, Ty);
}
Param->reverseChildren(FixedChildren);
break;
}
default:
@@ -1555,9 +1583,9 @@ NodePointer Demangler::addFuncSpecParamNumber(NodePointer Param,
Param->addChild(createNode(
Node::Kind::FunctionSignatureSpecializationParamKind, unsigned(Kind)),
*this);
std::string Str;
CharVector Str;
while (isDigit(peekChar())) {
Str += nextChar();
Str.push_back(nextChar(), *this);
}
if (Str.empty())
return nullptr;
@@ -1857,28 +1885,21 @@ NodePointer Demangler::demangleProtocolListType() {
NodePointer TypeList = createNode(Node::Kind::TypeList);
NodePointer ProtoList = createWithChild(Node::Kind::ProtocolList, TypeList);
if (!popNode(Node::Kind::EmptyList)) {
std::vector<NodePointer> ProtoNames;
bool firstElem = false;
do {
firstElem = (popNode(Node::Kind::FirstElementMarker) != nullptr);
NodePointer Proto = popProtocol();
if (!Proto)
return nullptr;
ProtoNames.push_back(Proto);
TypeList->addChild(Proto, *this);
} while (!firstElem);
while (NodePointer Proto = pop_back_val(ProtoNames)) {
TypeList->addChild(Proto, *this);
}
TypeList->reverseChildren();
}
return createType(ProtoList);
}
NodePointer Demangler::demangleGenericSignature(bool hasParamCounts) {
std::vector<NodePointer> Requirements;
while (NodePointer Req = popNode(isRequirement)) {
Requirements.push_back(Req);
}
NodePointer Sig = createNode(Node::Kind::DependentGenericSignature);
if (hasParamCounts) {
while (!nextIf('l')) {
@@ -1896,9 +1917,11 @@ NodePointer Demangler::demangleGenericSignature(bool hasParamCounts) {
}
if (Sig->getNumChildren() == 0)
return nullptr;
while (NodePointer Req = pop_back_val(Requirements)) {
size_t NumCounts = Sig->getNumChildren();
while (NodePointer Req = popNode(isRequirement)) {
Sig->addChild(Req, *this);
}
Sig->reverseChildren(NumCounts);
return Sig;
}
@@ -1961,7 +1984,7 @@ NodePointer Demangler::demangleGenericRequirement() {
auto c = nextChar();
NodePointer size = nullptr;
NodePointer alignment = nullptr;
StringRef name;
const char *name = nullptr;
if (c == 'U') {
name = "U";
} else if (c == 'R') {