[Syntax] Unify RawSyntax and RawTokenSyntax using union and TrailingObjects

It better matches with SwiftSyntax model.

Using TrailingObjects reduces the number of heap allocation which
gains 18% performance improvement.
This commit is contained in:
Rintaro Ishizaki
2018-01-15 21:13:36 +09:00
parent 4c172037e8
commit 0780c529c4
28 changed files with 557 additions and 627 deletions

View File

@@ -29,12 +29,15 @@
#ifndef SWIFT_SYNTAX_RAWSYNTAX_H
#define SWIFT_SYNTAX_RAWSYNTAX_H
#include "swift/Basic/InlineBitfield.h"
#include "swift/Syntax/References.h"
#include "swift/Syntax/SyntaxKind.h"
#include "swift/Syntax/TokenKinds.h"
#include "swift/Syntax/Trivia.h"
#include "llvm/ADT/IntrusiveRefCntPtr.h"
#include "llvm/ADT/PointerUnion.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/TrailingObjects.h"
#include "llvm/Support/raw_ostream.h"
#include <vector>
@@ -53,7 +56,8 @@ using llvm::StringRef;
#define syntax_assert_child_token(Raw, CursorName, ...) \
({ \
bool __Found = false; \
auto __Token = cast<RawTokenSyntax>(Raw->getChild(Cursor::CursorName)); \
auto __Token = Raw->getChild(Cursor::CursorName); \
assert(__Token->isToken()); \
if (__Token->isPresent()) { \
for (auto Token : {__VA_ARGS__}) { \
if (__Token->getTokenKind() == Token) { \
@@ -61,8 +65,8 @@ using llvm::StringRef;
break; \
} \
} \
assert(__Found && "invalid token supplied for " \
#CursorName ", expected one of {" #__VA_ARGS__ "}"); \
assert(__Found && "invalid token supplied for " #CursorName \
", expected one of {" #__VA_ARGS__ "}"); \
} \
})
#else
@@ -72,18 +76,19 @@ using llvm::StringRef;
#ifndef NDEBUG
#define syntax_assert_child_token_text(Raw, CursorName, TokenKind, ...) \
({ \
bool __Found = false; \
auto __Child = cast<RawTokenSyntax>(Raw->getChild(Cursor::CursorName)); \
if (__Child->isPresent()) { \
bool __Found = false; \
auto __Child = Raw->getChild(Cursor::CursorName); \
assert(__Child->isToken()); \
if (__Child->isPresent()) { \
assert(__Child->getTokenKind() == TokenKind); \
for (auto __Text : {__VA_ARGS__}) { \
if (__Child->getText() == __Text) { \
if (__Child->getTokenText() == __Text) { \
__Found = true; \
break; \
} \
} \
assert(__Found && "invalid text supplied for " \
#CursorName ", expected one of {" #__VA_ARGS__ "}"); \
assert(__Found && "invalid text supplied for " #CursorName \
", expected one of {" #__VA_ARGS__ "}"); \
} \
})
#else
@@ -158,22 +163,6 @@ public:
}
}
/// Use some character as a reference for adding to the absolute position,
/// taking note of newlines, etc.
/// Take care that consecutive call of this function with '\r' and '\n'
/// causes increase of 2 Line but desirable result may be 1 Line.
void addCharacter(char C) {
switch (C) {
case '\n':
case '\r':
addNewlines(1, 1);
break;
default:
addColumns(1);
break;
}
}
/// Get the line number of this position.
uint32_t getLine() const { return Line; }
@@ -218,73 +207,201 @@ struct SyntaxPrintOptions {
/// RawSyntax - the strictly immutable, shared backing nodes for all syntax.
///
/// This is implementation detail - do not expose it in public API.
struct RawSyntax : public llvm::ThreadSafeRefCountedBase<RawSyntax> {
class RawSyntax final
: public llvm::ThreadSafeRefCountedBase<RawSyntax>,
private llvm::TrailingObjects<RawSyntax, RC<RawSyntax>, OwnedString,
TriviaPiece> {
friend TrailingObjects;
using LayoutList = std::vector<RC<RawSyntax>>;
union {
uint64_t Clear;
struct {
/// The kind of syntax this node represents.
unsigned Kind : bitmax(NumSyntaxKindBits, 8);
/// Whether this piece of syntax was actually present in the source.
unsigned Presence : 1;
};
enum { NumRawSyntaxBits = bitmax(NumSyntaxKindBits, 8) + 1 };
/// The kind of syntax this node represents.
const SyntaxKind Kind;
// For "layout" nodes.
struct {
uint64_t : bitmax(NumRawSyntaxBits, 32);
/// Number of children this "layout" node has.
unsigned NumChildren : 32;
};
/// The "layout" of the node - representing the children, or the terms
/// in the production of the grammar.
const LayoutList Layout;
// For "token" nodes.
struct {
uint64_t : bitmax(NumRawSyntaxBits, 16);
/// The kind of token this "token" node represents.
unsigned TokenKind : 16;
/// Number of leading trivia pieces.
unsigned NumLeadingTrivia : 16;
/// Number of trailing trivia pieces.
unsigned NumTrailingTrivia : 16;
};
} Bits;
/// Whether this piece of syntax was actually present in the source.
const SourcePresence Presence;
/// Create a piece of raw syntax.
RawSyntax(const SyntaxKind Kind, const std::vector<RC<RawSyntax>> Layout,
const SourcePresence Presence)
: Kind(Kind), Layout(Layout), Presence(Presence) {}
virtual ~RawSyntax() = default;
/// Returns a raw syntax node of the given Kind, specified Layout,
/// and source presence.
static RC<RawSyntax> make(const SyntaxKind Kind, const LayoutList Layout,
const SourcePresence Presence) {
return RC<RawSyntax>{new RawSyntax{Kind, Layout, Presence}};
size_t numTrailingObjects(OverloadToken<RC<RawSyntax>>) const {
return isToken() ? 0 : Bits.NumChildren;
}
size_t numTrailingObjects(OverloadToken<OwnedString>) const {
return isToken() ? 1 : 0;
}
size_t numTrailingObjects(OverloadToken<TriviaPiece>) const {
return isToken() ? Bits.NumLeadingTrivia + Bits.NumTrailingTrivia : 0;
}
/// Returns a raw syntax node of the given Kind, marked as missing.
static RC<RawSyntax> missing(const SyntaxKind Kind) {
RawSyntax(SyntaxKind Kind, ArrayRef<RC<RawSyntax>> Layout,
SourcePresence Presence);
RawSyntax(tok TokKind, OwnedString Text, SourcePresence Presence,
ArrayRef<TriviaPiece> LeadingTrivia,
ArrayRef<TriviaPiece> TrailingTrivia);
public:
~RawSyntax();
/// \name Factory methods.
/// @{
/// Make a raw "layout" syntax node.
static RC<RawSyntax> make(SyntaxKind Kind, ArrayRef<RC<RawSyntax>> Layout,
SourcePresence Presence);
/// Make a raw "token" syntax node.
static RC<RawSyntax> make(tok TokKind, OwnedString Text,
SourcePresence Presence,
ArrayRef<TriviaPiece> LeadingTrivia,
ArrayRef<TriviaPiece> TrailingTrivia);
/// Make a missing raw "layout" syntax node.
static RC<RawSyntax> missing(SyntaxKind Kind) {
return make(Kind, {}, SourcePresence::Missing);
}
/// Make a missing raw "token" syntax node.
static RC<RawSyntax> missing(tok TokKind, OwnedString Text) {
return make(TokKind, Text, SourcePresence::Missing,
ArrayRef<TriviaPiece>{}, ArrayRef<TriviaPiece>{});
}
/// @}
SourcePresence getPresence() const {
return static_cast<SourcePresence>(Bits.Presence);
}
SyntaxKind getKind() const { return static_cast<SyntaxKind>(Bits.Kind); }
/// Returns true if the node is "missing" in the source (i.e. it was
/// expected (or optional) but not written.
bool isMissing() const { return getPresence() == SourcePresence::Missing; }
/// Returns true if the node is "present" in the source.
bool isPresent() const { return getPresence() == SourcePresence::Present; }
/// Returns true if this raw syntax node is some kind of declaration.
bool isDecl() const { return isDeclKind(getKind()); }
/// Returns true if this raw syntax node is some kind of type syntax.
bool isType() const { return isTypeKind(getKind()); }
/// Returns true if this raw syntax node is some kind of statement.
bool isStmt() const { return isStmtKind(getKind()); }
/// Returns true if this raw syntax node is some kind of expression.
bool isExpr() const { return isExprKind(getKind()); }
/// Returns true if this raw syntax node is some kind of pattern.
bool isPattern() const { return isPatternKind(getKind()); }
/// Return true is this raw syntax node is a unknown node.
bool isUnknown() const { return isUnknownKind(getKind()); }
/// Return true if this raw syntax node is a token.
bool isToken() const { return isTokenKind(getKind()); }
/// \name Getter routines for SyntaxKind::Token.
/// @{
/// Get the kind of the token.
tok getTokenKind() const {
assert(isToken());
return static_cast<tok>(Bits.TokenKind);
}
/// Return the text of the token.
StringRef getTokenText() const {
assert(isToken());
return getTrailingObjects<OwnedString>()->str();
}
/// Return the leading trivia list of the token.
ArrayRef<TriviaPiece> getLeadingTrivia() const {
assert(isToken());
return {getTrailingObjects<TriviaPiece>(), Bits.NumLeadingTrivia};
}
/// Return the trailing trivia list of the token.
ArrayRef<TriviaPiece> getTrailingTrivia() const {
assert(isToken());
return {getTrailingObjects<TriviaPiece>() + Bits.NumLeadingTrivia,
Bits.NumTrailingTrivia};
}
/// Return \c true if this is the given kind of token.
bool isToken(tok K) const { return isToken() && getTokenKind() == K; }
/// @}
/// \name Transform routines for "token" nodes.
/// @{
/// Return a new token like this one, but with the given leading
/// trivia instead.
RC<RawSyntax>
withLeadingTrivia(ArrayRef<TriviaPiece> NewLeadingTrivia) const {
return make(getTokenKind(), getTokenText(), getPresence(),
NewLeadingTrivia, getTrailingTrivia());
}
RC<RawSyntax> withLeadingTrivia(Trivia NewLeadingTrivia) const {
return withLeadingTrivia(NewLeadingTrivia.Pieces);
}
/// Return a new token like this one, but with the given trailing
/// trivia instead.
RC<RawSyntax>
withTrailingTrivia(ArrayRef<TriviaPiece> NewTrailingTrivia) const {
return make(getTokenKind(), getTokenText(), getPresence(),
getLeadingTrivia(), NewTrailingTrivia);
}
RC<RawSyntax> withTrailingTrivia(Trivia NewTrailingTrivia) const {
return withTrailingTrivia(NewTrailingTrivia.Pieces);
}
/// @}
/// \name Getter routines for "layout" nodes.
/// @{
/// Get the child nodes.
ArrayRef<RC<RawSyntax>> getLayout() const {
if (isToken())
return {};
return {getTrailingObjects<RC<RawSyntax>>(), Bits.NumChildren};
}
/// Get a child based on a particular node's "Cursor", indicating
/// the position of the terms in the production of the Swift grammar.
template <typename CursorType> RC<RawSyntax> getChild(CursorType C) const {
return Layout[cursorIndex(C)];
const RC<RawSyntax> &getChild(CursorIndex Index) const {
return getLayout()[Index];
}
/// Returns true if the node is "missing" in the source (i.e. it was
/// expected (or optional) but not written.
bool isMissing() const { return Presence == SourcePresence::Missing; }
/// @}
/// Returns true if the node is "present" in the source.
bool isPresent() const {
return Presence == SourcePresence::Present;
}
/// Returns true if this raw syntax node is some kind of declaration.
bool isDecl() const { return isDeclKind(Kind); }
/// Returns true if this raw syntax node is some kind of type syntax.
bool isType() const { return isTypeKind(Kind); }
/// Returns true if this raw syntax node is some kind of statement.
bool isStmt() const { return isStmtKind(Kind); }
/// Returns true if this raw syntax node is some kind of expression.
bool isExpr() const { return isExprKind(Kind); }
/// Returns true if this raw syntax node is some kind of pattern.
bool isPattern() const { return isPatternKind(Kind); }
/// Return true if this raw syntax node is a token.
bool isToken() const { return isTokenKind(Kind); }
bool isUnknown() const { return isUnknownKind(Kind); }
/// \name Transform routines for "layout" nodes.
/// @{
/// Return a new raw syntax node with the given new layout element appended
/// to the end of the node's layout.
@@ -292,21 +409,18 @@ struct RawSyntax : public llvm::ThreadSafeRefCountedBase<RawSyntax> {
/// Return a new raw syntax node with the given new layout element replacing
/// another at some cursor position.
template <typename CursorType>
RC<RawSyntax>
replaceChild(CursorType C, RC<RawSyntax> NewLayoutElement) const {
LayoutList NewLayout;
replaceChild(CursorIndex Index, RC<RawSyntax> NewLayoutElement) const;
std::copy(Layout.begin(), Layout.begin() + cursorIndex(C),
std::back_inserter(NewLayout));
/// @}
NewLayout.push_back(NewLayoutElement);
std::copy(Layout.begin() + cursorIndex(C) + 1, Layout.end(),
std::back_inserter(NewLayout));
return RawSyntax::make(Kind, NewLayout, Presence);
}
/// Advance the provided AbsolutePosition by the full width of this node.
///
/// If this is token node, returns the AbsolutePosition of the start of the
/// token's nontrivial text. Otherwise, return the position of the first
/// token. If this contains no tokens, return None.
llvm::Optional<AbsolutePosition>
accumulateAbsolutePosition(AbsolutePosition &Pos) const;
/// Print this piece of syntax recursively.
void print(llvm::raw_ostream &OS, SyntaxPrintOptions Opts) const;
@@ -315,7 +429,7 @@ struct RawSyntax : public llvm::ThreadSafeRefCountedBase<RawSyntax> {
void dump() const;
/// Dump this piece of syntax recursively.
void dump(llvm::raw_ostream &OS, unsigned Indent) const;
void dump(llvm::raw_ostream &OS, unsigned Indent = 0) const;
};
} // end namespace syntax