Files
swift-mirror/lib/Syntax/RawSyntax.cpp
Alex Hoppen c1d65de89c [libSyntax] Optimise layout of RawSyntax to be more space efficient
This decreases the size of RawSyntax nodes from 88 to 64 bytes by
- Avoiding some padding by moving RefCount further up
- Limiting the length of tokens and their trivia to 32 bits. We would
  hit this limit with files >4GB but we also hit this limit at other
  places like the TextLength property in the Common bits.
2021-02-10 09:50:12 +01:00

333 lines
10 KiB
C++

//===--- RawSyntax.cpp - Swift Raw Syntax Implementation ------------------===//
//
// This source file is part of the Swift.org open source project
//
// Copyright (c) 2014 - 2017 Apple Inc. and the Swift project authors
// Licensed under Apache License v2.0 with Runtime Library Exception
//
// See https://swift.org/LICENSE.txt for license information
// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
//
//===----------------------------------------------------------------------===//
#include "swift/Syntax/RawSyntax.h"
#include "swift/Basic/ColorUtils.h"
#include "swift/Parse/Lexer.h"
#include "swift/Syntax/SyntaxArena.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/raw_ostream.h"
#include <algorithm>
using llvm::dyn_cast;
using namespace swift;
using namespace swift::syntax;
namespace {
static bool isTrivialSyntaxKind(SyntaxKind Kind) {
if (isUnknownKind(Kind))
return true;
if (isCollectionKind(Kind))
return true;
switch(Kind) {
case SyntaxKind::SourceFile:
case SyntaxKind::CodeBlockItem:
case SyntaxKind::ExpressionStmt:
case SyntaxKind::DeclarationStmt:
return true;
default:
return false;
}
}
static void printSyntaxKind(SyntaxKind Kind, llvm::raw_ostream &OS,
SyntaxPrintOptions Opts, bool Open) {
std::unique_ptr<swift::OSColor> Color;
if (Opts.Visual) {
Color.reset(new swift::OSColor(OS, llvm::raw_ostream::GREEN));
}
OS << "<";
if (!Open)
OS << "/";
dumpSyntaxKind(OS, Kind);
OS << ">";
}
} // end of anonymous namespace
void swift::dumpTokenKind(llvm::raw_ostream &OS, tok Kind) {
switch (Kind) {
#define TOKEN(X) \
case tok::X: \
OS << #X; \
break;
#include "swift/Syntax/TokenKinds.def"
case tok::NUM_TOKENS:
OS << "NUM_TOKENS (unset)";
break;
}
}
/// Lex the given trivia string into its pieces
Trivia lexTrivia(StringRef TriviaStr) {
// FIXME: The trivia lexer should directly create TriviaPieces so we don't
// need the conversion from ParsedTriviaPiece to TriviaPiece
// Lex the trivia into ParsedTriviaPiece
auto TriviaPieces = TriviaLexer::lexTrivia(TriviaStr).Pieces;
/// Convert the ParsedTriviaPiece to TriviaPiece
Trivia SyntaxTrivia;
size_t Offset = 0;
for (auto Piece : TriviaPieces) {
StringRef Text = TriviaStr.substr(Offset, Piece.getLength());
SyntaxTrivia.push_back(TriviaPiece::fromText(Piece.getKind(), Text));
Offset += Piece.getLength();
}
return SyntaxTrivia;
}
/// If the \p Str is not allocated in \p Arena, copy it to \p Arena and adjust
/// \p Str to point to the string's copy in \p Arena.
void copyToArenaIfNecessary(StringRef &Str, const RC<SyntaxArena> Arena) {
if (Str.empty()) {
// Empty strings can live wherever they want. Nothing to do.
return;
}
if (Arena->containsPointer(Str.data())) {
// String already in arena. Nothing to do.
return;
}
// Copy string to arena
char *Data = (char *)Arena->Allocate(Str.size(), alignof(char *));
std::uninitialized_copy(Str.begin(), Str.end(), Data);
Str = StringRef(Data, Str.size());
}
// FIXME: If we want thread-safety for tree creation, this needs to be atomic.
unsigned RawSyntax::NextFreeNodeId = 1;
RawSyntax::RawSyntax(SyntaxKind Kind, ArrayRef<RC<RawSyntax>> Layout,
size_t TextLength, SourcePresence Presence,
const RC<SyntaxArena> &Arena,
llvm::Optional<unsigned> NodeId)
: RefCount(0), Arena(Arena),
Bits({{unsigned(TextLength), unsigned(Presence), false}}) {
assert(Arena && "RawSyntax nodes must always be allocated in an arena");
assert(Kind != SyntaxKind::Token &&
"'token' syntax node must be constructed with dedicated constructor");
size_t TotalSubNodeCount = 0;
for (auto Child : Layout) {
if (Child) {
TotalSubNodeCount += Child->getTotalSubNodeCount() + 1;
}
}
if (NodeId.hasValue()) {
this->NodeId = NodeId.getValue();
NextFreeNodeId = std::max(this->NodeId + 1, NextFreeNodeId);
} else {
this->NodeId = NextFreeNodeId++;
}
Bits.Layout.NumChildren = Layout.size();
Bits.Layout.TotalSubNodeCount = TotalSubNodeCount;
Bits.Layout.Kind = unsigned(Kind);
// Initialize layout data.
std::uninitialized_copy(Layout.begin(), Layout.end(),
getTrailingObjects<RC<RawSyntax>>());
}
RawSyntax::RawSyntax(tok TokKind, StringRef Text, size_t TextLength,
StringRef LeadingTrivia, StringRef TrailingTrivia,
SourcePresence Presence, const RC<SyntaxArena> &Arena,
llvm::Optional<unsigned> NodeId)
: RefCount(0), Arena(Arena),
Bits({{unsigned(TextLength), unsigned(Presence), true}}) {
assert(Arena && "RawSyntax nodes must always be allocated in an arena");
copyToArenaIfNecessary(LeadingTrivia, Arena);
copyToArenaIfNecessary(Text, Arena);
copyToArenaIfNecessary(TrailingTrivia, Arena);
if (Presence == SourcePresence::Missing) {
assert(TextLength == 0);
} else {
assert(TextLength ==
LeadingTrivia.size() + Text.size() + TrailingTrivia.size());
}
if (NodeId.hasValue()) {
this->NodeId = NodeId.getValue();
NextFreeNodeId = std::max(this->NodeId + 1, NextFreeNodeId);
} else {
this->NodeId = NextFreeNodeId++;
}
Bits.Token.LeadingTrivia = LeadingTrivia.data();
Bits.Token.TokenText = Text.data();
Bits.Token.TrailingTrivia = TrailingTrivia.data();
Bits.Token.LeadingTriviaLength = LeadingTrivia.size();
Bits.Token.TokenLength = Text.size();
Bits.Token.TrailingTriviaLength = TrailingTrivia.size();
Bits.Token.TokenKind = unsigned(TokKind);
}
RawSyntax::~RawSyntax() {
if (!isToken()) {
for (auto &child : getLayout())
child.~RC<RawSyntax>();
}
}
RC<RawSyntax> RawSyntax::make(SyntaxKind Kind, ArrayRef<RC<RawSyntax>> Layout,
size_t TextLength, SourcePresence Presence,
const RC<SyntaxArena> &Arena,
llvm::Optional<unsigned> NodeId) {
assert(Arena && "RawSyntax nodes must always be allocated in an arena");
auto size = totalSizeToAlloc<RC<RawSyntax>>(Layout.size());
void *data = Arena->Allocate(size, alignof(RawSyntax));
return RC<RawSyntax>(
new (data) RawSyntax(Kind, Layout, TextLength, Presence, Arena, NodeId));
}
RC<RawSyntax> RawSyntax::make(tok TokKind, StringRef Text, size_t TextLength,
StringRef LeadingTrivia, StringRef TrailingTrivia,
SourcePresence Presence,
const RC<SyntaxArena> &Arena,
llvm::Optional<unsigned> NodeId) {
assert(Arena && "RawSyntax nodes must always be allocated in an arena");
auto size = totalSizeToAlloc<RC<RawSyntax>>(0);
void *data = Arena->Allocate(size, alignof(RawSyntax));
return RC<RawSyntax>(new (data)
RawSyntax(TokKind, Text, TextLength, LeadingTrivia,
TrailingTrivia, Presence, Arena, NodeId));
}
Trivia RawSyntax::getLeadingTriviaPieces() const {
return lexTrivia(getLeadingTrivia());
}
Trivia RawSyntax::getTrailingTriviaPieces() const {
return lexTrivia(getTrailingTrivia());
}
RC<RawSyntax> RawSyntax::append(RC<RawSyntax> NewLayoutElement) const {
auto Layout = getLayout();
std::vector<RC<RawSyntax>> NewLayout;
NewLayout.reserve(Layout.size() + 1);
std::copy(Layout.begin(), Layout.end(), std::back_inserter(NewLayout));
NewLayout.push_back(NewLayoutElement);
return RawSyntax::makeAndCalcLength(getKind(), NewLayout,
SourcePresence::Present);
}
RC<RawSyntax> RawSyntax::replacingChild(CursorIndex Index,
RC<RawSyntax> NewLayoutElement) const {
auto Layout = getLayout();
std::vector<RC<RawSyntax>> NewLayout;
NewLayout.reserve(Layout.size());
std::copy(Layout.begin(), Layout.begin() + Index,
std::back_inserter(NewLayout));
NewLayout.push_back(NewLayoutElement);
std::copy(Layout.begin() + Index + 1, Layout.end(),
std::back_inserter(NewLayout));
return RawSyntax::makeAndCalcLength(getKind(), NewLayout, getPresence());
}
void RawSyntax::print(llvm::raw_ostream &OS, SyntaxPrintOptions Opts) const {
if (isMissing())
return;
if (isToken()) {
OS << getLeadingTrivia();
OS << getTokenText();
OS << getTrailingTrivia();
} else {
auto Kind = getKind();
const bool PrintKind = Opts.PrintSyntaxKind && (Opts.PrintTrivialNodeKind ||
!isTrivialSyntaxKind(Kind));
if (PrintKind)
printSyntaxKind(Kind, OS, Opts, true);
for (const auto &LE : getLayout())
if (LE)
LE->print(OS, Opts);
if (PrintKind)
printSyntaxKind(Kind, OS, Opts, false);
}
}
void RawSyntax::dump() const {
dump(llvm::errs(), /*Indent*/ 0);
llvm::errs() << '\n';
}
void RawSyntax::dump(llvm::raw_ostream &OS, unsigned Indent) const {
auto indent = [&](unsigned Amount) {
for (decltype(Amount) i = 0; i < Amount; ++i) {
OS << ' ';
}
};
indent(Indent);
OS << '(';
dumpSyntaxKind(OS, getKind());
if (isMissing())
OS << " [missing] ";
if (isToken()) {
OS << " ";
dumpTokenKind(OS, getTokenKind());
for (auto &Leader : getLeadingTriviaPieces()) {
OS << "\n";
Leader.dump(OS, Indent + 1);
}
OS << "\n";
indent(Indent + 1);
OS << "(text=\"";
OS.write_escaped(getTokenText(), /*UseHexEscapes=*/true);
OS << "\")";
for (auto &Trailer : getTrailingTriviaPieces()) {
OS << "\n";
Trailer.dump(OS, Indent + 1);
}
} else {
for (auto &Child : getLayout()) {
if (!Child)
continue;
OS << "\n";
Child->dump(OS, Indent + 1);
}
}
OS << ')';
}
void RawSyntax::Profile(llvm::FoldingSetNodeID &ID, tok TokKind, StringRef Text,
StringRef LeadingTrivia, StringRef TrailingTrivia) {
ID.AddInteger(unsigned(TokKind));
ID.AddInteger(LeadingTrivia.size());
ID.AddInteger(TrailingTrivia.size());
switch (TokKind) {
#define TOKEN_DEFAULT(NAME) case tok::NAME:
#define PUNCTUATOR(NAME, X) TOKEN_DEFAULT(NAME)
#define KEYWORD(KW) TOKEN_DEFAULT(kw_##KW)
#define POUND_KEYWORD(KW) TOKEN_DEFAULT(pound_##KW)
#include "swift/Syntax/TokenKinds.def"
break;
default:
ID.AddString(Text);
break;
}
ID.AddString(LeadingTrivia);
ID.AddString(TrailingTrivia);
}