Files
swift-mirror/lib/Syntax/RawSyntax.cpp
Alex Hoppen e43bad2c71 [libSyntax] Store the token's text in the SyntaxArena
Do the same thing that we are already doing for trivia: Since RawSyntax
nodes always live inside a SyntaxArena, we don't need to tail-allocate
an OwnedString to store the token's text. Instead we can just copy it
to the SyntaxArena. If we copy the entire source buffer to the syntax
arena at the start of parsing, this means that no more copies are
required later on. Plus we also avoid ref-counting the OwnedString which
should also increase performance.
2021-02-10 09:50:12 +01:00

323 lines
10 KiB
C++

//===--- RawSyntax.cpp - Swift Raw Syntax Implementation ------------------===//
//
// This source file is part of the Swift.org open source project
//
// Copyright (c) 2014 - 2017 Apple Inc. and the Swift project authors
// Licensed under Apache License v2.0 with Runtime Library Exception
//
// See https://swift.org/LICENSE.txt for license information
// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
//
//===----------------------------------------------------------------------===//
#include "swift/Syntax/RawSyntax.h"
#include "swift/Basic/ColorUtils.h"
#include "swift/Parse/Lexer.h"
#include "swift/Syntax/SyntaxArena.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/raw_ostream.h"
#include <algorithm>
using llvm::dyn_cast;
using namespace swift;
using namespace swift::syntax;
namespace {
static bool isTrivialSyntaxKind(SyntaxKind Kind) {
if (isUnknownKind(Kind))
return true;
if (isCollectionKind(Kind))
return true;
switch(Kind) {
case SyntaxKind::SourceFile:
case SyntaxKind::CodeBlockItem:
case SyntaxKind::ExpressionStmt:
case SyntaxKind::DeclarationStmt:
return true;
default:
return false;
}
}
static void printSyntaxKind(SyntaxKind Kind, llvm::raw_ostream &OS,
SyntaxPrintOptions Opts, bool Open) {
std::unique_ptr<swift::OSColor> Color;
if (Opts.Visual) {
Color.reset(new swift::OSColor(OS, llvm::raw_ostream::GREEN));
}
OS << "<";
if (!Open)
OS << "/";
dumpSyntaxKind(OS, Kind);
OS << ">";
}
} // end of anonymous namespace
void swift::dumpTokenKind(llvm::raw_ostream &OS, tok Kind) {
switch (Kind) {
#define TOKEN(X) \
case tok::X: \
OS << #X; \
break;
#include "swift/Syntax/TokenKinds.def"
case tok::NUM_TOKENS:
OS << "NUM_TOKENS (unset)";
break;
}
}
/// Lex the given trivia string into its pieces
Trivia lexTrivia(StringRef TriviaStr) {
// FIXME: The trivia lexer should directly create TriviaPieces so we don't
// need the conversion from ParsedTriviaPiece to TriviaPiece
// Lex the trivia into ParsedTriviaPiece
auto TriviaPieces = TriviaLexer::lexTrivia(TriviaStr).Pieces;
/// Convert the ParsedTriviaPiece to TriviaPiece
Trivia SyntaxTrivia;
size_t Offset = 0;
for (auto Piece : TriviaPieces) {
StringRef Text = TriviaStr.substr(Offset, Piece.getLength());
SyntaxTrivia.push_back(TriviaPiece::fromText(Piece.getKind(), Text));
Offset += Piece.getLength();
}
return SyntaxTrivia;
}
/// If the \p Str is not allocated in \p Arena, copy it to \p Arena and adjust
/// \p Str to point to the string's copy in \p Arena.
void copyToArenaIfNecessary(StringRef &Str, const RC<SyntaxArena> Arena) {
if (Str.empty()) {
// Empty strings can live wherever they want. Nothing to do.
return;
}
if (Arena->containsPointer(Str.data())) {
// String already in arena. Nothing to do.
return;
}
// Copy string to arena
char *Data = (char *)Arena->Allocate(Str.size(), alignof(char *));
std::uninitialized_copy(Str.begin(), Str.end(), Data);
Str = StringRef(Data, Str.size());
}
// FIXME: If we want thread-safety for tree creation, this needs to be atomic.
unsigned RawSyntax::NextFreeNodeId = 1;
RawSyntax::RawSyntax(SyntaxKind Kind, ArrayRef<RC<RawSyntax>> Layout,
size_t TextLength, SourcePresence Presence,
const RC<SyntaxArena> &Arena,
llvm::Optional<unsigned> NodeId)
: Arena(Arena), Bits({{unsigned(TextLength), unsigned(Presence), false}}),
RefCount(0) {
assert(Arena && "RawSyntax nodes must always be allocated in an arena");
assert(Kind != SyntaxKind::Token &&
"'token' syntax node must be constructed with dedicated constructor");
size_t TotalSubNodeCount = 0;
for (auto Child : Layout) {
if (Child) {
TotalSubNodeCount += Child->getTotalSubNodeCount() + 1;
}
}
if (NodeId.hasValue()) {
this->NodeId = NodeId.getValue();
NextFreeNodeId = std::max(this->NodeId + 1, NextFreeNodeId);
} else {
this->NodeId = NextFreeNodeId++;
}
Bits.Layout.NumChildren = Layout.size();
Bits.Layout.TotalSubNodeCount = TotalSubNodeCount;
Bits.Layout.Kind = unsigned(Kind);
// Initialize layout data.
std::uninitialized_copy(Layout.begin(), Layout.end(),
getTrailingObjects<RC<RawSyntax>>());
}
RawSyntax::RawSyntax(tok TokKind, StringRef Text, size_t TextLength,
StringRef LeadingTrivia, StringRef TrailingTrivia,
SourcePresence Presence, const RC<SyntaxArena> &Arena,
llvm::Optional<unsigned> NodeId)
: Arena(Arena), Bits({{unsigned(TextLength), unsigned(Presence), true}}),
RefCount(0) {
assert(Arena && "RawSyntax nodes must always be allocated in an arena");
copyToArenaIfNecessary(LeadingTrivia, Arena);
copyToArenaIfNecessary(Text, Arena);
copyToArenaIfNecessary(TrailingTrivia, Arena);
if (NodeId.hasValue()) {
this->NodeId = NodeId.getValue();
NextFreeNodeId = std::max(this->NodeId + 1, NextFreeNodeId);
} else {
this->NodeId = NextFreeNodeId++;
}
Bits.Token.TokenKind = unsigned(TokKind);
Bits.Token.LeadingTrivia = LeadingTrivia;
Bits.Token.TokenText = Text;
Bits.Token.TrailingTrivia = TrailingTrivia;
}
RawSyntax::~RawSyntax() {
if (!isToken()) {
for (auto &child : getLayout())
child.~RC<RawSyntax>();
}
}
RC<RawSyntax> RawSyntax::make(SyntaxKind Kind, ArrayRef<RC<RawSyntax>> Layout,
size_t TextLength, SourcePresence Presence,
const RC<SyntaxArena> &Arena,
llvm::Optional<unsigned> NodeId) {
assert(Arena && "RawSyntax nodes must always be allocated in an arena");
auto size = totalSizeToAlloc<RC<RawSyntax>>(Layout.size());
void *data = Arena->Allocate(size, alignof(RawSyntax));
return RC<RawSyntax>(
new (data) RawSyntax(Kind, Layout, TextLength, Presence, Arena, NodeId));
}
RC<RawSyntax> RawSyntax::make(tok TokKind, StringRef Text, size_t TextLength,
StringRef LeadingTrivia, StringRef TrailingTrivia,
SourcePresence Presence,
const RC<SyntaxArena> &Arena,
llvm::Optional<unsigned> NodeId) {
assert(Arena && "RawSyntax nodes must always be allocated in an arena");
auto size = totalSizeToAlloc<RC<RawSyntax>>(0);
void *data = Arena->Allocate(size, alignof(RawSyntax));
return RC<RawSyntax>(new (data)
RawSyntax(TokKind, Text, TextLength, LeadingTrivia,
TrailingTrivia, Presence, Arena, NodeId));
}
Trivia RawSyntax::getLeadingTriviaPieces() const {
return lexTrivia(getLeadingTrivia());
}
Trivia RawSyntax::getTrailingTriviaPieces() const {
return lexTrivia(getTrailingTrivia());
}
RC<RawSyntax> RawSyntax::append(RC<RawSyntax> NewLayoutElement) const {
auto Layout = getLayout();
std::vector<RC<RawSyntax>> NewLayout;
NewLayout.reserve(Layout.size() + 1);
std::copy(Layout.begin(), Layout.end(), std::back_inserter(NewLayout));
NewLayout.push_back(NewLayoutElement);
return RawSyntax::makeAndCalcLength(getKind(), NewLayout,
SourcePresence::Present);
}
RC<RawSyntax> RawSyntax::replacingChild(CursorIndex Index,
RC<RawSyntax> NewLayoutElement) const {
auto Layout = getLayout();
std::vector<RC<RawSyntax>> NewLayout;
NewLayout.reserve(Layout.size());
std::copy(Layout.begin(), Layout.begin() + Index,
std::back_inserter(NewLayout));
NewLayout.push_back(NewLayoutElement);
std::copy(Layout.begin() + Index + 1, Layout.end(),
std::back_inserter(NewLayout));
return RawSyntax::makeAndCalcLength(getKind(), NewLayout, getPresence());
}
void RawSyntax::print(llvm::raw_ostream &OS, SyntaxPrintOptions Opts) const {
if (isMissing())
return;
if (isToken()) {
OS << getLeadingTrivia();
OS << getTokenText();
OS << getTrailingTrivia();
} else {
auto Kind = getKind();
const bool PrintKind = Opts.PrintSyntaxKind && (Opts.PrintTrivialNodeKind ||
!isTrivialSyntaxKind(Kind));
if (PrintKind)
printSyntaxKind(Kind, OS, Opts, true);
for (const auto &LE : getLayout())
if (LE)
LE->print(OS, Opts);
if (PrintKind)
printSyntaxKind(Kind, OS, Opts, false);
}
}
void RawSyntax::dump() const {
dump(llvm::errs(), /*Indent*/ 0);
llvm::errs() << '\n';
}
void RawSyntax::dump(llvm::raw_ostream &OS, unsigned Indent) const {
auto indent = [&](unsigned Amount) {
for (decltype(Amount) i = 0; i < Amount; ++i) {
OS << ' ';
}
};
indent(Indent);
OS << '(';
dumpSyntaxKind(OS, getKind());
if (isMissing())
OS << " [missing] ";
if (isToken()) {
OS << " ";
dumpTokenKind(OS, getTokenKind());
for (auto &Leader : getLeadingTriviaPieces()) {
OS << "\n";
Leader.dump(OS, Indent + 1);
}
OS << "\n";
indent(Indent + 1);
OS << "(text=\"";
OS.write_escaped(getTokenText(), /*UseHexEscapes=*/true);
OS << "\")";
for (auto &Trailer : getTrailingTriviaPieces()) {
OS << "\n";
Trailer.dump(OS, Indent + 1);
}
} else {
for (auto &Child : getLayout()) {
if (!Child)
continue;
OS << "\n";
Child->dump(OS, Indent + 1);
}
}
OS << ')';
}
void RawSyntax::Profile(llvm::FoldingSetNodeID &ID, tok TokKind, StringRef Text,
StringRef LeadingTrivia, StringRef TrailingTrivia) {
ID.AddInteger(unsigned(TokKind));
ID.AddInteger(LeadingTrivia.size());
ID.AddInteger(TrailingTrivia.size());
switch (TokKind) {
#define TOKEN_DEFAULT(NAME) case tok::NAME:
#define PUNCTUATOR(NAME, X) TOKEN_DEFAULT(NAME)
#define KEYWORD(KW) TOKEN_DEFAULT(kw_##KW)
#define POUND_KEYWORD(KW) TOKEN_DEFAULT(pound_##KW)
#include "swift/Syntax/TokenKinds.def"
break;
default:
ID.AddString(Text);
break;
}
ID.AddString(LeadingTrivia);
ID.AddString(TrailingTrivia);
}