[libSyntax] Add a reference counted version of OwnedString

We cannot use unowned strings for token texts of incrementally parsed
syntax trees since the source buffer to which reused nodes refer will
have been freed for reused nodes. Always copying the token text whenever
OwnedString is passed is too expensive. A reference counted copy of the
string allows us to keep the token's string alive across incremental
parses while eliminating unnecessary copies.
This commit is contained in:
Alex Hoppen
2018-08-13 11:52:16 -07:00
parent a03749a743
commit ac512d4341
10 changed files with 132 additions and 273 deletions

View File

@@ -22,117 +22,89 @@
#include "llvm/ADT/IntrusiveRefCntPtr.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Support/TrailingObjects.h"
using llvm::StringRef;
namespace swift {
enum class StringOwnership {
/// An OwnedString holds a weak reference to the underlying string storage
/// and will never attempt to free it.
Unowned,
/// An OwnedString has its own copy of the underlying string storage and
/// will free the storage upon its destruction.
Copied,
};
/// Holds a string - either statically allocated or dynamically allocated
/// and owned by this type.
class OwnedString {
const char *Data;
size_t Length;
StringOwnership Ownership = StringOwnership::Unowned;
void release() {
if (Ownership == StringOwnership::Copied)
free(const_cast<char *>(Data));
/// An owner that keeps the buffer of a ref counted \c OwnedString alive.
class TextOwner final : public llvm::ThreadSafeRefCountedBase<TextOwner>,
public llvm::TrailingObjects<TextOwner, char> {
TextOwner(StringRef Text) {
std::uninitialized_copy(Text.begin(), Text.end(),
getTrailingObjects<char>());
}
void initialize(const char* Data, size_t Length, StringOwnership Ownership) {
this->Length = Length;
this->Ownership = Ownership;
if (Ownership == StringOwnership::Copied && Data) {
char *substring = static_cast<char *>(malloc(Length + 1));
assert(substring && "expected successful malloc of copy");
memcpy(substring, Data, Length);
substring[Length] = '\0';
this->Data = substring;
}
else
this->Data = Data;
}
OwnedString(const char* Data, size_t Length, StringOwnership Ownership) {
initialize(Data, Length, Ownership);
}
public:
OwnedString(): OwnedString(nullptr, 0, StringOwnership::Unowned) {}
OwnedString(const char *Data, size_t Length):
OwnedString(Data, Length, StringOwnership::Copied) {}
OwnedString(StringRef Str) : OwnedString(Str.data(), Str.size()) {}
OwnedString(const char *Data) : OwnedString(StringRef(Data)) {}
OwnedString(const OwnedString &Other):
OwnedString(Other.Data, Other.Length, Other.Ownership) {}
OwnedString(OwnedString &&Other): Data(Other.Data), Length(Other.Length),
Ownership(Other.Ownership) {
Other.Data = nullptr;
Other.Ownership = StringOwnership::Unowned;
static TextOwner *make(StringRef Text) {
auto size = totalSizeToAlloc<char>(Text.size());
void *data = ::operator new(size);
return new (data) TextOwner(Text);
}
OwnedString& operator=(const OwnedString &Other) {
if (&Other != this) {
release();
initialize(Other.Data, Other.Length, Other.Ownership);
}
return *this;
const char *getText() const { return getTrailingObjects<char>(); }
};
/// The text this owned string represents
StringRef Text;
/// In case of a ref counted string an owner that keeps the buffer \c Text
/// references alive.
llvm::IntrusiveRefCntPtr<TextOwner> OwnedPtr;
OwnedString(StringRef Text, llvm::IntrusiveRefCntPtr<TextOwner> OwnedPtr)
: Text(Text), OwnedPtr(OwnedPtr) {}
public:
OwnedString() : OwnedString(/*Text=*/StringRef(), /*OwnedPtr=*/nullptr) {}
/// Create a ref counted \c OwnedString that is initialized with the text of
/// the given \c StringRef.
OwnedString(StringRef Str) : OwnedString(makeRefCounted(Str)) {}
/// Create a ref counted \c OwnedString that is initialized with the text of
/// the given buffer.
OwnedString(const char *Str) : OwnedString(StringRef(Str)) {}
/// Create an \c OwnedString that references the given string. The
/// \c OwnedString will not take ownership of that buffer and will assume that
/// the buffer outlives its lifetime.
static OwnedString makeUnowned(StringRef Str) {
return OwnedString(Str, /*OwnedPtr=*/nullptr);
}
OwnedString& operator=(OwnedString &&Other) {
if (&Other != this) {
release();
this->Data = Other.Data;
this->Length = Other.Length;
this->Ownership = Other.Ownership;
Other.Ownership = StringOwnership::Unowned;
Other.Data = nullptr;
/// Create an \c OwnedString that keeps its contents in a reference counted
/// buffer. The contents of \p Str will be copied initially and are allowed to
/// be disposed after the \c OwnedString has been created.
static OwnedString makeRefCounted(StringRef Str) {
if (Str.empty()) {
// Copying an empty string doesn't make sense. Just create an unowned
// string that points to the empty string.
return makeUnowned(Str);
} else {
llvm::IntrusiveRefCntPtr<TextOwner> OwnedPtr(TextOwner::make(Str));
return OwnedString(StringRef(OwnedPtr->getText(), Str.size()),
std::move(OwnedPtr));
}
return *this;
}
OwnedString copy() const {
return OwnedString(Data, Length, StringOwnership::Copied);
}
/// Returns the length of the string in bytes.
size_t size() const {
return Length;
}
size_t size() const { return Text.size(); }
/// Returns true if the length is 0.
bool empty() const {
return Length == 0;
}
bool empty() const { return size() == 0; }
/// Returns a StringRef to the underlying data. No copy is made and no
/// ownership changes take place.
StringRef str() const {
return StringRef { Data, Length };
}
StringRef str() const { return Text; }
bool operator==(const OwnedString &Right) const {
return str() == Right.str();
}
~OwnedString() {
release();
}
};
} // end namespace swift

View File

@@ -404,12 +404,17 @@ public:
return static_cast<tok>(Bits.Token.TokenKind);
}
/// Return the text of the token.
StringRef getTokenText() const {
/// Return the text of the token as an \c OwnedString. Keeping a reference to
/// this string will keep it alive even if the syntax node gets freed.
OwnedString getOwnedTokenText() const {
assert(isToken());
return getTrailingObjects<OwnedString>()->str();
return *getTrailingObjects<OwnedString>();
}
/// Return the text of the token as a reference. The referenced buffer may
/// disappear when the syntax node gets freed.
StringRef getTokenText() const { return getOwnedTokenText().str(); }
/// Return the leading trivia list of the token.
ArrayRef<TriviaPiece> getLeadingTrivia() const {
assert(isToken());
@@ -434,7 +439,7 @@ public:
/// trivia instead.
RC<RawSyntax>
withLeadingTrivia(ArrayRef<TriviaPiece> NewLeadingTrivia) const {
return make(getTokenKind(), getTokenText(), NewLeadingTrivia,
return make(getTokenKind(), getOwnedTokenText(), NewLeadingTrivia,
getTrailingTrivia(), getPresence());
}
@@ -446,7 +451,7 @@ public:
/// trivia instead.
RC<RawSyntax>
withTrailingTrivia(ArrayRef<TriviaPiece> NewTrailingTrivia) const {
return make(getTokenKind(), getTokenText(), getLeadingTrivia(),
return make(getTokenKind(), getOwnedTokenText(), getLeadingTrivia(),
NewTrailingTrivia, getPresence());
}

View File

@@ -162,9 +162,9 @@ template <> struct MappingTraits<swift::RC<swift::RawSyntax>> {
StringRef nodeIdString;
in.mapRequired("id", nodeIdString);
unsigned nodeId = std::atoi(nodeIdString.data());
value =
swift::RawSyntax::make(tokenKind, text, leadingTrivia, trailingTrivia,
presence, /*Arena=*/nullptr, nodeId);
value = swift::RawSyntax::make(
tokenKind, swift::OwnedString::makeRefCounted(text), leadingTrivia,
trailingTrivia, presence, /*Arena=*/nullptr, nodeId);
} else {
swift::SyntaxKind kind;
in.mapRequired("kind", kind);

View File

@@ -454,7 +454,8 @@ struct MappingTraits<swift::syntax::TriviaPiece> {
% else:
StringRef text;
in.mapRequired("value", text);
return swift::syntax::TriviaPiece(kind, text);
return swift::syntax::TriviaPiece(
kind, swift::OwnedString::makeRefCounted(text));
% end
break;
}

View File

@@ -2182,7 +2182,8 @@ void Lexer::lexImpl() {
size_t BOMLen = ContentStart - BufferStart;
assert(BOMLen == 3 && "UTF-8 BOM is 3 bytes");
// Add UTF-8 BOM to LeadingTrivia.
LeadingTrivia.push_back(TriviaPiece::garbageText({CurPtr, BOMLen}));
auto Text = OwnedString::makeRefCounted(StringRef(CurPtr, BOMLen));
LeadingTrivia.push_back(TriviaPiece::garbageText(Text));
CurPtr += BOMLen;
}
NextToken.setAtStartOfLine(true);
@@ -2407,18 +2408,18 @@ Restart:
bool isDocComment = CurPtr[1] == '/';
skipSlashSlashComment(/*EatNewline=*/false);
size_t Length = CurPtr - TriviaStart;
Pieces.push_back(isDocComment
? TriviaPiece::docLineComment({TriviaStart, Length})
: TriviaPiece::lineComment({TriviaStart, Length}));
auto Text = OwnedString::makeRefCounted(StringRef(TriviaStart, Length));
Pieces.push_back(isDocComment ? TriviaPiece::docLineComment(Text)
: TriviaPiece::lineComment(Text));
goto Restart;
} else if (*CurPtr == '*') {
// '/* ... */' comment.
bool isDocComment = CurPtr[1] == '*';
skipSlashStarComment();
size_t Length = CurPtr - TriviaStart;
Pieces.push_back(isDocComment
? TriviaPiece::docBlockComment({TriviaStart, Length})
: TriviaPiece::blockComment({TriviaStart, Length}));
auto Text = OwnedString::makeRefCounted(StringRef(TriviaStart, Length));
Pieces.push_back(isDocComment ? TriviaPiece::docBlockComment(Text)
: TriviaPiece::blockComment(Text));
goto Restart;
}
break;
@@ -2430,7 +2431,8 @@ Restart:
diagnose(TriviaStart, diag::lex_hashbang_not_allowed);
skipHashbang(/*EatNewline=*/false);
size_t Length = CurPtr - TriviaStart;
Pieces.push_back(TriviaPiece::garbageText({TriviaStart, Length}));
auto Text = OwnedString::makeRefCounted(StringRef(TriviaStart, Length));
Pieces.push_back(TriviaPiece::garbageText(Text));
goto Restart;
}
break;
@@ -2439,7 +2441,8 @@ Restart:
if (tryLexConflictMarker(/*EatNewline=*/false)) {
// Conflict marker.
size_t Length = CurPtr - TriviaStart;
Pieces.push_back(TriviaPiece::garbageText({TriviaStart, Length}));
auto Text = OwnedString::makeRefCounted(StringRef(TriviaStart, Length));
Pieces.push_back(TriviaPiece::garbageText(Text));
goto Restart;
}
break;
@@ -2448,7 +2451,8 @@ Restart:
case NulCharacterKind::Embedded: {
diagnoseEmbeddedNul(Diags, CurPtr - 1);
size_t Length = CurPtr - TriviaStart;
Pieces.push_back(TriviaPiece::garbageText({TriviaStart, Length}));
auto Text = OwnedString::makeRefCounted(StringRef(TriviaStart, Length));
Pieces.push_back(TriviaPiece::garbageText(Text));
goto Restart;
}
case NulCharacterKind::CodeCompletion:
@@ -2494,7 +2498,8 @@ Restart:
}
size_t Length = CurPtr - TriviaStart;
Pieces.push_back(TriviaPiece::garbageText({TriviaStart, Length}));
auto Text = OwnedString::makeRefCounted(StringRef(TriviaStart, Length));
Pieces.push_back(TriviaPiece::garbageText(Text));
goto Restart;
}
// Reset the cursor.

View File

@@ -312,15 +312,15 @@ swift::tokenizeWithTrivia(const LangOptions &LangOpts, const SourceManager &SM,
syntax::AbsolutePosition RunningPos;
tokenize(
LangOpts, SM, BufferID, Offset, EndOffset,
Diags,
LangOpts, SM, BufferID, Offset, EndOffset, Diags,
CommentRetentionMode::AttachToNextToken, TriviaRetentionMode::WithTrivia,
/*TokenizeInterpolatedString=*/false,
/*SplitTokens=*/ArrayRef<Token>(),
[&](const Token &Tok, const Trivia &LeadingTrivia,
const Trivia &TrailingTrivia) {
auto Text = OwnedString::makeRefCounted(Tok.getText());
auto ThisToken =
RawSyntax::make(Tok.getKind(), Tok.getText(), LeadingTrivia.Pieces,
RawSyntax::make(Tok.getKind(), Text, LeadingTrivia.Pieces,
TrailingTrivia.Pieces, SourcePresence::Present);
auto ThisTokenPos = ThisToken->accumulateAbsolutePosition(RunningPos);

View File

@@ -163,9 +163,9 @@ void SyntaxParsingContext::addToken(Token &Tok, Trivia &LeadingTrivia,
return;
auto &Arena = getArena();
addRawSyntax(RawSyntax::getToken(Arena, Tok.getKind(), Tok.getText(),
LeadingTrivia.Pieces,
TrailingTrivia.Pieces));
auto Text = OwnedString::makeRefCounted(Tok.getText());
addRawSyntax(RawSyntax::getToken(
Arena, Tok.getKind(), Text, LeadingTrivia.Pieces, TrailingTrivia.Pieces));
}
/// Add Syntax to the parts.
@@ -313,7 +313,7 @@ void finalizeSourceFile(RootContextData &RootData,
}
if (!EOFToken)
EOFToken = RawSyntax::missing(tok::eof, "");
EOFToken = RawSyntax::missing(tok::eof, OwnedString::makeUnowned(""));
auto newRaw = SyntaxFactory::createRaw(
SyntaxKind::SourceFile,
@@ -352,7 +352,8 @@ void SyntaxParsingContext::synthesize(tok Kind, StringRef Text) {
return;
if (Text.empty())
Text = getTokenText(Kind);
getStorage().push_back(RawSyntax::missing(Kind, Text));
auto OwnedText = OwnedString::makeRefCounted(Text);
getStorage().push_back(RawSyntax::missing(Kind, OwnedText));
}
void SyntaxParsingContext::synthesize(SyntaxKind Kind) {

View File

@@ -233,7 +233,8 @@ SyntaxFactory::makeBlank${node.syntax_kind}(SyntaxArena *Arena) {
SyntaxFactory::make${token.name}Keyword(const Trivia &LeadingTrivia,
const Trivia &TrailingTrivia,
SyntaxArena *Arena) {
return makeToken(tok::${token.kind}, "${token.text}",
return makeToken(tok::${token.kind},
OwnedString::makeUnowned("${token.text}"),
LeadingTrivia, TrailingTrivia,
SourcePresence::Present, Arena);
}
@@ -242,7 +243,8 @@ SyntaxFactory::makeBlank${node.syntax_kind}(SyntaxArena *Arena) {
SyntaxFactory::make${token.name}Token(const Trivia &LeadingTrivia,
const Trivia &TrailingTrivia,
SyntaxArena *Arena) {
return makeToken(tok::${token.kind}, "${token.text}",
return makeToken(tok::${token.kind},
OwnedString::makeUnowned("${token.text}"),
LeadingTrivia, TrailingTrivia,
SourcePresence::Present, Arena);
}
@@ -303,30 +305,35 @@ TypeSyntax SyntaxFactory::makeTypeIdentifier(OwnedString TypeName,
TypeSyntax SyntaxFactory::makeAnyTypeIdentifier(const Trivia &LeadingTrivia,
const Trivia &TrailingTrivia,
SyntaxArena *Arena) {
return makeTypeIdentifier("Any", LeadingTrivia, TrailingTrivia, Arena);
return makeTypeIdentifier(OwnedString::makeUnowned("Any"), LeadingTrivia,
TrailingTrivia, Arena);
}
TypeSyntax SyntaxFactory::makeSelfTypeIdentifier(const Trivia &LeadingTrivia,
const Trivia &TrailingTrivia,
SyntaxArena *Arena) {
return makeTypeIdentifier("Self", LeadingTrivia, TrailingTrivia, Arena);
return makeTypeIdentifier(OwnedString::makeUnowned("Self"),
LeadingTrivia, TrailingTrivia, Arena);
}
TokenSyntax SyntaxFactory::makeTypeToken(const Trivia &LeadingTrivia,
const Trivia &TrailingTrivia,
SyntaxArena *Arena) {
return makeIdentifier("Type", LeadingTrivia, TrailingTrivia, Arena);
return makeIdentifier(OwnedString::makeUnowned("Type"),
LeadingTrivia, TrailingTrivia, Arena);
}
TokenSyntax SyntaxFactory::makeProtocolToken(const Trivia &LeadingTrivia,
const Trivia &TrailingTrivia,
SyntaxArena *Arena) {
return makeIdentifier("Protocol", LeadingTrivia, TrailingTrivia, Arena);
return makeIdentifier(OwnedString::makeUnowned("Protocol"),
LeadingTrivia, TrailingTrivia, Arena);
}
TokenSyntax SyntaxFactory::makeEqualityOperator(const Trivia &LeadingTrivia,
const Trivia &TrailingTrivia,
SyntaxArena *Arena) {
return makeToken(tok::oper_binary_spaced, "==", LeadingTrivia, TrailingTrivia,
SourcePresence::Present, Arena);
return makeToken(tok::oper_binary_spaced, OwnedString::makeUnowned("=="),
LeadingTrivia, TrailingTrivia, SourcePresence::Present,
Arena);
}

View File

@@ -18,177 +18,43 @@ using namespace swift;
TEST(OwnedStringTest, char_pointer_empty) {
const char *data = "";
const size_t length = strlen(data);
OwnedString ownedString(data);
OwnedString ownedString = OwnedString::makeUnowned(data);
EXPECT_EQ(length, ownedString.size());
EXPECT_TRUE(ownedString.empty());
OwnedString copy = ownedString.copy();
EXPECT_EQ(length, copy.size());
EXPECT_TRUE(copy.empty());
StringRef str = copy.str();
EXPECT_EQ("", str);
EXPECT_EQ(length, str.size());
EXPECT_EQ(data, ownedString.str().data());
}
TEST(OwnedStringTest, char_pointer_non_empty) {
const char *data = "string";
const size_t length = strlen(data);
OwnedString ownedString(data);
OwnedString ownedString = OwnedString::makeUnowned(data);
EXPECT_EQ(length, ownedString.size());
EXPECT_FALSE(ownedString.empty());
OwnedString copy = ownedString.copy();
EXPECT_EQ(length, copy.size());
EXPECT_FALSE(copy.empty());
StringRef str = copy.str();
EXPECT_EQ("string", str);
EXPECT_EQ(length, strlen(str.data()));
EXPECT_EQ(data, ownedString.str().data());
}
TEST(OwnedStringTest, char_pointer_length_equal) {
const char *data = "string";
TEST(OwnedStringTest, ref_counted_copies_buffer) {
char *data = static_cast<char *>(malloc(6));
memcpy(data, "hello", 6);
size_t length = strlen(data);
OwnedString ownedString(data, length);
EXPECT_EQ(length, ownedString.size());
EXPECT_FALSE(ownedString.empty());
OwnedString ownedString =
OwnedString::makeRefCounted(StringRef(data, length));
OwnedString copy = ownedString.copy();
EXPECT_EQ(length, copy.size());
EXPECT_FALSE(copy.empty());
EXPECT_EQ(ownedString.str(), "hello");
EXPECT_NE(ownedString.str().data(), data);
// Make sure we correctly copied the data and that it is null
// terminated.
StringRef str = copy.str();
EXPECT_EQ("string", str);
EXPECT_EQ(length, strlen(str.data()));
memcpy(data, "world", 6);
// Even if the original buffer changes, the string should stay the same
EXPECT_EQ(ownedString.str(), "hello");
}
TEST(OwnedStringTest, char_pointer_length_nonzero) {
const char *data = "string";
const size_t length = 1;
OwnedString ownedString(data, length);
TEST(OwnedStringTest, ref_counted_assignment) {
OwnedString str = OwnedString::makeRefCounted("hello");
OwnedString copy = str;
EXPECT_EQ(length, ownedString.size());
EXPECT_FALSE(ownedString.empty());
OwnedString copy = ownedString.copy();
EXPECT_EQ(length, copy.size());
EXPECT_FALSE(copy.empty());
// Make sure we correctly copied the data and that it is null
// terminated.
StringRef str = copy.str();
EXPECT_EQ("s", str);
EXPECT_EQ(1UL, strlen(str.data()));
}
TEST(OwnedStringTest, char_pointer_length_zero) {
const char *data = "string";
const size_t length = 0;
OwnedString ownedString(data, length);
EXPECT_EQ(length, ownedString.size());
EXPECT_TRUE(ownedString.empty());
OwnedString copy = ownedString.copy();
EXPECT_EQ(length, copy.size());
EXPECT_TRUE(copy.empty());
}
TEST(OwnedStringTest, copy_original_new_different) {
// Initialize a mutable string.
const char *original = "string";
const size_t length = strlen(original);
char *data = static_cast<char *>(malloc(length + 1));
memcpy(data, original, length);
data[length] = '\0';
// Create an OwnedString.
OwnedString ownedString(data, length);
EXPECT_EQ(length, ownedString.size());
EXPECT_FALSE(ownedString.empty());
// Copy the string
OwnedString copy = ownedString.copy();
EXPECT_EQ(length, copy.size());
EXPECT_FALSE(copy.empty());
// Make sure we correctly copied the data and that it is null
// terminated.
StringRef str = copy.str();
EXPECT_EQ("string", str);
EXPECT_EQ(length, strlen(str.data()));
// Make sure updating the original pointer doesn't affect the copy.
data[0] = 'a';
EXPECT_EQ("string", str);
}
TEST(OwnedStringTest, copy_constructor_original_not_copy) {
// Initialize a mutable string.
const char *original = "string";
const size_t length = strlen(original);
char *data = static_cast<char *>(malloc(length + 1));
memcpy(data, original, length);
data[length] = '\0';
// Create an OwnedString.
OwnedString ownedString(data, length);
EXPECT_EQ(length, ownedString.size());
EXPECT_FALSE(ownedString.empty());
// Copy the string
OwnedString copy = OwnedString(ownedString);
EXPECT_EQ(length, copy.size());
EXPECT_FALSE(copy.empty());
// Make sure we correctly copied the data and that it is null
// terminated.
StringRef str = copy.str();
EXPECT_EQ("string", str);
EXPECT_EQ(length, strlen(str.data()));
// Make sure updating the original pointer doesn't affect the copy.
data[0] = 'a';
EXPECT_EQ("string", str);
}
TEST(OwnedStringTest, copy_constructor_original_copy) {
// Initialize a mutable string.
const char *original = "string";
const size_t length = strlen(original);
char *data = static_cast<char *>(malloc(length + 1));
memcpy(data, original, length);
data[length] = '\0';
// Create an OwnedString.
OwnedString ownedString(data, length);
EXPECT_EQ(length, ownedString.size());
EXPECT_FALSE(ownedString.empty());
// Copy the string
OwnedString copy = OwnedString(ownedString.copy());
EXPECT_EQ(length, copy.size());
EXPECT_FALSE(copy.empty());
// Make sure we correctly copied the data and that it is null
// terminated.
StringRef str = copy.str();
EXPECT_EQ("string", str);
EXPECT_EQ(length, strlen(str.data()));
// Make sure updating the original pointer doesn't affect the copy.
data[0] = 'a';
EXPECT_EQ("string", str);
EXPECT_EQ(str.str().data(), copy.str().data());
}

View File

@@ -29,7 +29,9 @@ def make_missing_child(child):
token = child.main_token()
tok_kind = token.kind if token else "unknown"
tok_text = token.text if token else ""
return 'RawSyntax::missing(tok::%s, "%s")' % (tok_kind, tok_text)
return \
'RawSyntax::missing(tok::%s, OwnedString::makeUnowned("%s"))' % \
(tok_kind, tok_text)
else:
missing_kind = "Unknown" if child.syntax_kind == "Syntax" \
else child.syntax_kind