Files
swift-mirror/unittests/Parse/LexerTests.cpp
Alex Hoppen fe2ae72ad2 [IDE] Rename CodeCompletion to IDEInspection in cases where the code path no longer exclusively applies to code completion
The code completio infrastructure is also being used for cursor info now, so it should no longer be called code completion.

rdar://103251187
2022-12-13 11:41:05 +01:00

748 lines
23 KiB
C++

#include "swift/AST/DiagnosticConsumer.h"
#include "swift/AST/DiagnosticEngine.h"
#include "swift/Basic/Defer.h"
#include "swift/Basic/LangOptions.h"
#include "swift/Basic/SourceManager.h"
#include "swift/Parse/Lexer.h"
#include "swift/Subsystems.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/Process.h"
#include "gtest/gtest.h"
#if __has_include(<sys/mman.h>)
# include <sys/mman.h>
# define HAS_MMAP 1
#else
# define HAS_MMAP 0
#endif
using namespace swift;
using namespace llvm;
// The test fixture.
class LexerTest : public ::testing::Test {
public:
LangOptions LangOpts;
SourceManager SourceMgr;
std::vector<Token> tokenizeAndKeepEOF(unsigned BufferID) {
Lexer L(LangOpts, SourceMgr, BufferID, /*Diags=*/nullptr,
LexerMode::Swift);
std::vector<Token> Tokens;
do {
Tokens.emplace_back();
L.lex(Tokens.back());
} while (Tokens.back().isNot(tok::eof));
return Tokens;
}
std::vector<Token> checkLex(StringRef Source,
ArrayRef<tok> ExpectedTokens,
bool KeepComments = false,
bool KeepEOF = false) {
unsigned BufID = SourceMgr.addMemBufferCopy(Source);
std::vector<Token> Toks;
if (KeepEOF)
Toks = tokenizeAndKeepEOF(BufID);
else
Toks = tokenize(LangOpts, SourceMgr, BufID, 0, 0, /*Diags=*/nullptr, KeepComments);
EXPECT_EQ(ExpectedTokens.size(), Toks.size());
for (unsigned i = 0, e = ExpectedTokens.size(); i != e; ++i) {
EXPECT_EQ(ExpectedTokens[i], Toks[i].getKind()) << "i = " << i;
}
return Toks;
}
SourceLoc getLocForEndOfToken(SourceLoc Loc) {
return Lexer::getLocForEndOfToken(SourceMgr, Loc);
}
};
TEST_F(LexerTest, TokenizeSkipComments) {
const char *Source =
"// Blah\n"
"(/*yo*/)";
std::vector<tok> ExpectedTokens{ tok::l_paren, tok::r_paren };
checkLex(Source, ExpectedTokens, /*KeepComments=*/false);
}
TEST_F(LexerTest, TokenizeWithComments) {
const char *Source =
"// Blah\n"
"(/*yo*/)";
std::vector<tok> ExpectedTokens{
tok::comment, tok::l_paren, tok::comment, tok::r_paren
};
std::vector<Token> Toks = checkLex(Source, ExpectedTokens,
/*KeepComments=*/true);
EXPECT_EQ(Toks[0].getLength(), 8U);
EXPECT_EQ(Toks[2].getLength(), 6U);
EXPECT_EQ(getLocForEndOfToken(Toks[0].getLoc()),
Toks[0].getLoc().getAdvancedLoc(8));
}
TEST_F(LexerTest, EOFTokenLengthIsZero) {
const char *Source = "meow";
std::vector<tok> ExpectedTokens{ tok::identifier, tok::eof };
std::vector<Token> Toks = checkLex(Source, ExpectedTokens,
/*KeepComments=*/true,
/*KeepEOF=*/true);
EXPECT_EQ(Toks[1].getLength(), 0U);
}
TEST_F(LexerTest, BrokenStringLiteral1) {
StringRef Source("\"meow\0", 6);
std::vector<tok> ExpectedTokens{ tok::unknown, tok::eof };
std::vector<Token> Toks = checkLex(Source, ExpectedTokens,
/*KeepComments=*/true,
/*KeepEOF=*/true);
EXPECT_EQ(Toks[0].getLength(), 6U);
EXPECT_EQ(Toks[1].getLength(), 0U);
}
TEST_F(LexerTest, BrokenStringLiteral2) {
StringRef Source("\"\\(meow\0", 8);
std::vector<tok> ExpectedTokens{ tok::unknown, tok::eof };
std::vector<Token> Toks = checkLex(Source, ExpectedTokens,
/*KeepComments=*/true,
/*KeepEOF=*/true);
EXPECT_EQ(Toks[0].getLength(), 8U);
EXPECT_EQ(Toks[1].getLength(), 0U);
}
TEST_F(LexerTest, StringLiteralWithNUL1) {
StringRef Source("\"\0\"", 3);
std::vector<tok> ExpectedTokens{ tok::string_literal, tok::eof };
std::vector<Token> Toks = checkLex(Source, ExpectedTokens,
/*KeepComments=*/true,
/*KeepEOF=*/true);
EXPECT_EQ(Toks[0].getLength(), 3U);
EXPECT_EQ(Toks[1].getLength(), 0U);
}
TEST_F(LexerTest, ContentStartHashbangSkip) {
const char *Source = "#!/usr/bin/swift\naaa";
LangOptions LangOpts;
SourceManager SourceMgr;
unsigned BufferID = SourceMgr.addMemBufferCopy(StringRef(Source));
Lexer L(LangOpts, SourceMgr, BufferID, /*Diags=*/nullptr, LexerMode::Swift);
Token Tok;
L.lex(Tok);
ASSERT_EQ(tok::identifier, Tok.getKind());
ASSERT_EQ("aaa", Tok.getText());
ASSERT_EQ(SourceMgr.getLocForOffset(BufferID, 17), Tok.getLoc());
}
TEST_F(LexerTest, ContentStartHashbangSkipUTF8BOM) {
const char *Source = "\xEF\xBB\xBF" "#!/usr/bin/swift\naaa";
LangOptions LangOpts;
SourceManager SourceMgr;
unsigned BufferID = SourceMgr.addMemBufferCopy(StringRef(Source));
Lexer L(LangOpts, SourceMgr, BufferID, /*Diags=*/nullptr, LexerMode::Swift);
Token Tok;
L.lex(Tok);
ASSERT_EQ(tok::identifier, Tok.getKind());
ASSERT_EQ("aaa", Tok.getText());
ASSERT_EQ(SourceMgr.getLocForOffset(BufferID, 20), Tok.getLoc());
}
TEST_F(LexerTest, ContentStartOperatorLeftBound) {
const char *Source = "+a";
LangOptions LangOpts;
SourceManager SourceMgr;
unsigned BufferID = SourceMgr.addMemBufferCopy(StringRef(Source));
Lexer L(LangOpts, SourceMgr, BufferID, /*Diags=*/nullptr, LexerMode::Swift);
Token Tok;
L.lex(Tok);
ASSERT_EQ(tok::oper_prefix, Tok.getKind());
ASSERT_EQ("+", Tok.getText());
ASSERT_EQ(SourceMgr.getLocForOffset(BufferID, 0), Tok.getLoc());
}
TEST_F(LexerTest, ContentStartOperatorLeftBoundUTF8BOM) {
const char *Source = "\xEF\xBB\xBF" "+a";
LangOptions LangOpts;
SourceManager SourceMgr;
unsigned BufferID = SourceMgr.addMemBufferCopy(StringRef(Source));
Lexer L(LangOpts, SourceMgr, BufferID, /*Diags=*/nullptr, LexerMode::Swift);
Token Tok;
L.lex(Tok);
ASSERT_EQ(tok::oper_prefix, Tok.getKind());
ASSERT_EQ("+", Tok.getText());
ASSERT_EQ(SourceMgr.getLocForOffset(BufferID, 3), Tok.getLoc());
}
TEST_F(LexerTest, ContentStartConflictMarker) {
const char *Source =
"<<<<<<< HEAD\n"
"xxx\n"
"=======\n"
"yyy\n"
">>>>>>> 12345670\n"
"aaa";
LangOptions LangOpts;
SourceManager SourceMgr;
unsigned BufferID = SourceMgr.addMemBufferCopy(StringRef(Source));
Lexer L(LangOpts, SourceMgr, BufferID, /*Diags=*/nullptr, LexerMode::Swift);
Token Tok;
L.lex(Tok);
ASSERT_EQ(tok::identifier, Tok.getKind());
ASSERT_EQ("aaa", Tok.getText());
}
TEST_F(LexerTest, ContentStartConflictMarkerUTF8BOM) {
const char *Source =
"\xEF\xBB\xBF"
"<<<<<<< HEAD\n"
"xxx\n"
"=======\n"
"yyy\n"
">>>>>>> 12345670\n"
"aaa";
LangOptions LangOpts;
SourceManager SourceMgr;
unsigned BufferID = SourceMgr.addMemBufferCopy(StringRef(Source));
Lexer L(LangOpts, SourceMgr, BufferID, /*Diags=*/nullptr, LexerMode::Swift);
Token Tok;
L.lex(Tok);
ASSERT_EQ(tok::identifier, Tok.getKind());
ASSERT_EQ("aaa", Tok.getText());
}
TEST_F(LexerTest, ContentStartTokenIsStartOfLine) {
const char *Source = "aaa";
LangOptions LangOpts;
SourceManager SourceMgr;
unsigned BufferID = SourceMgr.addMemBufferCopy(StringRef(Source));
Lexer L(LangOpts, SourceMgr, BufferID, /*Diags=*/nullptr, LexerMode::Swift);
Token Tok;
L.lex(Tok);
ASSERT_EQ(tok::identifier, Tok.getKind());
ASSERT_EQ("aaa", Tok.getText());
ASSERT_EQ(SourceMgr.getLocForOffset(BufferID, 0), Tok.getLoc());
ASSERT_TRUE(Tok.isAtStartOfLine());
}
TEST_F(LexerTest, ContentStartTokenIsStartOfLineUTF8BOM) {
const char *Source = "\xEF\xBB\xBF" "aaa";
LangOptions LangOpts;
SourceManager SourceMgr;
unsigned BufferID = SourceMgr.addMemBufferCopy(StringRef(Source));
Lexer L(LangOpts, SourceMgr, BufferID, /*Diags=*/nullptr, LexerMode::Swift);
Token Tok;
L.lex(Tok);
ASSERT_EQ(tok::identifier, Tok.getKind());
ASSERT_EQ("aaa", Tok.getText());
ASSERT_EQ(SourceMgr.getLocForOffset(BufferID, 3), Tok.getLoc());
ASSERT_TRUE(Tok.isAtStartOfLine());
}
TEST_F(LexerTest, BOMNoCommentNoTrivia) {
const char *Source = "\xEF\xBB\xBF" "// comment\naaa //xx \n/* x */";
LangOptions LangOpts;
SourceManager SourceMgr;
unsigned BufferID = SourceMgr.addMemBufferCopy(StringRef(Source));
Lexer L(LangOpts, SourceMgr, BufferID, /*Diags=*/nullptr, LexerMode::Swift,
HashbangMode::Disallowed, CommentRetentionMode::None);
Token Tok;
L.lex(Tok);
ASSERT_EQ(tok::identifier, Tok.getKind());
ASSERT_EQ("aaa", Tok.getText());
ASSERT_EQ(SourceMgr.getLocForOffset(BufferID, 14), Tok.getLoc());
ASSERT_EQ(SourceMgr.getLocForOffset(BufferID, 14), Tok.getCommentRange().getStart());
ASSERT_EQ(0u, Tok.getCommentRange().getByteLength());
L.lex(Tok);
ASSERT_EQ(tok::eof, Tok.getKind());
ASSERT_EQ(SourceMgr.getLocForOffset(BufferID, 31), Tok.getLoc());
ASSERT_EQ(SourceMgr.getLocForOffset(BufferID, 31), Tok.getCommentRange().getStart());
ASSERT_EQ(0u, Tok.getCommentRange().getByteLength());
}
TEST_F(LexerTest, BOMTokenCommentNoTrivia) {
const char *Source = "\xEF\xBB\xBF" "// comment\naaa //xx \n/* x */";
LangOptions LangOpts;
SourceManager SourceMgr;
unsigned BufferID = SourceMgr.addMemBufferCopy(StringRef(Source));
Lexer L(LangOpts, SourceMgr, BufferID, /*Diags=*/nullptr, LexerMode::Swift,
HashbangMode::Disallowed, CommentRetentionMode::ReturnAsTokens);
Token Tok;
L.lex(Tok);
ASSERT_EQ(tok::comment, Tok.getKind());
ASSERT_EQ("// comment\n", Tok.getText());
ASSERT_EQ(SourceMgr.getLocForOffset(BufferID, 3), Tok.getLoc());
ASSERT_EQ(SourceMgr.getLocForOffset(BufferID, 3), Tok.getCommentRange().getStart());
ASSERT_EQ(0u, Tok.getCommentRange().getByteLength());
L.lex(Tok);
ASSERT_EQ(tok::identifier, Tok.getKind());
ASSERT_EQ("aaa", Tok.getText());
ASSERT_EQ(SourceMgr.getLocForOffset(BufferID, 14), Tok.getLoc());
ASSERT_EQ(SourceMgr.getLocForOffset(BufferID, 14), Tok.getCommentRange().getStart());
ASSERT_EQ(0u, Tok.getCommentRange().getByteLength());
L.lex(Tok);
ASSERT_EQ(tok::comment, Tok.getKind());
ASSERT_EQ("//xx \n", Tok.getText());
ASSERT_EQ(SourceMgr.getLocForOffset(BufferID, 18), Tok.getLoc());
ASSERT_EQ(SourceMgr.getLocForOffset(BufferID, 18), Tok.getCommentRange().getStart());
ASSERT_EQ(0u, Tok.getCommentRange().getByteLength());
L.lex(Tok);
ASSERT_EQ(tok::comment, Tok.getKind());
ASSERT_EQ("/* x */", Tok.getText());
ASSERT_EQ(SourceMgr.getLocForOffset(BufferID, 24), Tok.getLoc());
ASSERT_EQ(SourceMgr.getLocForOffset(BufferID, 24), Tok.getCommentRange().getStart());
ASSERT_EQ(0u, Tok.getCommentRange().getByteLength());
L.lex(Tok);
ASSERT_EQ(tok::eof, Tok.getKind());
ASSERT_EQ(SourceMgr.getLocForOffset(BufferID, 31), Tok.getLoc());
ASSERT_EQ(SourceMgr.getLocForOffset(BufferID, 31), Tok.getCommentRange().getStart());
ASSERT_EQ(0u, Tok.getCommentRange().getByteLength());
}
TEST_F(LexerTest, BOMAttachCommentNoTrivia) {
const char *Source = "\xEF\xBB\xBF" "// comment\naaa //xx \n/* x */";
LangOptions LangOpts;
SourceManager SourceMgr;
unsigned BufferID = SourceMgr.addMemBufferCopy(StringRef(Source));
Lexer L(LangOpts, SourceMgr, BufferID, /*Diags=*/nullptr, LexerMode::Swift,
HashbangMode::Disallowed, CommentRetentionMode::AttachToNextToken);
Token Tok;
L.lex(Tok);
ASSERT_EQ(tok::identifier, Tok.getKind());
ASSERT_EQ("aaa", Tok.getText());
ASSERT_EQ(SourceMgr.getLocForOffset(BufferID, 14), Tok.getLoc());
ASSERT_EQ(SourceMgr.getLocForOffset(BufferID, 3), Tok.getCommentRange().getStart());
ASSERT_EQ(10u, Tok.getCommentRange().getByteLength());
L.lex(Tok);
ASSERT_EQ(tok::eof, Tok.getKind());
ASSERT_EQ(SourceMgr.getLocForOffset(BufferID, 31), Tok.getLoc());
ASSERT_EQ(SourceMgr.getLocForOffset(BufferID, 18), Tok.getCommentRange().getStart());
ASSERT_EQ(13u, Tok.getCommentRange().getByteLength());
}
TEST_F(LexerTest, RestoreBasic) {
const char *Source = "aaa \t\0 bbb ccc";
LangOptions LangOpts;
SourceManager SourceMgr;
unsigned BufferID = SourceMgr.addMemBufferCopy(StringRef(Source, 14));
Lexer L(LangOpts, SourceMgr, BufferID, /*Diags=*/nullptr, LexerMode::Swift);
Token Tok;
L.lex(Tok);
ASSERT_EQ(tok::identifier, Tok.getKind());
ASSERT_EQ("aaa", Tok.getText());
ASSERT_TRUE(Tok.isAtStartOfLine());
L.lex(Tok);
ASSERT_EQ(tok::identifier, Tok.getKind());
ASSERT_EQ("bbb", Tok.getText());
ASSERT_FALSE(Tok.isAtStartOfLine());
LexerState S = L.getStateForBeginningOfToken(Tok);
L.lex(Tok);
ASSERT_EQ(tok::identifier, Tok.getKind());
ASSERT_EQ("ccc", Tok.getText());
ASSERT_FALSE(Tok.isAtStartOfLine());
L.lex(Tok);
ASSERT_EQ(tok::eof, Tok.getKind());
L.restoreState(S);
L.lex(Tok);
ASSERT_EQ(tok::identifier, Tok.getKind());
ASSERT_EQ("bbb", Tok.getText());
ASSERT_FALSE(Tok.isAtStartOfLine());
L.lex(Tok);
ASSERT_EQ(tok::identifier, Tok.getKind());
ASSERT_EQ("ccc", Tok.getText());
ASSERT_FALSE(Tok.isAtStartOfLine());
L.lex(Tok);
ASSERT_EQ(tok::eof, Tok.getKind());
}
TEST_F(LexerTest, RestoreNewlineFlag) {
const char *Source = "aaa \n \0\tbbb \nccc";
LangOptions LangOpts;
SourceManager SourceMgr;
unsigned BufferID = SourceMgr.addMemBufferCopy(StringRef(Source, 16));
Lexer L(LangOpts, SourceMgr, BufferID, /*Diags=*/nullptr, LexerMode::Swift);
Token Tok;
L.lex(Tok);
ASSERT_EQ(tok::identifier, Tok.getKind());
ASSERT_EQ("aaa", Tok.getText());
ASSERT_TRUE(Tok.isAtStartOfLine());
L.lex(Tok);
ASSERT_EQ(tok::identifier, Tok.getKind());
ASSERT_EQ("bbb", Tok.getText());
ASSERT_TRUE(Tok.isAtStartOfLine());
LexerState S = L.getStateForBeginningOfToken(Tok);
L.lex(Tok);
ASSERT_EQ(tok::identifier, Tok.getKind());
ASSERT_EQ("ccc", Tok.getText());
ASSERT_TRUE(Tok.isAtStartOfLine());
L.lex(Tok);
ASSERT_EQ(tok::eof, Tok.getKind());
L.restoreState(S);
L.lex(Tok);
ASSERT_EQ(tok::identifier, Tok.getKind());
ASSERT_EQ("bbb", Tok.getText());
ASSERT_TRUE(Tok.isAtStartOfLine());
L.lex(Tok);
ASSERT_EQ(tok::identifier, Tok.getKind());
ASSERT_EQ("ccc", Tok.getText());
ASSERT_TRUE(Tok.isAtStartOfLine());
L.lex(Tok);
ASSERT_EQ(tok::eof, Tok.getKind());
}
TEST_F(LexerTest, RestoreStopAtCodeCompletion) {
const char *Source = "aaa \n \0\tbbb \nccc";
LangOptions LangOpts;
SourceManager SourceMgr;
unsigned BufferID = SourceMgr.addMemBufferCopy(StringRef(Source, 16));
SourceMgr.setIDEInspectionTarget(BufferID, 6);
Lexer L(LangOpts, SourceMgr, BufferID, /*Diags=*/nullptr, LexerMode::Swift);
Token Tok;
L.lex(Tok);
ASSERT_EQ(tok::identifier, Tok.getKind());
ASSERT_EQ("aaa", Tok.getText());
ASSERT_TRUE(Tok.isAtStartOfLine());
L.lex(Tok);
ASSERT_EQ(tok::code_complete, Tok.getKind());
ASSERT_TRUE(Tok.isAtStartOfLine());
L.lex(Tok);
ASSERT_EQ(tok::identifier, Tok.getKind());
ASSERT_EQ("bbb", Tok.getText());
ASSERT_FALSE(Tok.isAtStartOfLine());
LexerState S = L.getStateForBeginningOfToken(Tok);
L.lex(Tok);
ASSERT_EQ(tok::identifier, Tok.getKind());
ASSERT_EQ("ccc", Tok.getText());
ASSERT_TRUE(Tok.isAtStartOfLine());
L.lex(Tok);
ASSERT_EQ(tok::eof, Tok.getKind());
L.restoreState(S);
// Ensure that we don't get tok::code_complete here. We saved the lexer
// position after it, so we should not be getting it.
L.lex(Tok);
ASSERT_EQ(tok::identifier, Tok.getKind());
ASSERT_EQ("bbb", Tok.getText());
ASSERT_FALSE(Tok.isAtStartOfLine());
L.lex(Tok);
ASSERT_EQ(tok::identifier, Tok.getKind());
ASSERT_EQ("ccc", Tok.getText());
ASSERT_TRUE(Tok.isAtStartOfLine());
L.lex(Tok);
ASSERT_EQ(tok::eof, Tok.getKind());
}
TEST_F(LexerTest, getLocForStartOfToken) {
const char *Source = "aaa \n \tbbb \"hello\" \"-\\(val)-\"";
unsigned BufferID = SourceMgr.addMemBufferCopy(Source);
// First is character offset, second is its token offset.
unsigned Offs[][2] =
{ {1, 0}, {2, 0}, {3, 3}, {4, 4}, {6, 6}, {9, 7}, {14, 11},
// interpolated string
{20, 19}, {23, 23}, {24, 23}, {25, 23}, {26, 26}, {27, 19} };
for (auto Pair : Offs) {
ASSERT_EQ(Lexer::getLocForStartOfToken(SourceMgr, BufferID, Pair[0]),
SourceMgr.getLocForOffset(BufferID, Pair[1]));
}
}
TEST_F(LexerTest, getLocForStartOfTokenWithCustomSourceLocation) {
const char *Source =
"aaa \n"
// This next line is exactly 50 bytes to make it easy to compare with the
// previous test.
"#sourceLocation(file: \"custom-50.swuft\", line: 9)\n"
" \tbbb \"hello\" \"-\\(val)-\"";
unsigned BufferID = SourceMgr.addMemBufferCopy(Source);
// First is character offset, second is its token offset.
unsigned Offs[][2] =
{ {1, 0}, {2, 0}, {3, 3}, {4, 4},
{56, 56}, {59, 57}, {64, 61},
// interpolated string
{70, 69}, {73, 73}, {74, 73}, {75, 73}, {76, 76}, {77, 69} };
for (auto Pair : Offs) {
ASSERT_EQ(Lexer::getLocForStartOfToken(SourceMgr, BufferID, Pair[0]),
SourceMgr.getLocForOffset(BufferID, Pair[1]));
}
}
TEST_F(LexerTest, NestedSubLexers) {
const char *Source = "aaa0 bbb1 ccc2 ddd3 eee4 fff5 ggg6";
unsigned BufferID = SourceMgr.addMemBufferCopy(Source);
Lexer Primary(LangOpts, SourceMgr, BufferID, /*Diags=*/nullptr,
LexerMode::Swift);
std::vector<Token> TokensPrimary;
do {
TokensPrimary.emplace_back();
Primary.lex(TokensPrimary.back());
} while (TokensPrimary.back().isNot(tok::eof));
ASSERT_EQ(8U, TokensPrimary.size());
ASSERT_EQ(tok::eof, TokensPrimary.back().getKind());
Lexer Sub1(Primary, Primary.getStateForBeginningOfToken(TokensPrimary[1]),
Primary.getStateForBeginningOfToken(*(TokensPrimary.end() - 2)));
std::vector<Token> TokensSub1;
do {
TokensSub1.emplace_back();
Sub1.lex(TokensSub1.back());
} while (TokensSub1.back().isNot(tok::eof));
ASSERT_EQ(6U, TokensSub1.size());
ASSERT_EQ("bbb1", TokensSub1.front().getText());
ASSERT_EQ("fff5", (TokensSub1.end() - 2)->getText());
ASSERT_EQ(tok::eof, TokensSub1.back().getKind());
ASSERT_EQ("ggg6", TokensSub1.back().getText());
Lexer Sub2(Sub1, Sub1.getStateForBeginningOfToken(TokensSub1[1]),
Sub1.getStateForBeginningOfToken(*(TokensSub1.end() - 2)));
std::vector<Token> TokensSub2;
do {
TokensSub2.emplace_back();
Sub2.lex(TokensSub2.back());
} while (TokensSub2.back().isNot(tok::eof));
ASSERT_EQ(4U, TokensSub2.size());
ASSERT_EQ("ccc2", TokensSub2.front().getText());
ASSERT_EQ("eee4", (TokensSub2.end() - 2)->getText());
ASSERT_EQ(tok::eof, TokensSub2.back().getKind());
ASSERT_EQ("fff5", TokensSub2.back().getText());
}
TEST_F(LexerTest, TokenizePlaceholder) {
const char *Source = "aa <#one#> bb <# two #>";
std::vector<tok> ExpectedTokens{
tok::identifier, tok::identifier, tok::identifier, tok::identifier
};
std::vector<Token> Toks = checkLex(Source, ExpectedTokens);
EXPECT_EQ("aa", Toks[0].getText());
EXPECT_EQ("<#one#>", Toks[1].getText());
EXPECT_EQ("bb", Toks[2].getText());
EXPECT_EQ("<# two #>", Toks[3].getText());
}
TEST_F(LexerTest, NoPlaceholder) {
auto checkTok = [&](StringRef Source) {
unsigned BufID = SourceMgr.addMemBufferCopy(Source);
std::vector<Token> Toks = tokenize(LangOpts, SourceMgr, BufID, 0, 0, /*Diags=*/nullptr, false);
ASSERT_FALSE(Toks.empty());
EXPECT_NE(tok::identifier, Toks[0].getKind());
};
checkTok("<#");
checkTok("<#a#");
checkTok("<#a\n#>");
checkTok("< #a#>");
}
TEST_F(LexerTest, NestedPlaceholder) {
const char *Source = "<#<#aa#>#>";
std::vector<tok> ExpectedTokens{
tok::oper_prefix, tok::pound, tok::identifier, tok::pound, tok::oper_postfix
};
std::vector<Token> Toks = checkLex(Source, ExpectedTokens);
EXPECT_EQ("<#aa#>", Toks[2].getText());
}
class StringCaptureDiagnosticConsumer : public DiagnosticConsumer {
public:
virtual void handleDiagnostic(SourceManager &SM,
const swift::DiagnosticInfo &Info) override {
std::string DiagMsg;
llvm::raw_string_ostream DiagOS(DiagMsg);
DiagnosticEngine::formatDiagnosticText(DiagOS, Info.FormatString,
Info.FormatArgs);
auto LC = SM.getPresumedLineAndColumnForLoc(Info.Loc);
std::ostringstream StrOS;
StrOS << LC.first << ", " << LC.second << ": " << DiagOS.str();
messages.push_back(StrOS.str());
}
std::vector<std::string> messages;
};
bool containsPrefix(const std::vector<std::string> &strs,
const std::string &prefix) {
for (auto &str : strs) {
if (StringRef(str).startswith(StringRef(prefix))) {
return true;
}
}
return false;
}
TEST_F(LexerTest, DiagnoseEmbeddedNul) {
const char Source[] = " \0 \0 aaa \0 \0 bbb";
size_t SourceLen = sizeof(Source) - 1;
LangOptions LangOpts;
SourceManager SourceMgr;
unsigned BufferID = SourceMgr.addMemBufferCopy(StringRef(Source, SourceLen));
StringCaptureDiagnosticConsumer DiagConsumer;
DiagnosticEngine Diags(SourceMgr);
Diags.addConsumer(DiagConsumer);
Lexer L(LangOpts, SourceMgr, BufferID, &Diags,
LexerMode::Swift, HashbangMode::Disallowed,
CommentRetentionMode::None);
Token Tok;
L.lex(Tok);
ASSERT_TRUE(containsPrefix(DiagConsumer.messages,
"1, 2: nul character embedded in middle of file"));
ASSERT_TRUE(containsPrefix(DiagConsumer.messages,
"1, 4: nul character embedded in middle of file"));
}
TEST_F(LexerTest, DiagnoseEmbeddedNulOffset) {
const char Source[] = " \0 \0 aaa \0 \0 bbb";
size_t SourceLen = sizeof(Source) - 1;
LangOptions LangOpts;
SourceManager SourceMgr;
unsigned BufferID = SourceMgr.addMemBufferCopy(StringRef(Source, SourceLen));
StringCaptureDiagnosticConsumer DiagConsumer;
DiagnosticEngine Diags(SourceMgr);
Diags.addConsumer(DiagConsumer);
Lexer L(LangOpts, SourceMgr, BufferID, &Diags,
LexerMode::Swift, HashbangMode::Disallowed,
CommentRetentionMode::None,
/*Offset=*/5, /*EndOffset=*/SourceLen);
ASSERT_FALSE(containsPrefix(
DiagConsumer.messages, "1, 2: nul character embedded in middle of file"));
ASSERT_FALSE(containsPrefix(
DiagConsumer.messages, "1, 4: nul character embedded in middle of file"));
}
#if HAS_MMAP
// This test requires mmap because llvm::sys::Memory doesn't support protecting
// pages to have no permissions.
TEST_F(LexerTest, EncodedStringSegmentPastTheEnd) {
Expected<size_t> ExptPageSize = llvm::sys::Process::getPageSize();
ASSERT_TRUE(bool(ExptPageSize));
size_t PageSize = *ExptPageSize;
void *FirstPage = mmap(/*addr*/nullptr, PageSize * 2, PROT_NONE,
MAP_PRIVATE | MAP_ANON, /*fd*/-1, /*offset*/0);
SWIFT_DEFER { (void)munmap(FirstPage, PageSize * 2); };
ASSERT_NE(FirstPage, MAP_FAILED);
int ProtectResult = mprotect(FirstPage, PageSize, PROT_READ | PROT_WRITE);
ASSERT_EQ(ProtectResult, 0);
auto check = [FirstPage, PageSize](StringRef Input, StringRef Expected) {
char *StartPtr = static_cast<char *>(FirstPage) + PageSize - Input.size();
memcpy(StartPtr, Input.data(), Input.size());
SmallString<64> Buffer;
StringRef Escaped = Lexer::getEncodedStringSegment({StartPtr, Input.size()},
Buffer);
EXPECT_EQ(Escaped, Expected);
};
check("needs escaping\\r",
"needs escaping\r");
check("does not need escaping",
"does not need escaping");
check("invalid escape at the end \\",
"invalid escape at the end ");
}
#endif // HAS_MMAP