mirror of
https://github.com/apple/swift.git
synced 2025-12-14 20:36:38 +01:00
[Lexer] Introduce Lexer::getLocForStartOfToken() that returns the location at the start of the token that a given offset points to.
Swift SVN r8281
This commit is contained in:
@@ -110,6 +110,11 @@ public:
|
|||||||
/// \brief Returns the distance in bytes between the given source locations.
|
/// \brief Returns the distance in bytes between the given source locations.
|
||||||
unsigned getByteDistance(SourceLoc Start, SourceLoc End) const;
|
unsigned getByteDistance(SourceLoc Start, SourceLoc End) const;
|
||||||
|
|
||||||
|
/// Returns the SourceLoc for the byte offset in the specified buffer.
|
||||||
|
SourceLoc getLocForOffset(unsigned BufferID, unsigned Offset) const {
|
||||||
|
return getLocForBufferStart(BufferID).getAdvancedLoc(Offset);
|
||||||
|
}
|
||||||
|
|
||||||
std::pair<unsigned, unsigned> getLineAndColumn(SourceLoc Loc,
|
std::pair<unsigned, unsigned> getLineAndColumn(SourceLoc Loc,
|
||||||
int BufferID = -1) const {
|
int BufferID = -1) const {
|
||||||
assert(Loc.isValid());
|
assert(Loc.isValid());
|
||||||
|
|||||||
@@ -192,6 +192,14 @@ public:
|
|||||||
/// \param Loc The source location of the beginning of a token.
|
/// \param Loc The source location of the beginning of a token.
|
||||||
static SourceLoc getLocForEndOfToken(SourceManager &SM, SourceLoc Loc);
|
static SourceLoc getLocForEndOfToken(SourceManager &SM, SourceLoc Loc);
|
||||||
|
|
||||||
|
/// Return the start location of the token that the offset in the given buffer
|
||||||
|
/// points to.
|
||||||
|
///
|
||||||
|
/// If the offset points to whitespace the source location will point to the
|
||||||
|
/// exact offset.
|
||||||
|
static SourceLoc getLocForStartOfToken(SourceManager &SM, unsigned BufferID,
|
||||||
|
unsigned Offset);
|
||||||
|
|
||||||
/// \brief Determines if the given string is a valid non-operator
|
/// \brief Determines if the given string is a valid non-operator
|
||||||
/// identifier.
|
/// identifier.
|
||||||
static bool isIdentifier(StringRef identifier);
|
static bool isIdentifier(StringRef identifier);
|
||||||
|
|||||||
@@ -1486,3 +1486,88 @@ SourceLoc Lexer::getLocForEndOfToken(SourceManager &SM, SourceLoc Loc) {
|
|||||||
return Loc.getAdvancedLoc(Length);
|
return Loc.getAdvancedLoc(Length);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static SourceLoc getLocForStartOfTokenInBuf(SourceManager &SM,
|
||||||
|
unsigned BufferID,
|
||||||
|
unsigned Offset,
|
||||||
|
unsigned BufferStart,
|
||||||
|
unsigned BufferEnd,
|
||||||
|
bool InInterpolatedString) {
|
||||||
|
Lexer L(SM, BufferID, nullptr, /*InSILMode=*/false, /*KeepComments=*/false,
|
||||||
|
BufferStart, BufferEnd);
|
||||||
|
|
||||||
|
// Lex tokens until we find the token that contains the source location.
|
||||||
|
Token Tok;
|
||||||
|
do {
|
||||||
|
L.lex(Tok);
|
||||||
|
|
||||||
|
unsigned TokOffs = SM.getLocOffsetInBuffer(Tok.getLoc(), BufferID);
|
||||||
|
if (TokOffs > Offset) {
|
||||||
|
// We ended up skipping over the source location entirely, which means
|
||||||
|
// that it points into whitespace. We are done here.
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (Offset < TokOffs+Tok.getLength()) {
|
||||||
|
// Current token encompasses our source location.
|
||||||
|
|
||||||
|
if (Tok.is(tok::string_literal)) {
|
||||||
|
assert(!InInterpolatedString);
|
||||||
|
SmallVector<Lexer::StringSegment, 4> Segments;
|
||||||
|
Lexer::getStringLiteralSegments(Tok, Segments, /*Diags=*/0);
|
||||||
|
for (auto &Seg : Segments) {
|
||||||
|
unsigned SegOffs = SM.getLocOffsetInBuffer(Seg.Loc, BufferID);
|
||||||
|
unsigned SegEnd = SegOffs+Seg.Length;
|
||||||
|
if (SegOffs > Offset)
|
||||||
|
break;
|
||||||
|
|
||||||
|
// If the offset is inside an interpolated expr segment, re-lex.
|
||||||
|
if (Seg.Kind == Lexer::StringSegment::Expr && Offset < SegEnd)
|
||||||
|
return getLocForStartOfTokenInBuf(SM, BufferID, Offset,
|
||||||
|
/*BufferStart=*/SegOffs,
|
||||||
|
/*BufferEnd=*/SegEnd,
|
||||||
|
/*InInterpolatedString=*/true);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return Tok.getLoc();
|
||||||
|
}
|
||||||
|
} while (Tok.isNot(tok::eof));
|
||||||
|
|
||||||
|
// We've passed our source location; just return the original source location.
|
||||||
|
return SM.getLocForOffset(BufferID, Offset);
|
||||||
|
}
|
||||||
|
|
||||||
|
SourceLoc Lexer::getLocForStartOfToken(SourceManager &SM, unsigned BufferID,
|
||||||
|
unsigned Offset) {
|
||||||
|
const llvm::MemoryBuffer *MemBuffer = SM->getMemoryBuffer(BufferID);
|
||||||
|
if (!MemBuffer)
|
||||||
|
return SourceLoc();
|
||||||
|
StringRef Buffer = MemBuffer->getBuffer();
|
||||||
|
|
||||||
|
const char *BufStart = Buffer.data();
|
||||||
|
if (Offset > Buffer.size())
|
||||||
|
return SourceLoc();
|
||||||
|
|
||||||
|
const char *StrData = BufStart+Offset;
|
||||||
|
// If it points to whitespace return the SourceLoc for it.
|
||||||
|
if (StrData[0] == '\n' || StrData[0] == '\r' ||
|
||||||
|
StrData[0] == ' ' || StrData[0] == '\t')
|
||||||
|
return SM.getLocForOffset(BufferID, Offset);
|
||||||
|
|
||||||
|
// Back up from the current location until we hit the beginning of a line
|
||||||
|
// (or the buffer). We'll relex from that point.
|
||||||
|
const char *LexStart = StrData;
|
||||||
|
while (LexStart != BufStart) {
|
||||||
|
if (LexStart[0] == '\n' || LexStart[0] == '\r') {
|
||||||
|
++LexStart;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
--LexStart;
|
||||||
|
}
|
||||||
|
|
||||||
|
return getLocForStartOfTokenInBuf(SM, BufferID, Offset,
|
||||||
|
/*BufferStart=*/LexStart-BufStart,
|
||||||
|
/*BufferEnd=*/Buffer.size(),
|
||||||
|
/*InInterpolatedString=*/false);
|
||||||
|
}
|
||||||
|
|||||||
@@ -9,9 +9,8 @@ using namespace llvm;
|
|||||||
|
|
||||||
// The test fixture.
|
// The test fixture.
|
||||||
class LexerTest : public ::testing::Test {
|
class LexerTest : public ::testing::Test {
|
||||||
SourceManager SourceMgr;
|
|
||||||
|
|
||||||
public:
|
public:
|
||||||
|
SourceManager SourceMgr;
|
||||||
|
|
||||||
std::vector<Token> tokenizeAndKeepEOF(unsigned BufferID) {
|
std::vector<Token> tokenizeAndKeepEOF(unsigned BufferID) {
|
||||||
Lexer L(SourceMgr, BufferID, /*Diags=*/nullptr, /*InSILMode=*/false);
|
Lexer L(SourceMgr, BufferID, /*Diags=*/nullptr, /*InSILMode=*/false);
|
||||||
@@ -257,3 +256,20 @@ TEST_F(LexerTest, RestoreStopAtCodeCompletion) {
|
|||||||
ASSERT_EQ(tok::eof, Tok.getKind());
|
ASSERT_EQ(tok::eof, Tok.getKind());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
TEST_F(LexerTest, getLocForStartOfToken) {
|
||||||
|
const char *Source = "aaa \n \tbbb \"hello\" \"-\\(val)-\"";
|
||||||
|
|
||||||
|
MemoryBuffer *Buf = MemoryBuffer::getMemBuffer(Source);
|
||||||
|
unsigned BufferID = SourceMgr->AddNewSourceBuffer(Buf, llvm::SMLoc());
|
||||||
|
|
||||||
|
// First is character offset, second is its token offset.
|
||||||
|
unsigned Offs[][2] =
|
||||||
|
{ {1, 0}, {2, 0}, {3, 3}, {4, 4}, {6, 6}, {9, 7}, {14, 11},
|
||||||
|
// interpolated string
|
||||||
|
{20, 19}, {23, 23}, {24, 23}, {25, 23}, {26, 26}, {27, 19} };
|
||||||
|
|
||||||
|
for (auto Pair : Offs) {
|
||||||
|
ASSERT_EQ(Lexer::getLocForStartOfToken(SourceMgr, BufferID, Pair[0]),
|
||||||
|
SourceMgr.getLocForOffset(BufferID, Pair[1]));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user