mirror of
https://github.com/apple/swift.git
synced 2025-12-14 20:36:38 +01:00
[Lexer] Introduce Lexer::getLocForStartOfToken() that returns the location at the start of the token that a given offset points to.
Swift SVN r8281
This commit is contained in:
@@ -110,6 +110,11 @@ public:
|
||||
/// \brief Returns the distance in bytes between the given source locations.
|
||||
unsigned getByteDistance(SourceLoc Start, SourceLoc End) const;
|
||||
|
||||
/// Returns the SourceLoc for the byte offset in the specified buffer.
|
||||
SourceLoc getLocForOffset(unsigned BufferID, unsigned Offset) const {
|
||||
return getLocForBufferStart(BufferID).getAdvancedLoc(Offset);
|
||||
}
|
||||
|
||||
std::pair<unsigned, unsigned> getLineAndColumn(SourceLoc Loc,
|
||||
int BufferID = -1) const {
|
||||
assert(Loc.isValid());
|
||||
|
||||
@@ -192,6 +192,14 @@ public:
|
||||
/// \param Loc The source location of the beginning of a token.
|
||||
static SourceLoc getLocForEndOfToken(SourceManager &SM, SourceLoc Loc);
|
||||
|
||||
/// Return the start location of the token that the offset in the given buffer
|
||||
/// points to.
|
||||
///
|
||||
/// If the offset points to whitespace the source location will point to the
|
||||
/// exact offset.
|
||||
static SourceLoc getLocForStartOfToken(SourceManager &SM, unsigned BufferID,
|
||||
unsigned Offset);
|
||||
|
||||
/// \brief Determines if the given string is a valid non-operator
|
||||
/// identifier.
|
||||
static bool isIdentifier(StringRef identifier);
|
||||
|
||||
@@ -1486,3 +1486,88 @@ SourceLoc Lexer::getLocForEndOfToken(SourceManager &SM, SourceLoc Loc) {
|
||||
return Loc.getAdvancedLoc(Length);
|
||||
}
|
||||
|
||||
static SourceLoc getLocForStartOfTokenInBuf(SourceManager &SM,
|
||||
unsigned BufferID,
|
||||
unsigned Offset,
|
||||
unsigned BufferStart,
|
||||
unsigned BufferEnd,
|
||||
bool InInterpolatedString) {
|
||||
Lexer L(SM, BufferID, nullptr, /*InSILMode=*/false, /*KeepComments=*/false,
|
||||
BufferStart, BufferEnd);
|
||||
|
||||
// Lex tokens until we find the token that contains the source location.
|
||||
Token Tok;
|
||||
do {
|
||||
L.lex(Tok);
|
||||
|
||||
unsigned TokOffs = SM.getLocOffsetInBuffer(Tok.getLoc(), BufferID);
|
||||
if (TokOffs > Offset) {
|
||||
// We ended up skipping over the source location entirely, which means
|
||||
// that it points into whitespace. We are done here.
|
||||
break;
|
||||
}
|
||||
|
||||
if (Offset < TokOffs+Tok.getLength()) {
|
||||
// Current token encompasses our source location.
|
||||
|
||||
if (Tok.is(tok::string_literal)) {
|
||||
assert(!InInterpolatedString);
|
||||
SmallVector<Lexer::StringSegment, 4> Segments;
|
||||
Lexer::getStringLiteralSegments(Tok, Segments, /*Diags=*/0);
|
||||
for (auto &Seg : Segments) {
|
||||
unsigned SegOffs = SM.getLocOffsetInBuffer(Seg.Loc, BufferID);
|
||||
unsigned SegEnd = SegOffs+Seg.Length;
|
||||
if (SegOffs > Offset)
|
||||
break;
|
||||
|
||||
// If the offset is inside an interpolated expr segment, re-lex.
|
||||
if (Seg.Kind == Lexer::StringSegment::Expr && Offset < SegEnd)
|
||||
return getLocForStartOfTokenInBuf(SM, BufferID, Offset,
|
||||
/*BufferStart=*/SegOffs,
|
||||
/*BufferEnd=*/SegEnd,
|
||||
/*InInterpolatedString=*/true);
|
||||
}
|
||||
}
|
||||
|
||||
return Tok.getLoc();
|
||||
}
|
||||
} while (Tok.isNot(tok::eof));
|
||||
|
||||
// We've passed our source location; just return the original source location.
|
||||
return SM.getLocForOffset(BufferID, Offset);
|
||||
}
|
||||
|
||||
SourceLoc Lexer::getLocForStartOfToken(SourceManager &SM, unsigned BufferID,
|
||||
unsigned Offset) {
|
||||
const llvm::MemoryBuffer *MemBuffer = SM->getMemoryBuffer(BufferID);
|
||||
if (!MemBuffer)
|
||||
return SourceLoc();
|
||||
StringRef Buffer = MemBuffer->getBuffer();
|
||||
|
||||
const char *BufStart = Buffer.data();
|
||||
if (Offset > Buffer.size())
|
||||
return SourceLoc();
|
||||
|
||||
const char *StrData = BufStart+Offset;
|
||||
// If it points to whitespace return the SourceLoc for it.
|
||||
if (StrData[0] == '\n' || StrData[0] == '\r' ||
|
||||
StrData[0] == ' ' || StrData[0] == '\t')
|
||||
return SM.getLocForOffset(BufferID, Offset);
|
||||
|
||||
// Back up from the current location until we hit the beginning of a line
|
||||
// (or the buffer). We'll relex from that point.
|
||||
const char *LexStart = StrData;
|
||||
while (LexStart != BufStart) {
|
||||
if (LexStart[0] == '\n' || LexStart[0] == '\r') {
|
||||
++LexStart;
|
||||
break;
|
||||
}
|
||||
|
||||
--LexStart;
|
||||
}
|
||||
|
||||
return getLocForStartOfTokenInBuf(SM, BufferID, Offset,
|
||||
/*BufferStart=*/LexStart-BufStart,
|
||||
/*BufferEnd=*/Buffer.size(),
|
||||
/*InInterpolatedString=*/false);
|
||||
}
|
||||
|
||||
@@ -9,9 +9,8 @@ using namespace llvm;
|
||||
|
||||
// The test fixture.
|
||||
class LexerTest : public ::testing::Test {
|
||||
SourceManager SourceMgr;
|
||||
|
||||
public:
|
||||
SourceManager SourceMgr;
|
||||
|
||||
std::vector<Token> tokenizeAndKeepEOF(unsigned BufferID) {
|
||||
Lexer L(SourceMgr, BufferID, /*Diags=*/nullptr, /*InSILMode=*/false);
|
||||
@@ -257,3 +256,20 @@ TEST_F(LexerTest, RestoreStopAtCodeCompletion) {
|
||||
ASSERT_EQ(tok::eof, Tok.getKind());
|
||||
}
|
||||
|
||||
TEST_F(LexerTest, getLocForStartOfToken) {
|
||||
const char *Source = "aaa \n \tbbb \"hello\" \"-\\(val)-\"";
|
||||
|
||||
MemoryBuffer *Buf = MemoryBuffer::getMemBuffer(Source);
|
||||
unsigned BufferID = SourceMgr->AddNewSourceBuffer(Buf, llvm::SMLoc());
|
||||
|
||||
// First is character offset, second is its token offset.
|
||||
unsigned Offs[][2] =
|
||||
{ {1, 0}, {2, 0}, {3, 3}, {4, 4}, {6, 6}, {9, 7}, {14, 11},
|
||||
// interpolated string
|
||||
{20, 19}, {23, 23}, {24, 23}, {25, 23}, {26, 26}, {27, 19} };
|
||||
|
||||
for (auto Pair : Offs) {
|
||||
ASSERT_EQ(Lexer::getLocForStartOfToken(SourceMgr, BufferID, Pair[0]),
|
||||
SourceMgr.getLocForOffset(BufferID, Pair[1]));
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user