[Lexer] Introduce Lexer::getLocForStartOfToken() that returns the location at the start of the token that a given offset points to.

Swift SVN r8281
2025-12-14 20:36:38 +01:00 · 2013-09-16 18:41:16 +00:00
parent e7af4d6c72
commit 5db368ce7b
4 changed files with 116 additions and 2 deletions
--- a/include/swift/Basic/SourceManager.h
+++ b/include/swift/Basic/SourceManager.h
@@ -110,6 +110,11 @@ public:
  /// \brief Returns the distance in bytes between the given source locations.
  unsigned getByteDistance(SourceLoc Start, SourceLoc End) const;
  /// Returns the SourceLoc for the byte offset in the specified buffer.
  SourceLoc getLocForOffset(unsigned BufferID, unsigned Offset) const {
    return getLocForBufferStart(BufferID).getAdvancedLoc(Offset);
  }
  std::pair<unsigned, unsigned> getLineAndColumn(SourceLoc Loc,
                                                 int BufferID = -1) const {
    assert(Loc.isValid());
--- a/include/swift/Parse/Lexer.h
+++ b/include/swift/Parse/Lexer.h
@@ -192,6 +192,14 @@ public:
  /// \param Loc The source location of the beginning of a token.
  static SourceLoc getLocForEndOfToken(SourceManager &SM, SourceLoc Loc);
  /// Return the start location of the token that the offset in the given buffer
  /// points to.
  ///
  /// If the offset points to whitespace the source location will point to the
  /// exact offset.
  static SourceLoc getLocForStartOfToken(SourceManager &SM, unsigned BufferID,
                                         unsigned Offset);
  /// \brief Determines if the given string is a valid non-operator
  /// identifier.
  static bool isIdentifier(StringRef identifier);
--- a/lib/Parse/Lexer.cpp
+++ b/lib/Parse/Lexer.cpp
@@ -1486,3 +1486,88 @@ SourceLoc Lexer::getLocForEndOfToken(SourceManager &SM, SourceLoc Loc) {
  return Loc.getAdvancedLoc(Length);
 }
 static SourceLoc getLocForStartOfTokenInBuf(SourceManager &SM,
                                            unsigned BufferID,
                                            unsigned Offset,
                                            unsigned BufferStart,
                                            unsigned BufferEnd,
                                            bool InInterpolatedString) {
  Lexer L(SM, BufferID, nullptr, /*InSILMode=*/false, /*KeepComments=*/false,
          BufferStart, BufferEnd);
  // Lex tokens until we find the token that contains the source location.
  Token Tok;
  do {
    L.lex(Tok);
    unsigned TokOffs = SM.getLocOffsetInBuffer(Tok.getLoc(), BufferID);
    if (TokOffs > Offset) {
      // We ended up skipping over the source location entirely, which means
      // that it points into whitespace. We are done here.
      break;
    }
    if (Offset < TokOffs+Tok.getLength()) {
      // Current token encompasses our source location.
      if (Tok.is(tok::string_literal)) {
        assert(!InInterpolatedString);
        SmallVector<Lexer::StringSegment, 4> Segments;
        Lexer::getStringLiteralSegments(Tok, Segments, /*Diags=*/0);
        for (auto &Seg : Segments) {
          unsigned SegOffs = SM.getLocOffsetInBuffer(Seg.Loc, BufferID);
          unsigned SegEnd = SegOffs+Seg.Length;
          if (SegOffs > Offset)
            break;
          // If the offset is inside an interpolated expr segment, re-lex.
          if (Seg.Kind == Lexer::StringSegment::Expr && Offset < SegEnd)
            return getLocForStartOfTokenInBuf(SM, BufferID, Offset,
                                              /*BufferStart=*/SegOffs,
                                              /*BufferEnd=*/SegEnd,
                                              /*InInterpolatedString=*/true);
        }
      }
      return Tok.getLoc();
    }
  } while (Tok.isNot(tok::eof));
  // We've passed our source location; just return the original source location.
  return SM.getLocForOffset(BufferID, Offset);
 }
 SourceLoc Lexer::getLocForStartOfToken(SourceManager &SM, unsigned BufferID,
                                       unsigned Offset) {
  const llvm::MemoryBuffer *MemBuffer = SM->getMemoryBuffer(BufferID);
  if (!MemBuffer)
    return SourceLoc();
  StringRef Buffer = MemBuffer->getBuffer();
  const char *BufStart = Buffer.data();
  if (Offset > Buffer.size())
    return SourceLoc();
  const char *StrData = BufStart+Offset;
  // If it points to whitespace return the SourceLoc for it.
  if (StrData[0] == '\n' || StrData[0] == '\r' ||
      StrData[0] == ' ' || StrData[0] == '\t')
    return SM.getLocForOffset(BufferID, Offset);
  // Back up from the current location until we hit the beginning of a line
  // (or the buffer). We'll relex from that point.
  const char *LexStart = StrData;
  while (LexStart != BufStart) {
    if (LexStart[0] == '\n' || LexStart[0] == '\r') {
      ++LexStart;
      break;
    }
    --LexStart;
  }
  return getLocForStartOfTokenInBuf(SM, BufferID, Offset,
                                    /*BufferStart=*/LexStart-BufStart,
                                    /*BufferEnd=*/Buffer.size(),
                                    /*InInterpolatedString=*/false);
 }
--- a/unittests/Parse/LexerTests.cpp
+++ b/unittests/Parse/LexerTests.cpp
@@ -9,9 +9,8 @@ using namespace llvm;
 // The test fixture.
 class LexerTest : public ::testing::Test {
  SourceManager SourceMgr;
 public:
  SourceManager SourceMgr;
  std::vector<Token> tokenizeAndKeepEOF(unsigned BufferID) {
    Lexer L(SourceMgr, BufferID, /*Diags=*/nullptr, /*InSILMode=*/false);
@@ -257,3 +256,20 @@ TEST_F(LexerTest, RestoreStopAtCodeCompletion) {
  ASSERT_EQ(tok::eof, Tok.getKind());
 }
 TEST_F(LexerTest, getLocForStartOfToken) {
  const char *Source = "aaa \n \tbbb \"hello\" \"-\\(val)-\"";
  MemoryBuffer *Buf = MemoryBuffer::getMemBuffer(Source);
  unsigned BufferID = SourceMgr->AddNewSourceBuffer(Buf, llvm::SMLoc());
  // First is character offset, second is its token offset.
  unsigned Offs[][2] =
    { {1, 0}, {2, 0}, {3, 3}, {4, 4}, {6, 6}, {9, 7}, {14, 11},
      // interpolated string
      {20, 19}, {23, 23}, {24, 23}, {25, 23}, {26, 26}, {27, 19} };
  for (auto Pair : Offs) {
    ASSERT_EQ(Lexer::getLocForStartOfToken(SourceMgr, BufferID, Pair[0]),
              SourceMgr.getLocForOffset(BufferID, Pair[1]));
  }
 }