mirror of
https://github.com/apple/swift.git
synced 2025-12-21 12:14:44 +01:00
in expr-dot and expr-postfix that can be typechecked without typechecking the beginning of the function body. Swift SVN r6198
292 lines
9.3 KiB
C++
292 lines
9.3 KiB
C++
//===--- Lexer.h - Swift Language Lexer -------------------------*- C++ -*-===//
|
|
//
|
|
// This source file is part of the Swift.org open source project
|
|
//
|
|
// Copyright (c) 2014 - 2015 Apple Inc. and the Swift project authors
|
|
// Licensed under Apache License v2.0 with Runtime Library Exception
|
|
//
|
|
// See http://swift.org/LICENSE.txt for license information
|
|
// See http://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
//
|
|
// This file defines the Lexer interface.
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
#ifndef SWIFT_LEXER_H
|
|
#define SWIFT_LEXER_H
|
|
|
|
#include "Token.h"
|
|
#include "llvm/ADT/SmallVector.h"
|
|
#include "swift/Basic/SourceLoc.h"
|
|
|
|
namespace llvm {
|
|
class SourceMgr;
|
|
}
|
|
|
|
namespace swift {
|
|
class DiagnosticEngine;
|
|
class Identifier;
|
|
class InFlightDiagnostic;
|
|
class ASTContext;
|
|
|
|
template<typename ...T> struct Diag;
|
|
|
|
class Lexer {
|
|
llvm::SourceMgr &SourceMgr;
|
|
DiagnosticEngine *Diags;
|
|
|
|
/// Pointer to the first character of the buffer.
|
|
const char *BufferStart;
|
|
|
|
/// Pointer to one past the end character of the buffer. Because the buffer
|
|
/// is always NUL-terminated, this points to the NUL terminator.
|
|
const char *BufferEnd;
|
|
|
|
/// Pointer to the artificial EOF that is located before BufferEnd. Useful
|
|
/// for lexing subranges of a buffer.
|
|
const char *ArtificialEOF;
|
|
|
|
/// Pointer to the next not consumed character.
|
|
const char *CurPtr;
|
|
|
|
Token NextToken;
|
|
|
|
/// InSILMode - This is true if we're lexing a .sil file instead of a .swift
|
|
/// file. This enables the 'sil' keyword.
|
|
bool InSILMode;
|
|
|
|
/// InSILBody - This is true when we're lexing the body of a SIL declaration
|
|
/// in a SIL file. This enables some context-sensitive lexing.
|
|
bool InSILBody = false;
|
|
|
|
/// \brief Set to true to return comment tokens, instead of skipping them.
|
|
bool KeepComments = false;
|
|
|
|
/// \brief Set to true if we should produce a code completion token when we
|
|
/// hit \c ArtificialEOF.
|
|
bool DoingCodeCompletion = false;
|
|
|
|
Lexer(const Lexer&) = delete;
|
|
void operator=(const Lexer&) = delete;
|
|
|
|
Lexer(llvm::SourceMgr &SourceMgr, llvm::StringRef Buffer,
|
|
DiagnosticEngine *Diags, const char *CurrentPosition,
|
|
bool InSILMode, bool KeepComments);
|
|
|
|
public:
|
|
Lexer(llvm::StringRef Buffer, llvm::SourceMgr &SourceMgr,
|
|
DiagnosticEngine *Diags, bool InSILMode, bool KeepComments = false)
|
|
: Lexer(SourceMgr, Buffer, Diags, Buffer.begin(), InSILMode, KeepComments){}
|
|
|
|
/// \brief Lexer state can be saved/restored to/from objects of this class.
|
|
class State {
|
|
public:
|
|
State(): CurPtr(nullptr) {}
|
|
|
|
private:
|
|
explicit State(const char *CurPtr): CurPtr(CurPtr) {}
|
|
const char *CurPtr;
|
|
bool isValid() const {
|
|
return CurPtr != nullptr;
|
|
}
|
|
friend class Lexer;
|
|
};
|
|
|
|
/// \brief Create a sub-lexer that lexes from the same buffer, but scans
|
|
/// a subrange of the buffer.
|
|
///
|
|
/// \param Parent the parent lexer that scans the whole buffer
|
|
/// \param BeginState start of the subrange
|
|
/// \param EndState end of the subrange
|
|
Lexer(Lexer &Parent, State BeginState, State EndState,
|
|
llvm::SourceMgr &SourceMgr, DiagnosticEngine *Diags, bool InSILMode)
|
|
: Lexer(SourceMgr,
|
|
StringRef(BeginState.CurPtr, Parent.BufferEnd - BeginState.CurPtr),
|
|
Diags, BeginState.CurPtr, InSILMode, Parent.isKeepingComments()) {
|
|
assert(BeginState.CurPtr >= Parent.BufferStart &&
|
|
BeginState.CurPtr <= Parent.BufferEnd &&
|
|
"Begin position out of range");
|
|
// If the parent lexer is doing code completion and the completion position
|
|
// is in this subrange, then we should stop at that point, too.
|
|
if (Parent.DoingCodeCompletion &&
|
|
Parent.ArtificialEOF >= BufferStart &&
|
|
Parent.ArtificialEOF <= BufferEnd) {
|
|
DoingCodeCompletion = true;
|
|
ArtificialEOF = Parent.ArtificialEOF;
|
|
} else
|
|
ArtificialEOF = EndState.CurPtr;
|
|
}
|
|
|
|
bool isKeepingComments() const { return KeepComments; }
|
|
|
|
void setCodeCompletion(unsigned Offset) {
|
|
ArtificialEOF = BufferStart + Offset;
|
|
assert(ArtificialEOF <= BufferEnd);
|
|
DoingCodeCompletion = true;
|
|
}
|
|
|
|
const char *getBufferEnd() const { return BufferEnd; }
|
|
|
|
void lex(Token &Result) {
|
|
Result = NextToken;
|
|
if (Result.isNot(tok::eof))
|
|
lexImpl();
|
|
}
|
|
|
|
/// peekNextToken - Return the next token to be returned by Lex without
|
|
/// actually lexing it.
|
|
const Token &peekNextToken() const { return NextToken; }
|
|
|
|
/// \brief Returns the lexer state for the beginning of the given token.
|
|
/// After restoring the state, lexer will return this token and continue from
|
|
/// there.
|
|
State getStateForBeginningOfToken(const Token &Tok) const {
|
|
const char *Ptr = Tok.getText().begin();
|
|
// Skip whitespace backwards until we hit a newline. This is needed to
|
|
// correctly lex the token if it is at the beginning of the line.
|
|
while (Ptr >= BufferStart + 1) {
|
|
char C = Ptr[-1];
|
|
if (C == ' ' || C == '\t' || C == 0) {
|
|
Ptr--;
|
|
continue;
|
|
}
|
|
if (C == '\n' || C == '\r') {
|
|
Ptr--;
|
|
break;
|
|
}
|
|
break;
|
|
}
|
|
return State(Ptr);
|
|
}
|
|
|
|
/// \brief Restore the lexer state to a given one, that can be located either
|
|
/// before or after the current position.
|
|
void restoreState(State S) {
|
|
assert(S.isValid());
|
|
assert(BufferStart <= S.CurPtr && S.CurPtr <= BufferEnd &&
|
|
"state for the wrong buffer");
|
|
CurPtr = S.CurPtr;
|
|
lexImpl();
|
|
}
|
|
|
|
/// \brief Restore the lexer state to a given state that is located before
|
|
/// current position.
|
|
void backtrackToState(State S) {
|
|
assert(S.CurPtr <= CurPtr && "can't backtrack forward");
|
|
restoreState(S);
|
|
}
|
|
|
|
bool stateRangeHasCodeCompletionToken(State Begin, State End,
|
|
unsigned TokenOffset) {
|
|
assert(Begin.isValid() && End.isValid());
|
|
assert(Begin.CurPtr <= End.CurPtr && "states don't form a range");
|
|
const char *CodeCompletePtr = BufferStart + TokenOffset;
|
|
return Begin.CurPtr <= CodeCompletePtr &&
|
|
CodeCompletePtr < End.CurPtr;
|
|
}
|
|
|
|
/// \brief Retrieve the source location that points just past the
|
|
/// end of the token refered to by \c Loc.
|
|
///
|
|
/// \param SM The source manager in which the given source location
|
|
/// resides.
|
|
///
|
|
/// \param Loc The source location of the beginning of a token.
|
|
static SourceLoc getLocForEndOfToken(llvm::SourceMgr &SM, SourceLoc Loc);
|
|
|
|
/// \brief Determines if the given string is a valid non-operator
|
|
/// identifier.
|
|
static bool isIdentifier(llvm::StringRef identifier);
|
|
|
|
SourceLoc getLocForStartOfBuffer() const {
|
|
return SourceLoc(llvm::SMLoc::getFromPointer(BufferStart));
|
|
}
|
|
|
|
/// StringSegment - A segment of a (potentially interpolated) string.
|
|
struct StringSegment {
|
|
enum : char { Literal, Expr } Kind;
|
|
/// String data (not quoted). It might not point into the original source
|
|
/// buffer.
|
|
StringRef Data;
|
|
SourceRange Range;
|
|
|
|
static StringSegment getLiteral(StringRef Str, SourceRange Range) {
|
|
StringSegment Result;
|
|
Result.Kind = Literal;
|
|
Result.Data = Str;
|
|
Result.Range = Range;
|
|
return Result;
|
|
}
|
|
|
|
static StringSegment getExpr(StringRef Str, SourceRange Range) {
|
|
StringSegment Result;
|
|
Result.Kind = Expr;
|
|
Result.Data = Str;
|
|
Result.Range = Range;
|
|
return Result;
|
|
}
|
|
};
|
|
|
|
/// getEncodedStringLiteral - Given a string literal token, compute the bytes
|
|
/// that the actual string literal should codegen to along with any
|
|
/// sequences that represent interpolated expressions.
|
|
/// If a copy needs to be made, it will be allocated out of the ASTContext
|
|
/// allocator.
|
|
void getEncodedStringLiteral(const Token &Str, ASTContext &Ctx,
|
|
llvm::SmallVectorImpl<StringSegment> &Segments);
|
|
/// getEncodedCharacterLiteral - Return the UTF32 codepoint for the specified
|
|
/// character literal.
|
|
uint32_t getEncodedCharacterLiteral(const Token &Str);
|
|
|
|
InFlightDiagnostic diagnose(const char *Loc, Diag<> ID);
|
|
|
|
static SourceLoc getSourceLoc(const char *Loc) {
|
|
return SourceLoc(llvm::SMLoc::getFromPointer(Loc));
|
|
}
|
|
|
|
/// getTokenKind - Retrieve the token kind for the given text, which must
|
|
/// fall within the given source buffer.
|
|
tok getTokenKind(StringRef Text);
|
|
|
|
void lexHexNumber();
|
|
|
|
/// SILBodyRAII - This helper class is used when parsing a SIL body to inform
|
|
/// the lexer that SIL-specific lexing should be enabled.
|
|
struct SILBodyRAII {
|
|
Lexer &L;
|
|
SILBodyRAII(Lexer &L) : L(L) {
|
|
assert(!L.InSILBody && "Already in a sil body?");
|
|
L.InSILBody = true;
|
|
}
|
|
~SILBodyRAII() {
|
|
assert(L.InSILBody && "Left sil body already?");
|
|
L.InSILBody = false;
|
|
}
|
|
SILBodyRAII(const SILBodyRAII&) = delete;
|
|
void operator=(const SILBodyRAII&) = delete;
|
|
};
|
|
|
|
private:
|
|
void lexImpl();
|
|
void formToken(tok Kind, const char *TokStart);
|
|
|
|
void skipSlashSlashComment();
|
|
void skipSlashStarComment();
|
|
void lexIdentifier();
|
|
void lexDollarIdent();
|
|
void lexOperatorIdentifier();
|
|
void lexNumber();
|
|
|
|
unsigned lexCharacter(const char *&CurPtr,
|
|
bool StopAtDoubleQuote, bool EmitDiagnostics);
|
|
void lexCharacterLiteral();
|
|
void lexStringLiteral();
|
|
};
|
|
|
|
|
|
} // end namespace swift
|
|
|
|
#endif
|