Files
swift-mirror/lib/Parse/Parser.cpp
Jordan Rose 417b5d3982 Merge TranslationUnit into Module, and eliminate the term "translation unit".
This completes the FileUnit refactoring. A module consists of multiple
FileUnits, which provide decls from various file-like sources. I say
"file-like" because the Builtin module is implemented with a single
BuiltinUnit, and imported Clang modules are just a single FileUnit source
within a module.

Most modules, therefore, contain a single file unit; only the main module
will contain multiple source files (and eventually partial AST files).

The term "translation unit" has been scrubbed from the project. To refer
to the context of declarations outside of any other declarations, use
"top-level" or "module scope". To refer to a .swift file or its DeclContext,
use "source file". To refer to a single unit of compilation, use "module",
since the model is that an entire module will be compiled with a single
driver call. (It will still be possible to compile a single source file
through the direct-to-frontend interface, but only in the context of the
whole module.)

Swift SVN r10837
2013-12-05 01:51:15 +00:00

553 lines
17 KiB
C++

//===--- Parser.cpp - Swift Language Parser -------------------------------===//
//
// This source file is part of the Swift.org open source project
//
// Copyright (c) 2014 - 2015 Apple Inc. and the Swift project authors
// Licensed under Apache License v2.0 with Runtime Library Exception
//
// See http://swift.org/LICENSE.txt for license information
// See http://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
//
//===----------------------------------------------------------------------===//
//
// This file implements the Swift parser.
//
//===----------------------------------------------------------------------===//
#include "swift/Parse/Parser.h"
#include "swift/Subsystems.h"
#include "swift/AST/ASTWalker.h"
#include "swift/AST/Diagnostics.h"
#include "swift/AST/PrettyStackTrace.h"
#include "swift/Basic/SourceManager.h"
#include "swift/Parse/Lexer.h"
#include "swift/Parse/CodeCompletionCallbacks.h"
#include "swift/Parse/DelayedParsingCallbacks.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Support/SaveAndRestore.h"
#include "llvm/ADT/PointerUnion.h"
#include "llvm/ADT/Twine.h"
using namespace swift;
void DelayedParsingCallbacks::anchor() { }
namespace {
/// To assist debugging parser crashes, tell us the location of the
/// current token.
class PrettyStackTraceParser : public llvm::PrettyStackTraceEntry {
Parser &P;
public:
PrettyStackTraceParser(Parser &P) : P(P) {}
void print(llvm::raw_ostream &out) const {
out << "With parser at source location: ";
P.Tok.getLoc().print(out, P.Context.SourceMgr);
out << '\n';
}
};
/// A visitor that does delayed parsing of function bodies.
class ParseDelayedFunctionBodies : public ASTWalker {
PersistentParserState &ParserState;
CodeCompletionCallbacksFactory *CodeCompletionFactory;
public:
ParseDelayedFunctionBodies(PersistentParserState &ParserState,
CodeCompletionCallbacksFactory *Factory)
: ParserState(ParserState), CodeCompletionFactory(Factory) {}
bool walkToDeclPre(Decl *D) override {
if (auto AFD = dyn_cast<AbstractFunctionDecl>(D)) {
if (AFD->getBodyKind() != FuncDecl::BodyKind::Unparsed)
return false;
parseFunctionBody(AFD);
return true;
}
return true;
}
private:
void parseFunctionBody(AbstractFunctionDecl *AFD) {
assert(AFD->getBodyKind() == FuncDecl::BodyKind::Unparsed);
SourceFile &SF = *AFD->getDeclContext()->getParentSourceFile();
SourceManager &SourceMgr = SF.getASTContext().SourceMgr;
unsigned BufferID = SourceMgr.findBufferContainingLoc(AFD->getLoc());
Parser TheParser(BufferID, SF, nullptr, &ParserState);
std::unique_ptr<CodeCompletionCallbacks> CodeCompletion;
if (CodeCompletionFactory) {
CodeCompletion.reset(
CodeCompletionFactory->createCodeCompletionCallbacks(TheParser));
TheParser.setCodeCompletionCallbacks(CodeCompletion.get());
}
TheParser.parseAbstractFunctionBodyDelayed(AFD);
if (CodeCompletion)
CodeCompletion->doneParsing();
}
};
void parseDelayedDecl(PersistentParserState &ParserState,
CodeCompletionCallbacksFactory *CodeCompletionFactory) {
if (!ParserState.hasDelayedDecl())
return;
SourceFile &SF = *ParserState.getDelayedDeclContext()->getParentSourceFile();
SourceManager &SourceMgr = SF.getASTContext().SourceMgr;
unsigned BufferID =
SourceMgr.findBufferContainingLoc(ParserState.getDelayedDeclLoc());
Parser TheParser(BufferID, SF, nullptr, &ParserState);
std::unique_ptr<CodeCompletionCallbacks> CodeCompletion;
if (CodeCompletionFactory) {
CodeCompletion.reset(
CodeCompletionFactory->createCodeCompletionCallbacks(TheParser));
TheParser.setCodeCompletionCallbacks(CodeCompletion.get());
}
switch (ParserState.getDelayedDeclKind()) {
case PersistentParserState::DelayedDeclKind::TopLevelCodeDecl:
TheParser.parseTopLevelCodeDeclDelayed();
break;
case PersistentParserState::DelayedDeclKind::Decl:
TheParser.parseDeclDelayed();
break;
}
if (CodeCompletion)
CodeCompletion->doneParsing();
}
} // unnamed namespace
bool swift::parseIntoSourceFile(SourceFile &SF,
unsigned BufferID,
bool *Done,
SILParserState *SIL,
PersistentParserState *PersistentState,
DelayedParsingCallbacks *DelayedParseCB) {
Parser P(BufferID, SF, SIL, PersistentState);
PrettyStackTraceParser StackTrace(P);
if (DelayedParseCB)
P.setDelayedParsingCallbacks(DelayedParseCB);
bool FoundSideEffects = P.parseTopLevel();
*Done = P.Tok.is(tok::eof);
return FoundSideEffects;
}
void swift::performDelayedParsing(
DeclContext *DC, PersistentParserState &PersistentState,
CodeCompletionCallbacksFactory *CodeCompletionFactory) {
ParseDelayedFunctionBodies Walker(PersistentState,
CodeCompletionFactory);
DC->walkContext(Walker);
if (CodeCompletionFactory)
parseDelayedDecl(PersistentState, CodeCompletionFactory);
}
/// \brief Tokenizes a string literal, taking into account string interpolation.
static void getStringPartTokens(const Token &Tok, SourceManager &SM,
int BufID, const llvm::MemoryBuffer *Buffer,
std::vector<Token> &Toks) {
assert(Tok.is(tok::string_literal));
SmallVector<Lexer::StringSegment, 4> Segments;
Lexer::getStringLiteralSegments(Tok, Segments, /*Diags=*/0);
for (unsigned i = 0, e = Segments.size(); i != e; ++i) {
Lexer::StringSegment &Seg = Segments[i];
bool isFirst = i == 0;
bool isLast = i == e-1;
if (Seg.Kind == Lexer::StringSegment::Literal) {
SourceLoc Loc = Seg.Loc;
unsigned Len = Seg.Length;
if (isFirst) {
// Include the quote.
Loc = Loc.getAdvancedLoc(-1);
++Len;
}
if (isLast) {
// Include the quote.
++Len;
}
StringRef Text(Buffer->getBufferStart() +
SM.getLocOffsetInBuffer(Loc, BufID),
Len);
Token NewTok;
NewTok.setToken(tok::string_literal, Text);
Toks.push_back(NewTok);
} else {
assert(Seg.Kind == Lexer::StringSegment::Expr &&
"new enumerator was introduced ?");
unsigned Offset = SM.getLocOffsetInBuffer(Seg.Loc, BufID);
unsigned EndOffset = Offset + Seg.Length;
if (isFirst) {
// Add a token for the quote character.
StringRef Text(Buffer->getBufferStart() + Offset-2, 1);
Token NewTok;
NewTok.setToken(tok::string_literal, Text);
Toks.push_back(NewTok);
}
std::vector<Token> NewTokens = swift::tokenize(SM, BufID, Offset,
EndOffset,
/*KeepComments=*/true);
Toks.insert(Toks.end(), NewTokens.begin(), NewTokens.end());
if (isLast) {
// Add a token for the quote character.
StringRef Text(Buffer->getBufferStart() + EndOffset, 1);
Token NewTok;
NewTok.setToken(tok::string_literal, Text);
Toks.push_back(NewTok);
}
}
}
}
std::vector<Token> swift::tokenize(SourceManager &SM, unsigned BufferID,
unsigned Offset, unsigned EndOffset,
bool KeepComments,
bool TokenizeInterpolatedString) {
auto *Buffer = SM->getMemoryBuffer(BufferID);
if (Offset == 0 && EndOffset == 0)
EndOffset = Buffer->getBufferSize();
Lexer L(SM, BufferID, /*Diags=*/nullptr, /*InSILMode=*/false, KeepComments,
Offset, EndOffset);
std::vector<Token> Tokens;
do {
Tokens.emplace_back();
L.lex(Tokens.back());
if (Tokens.back().is(tok::string_literal) && TokenizeInterpolatedString) {
Token StrTok = Tokens.back();
Tokens.pop_back();
getStringPartTokens(StrTok, SM, BufferID, Buffer, Tokens);
}
} while (Tokens.back().isNot(tok::eof));
Tokens.pop_back(); // Remove EOF.
return Tokens;
}
//===----------------------------------------------------------------------===//
// Setup and Helper Methods
//===----------------------------------------------------------------------===//
Parser::Parser(unsigned BufferID, SourceFile &SF, SILParserState *SIL,
PersistentParserState *PersistentState)
: SourceMgr(SF.getASTContext().SourceMgr),
BufferID(BufferID),
Diags(SF.getASTContext().Diags),
SF(SF),
L(new Lexer(SF.getASTContext().SourceMgr, BufferID,
&SF.getASTContext().Diags,
/*InSILMode=*/SIL != nullptr, /*KeepComments=*/false)),
SIL(SIL),
Context(SF.getASTContext()) {
State = PersistentState;
if (!State) {
OwnedState.reset(new PersistentParserState());
State = OwnedState.get();
}
// Set the token to a sentinel so that we know the lexer isn't primed yet.
// This cannot be tok::unknown, since that is a token the lexer could produce.
Tok.setKind(tok::NUM_TOKENS);
auto ParserPos = State->takeParserPosition();
if (ParserPos.isValid() &&
SourceMgr.findBufferContainingLoc(ParserPos.Loc) == BufferID) {
auto BeginParserPosition = getParserPosition(ParserPos);
restoreParserPosition(BeginParserPosition);
}
}
Parser::~Parser() {
delete L;
}
/// peekToken - Return the next token that will be installed by consumeToken.
const Token &Parser::peekToken() {
return L->peekNextToken();
}
SourceLoc Parser::consumeToken() {
SourceLoc Loc = Tok.getLoc();
assert(Tok.isNot(tok::eof) && "Lexing past eof!");
L->lex(Tok);
PreviousLoc = Loc;
return Loc;
}
SourceLoc Parser::getEndOfPreviousLoc() {
return Lexer::getLocForEndOfToken(SourceMgr, PreviousLoc);
}
SourceLoc Parser::consumeStartingLess() {
assert(startsWithLess(Tok) && "Token does not start with '<'");
if (Tok.getLength() == 1)
return consumeToken();
// Skip the starting '<' in the existing token.
SourceLoc Loc = Tok.getLoc();
Tok = L->getTokenAt(Loc.getAdvancedLoc(1));
return Loc;
}
SourceLoc Parser::consumeStartingGreater() {
assert(startsWithGreater(Tok) && "Token does not start with '>'");
if (Tok.getLength() == 1)
return consumeToken();
// Skip the starting '>' in the existing token.
SourceLoc Loc = Tok.getLoc();
Tok = L->getTokenAt(Loc.getAdvancedLoc(1));
return Loc;
}
void Parser::skipSingle() {
switch (Tok.getKind()) {
case tok::l_paren:
consumeToken();
skipUntil(tok::r_paren);
consumeIf(tok::r_paren);
break;
case tok::l_brace:
consumeToken();
skipUntil(tok::r_brace);
consumeIf(tok::r_brace);
break;
case tok::l_square:
consumeToken();
skipUntil(tok::r_square);
consumeIf(tok::r_square);
break;
default:
consumeToken();
break;
}
}
void Parser::skipUntil(tok T1, tok T2) {
// tok::unknown is a sentinel that means "don't skip".
if (T1 == tok::unknown && T2 == tok::unknown) return;
while (Tok.isNot(tok::eof) && Tok.isNot(T1) && Tok.isNot(T2))
skipSingle();
}
void Parser::skipUntilAnyOperator() {
while (Tok.isNot(tok::eof) && Tok.isNotAnyOperator())
skipSingle();
}
void Parser::skipUntilGreaterInTypeList() {
while (true) {
switch (Tok.getKind()) {
case tok::eof:
case tok::l_brace:
case tok::r_brace:
return;
#define KEYWORD(X) case tok::kw_##X:
#include "swift/Parse/Tokens.def"
// 'Self' can appear in types, skip it.
if (Tok.is(tok::kw_Self))
break;
if (isStartOfStmt(Tok) || isStartOfDecl(Tok, peekToken()))
return;
break;
default:
if (Tok.isAnyOperator() && startsWithGreater(Tok))
return;
// Skip '[' ']' '(' ')', because they can appear in types.
break;
}
skipSingle();
}
}
void Parser::skipUntilDeclRBrace() {
while (Tok.isNot(tok::eof) && Tok.isNot(tok::r_brace) &&
!isStartOfDecl(Tok, peekToken()))
skipSingle();
}
void Parser::skipUntilDeclStmtRBrace(tok T1) {
while (Tok.isNot(T1) && Tok.isNot(tok::eof) && Tok.isNot(tok::r_brace) &&
!isStartOfStmt(Tok) &&
!isStartOfDecl(Tok, peekToken())) {
skipSingle();
}
}
void Parser::skipUntilDeclRBrace(tok T1, tok T2) {
while (Tok.isNot(T1) && Tok.isNot(T2) &&
Tok.isNot(tok::eof) && Tok.isNot(tok::r_brace) &&
!isStartOfDecl(Tok, peekToken())) {
skipSingle();
}
}
//===----------------------------------------------------------------------===//
// Primitive Parsing
//===----------------------------------------------------------------------===//
bool Parser::parseIdentifier(Identifier &Result, SourceLoc &Loc,
const Diagnostic &D) {
switch (Tok.getKind()) {
case tok::identifier:
Result = Context.getIdentifier(Tok.getText());
Loc = Tok.getLoc();
consumeToken();
return false;
default:
diagnose(Tok, D);
return true;
}
}
/// parseAnyIdentifier - Consume an identifier or operator if present and return
/// its name in Result. Otherwise, emit an error and return true.
bool Parser::parseAnyIdentifier(Identifier &Result, SourceLoc &Loc,
const Diagnostic &D) {
if (Tok.is(tok::identifier) || Tok.isAnyOperator()) {
Result = Context.getIdentifier(Tok.getText());
Loc = Tok.getLoc();
consumeToken();
return false;
}
// When we know we're supposed to get an identifier or operator, map the
// postfix '!' to an operator name.
if (Tok.is(tok::exclaim_postfix)) {
Result = Context.getIdentifier(Tok.getText());
Loc = Tok.getLoc();
consumeToken();
return false;
}
diagnose(Tok, D);
return true;
}
/// parseToken - The parser expects that 'K' is next in the input. If so, it is
/// consumed and false is returned.
///
/// If the input is malformed, this emits the specified error diagnostic.
bool Parser::parseToken(tok K, SourceLoc &TokLoc, const Diagnostic &D) {
if (Tok.is(K)) {
TokLoc = consumeToken(K);
return false;
}
diagnose(Tok, D);
return true;
}
/// parseMatchingToken - Parse the specified expected token and return its
/// location on success. On failure, emit the specified error diagnostic, and a
/// note at the specified note location.
bool Parser::parseMatchingToken(tok K, SourceLoc &TokLoc, Diag<> ErrorDiag,
SourceLoc OtherLoc) {
Diag<> OtherNote;
switch (K) {
case tok::r_paren: OtherNote = diag::opening_paren; break;
case tok::r_square: OtherNote = diag::opening_bracket; break;
case tok::r_brace: OtherNote = diag::opening_brace; break;
default: llvm_unreachable("unknown matching token!"); break;
}
if (parseToken(K, TokLoc, ErrorDiag)) {
diagnose(OtherLoc, OtherNote);
return true;
}
return false;
}
ParserStatus
Parser::parseList(tok RightK, SourceLoc LeftLoc, SourceLoc &RightLoc,
tok SeparatorK, bool OptionalSep, bool AllowSepAfterLast,
Diag<> ErrorDiag, std::function<ParserStatus()> callback) {
assert(SeparatorK == tok::comma || SeparatorK == tok::semi);
if (Tok.is(RightK)) {
RightLoc = consumeToken(RightK);
return makeParserSuccess();
}
ParserStatus Status;
while (true) {
while (Tok.is(SeparatorK)) {
diagnose(Tok, diag::unexpected_separator,
SeparatorK == tok::comma ? "," : ";")
.fixItRemove(SourceRange(Tok.getLoc()));
consumeToken();
}
SourceLoc StartLoc = Tok.getLoc();
Status |= callback();
if (Tok.is(RightK))
break;
// If the lexer stopped with an EOF token whose spelling is ")", then this
// is actually the tuple that is a string literal interpolation context.
// Just accept the ")" and build the tuple as we usually do.
if (Tok.is(tok::eof) && Tok.getText() == ")") {
RightLoc = Tok.getLoc();
return Status;
}
if (consumeIf(SeparatorK)) {
if (AllowSepAfterLast && Tok.is(RightK))
break;
else
continue;
}
if (!OptionalSep) {
SourceLoc InsertLoc = Lexer::getLocForEndOfToken(SourceMgr, PreviousLoc);
StringRef Separator = (SeparatorK == tok::comma ? "," : ";");
diagnose(Tok, diag::expected_separator, Separator)
.fixItInsert(InsertLoc, Separator);
Status.setIsParseError();
}
// If we haven't made progress, skip ahead
if (Tok.getLoc() == StartLoc) {
skipUntilDeclRBrace(RightK, SeparatorK);
if (Tok.is(RightK))
break;
if (Tok.is(tok::eof)) {
RightLoc = PreviousLoc;
Status.setIsParseError();
return Status;
}
if (consumeIf(SeparatorK) || OptionalSep)
continue;
break;
}
}
if (parseMatchingToken(RightK, RightLoc, ErrorDiag, LeftLoc)) {
Status.setIsParseError();
RightLoc = PreviousLoc;
}
return Status;
}
/// diagnoseRedefinition - Diagnose a redefinition error, with a note
/// referring back to the original definition.
void Parser::diagnoseRedefinition(ValueDecl *Prev, ValueDecl *New) {
assert(New != Prev && "Cannot conflict with self");
diagnose(New->getLoc(), diag::decl_redefinition, New->isDefinition());
diagnose(Prev->getLoc(), diag::previous_decldef, Prev->isDefinition(),
Prev->getName());
}