Files
swift-mirror/lib/Parse/Parser.cpp
Jordan Rose 32130d3cc3 Teach delayed parsing to get its SourceFile from the saved DeclContext.
This makes it possible to delay decls in more than one source file.

Swift SVN r9672
2013-10-25 17:30:43 +00:00

552 lines
17 KiB
C++

//===--- Parser.cpp - Swift Language Parser -------------------------------===//
//
// This source file is part of the Swift.org open source project
//
// Copyright (c) 2014 - 2015 Apple Inc. and the Swift project authors
// Licensed under Apache License v2.0 with Runtime Library Exception
//
// See http://swift.org/LICENSE.txt for license information
// See http://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
//
//===----------------------------------------------------------------------===//
//
// This file implements the Swift parser.
//
//===----------------------------------------------------------------------===//
#include "swift/Parse/Parser.h"
#include "swift/Subsystems.h"
#include "swift/AST/ASTWalker.h"
#include "swift/AST/Diagnostics.h"
#include "swift/AST/PrettyStackTrace.h"
#include "swift/Basic/SourceManager.h"
#include "swift/Parse/Lexer.h"
#include "swift/Parse/CodeCompletionCallbacks.h"
#include "swift/Parse/DelayedParsingCallbacks.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Support/SaveAndRestore.h"
#include "llvm/ADT/PointerUnion.h"
#include "llvm/ADT/Twine.h"
using namespace swift;
void DelayedParsingCallbacks::anchor() { }
namespace {
/// To assist debugging parser crashes, tell us the location of the
/// current token.
class PrettyStackTraceParser : public llvm::PrettyStackTraceEntry {
Parser &P;
public:
PrettyStackTraceParser(Parser &P) : P(P) {}
void print(llvm::raw_ostream &out) const {
out << "With parser at source location: ";
P.Tok.getLoc().print(out, P.Context.SourceMgr);
out << '\n';
}
};
/// A visitor that does delayed parsing of function bodies.
class ParseDelayedFunctionBodies : public ASTWalker {
PersistentParserState &ParserState;
CodeCompletionCallbacksFactory *CodeCompletionFactory;
public:
ParseDelayedFunctionBodies(PersistentParserState &ParserState,
CodeCompletionCallbacksFactory *Factory)
: ParserState(ParserState), CodeCompletionFactory(Factory) {}
bool walkToDeclPre(Decl *D) override {
if (auto AFD = dyn_cast<AbstractFunctionDecl>(D)) {
if (AFD->getBodyKind() != FuncDecl::BodyKind::Unparsed)
return false;
parseFunctionBody(AFD);
return true;
}
return true;
}
private:
void parseFunctionBody(AbstractFunctionDecl *AFD) {
assert(AFD->getBodyKind() == FuncDecl::BodyKind::Unparsed);
SourceFile &SF = *AFD->getDeclContext()->getParentSourceFile();
SourceManager &SourceMgr = SF.TU.Ctx.SourceMgr;
unsigned BufferID = SourceMgr.findBufferContainingLoc(AFD->getLoc());
Parser TheParser(BufferID, SF, nullptr, &ParserState);
std::unique_ptr<CodeCompletionCallbacks> CodeCompletion;
if (CodeCompletionFactory) {
CodeCompletion.reset(
CodeCompletionFactory->createCodeCompletionCallbacks(TheParser));
TheParser.setCodeCompletionCallbacks(CodeCompletion.get());
}
TheParser.parseAbstractFunctionBodyDelayed(AFD);
if (CodeCompletion)
CodeCompletion->doneParsing();
}
};
void parseDelayedDecl(PersistentParserState &ParserState,
CodeCompletionCallbacksFactory *CodeCompletionFactory) {
if (!ParserState.hasDelayedDecl())
return;
SourceFile &SF = *ParserState.getDelayedDeclContext()->getParentSourceFile();
SourceManager &SourceMgr = SF.TU.Ctx.SourceMgr;
unsigned BufferID =
SourceMgr.findBufferContainingLoc(ParserState.getDelayedDeclLoc());
Parser TheParser(BufferID, SF, nullptr, &ParserState);
std::unique_ptr<CodeCompletionCallbacks> CodeCompletion;
if (CodeCompletionFactory) {
CodeCompletion.reset(
CodeCompletionFactory->createCodeCompletionCallbacks(TheParser));
TheParser.setCodeCompletionCallbacks(CodeCompletion.get());
}
switch (ParserState.getDelayedDeclKind()) {
case PersistentParserState::DelayedDeclKind::TopLevelCodeDecl:
TheParser.parseTopLevelCodeDeclDelayed();
break;
case PersistentParserState::DelayedDeclKind::Decl:
TheParser.parseDeclDelayed();
break;
}
if (CodeCompletion)
CodeCompletion->doneParsing();
}
} // unnamed namespace
bool swift::parseIntoTranslationUnit(SourceFile &SF,
unsigned BufferID,
bool *Done,
SILParserState *SIL,
PersistentParserState *PersistentState,
DelayedParsingCallbacks *DelayedParseCB) {
Parser P(BufferID, SF, SIL, PersistentState);
PrettyStackTraceParser StackTrace(P);
if (DelayedParseCB)
P.setDelayedParsingCallbacks(DelayedParseCB);
bool FoundSideEffects = P.parseTopLevel();
*Done = P.Tok.is(tok::eof);
return FoundSideEffects;
}
void swift::performDelayedParsing(
DeclContext *DC, PersistentParserState &PersistentState,
CodeCompletionCallbacksFactory *CodeCompletionFactory) {
ParseDelayedFunctionBodies Walker(PersistentState,
CodeCompletionFactory);
DC->walkContext(Walker);
if (CodeCompletionFactory)
parseDelayedDecl(PersistentState, CodeCompletionFactory);
}
/// \brief Tokenizes a string literal, taking into account string interpolation.
static void getStringPartTokens(const Token &Tok, SourceManager &SM,
int BufID, const llvm::MemoryBuffer *Buffer,
std::vector<Token> &Toks) {
assert(Tok.is(tok::string_literal));
SmallVector<Lexer::StringSegment, 4> Segments;
Lexer::getStringLiteralSegments(Tok, Segments, /*Diags=*/0);
for (unsigned i = 0, e = Segments.size(); i != e; ++i) {
Lexer::StringSegment &Seg = Segments[i];
bool isFirst = i == 0;
bool isLast = i == e-1;
if (Seg.Kind == Lexer::StringSegment::Literal) {
SourceLoc Loc = Seg.Loc;
unsigned Len = Seg.Length;
if (isFirst) {
// Include the quote.
Loc = Loc.getAdvancedLoc(-1);
++Len;
}
if (isLast) {
// Include the quote.
++Len;
}
StringRef Text(Buffer->getBufferStart() +
SM.getLocOffsetInBuffer(Loc, BufID),
Len);
Token NewTok;
NewTok.setToken(tok::string_literal, Text);
Toks.push_back(NewTok);
} else {
assert(Seg.Kind == Lexer::StringSegment::Expr &&
"new enumerator was introduced ?");
unsigned Offset = SM.getLocOffsetInBuffer(Seg.Loc, BufID);
unsigned EndOffset = Offset + Seg.Length;
if (isFirst) {
// Add a token for the quote character.
StringRef Text(Buffer->getBufferStart() + Offset-2, 1);
Token NewTok;
NewTok.setToken(tok::string_literal, Text);
Toks.push_back(NewTok);
}
std::vector<Token> NewTokens = swift::tokenize(SM, BufID, Offset,
EndOffset,
/*KeepComments=*/true);
Toks.insert(Toks.end(), NewTokens.begin(), NewTokens.end());
if (isLast) {
// Add a token for the quote character.
StringRef Text(Buffer->getBufferStart() + EndOffset, 1);
Token NewTok;
NewTok.setToken(tok::string_literal, Text);
Toks.push_back(NewTok);
}
}
}
}
std::vector<Token> swift::tokenize(SourceManager &SM, unsigned BufferID,
unsigned Offset, unsigned EndOffset,
bool KeepComments,
bool TokenizeInterpolatedString) {
auto *Buffer = SM->getMemoryBuffer(BufferID);
if (Offset == 0 && EndOffset == 0)
EndOffset = Buffer->getBufferSize();
Lexer L(SM, BufferID, /*Diags=*/nullptr, /*InSILMode=*/false, KeepComments,
Offset, EndOffset);
std::vector<Token> Tokens;
do {
Tokens.emplace_back();
L.lex(Tokens.back());
if (Tokens.back().is(tok::string_literal) && TokenizeInterpolatedString) {
Token StrTok = Tokens.back();
Tokens.pop_back();
getStringPartTokens(StrTok, SM, BufferID, Buffer, Tokens);
}
} while (Tokens.back().isNot(tok::eof));
Tokens.pop_back(); // Remove EOF.
return Tokens;
}
//===----------------------------------------------------------------------===//
// Setup and Helper Methods
//===----------------------------------------------------------------------===//
Parser::Parser(unsigned BufferID, SourceFile &SF, SILParserState *SIL,
PersistentParserState *PersistentState)
: SourceMgr(SF.TU.Ctx.SourceMgr),
BufferID(BufferID),
Diags(SF.TU.Ctx.Diags),
SF(SF),
L(new Lexer(SF.TU.Ctx.SourceMgr, BufferID, &SF.TU.Ctx.Diags,
/*InSILMode=*/SIL != nullptr, /*KeepComments=*/false)),
SIL(SIL),
Context(SF.TU.Ctx) {
State = PersistentState;
if (!State) {
OwnedState.reset(new PersistentParserState());
State = OwnedState.get();
}
// Set the token to a sentinel so that we know the lexer isn't primed yet.
// This cannot be tok::unknown, since that is a token the lexer could produce.
Tok.setKind(tok::NUM_TOKENS);
auto ParserPos = State->takeParserPosition();
if (ParserPos.isValid() &&
SourceMgr.findBufferContainingLoc(ParserPos.Loc) == BufferID) {
auto BeginParserPosition = getParserPosition(ParserPos);
restoreParserPosition(BeginParserPosition);
}
}
Parser::~Parser() {
delete L;
}
/// peekToken - Return the next token that will be installed by consumeToken.
const Token &Parser::peekToken() {
return L->peekNextToken();
}
SourceLoc Parser::consumeToken() {
SourceLoc Loc = Tok.getLoc();
assert(Tok.isNot(tok::eof) && "Lexing past eof!");
L->lex(Tok);
PreviousLoc = Loc;
return Loc;
}
SourceLoc Parser::getEndOfPreviousLoc() {
return Lexer::getLocForEndOfToken(SourceMgr, PreviousLoc);
}
SourceLoc Parser::consumeStartingLess() {
assert(startsWithLess(Tok) && "Token does not start with '<'");
if (Tok.getLength() == 1)
return consumeToken();
// Skip the starting '<' in the existing token.
SourceLoc Loc = Tok.getLoc();
Tok = L->getTokenAt(Loc.getAdvancedLoc(1));
return Loc;
}
SourceLoc Parser::consumeStartingGreater() {
assert(startsWithGreater(Tok) && "Token does not start with '>'");
if (Tok.getLength() == 1)
return consumeToken();
// Skip the starting '>' in the existing token.
SourceLoc Loc = Tok.getLoc();
Tok = L->getTokenAt(Loc.getAdvancedLoc(1));
return Loc;
}
void Parser::skipSingle() {
switch (Tok.getKind()) {
case tok::l_paren:
consumeToken();
skipUntil(tok::r_paren);
consumeIf(tok::r_paren);
break;
case tok::l_brace:
consumeToken();
skipUntil(tok::r_brace);
consumeIf(tok::r_brace);
break;
case tok::l_square:
consumeToken();
skipUntil(tok::r_square);
consumeIf(tok::r_square);
break;
default:
consumeToken();
break;
}
}
void Parser::skipUntil(tok T1, tok T2) {
// tok::unknown is a sentinel that means "don't skip".
if (T1 == tok::unknown && T2 == tok::unknown) return;
while (Tok.isNot(tok::eof) && Tok.isNot(T1) && Tok.isNot(T2))
skipSingle();
}
void Parser::skipUntilAnyOperator() {
while (Tok.isNot(tok::eof) && Tok.isNotAnyOperator())
skipSingle();
}
void Parser::skipUntilGreaterInTypeList() {
while (true) {
switch (Tok.getKind()) {
case tok::eof:
case tok::l_brace:
case tok::r_brace:
return;
#define KEYWORD(X) case tok::kw_##X:
#include "swift/Parse/Tokens.def"
// 'Self' can appear in types, skip it.
if (Tok.is(tok::kw_Self))
break;
if (isStartOfStmt(Tok) || isStartOfDecl(Tok, peekToken()))
return;
break;
default:
if (Tok.isAnyOperator() && startsWithGreater(Tok))
return;
// Skip '[' ']' '(' ')', because they can appear in types.
break;
}
skipSingle();
}
}
void Parser::skipUntilDeclRBrace() {
while (Tok.isNot(tok::eof) && Tok.isNot(tok::r_brace) &&
!isStartOfDecl(Tok, peekToken()))
skipSingle();
}
void Parser::skipUntilDeclStmtRBrace(tok T1) {
while (Tok.isNot(T1) && Tok.isNot(tok::eof) && Tok.isNot(tok::r_brace) &&
!isStartOfStmt(Tok) &&
!isStartOfDecl(Tok, peekToken())) {
skipSingle();
}
}
void Parser::skipUntilDeclRBrace(tok T1, tok T2) {
while (Tok.isNot(T1) && Tok.isNot(T2) &&
Tok.isNot(tok::eof) && Tok.isNot(tok::r_brace) &&
!isStartOfDecl(Tok, peekToken())) {
skipSingle();
}
}
//===----------------------------------------------------------------------===//
// Primitive Parsing
//===----------------------------------------------------------------------===//
bool Parser::parseIdentifier(Identifier &Result, SourceLoc &Loc,
const Diagnostic &D) {
switch (Tok.getKind()) {
case tok::identifier:
Result = Context.getIdentifier(Tok.getText());
Loc = Tok.getLoc();
consumeToken();
return false;
default:
diagnose(Tok, D);
return true;
}
}
/// parseAnyIdentifier - Consume an identifier or operator if present and return
/// its name in Result. Otherwise, emit an error and return true.
bool Parser::parseAnyIdentifier(Identifier &Result, SourceLoc &Loc,
const Diagnostic &D) {
if (Tok.is(tok::identifier) || Tok.isAnyOperator()) {
Result = Context.getIdentifier(Tok.getText());
Loc = Tok.getLoc();
consumeToken();
return false;
}
// When we know we're supposed to get an identifier or operator, map the
// postfix '!' to an operator name.
if (Tok.is(tok::exclaim_postfix)) {
Result = Context.getIdentifier(Tok.getText());
Loc = Tok.getLoc();
consumeToken();
return false;
}
diagnose(Tok, D);
return true;
}
/// parseToken - The parser expects that 'K' is next in the input. If so, it is
/// consumed and false is returned.
///
/// If the input is malformed, this emits the specified error diagnostic.
bool Parser::parseToken(tok K, SourceLoc &TokLoc, const Diagnostic &D) {
if (Tok.is(K)) {
TokLoc = consumeToken(K);
return false;
}
diagnose(Tok, D);
return true;
}
/// parseMatchingToken - Parse the specified expected token and return its
/// location on success. On failure, emit the specified error diagnostic, and a
/// note at the specified note location.
bool Parser::parseMatchingToken(tok K, SourceLoc &TokLoc, Diag<> ErrorDiag,
SourceLoc OtherLoc) {
Diag<> OtherNote;
switch (K) {
case tok::r_paren: OtherNote = diag::opening_paren; break;
case tok::r_square: OtherNote = diag::opening_bracket; break;
case tok::r_brace: OtherNote = diag::opening_brace; break;
default: llvm_unreachable("unknown matching token!"); break;
}
if (parseToken(K, TokLoc, ErrorDiag)) {
diagnose(OtherLoc, OtherNote);
return true;
}
return false;
}
ParserStatus
Parser::parseList(tok RightK, SourceLoc LeftLoc, SourceLoc &RightLoc,
tok SeparatorK, bool OptionalSep, bool AllowSepAfterLast,
Diag<> ErrorDiag, std::function<ParserStatus()> callback) {
assert(SeparatorK == tok::comma || SeparatorK == tok::semi);
if (Tok.is(RightK)) {
RightLoc = consumeToken(RightK);
return makeParserSuccess();
}
ParserStatus Status;
while (true) {
while (Tok.is(SeparatorK)) {
diagnose(Tok, diag::unexpected_separator,
SeparatorK == tok::comma ? "," : ";")
.fixItRemove(SourceRange(Tok.getLoc()));
consumeToken();
}
SourceLoc StartLoc = Tok.getLoc();
Status |= callback();
if (Tok.is(RightK))
break;
// If the lexer stopped with an EOF token whose spelling is ")", then this
// is actually the tuple that is a string literal interpolation context.
// Just accept the ")" and build the tuple as we usually do.
if (Tok.is(tok::eof) && Tok.getText() == ")") {
RightLoc = Tok.getLoc();
return Status;
}
if (consumeIf(SeparatorK)) {
if (AllowSepAfterLast && Tok.is(RightK))
break;
else
continue;
}
if (!OptionalSep) {
SourceLoc InsertLoc = Lexer::getLocForEndOfToken(SourceMgr, PreviousLoc);
StringRef Separator = (SeparatorK == tok::comma ? "," : ";");
diagnose(Tok, diag::expected_separator, Separator)
.fixItInsert(InsertLoc, Separator);
Status.setIsParseError();
}
// If we haven't made progress, skip ahead
if (Tok.getLoc() == StartLoc) {
skipUntilDeclRBrace(RightK, SeparatorK);
if (Tok.is(RightK))
break;
if (Tok.is(tok::eof)) {
RightLoc = PreviousLoc;
Status.setIsParseError();
return Status;
}
if (consumeIf(SeparatorK) || OptionalSep)
continue;
break;
}
}
if (parseMatchingToken(RightK, RightLoc, ErrorDiag, LeftLoc)) {
Status.setIsParseError();
RightLoc = PreviousLoc;
}
return Status;
}
/// diagnoseRedefinition - Diagnose a redefinition error, with a note
/// referring back to the original definition.
void Parser::diagnoseRedefinition(ValueDecl *Prev, ValueDecl *New) {
assert(New != Prev && "Cannot conflict with self");
diagnose(New->getLoc(), diag::decl_redefinition, New->isDefinition());
diagnose(Prev->getLoc(), diag::previous_decldef, Prev->isDefinition(),
Prev->getName());
}