Files
swift-mirror/lib/Parse/ParseExpr.cpp
Chris Lattner 41a924e723 synch up the grammar in the parser and langref, and make the
expression section of langref make more sense.



Swift SVN r571
2011-08-19 23:10:13 +00:00

565 lines
17 KiB
C++

//===--- ParseExpr.cpp - Swift Language Parser for Expressions ------------===//
//
// This source file is part of the Swift.org open source project
//
// Copyright (c) 2014 - 2015 Apple Inc. and the Swift project authors
// Licensed under Apache License v2.0 with Runtime Library Exception
//
// See http://swift.org/LICENSE.txt for license information
// See http://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
//
//===----------------------------------------------------------------------===//
//
// Expression Parsing and AST Building
//
//===----------------------------------------------------------------------===//
#include "Parser.h"
#include "ParseResult.h"
#include "Scope.h"
#include "swift/AST/Decl.h"
#include "swift/AST/Expr.h"
#include "swift/AST/Types.h"
#include "swift/AST/ASTContext.h"
#include "llvm/ADT/Twine.h"
using namespace swift;
bool Parser::isStartOfExpr(const Token &Tok, const Token &Next) {
if (Tok.is(tok::numeric_constant) || Tok.is(tok::colon) ||
Tok.is(tok::l_paren_space) || Tok.is(tok::dollarident) ||
Tok.is(tok::identifier) || Tok.is(tok::oper))
return true;
// "func(" and "func{" are func expressions. "func x" is a func declaration.
if (Tok.is(tok::kw_func) &&
(Next.is(tok::l_paren) || Next.is(tok::l_paren_space) ||
Next.is(tok::l_brace)))
return true;
return false;
}
/// parseSingleExpr
///
/// Parse an expression in a context that requires a single expression.
ParseResult<Expr> Parser::parseSingleExpr(const char *Message) {
ParseResult<Expr> Result = parseExpr(Message);
if (Result) return true;
// Kill all the following expressions. This is not necessarily
// good for certain kinds of recovery.
if (isStartOfExpr(Tok, peekToken())) {
error(Tok.getLoc(), "expected a singular expression");
do {
ParseResult<Expr> Extra = parseExpr(Message);
if (Extra) break;
} while (isStartOfExpr(Tok, peekToken()));
}
return Result;
}
/// parseExpr
/// expr:
/// expr-unary
/// expr-unary operator expr
///
/// The sequencing here is not structural, i.e. binary operators are
/// not inherently right-associative.
ParseResult<Expr> Parser::parseExpr(const char *Message) {
SmallVector<Expr*, 8> SequencedExprs;
bool HasSemaError = false;
while (true) {
// Parse a primary expression.
ParseResult<Expr> Primary = parseExprUnary(Message);
if (Primary.isParseError())
return true;
if (Primary.isSemaError()) {
HasSemaError = true;
} else {
SequencedExprs.push_back(Primary.get());
}
// If the next token is not an operator, we're done.
if (!Tok.is(tok::oper))
break;
// Parse the operator. If this ever gains the ability to fail, we
// probably need to do something to keep the SequenceExpr in a
// valid state.
Expr *Operator = parseExprOperator();
SequencedExprs.push_back(Operator);
// The message is only valid for the first subexpr.
Message = "expected expression after operator";
}
// If we had semantic errors, just fail here.
if (HasSemaError)
return ParseResult<Expr>::getSemaError();
assert(!SequencedExprs.empty());
// If we saw no operators, don't build a sequence.
if (SequencedExprs.size() == 1)
return SequencedExprs[0];
Expr **NewElements =
Context.AllocateCopy<Expr*>(SequencedExprs.begin(), SequencedExprs.end());
return new (Context) SequenceExpr(NewElements, SequencedExprs.size());
}
/// parseExprUnary
///
/// expr-unary:
/// expr-postfix
/// operator expr-unary
ParseResult<Expr> Parser::parseExprUnary(const char *Message) {
// TODO: implement
return parseExprPostfix(Message);
}
/// parseExprPostfix
///
/// expr-literal:
/// numeric_constant
///
/// expr-primary:
/// expr-literal
/// expr-identifier
/// ':' identifier
/// expr-paren
/// expr-func
///
/// expr-dot:
/// expr-postfix '.' identifier
/// expr-postfix '.' dollarident
///
/// expr-subscript:
/// expr-postfix '[' expr ']'
///
/// expr-call:
/// expr-postfix expr-paren
///
/// expr-postfix:
/// expr-primary
/// expr-dot
/// expr-subscript
/// expr-call
///
ParseResult<Expr> Parser::parseExprPostfix(const char *Message) {
ParseResult<Expr> Result;
switch (Tok.getKind()) {
case tok::numeric_constant:
Result = parseExprNumericConstant();
break;
case tok::dollarident: // $1
Result = parseExprDollarIdentifier();
break;
case tok::identifier: // foo and foo::bar
Result = parseExprIdentifier();
break;
case tok::colon: { // :foo
SMLoc ColonLoc = consumeToken(tok::colon);
Identifier Name;
SMLoc NameLoc = Tok.getLoc();
if (parseIdentifier(Name, "expected identifier after ':' expression"))
return true;
// Handle :foo by just making an AST node.
Result = new (Context) UnresolvedMemberExpr(ColonLoc, NameLoc, Name);
break;
}
// A spaced left parenthesis can generally start a tuple expression.
// What it can't do is start a call.
case tok::l_paren:
case tok::l_paren_space:
Result = parseExprParen();
break;
case tok::kw_func:
Result = parseExprFunc();
break;
default:
error(Tok.getLoc(), Message ? Message : "expected expression");
return true;
}
// If we had a parse error, don't attempt to parse suffixes. Do keep going if
// we had semantic errors though.
if (Result.isParseError())
return true;
// Handle suffix expressions.
while (1) {
// Check for a .foo suffix.
SMLoc TokLoc = Tok.getLoc();
if (consumeIf(tok::period)) {
if (Tok.isNot(tok::identifier) && Tok.isNot(tok::dollarident)) {
error(Tok.getLoc(), "expected field name");
return true;
}
if (!Result.isSemaError()) {
Identifier Name = Context.getIdentifier(Tok.getText());
Result = new (Context) UnresolvedDotExpr(Result.get(), TokLoc, Name,
Tok.getLoc());
}
if (Tok.is(tok::identifier))
consumeToken(tok::identifier);
else
consumeToken(tok::dollarident);
continue;
}
// Check for a () suffix, which indicates a call.
// Note that this cannot be a l_paren_space.
if (Tok.is(tok::l_paren)) {
ParseResult<Expr> Arg = parseExprParen();
if (Arg.isParseError())
return true;
if (Arg.isSemaError())
Result = ParseResult<Expr>::getSemaError();
else if (!Result.isSemaError())
Result = new (Context) CallExpr(Result.get(), Arg.get(), Type());
continue;
}
// Check for a [expr] suffix.
if (consumeIf(tok::l_square)) {
ParseResult<Expr> Idx;
if ((Idx = parseSingleExpr("expected expression parsing array index")))
return true;
SMLoc RLoc = Tok.getLoc();
if (parseToken(tok::r_square, "expected ']'")) {
note(TokLoc, "to match this '['");
return true;
}
if (!Result.isSemaError() && !Idx.isSemaError()) {
// FIXME: Implement. This should modify Result like the cases
// above.
Result = Result;
}
}
break;
}
return Result;
}
ParseResult<Expr> Parser::parseExprNumericConstant() {
StringRef Text = Tok.getText();
SMLoc Loc = consumeToken(tok::numeric_constant);
// The integer literal must fit in 64-bits.
unsigned long long Val;
if (Text.getAsInteger(0, Val)) {
error(Loc, "invalid immediate for integer literal, value too large");
return ParseResult<Expr>::getSemaError();
}
// The type of an integer literal is always "integer_literal_type", which
// should be defined by the library.
Identifier TyName = Context.getIdentifier("integer_literal_type");
Type Ty = ScopeInfo.lookupOrInsertTypeName(TyName, Loc);
return new (Context) IntegerLiteralExpr(Text, Loc, Ty);
}
/// expr-identifier:
/// dollarident
ParseResult<Expr> Parser::parseExprDollarIdentifier() {
StringRef Name = Tok.getText();
SMLoc Loc = consumeToken(tok::dollarident);
assert(Name[0] == '$' && "Not a dollarident");
bool AllNumeric = true;
for (unsigned i = 1, e = Name.size(); i != e; ++i)
AllNumeric &= isdigit(Name[i]);
if (Name.size() == 1 || !AllNumeric) {
error(Loc, "invalid identifier, expected expression");
return ParseResult<Expr>::getSemaError();
}
unsigned ArgNo = 0;
if (Name.substr(1).getAsInteger(10, ArgNo)) {
error(Loc, "invalid name in $ expression");
return ParseResult<Expr>::getSemaError();
}
return new (Context) AnonClosureArgExpr(ArgNo, Loc);
}
/// parseExprOperator - Parse an operator reference expression. These
/// are not "proper" expressions; they can only appear interlaced in
/// SequenceExprs.
Expr *Parser::parseExprOperator() {
assert(Tok.is(tok::oper));
SMLoc Loc = Tok.getLoc();
Identifier Name;
parseIdentifier(Name, "");
return actOnIdentifierExpr(Name, Loc);
}
/// parseExprIdentifier - Parse an identifier expression:
///
/// expr-identifier:
/// identifier
/// identifier '::' identifier
ParseResult<Expr> Parser::parseExprIdentifier() {
assert(Tok.is(tok::identifier));
SMLoc Loc = Tok.getLoc();
Identifier Name;
parseIdentifier(Name, "");
if (Tok.isNot(tok::coloncolon))
return actOnIdentifierExpr(Name, Loc);
SMLoc ColonColonLoc = consumeToken(tok::coloncolon);
SMLoc Loc2 = Tok.getLoc();
Identifier Name2;
if (parseIdentifier(Name2, "expected identifier after '" + Name.str() +
"::' expression"))
return true;
// Note: this is very simplistic support for scoped name lookup, extend when
// needed.
TypeAliasDecl *TypeScopeDecl = ScopeInfo.lookupOrInsertTypeNameDecl(Name,Loc);
return new (Context) UnresolvedScopedIdentifierExpr(TypeScopeDecl, Loc,
ColonColonLoc, Loc2,
Name2);
}
Expr *Parser::actOnIdentifierExpr(Identifier Text, SMLoc Loc) {
ValueDecl *D = ScopeInfo.lookupValueName(Text);
if (D == 0)
return new (Context) UnresolvedDeclRefExpr(Text, Loc);
return new (Context) DeclRefExpr(D, Loc);
}
/// parseExprParen - Parse a tuple expression.
///
/// expr-paren:
/// '(' ')'
/// '(' expr-paren-element (',' expr-paren-element)* ')'
///
/// expr-paren-element:
/// ('.' identifier '=')? expr
///
ParseResult<Expr> Parser::parseExprParen() {
SMLoc LPLoc = consumeToken();
SmallVector<Expr*, 8> SubExprs;
SmallVector<Identifier, 8> SubExprNames;
bool AnySubExprSemaErrors = false;
if (Tok.isNot(tok::r_paren)) {
do {
Identifier FieldName;
// Check to see if there is a field specifier.
if (consumeIf(tok::period)) {
if (parseIdentifier(FieldName,
"expected field specifier name in tuple expression")||
parseToken(tok::equal, "expected '=' in tuple expression"))
return true;
}
if (!SubExprNames.empty())
SubExprNames.push_back(FieldName);
else if (FieldName.get()) {
SubExprNames.resize(SubExprs.size());
SubExprNames.push_back(FieldName);
}
ParseResult<Expr> SubExpr;
if ((SubExpr = parseSingleExpr("expected expression in parentheses")))
return true;
if (SubExpr.isSemaError())
AnySubExprSemaErrors = true;
else
SubExprs.push_back(SubExpr.get());
} while (consumeIf(tok::comma));
}
SMLoc RPLoc = Tok.getLoc();
if (parseToken(tok::r_paren, "expected ')' in parenthesis expression")) {
note(LPLoc, "to match this opening '('");
return true;
}
if (AnySubExprSemaErrors)
return ParseResult<Expr>::getSemaError();
Expr **NewSubExprs =
Context.AllocateCopy<Expr*>(SubExprs.data(),
SubExprs.data()+SubExprs.size());
Identifier *NewSubExprsNames = 0;
if (!SubExprNames.empty())
NewSubExprsNames =
Context.AllocateCopy<Identifier>(SubExprNames.data(),
SubExprNames.data()+SubExprs.size());
bool IsGrouping = false;
if (SubExprs.size() == 1 &&
(SubExprNames.empty() || SubExprNames[0].empty()))
IsGrouping = true;
return new (Context) TupleExpr(LPLoc, NewSubExprs, NewSubExprsNames,
SubExprs.size(), RPLoc, IsGrouping);
}
/// parseExprFunc - Parse a func expression.
///
/// expr-func:
/// 'func' type? stmt-brace
///
/// The type must start with '(' if present.
///
ParseResult<Expr> Parser::parseExprFunc() {
SMLoc FuncLoc = consumeToken(tok::kw_func);
Type Ty;
if (Tok.is(tok::l_brace)) {
Ty = TupleType::getEmpty(Context);
} else if (!Tok.is(tok::l_paren) && !Tok.is(tok::l_paren_space)) {
error(Tok.getLoc(), "expected '(' in func expression argument list");
return true;
} else if (parseType(Ty)) {
return true;
}
// If the parsed type is not spelled as a function type (i.e., has no '->' in
// it), then it is implicitly a function that returns ().
if (!isa<FunctionType>(Ty.getPointer()))
Ty = FunctionType::get(Ty, TupleType::getEmpty(Context), Context);
// The arguments to the func are defined in their own scope.
Scope FuncBodyScope(this);
FuncExpr *FE = actOnFuncExprStart(FuncLoc, Ty);
// Then parse the expression.
ParseResult<BraceStmt> Body;
if ((Body = parseStmtBrace("expected '{' in func expression")))
return true;
if (Body.isSemaError())
return ParseResult<Expr>::getSemaError();
FE->Body = Body.get();
return FE;
}
/// FuncTypePiece - This little enum is used by AddFuncArgumentsToScope to keep
/// track of where in a function type it is currently looking. This affects how
/// the decls are processed and created.
enum class FuncTypePiece {
Function, // Looking at the initial functiontype itself.
Input, // Looking at the input to the function type
Output // Looking at the output to the function type.
};
/// AddFuncArgumentsToScope - Walk the type specified for a Func object (which
/// is known to be a FunctionType on the outer level) creating and adding named
/// arguments to the current scope. This causes redefinition errors to be
/// emitted.
static void AddFuncArgumentsToScope(Type Ty,
SmallVectorImpl<unsigned> &AccessPath,
FuncTypePiece Mode,
SMLoc FuncLoc,
SmallVectorImpl<ArgDecl*> &ArgDecls,
Parser &P) {
// Handle the function case first.
if (Mode == FuncTypePiece::Function) {
FunctionType *FT = cast<FunctionType>(Ty.getPointer());
AccessPath.push_back(0);
AddFuncArgumentsToScope(FT->Input, AccessPath, FuncTypePiece::Input,
FuncLoc, ArgDecls, P);
AccessPath.back() = 1;
// If this is a->b->c then we treat b as an input, not (b->c) as an output.
if (isa<FunctionType>(FT->Result.getPointer()))
AddFuncArgumentsToScope(FT->Result, AccessPath,
FuncTypePiece::Function, FuncLoc, ArgDecls, P);
else
AddFuncArgumentsToScope(FT->Result, AccessPath,
FuncTypePiece::Output, FuncLoc, ArgDecls, P);
AccessPath.pop_back();
return;
}
// Otherwise, we're looking at an input or output to the func. The only type
// we currently dive into is the humble tuple, which can be recursive. This
// should dive in syntactically.
///
/// Note that we really *do* want dyn_cast here, not getAs, because we do not
/// want to look through type aliases or other sugar, we want to see what the
/// user wrote in the func declaration.
TupleType *TT = dyn_cast<TupleType>(Ty.getPointer());
if (TT == 0) return;
AccessPath.push_back(0);
// For tuples, recursively processes their elements (to handle cases like:
// (x : (a : int, b : int), y : int) -> ...
// and create decls for any named elements.
for (unsigned i = 0, e = TT->Fields.size(); i != e; ++i) {
AccessPath.back() = 1;
AddFuncArgumentsToScope(TT->Fields[i].Ty, AccessPath, Mode, FuncLoc,
ArgDecls, P);
// If this field is named, create the argument decl for it.
Identifier Name = TT->Fields[i].Name;
// Ignore unnamed fields.
if (Name.get() == 0) continue;
// Create the argument decl for this named argument.
ArgDecl *AD = new (P.Context) ArgDecl(FuncLoc, Name, TT->Fields[i].Ty);
ArgDecls.push_back(AD);
// Eventually we should mark the input/outputs as readonly vs writeonly.
//bool isInput = Mode == FuncTypePiece::Input;
P.ScopeInfo.addToScope(AD);
}
AccessPath.pop_back();
}
FuncExpr *Parser::actOnFuncExprStart(SMLoc FuncLoc, Type FuncTy) {
SmallVector<unsigned, 8> AccessPath;
SmallVector<ArgDecl*, 8> ArgDecls;
AddFuncArgumentsToScope(FuncTy, AccessPath, FuncTypePiece::Function,
FuncLoc, ArgDecls, *this);
ArrayRef<ArgDecl*> Args = ArgDecls;
return new (Context) FuncExpr(FuncLoc, FuncTy, Context.AllocateCopy(Args));
}