Fix <rdar://14296004> [QoI] Poor diagnostic/recovery when two operators (e.g., == and -) are adjacted without spaces.

This is a frequently reported and surprising issue where lack of whitespace leads to
rejecting common code like "X*-4".  Fix this by diagnosing it specifically as a lack
of whitespace problem, including a fixit to insert the missing whitespace (to transform
it into "X * -4".  This even handles the cases where there are multiple valid (single)
splits possible by emitting a series of notes.
This commit is contained in:
Chris Lattner
2015-12-16 13:18:59 -08:00
parent d221401713
commit e28c2e2c6e
5 changed files with 189 additions and 16 deletions

View File

@@ -420,16 +420,28 @@ ERROR(ambiguous_module_type,sema_nb,none,
ERROR(use_nonmatching_operator,sema_nb,none,
"%0 is not a %select{binary|prefix unary|postfix unary}1 operator",
(Identifier, unsigned))
ERROR(unspaced_operators_fixit,sema_nb,none,
"missing whitespace between %0 and %1 operators",
(Identifier, Identifier, bool))
ERROR(unspaced_operators,sema_nb,none,
"ambiguous missing whitespace between unary and binary operators", ())
NOTE(unspaced_operators_candidate,sema_nb,none,
"could be %select{binary|postfix}2 %0 and %select{prefix|binary}2 %1",
(Identifier, Identifier, bool))
ERROR(use_unresolved_identifier,sema_nb,none,
"use of unresolved %select{identifier|operator}1 %0", (Identifier, bool))
ERROR(use_undeclared_type,sema_nb,none,
"use of undeclared type %0", (Identifier))
ERROR(use_undeclared_type_did_you_mean,sema_nb,none,
"use of undeclared type %0; did you mean to use '%1'?", (Identifier, StringRef))
"use of undeclared type %0; did you mean to use '%1'?", (Identifier, StringRef))
NOTE(note_remapped_type,sema_nb,none,
"did you mean to use '%0'?", (StringRef))
"did you mean to use '%0'?", (StringRef))
ERROR(identifier_init_failure,sema_nb,none,
"could not infer type for %0", (Identifier))
"could not infer type for %0", (Identifier))
ERROR(pattern_used_in_type,sema_nb,none,
"%0 used within its own type", (Identifier))
NOTE(note_module_as_type,sema_nb,none,

View File

@@ -240,6 +240,15 @@ public:
restoreState(S);
}
/// \brief Retrieve the Token referred to by \c Loc.
///
/// \param SM The source manager in which the given source location
/// resides.
///
/// \param Loc The source location of the beginning of a token.
static Token getTokenAtLocation(const SourceManager &SM, SourceLoc Loc);
/// \brief Retrieve the source location that points just past the
/// end of the token referred to by \c Loc.
///
@@ -299,6 +308,10 @@ public:
/// reserved word.
static tok kindOfIdentifier(StringRef Str, bool InSILMode);
/// \brief Determines if the given string is a valid operator identifier,
/// without escaping characters.
static bool isOperator(StringRef string);
SourceLoc getLocForStartOfBuffer() const {
return SourceLoc(llvm::SMLoc::getFromPointer(BufferStart));
}

View File

@@ -523,6 +523,18 @@ bool Lexer::isIdentifier(StringRef string) {
return p == end;
}
/// \brief Determines if the given string is a valid operator identifier,
/// without escaping characters.
bool Lexer::isOperator(StringRef string) {
if (string.empty()) return false;
char const *p = string.data(), *end = string.end();
if (!advanceIfValidStartOfOperator(p, end))
return false;
while (p < end && advanceIfValidContinuationOfOperator(p, end));
return p == end;
}
tok Lexer::kindOfIdentifier(StringRef Str, bool InSILMode) {
tok Kind = llvm::StringSwitch<tok>(Str)
#define KEYWORD(kw) \
@@ -1664,15 +1676,15 @@ Restart:
}
}
SourceLoc Lexer::getLocForEndOfToken(const SourceManager &SM, SourceLoc Loc) {
Token Lexer::getTokenAtLocation(const SourceManager &SM, SourceLoc Loc) {
// Don't try to do anything with an invalid location.
if (!Loc.isValid())
return Loc;
return Token();
// Figure out which buffer contains this location.
int BufferID = SM.findBufferContainingLoc(Loc);
if (BufferID < 0)
return SourceLoc();
return Token();
// Use fake language options; language options only affect validity
// and the exact token produced.
@@ -1685,10 +1697,14 @@ SourceLoc Lexer::getLocForEndOfToken(const SourceManager &SM, SourceLoc Loc) {
Lexer L(FakeLangOpts, SM, BufferID, nullptr, /*InSILMode=*/ false,
CommentRetentionMode::ReturnAsTokens);
L.restoreState(State(Loc));
unsigned Length = L.peekNextToken().getLength();
return Loc.getAdvancedLoc(Length);
return L.peekNextToken();
}
SourceLoc Lexer::getLocForEndOfToken(const SourceManager &SM, SourceLoc Loc) {
return Loc.getAdvancedLoc(getTokenAtLocation(SM, Loc).getLength());
}
static SourceLoc getLocForStartOfTokenInBuf(SourceManager &SM,
unsigned BufferID,
unsigned Offset,

View File

@@ -281,6 +281,132 @@ static bool matchesDeclRefKind(ValueDecl *value, DeclRefKind refKind) {
llvm_unreachable("bad declaration reference kind");
}
static bool containsDeclRefKind(LookupResult &lookupResult,
DeclRefKind refKind) {
for (auto candidate : lookupResult) {
ValueDecl *D = candidate.Decl;
if (!D || !D->hasType())
continue;
if (matchesDeclRefKind(D, refKind))
return true;
}
return false;
}
/// Emit a diagnostic with a fixit hint for an invalid binary operator, showing
/// how to split it according to splitCandidate.
static void diagnoseOperatorSplit(UnresolvedDeclRefExpr *UDRE,
std::pair<unsigned, bool> splitCandidate,
Diag<Identifier, Identifier, bool> diagID,
TypeChecker &TC) {
unsigned splitLoc = splitCandidate.first;
bool isBinOpFirst = splitCandidate.second;
StringRef nameStr = UDRE->getName().str();
auto startStr = nameStr.substr(0, splitLoc);
auto endStr = nameStr.drop_front(splitLoc);
// One valid split found, it is almost certainly the right answer.
auto diag = TC.diagnose(UDRE->getLoc(), diagID,
TC.Context.getIdentifier(startStr),
TC.Context.getIdentifier(endStr), isBinOpFirst);
// Highlight the whole operator.
diag.highlight(UDRE->getLoc());
// Insert whitespace on the left if the binop is at the start, or to the
// right if it is end.
if (isBinOpFirst)
diag.fixItInsert(UDRE->getLoc(), " ");
else
diag.fixItInsertAfter(UDRE->getLoc(), " ");
// Insert a space between the operators.
diag.fixItInsert(UDRE->getLoc().getAdvancedLoc(splitLoc), " ");
}
/// If we failed lookup of a binary operator, check to see it to see if
/// it is a binary operator juxtaposed with a unary operator (x*-4) that
/// needs whitespace. If so, emit specific diagnostics for it and return true,
/// otherwise return false.
static bool diagnoseJuxtaposedBinOp(UnresolvedDeclRefExpr *UDRE,
DeclContext *DC,
TypeChecker &TC) {
Identifier name = UDRE->getName();
StringRef nameStr = name.str();
if (!name.isOperator() || nameStr.size() < 2 ||
UDRE->getRefKind() != DeclRefKind::BinaryOperator)
return false;
// Relex the token, to decide whether it has whitespace around it or not. If
// it does, it isn't likely to be a case where a space was forgotten.
auto tok = Lexer::getTokenAtLocation(TC.Context.SourceMgr, UDRE->getLoc());
if (tok.getKind() != tok::oper_binary_unspaced)
return false;
// Okay, we have a failed lookup of a multicharacter unspaced binary operator.
// Check to see if lookup succeeds if a prefix or postfix operator is split
// off, and record the matches found. The bool indicated is false if the
// first half of the split is the unary operator (x!*4) or true if it is the
// binary operator (x*+4).
std::vector<std::pair<unsigned, bool>> WorkableSplits;
// Check all the potential splits.
for (unsigned splitLoc = 1, e = nameStr.size(); splitLoc != e; ++splitLoc) {
// For it to be a valid split, the start and end section must be valid
// operators, spliting a unicode code point isn't kosher.
auto startStr = nameStr.substr(0, splitLoc);
auto endStr = nameStr.drop_front(splitLoc);
if (!Lexer::isOperator(startStr) || !Lexer::isOperator(endStr))
continue;
auto startName = TC.Context.getIdentifier(startStr);
auto endName = TC.Context.getIdentifier(endStr);
// Perform name lookup for the first and second pieces. If either fail to
// be found, then it isn't a valid split.
NameLookupOptions LookupOptions = defaultUnqualifiedLookupOptions;
if (isa<AbstractFunctionDecl>(DC))
LookupOptions |= NameLookupFlags::KnownPrivate;
auto startLookup = TC.lookupUnqualified(DC, startName, UDRE->getLoc(),
LookupOptions);
if (!startLookup) continue;
auto endLookup = TC.lookupUnqualified(DC, endName, UDRE->getLoc(),
LookupOptions);
if (!endLookup) continue;
// Look to see if the candidates found could possibly match.
if (containsDeclRefKind(startLookup, DeclRefKind::PostfixOperator) &&
containsDeclRefKind(endLookup, DeclRefKind::BinaryOperator))
WorkableSplits.push_back({ splitLoc, false });
if (containsDeclRefKind(startLookup, DeclRefKind::BinaryOperator) &&
containsDeclRefKind(endLookup, DeclRefKind::PrefixOperator))
WorkableSplits.push_back({ splitLoc, true });
}
switch (WorkableSplits.size()) {
case 0:
// No splits found, can't produce this diagnostic.
return false;
case 1:
// One candidate: produce an error with a fixit on it.
diagnoseOperatorSplit(UDRE, WorkableSplits[0],
diag::unspaced_operators_fixit, TC);
return true;
default:
// Otherwise, we have to produce a series of notes listing the various
// options.
TC.diagnose(UDRE->getLoc(), diag::unspaced_operators)
.highlight(UDRE->getLoc());
for (auto candidateSplit : WorkableSplits)
diagnoseOperatorSplit(UDRE, candidateSplit,
diag::unspaced_operators_candidate, TC);
return true;
}
}
/// Bind an UnresolvedDeclRefExpr by performing name lookup and
/// returning the resultant expression. Context is the DeclContext used
/// for the lookup.
@@ -297,9 +423,14 @@ resolveDeclRefExpr(UnresolvedDeclRefExpr *UDRE, DeclContext *DC) {
auto Lookup = lookupUnqualified(DC, Name, Loc, LookupOptions);
if (!Lookup) {
diagnose(Loc, diag::use_unresolved_identifier, Name,
UDRE->getName().isOperator())
.highlight(Loc);
// If we failed lookup of a binary operator, check to see it to see if
// it is a binary operator juxtaposed with a unary operator (x*-4) that
// needs whitespace. If so, emit specific diagnostics for it.
if (!diagnoseJuxtaposedBinOp(UDRE, DC, *this)) {
diagnose(Loc, diag::use_unresolved_identifier, Name,
UDRE->getName().isOperator())
.highlight(Loc);
}
return new (Context) ErrorExpr(Loc);
}

View File

@@ -172,10 +172,9 @@ func ??= <T>(inout result : T?, rhs : Int) { // ok
_ = n*-4 // expected-error {{use of unresolved operator '*-'}}
if n==-1 {} // expected-error {{use of unresolved operator '==-'}}
// <rdar://problem/14296004> [QoI] Poor diagnostic/recovery when two operators (e.g., == and -) are adjacted without spaces.
_ = n*-4 // expected-error {{missing whitespace between '*' and '-' operators}} {{6-6= }} {{7-7= }}
if n==-1 {} // expected-error {{missing whitespace between '==' and '-' operators}} {{5-5= }} {{7-7= }}
prefix operator {}
prefix func(a : Int) -> Int { return a }
@@ -184,8 +183,10 @@ postfix func☃⃠(a : Int) -> Int { return a }
_ = n n // Ok.
_ = n n // Ok.
_ = nn // expected-error {{use of unresolved operator ''}}
_ = n n // expected-error {{use of unresolved operator ''}}
_ = nn // expected-error {{ambiguous missing whitespace between unary and binary operators}}
// expected-note @-1 {{could be binary '' and prefix ''}} {{12-12= }} {{18-18= }}
// expected-note @-2 {{could be postfix '' and binary ''}} {{6-6= }} {{12-12= }}