mirror of
https://github.com/apple/swift.git
synced 2025-12-14 20:36:38 +01:00
Fix <rdar://14296004> [QoI] Poor diagnostic/recovery when two operators (e.g., == and -) are adjacted without spaces.
This is a frequently reported and surprising issue where lack of whitespace leads to rejecting common code like "X*-4". Fix this by diagnosing it specifically as a lack of whitespace problem, including a fixit to insert the missing whitespace (to transform it into "X * -4". This even handles the cases where there are multiple valid (single) splits possible by emitting a series of notes.
This commit is contained in:
@@ -420,16 +420,28 @@ ERROR(ambiguous_module_type,sema_nb,none,
|
||||
ERROR(use_nonmatching_operator,sema_nb,none,
|
||||
"%0 is not a %select{binary|prefix unary|postfix unary}1 operator",
|
||||
(Identifier, unsigned))
|
||||
|
||||
ERROR(unspaced_operators_fixit,sema_nb,none,
|
||||
"missing whitespace between %0 and %1 operators",
|
||||
(Identifier, Identifier, bool))
|
||||
ERROR(unspaced_operators,sema_nb,none,
|
||||
"ambiguous missing whitespace between unary and binary operators", ())
|
||||
NOTE(unspaced_operators_candidate,sema_nb,none,
|
||||
"could be %select{binary|postfix}2 %0 and %select{prefix|binary}2 %1",
|
||||
(Identifier, Identifier, bool))
|
||||
|
||||
|
||||
|
||||
ERROR(use_unresolved_identifier,sema_nb,none,
|
||||
"use of unresolved %select{identifier|operator}1 %0", (Identifier, bool))
|
||||
ERROR(use_undeclared_type,sema_nb,none,
|
||||
"use of undeclared type %0", (Identifier))
|
||||
ERROR(use_undeclared_type_did_you_mean,sema_nb,none,
|
||||
"use of undeclared type %0; did you mean to use '%1'?", (Identifier, StringRef))
|
||||
"use of undeclared type %0; did you mean to use '%1'?", (Identifier, StringRef))
|
||||
NOTE(note_remapped_type,sema_nb,none,
|
||||
"did you mean to use '%0'?", (StringRef))
|
||||
"did you mean to use '%0'?", (StringRef))
|
||||
ERROR(identifier_init_failure,sema_nb,none,
|
||||
"could not infer type for %0", (Identifier))
|
||||
"could not infer type for %0", (Identifier))
|
||||
ERROR(pattern_used_in_type,sema_nb,none,
|
||||
"%0 used within its own type", (Identifier))
|
||||
NOTE(note_module_as_type,sema_nb,none,
|
||||
|
||||
@@ -240,6 +240,15 @@ public:
|
||||
restoreState(S);
|
||||
}
|
||||
|
||||
/// \brief Retrieve the Token referred to by \c Loc.
|
||||
///
|
||||
/// \param SM The source manager in which the given source location
|
||||
/// resides.
|
||||
///
|
||||
/// \param Loc The source location of the beginning of a token.
|
||||
static Token getTokenAtLocation(const SourceManager &SM, SourceLoc Loc);
|
||||
|
||||
|
||||
/// \brief Retrieve the source location that points just past the
|
||||
/// end of the token referred to by \c Loc.
|
||||
///
|
||||
@@ -299,6 +308,10 @@ public:
|
||||
/// reserved word.
|
||||
static tok kindOfIdentifier(StringRef Str, bool InSILMode);
|
||||
|
||||
/// \brief Determines if the given string is a valid operator identifier,
|
||||
/// without escaping characters.
|
||||
static bool isOperator(StringRef string);
|
||||
|
||||
SourceLoc getLocForStartOfBuffer() const {
|
||||
return SourceLoc(llvm::SMLoc::getFromPointer(BufferStart));
|
||||
}
|
||||
|
||||
@@ -523,6 +523,18 @@ bool Lexer::isIdentifier(StringRef string) {
|
||||
return p == end;
|
||||
}
|
||||
|
||||
/// \brief Determines if the given string is a valid operator identifier,
|
||||
/// without escaping characters.
|
||||
bool Lexer::isOperator(StringRef string) {
|
||||
if (string.empty()) return false;
|
||||
char const *p = string.data(), *end = string.end();
|
||||
if (!advanceIfValidStartOfOperator(p, end))
|
||||
return false;
|
||||
while (p < end && advanceIfValidContinuationOfOperator(p, end));
|
||||
return p == end;
|
||||
}
|
||||
|
||||
|
||||
tok Lexer::kindOfIdentifier(StringRef Str, bool InSILMode) {
|
||||
tok Kind = llvm::StringSwitch<tok>(Str)
|
||||
#define KEYWORD(kw) \
|
||||
@@ -1664,15 +1676,15 @@ Restart:
|
||||
}
|
||||
}
|
||||
|
||||
SourceLoc Lexer::getLocForEndOfToken(const SourceManager &SM, SourceLoc Loc) {
|
||||
Token Lexer::getTokenAtLocation(const SourceManager &SM, SourceLoc Loc) {
|
||||
// Don't try to do anything with an invalid location.
|
||||
if (!Loc.isValid())
|
||||
return Loc;
|
||||
return Token();
|
||||
|
||||
// Figure out which buffer contains this location.
|
||||
int BufferID = SM.findBufferContainingLoc(Loc);
|
||||
if (BufferID < 0)
|
||||
return SourceLoc();
|
||||
return Token();
|
||||
|
||||
// Use fake language options; language options only affect validity
|
||||
// and the exact token produced.
|
||||
@@ -1685,10 +1697,14 @@ SourceLoc Lexer::getLocForEndOfToken(const SourceManager &SM, SourceLoc Loc) {
|
||||
Lexer L(FakeLangOpts, SM, BufferID, nullptr, /*InSILMode=*/ false,
|
||||
CommentRetentionMode::ReturnAsTokens);
|
||||
L.restoreState(State(Loc));
|
||||
unsigned Length = L.peekNextToken().getLength();
|
||||
return Loc.getAdvancedLoc(Length);
|
||||
return L.peekNextToken();
|
||||
}
|
||||
|
||||
SourceLoc Lexer::getLocForEndOfToken(const SourceManager &SM, SourceLoc Loc) {
|
||||
return Loc.getAdvancedLoc(getTokenAtLocation(SM, Loc).getLength());
|
||||
}
|
||||
|
||||
|
||||
static SourceLoc getLocForStartOfTokenInBuf(SourceManager &SM,
|
||||
unsigned BufferID,
|
||||
unsigned Offset,
|
||||
|
||||
@@ -281,6 +281,132 @@ static bool matchesDeclRefKind(ValueDecl *value, DeclRefKind refKind) {
|
||||
llvm_unreachable("bad declaration reference kind");
|
||||
}
|
||||
|
||||
static bool containsDeclRefKind(LookupResult &lookupResult,
|
||||
DeclRefKind refKind) {
|
||||
for (auto candidate : lookupResult) {
|
||||
ValueDecl *D = candidate.Decl;
|
||||
if (!D || !D->hasType())
|
||||
continue;
|
||||
if (matchesDeclRefKind(D, refKind))
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
/// Emit a diagnostic with a fixit hint for an invalid binary operator, showing
|
||||
/// how to split it according to splitCandidate.
|
||||
static void diagnoseOperatorSplit(UnresolvedDeclRefExpr *UDRE,
|
||||
std::pair<unsigned, bool> splitCandidate,
|
||||
Diag<Identifier, Identifier, bool> diagID,
|
||||
TypeChecker &TC) {
|
||||
|
||||
unsigned splitLoc = splitCandidate.first;
|
||||
bool isBinOpFirst = splitCandidate.second;
|
||||
StringRef nameStr = UDRE->getName().str();
|
||||
auto startStr = nameStr.substr(0, splitLoc);
|
||||
auto endStr = nameStr.drop_front(splitLoc);
|
||||
|
||||
// One valid split found, it is almost certainly the right answer.
|
||||
auto diag = TC.diagnose(UDRE->getLoc(), diagID,
|
||||
TC.Context.getIdentifier(startStr),
|
||||
TC.Context.getIdentifier(endStr), isBinOpFirst);
|
||||
// Highlight the whole operator.
|
||||
diag.highlight(UDRE->getLoc());
|
||||
// Insert whitespace on the left if the binop is at the start, or to the
|
||||
// right if it is end.
|
||||
if (isBinOpFirst)
|
||||
diag.fixItInsert(UDRE->getLoc(), " ");
|
||||
else
|
||||
diag.fixItInsertAfter(UDRE->getLoc(), " ");
|
||||
|
||||
// Insert a space between the operators.
|
||||
diag.fixItInsert(UDRE->getLoc().getAdvancedLoc(splitLoc), " ");
|
||||
}
|
||||
|
||||
/// If we failed lookup of a binary operator, check to see it to see if
|
||||
/// it is a binary operator juxtaposed with a unary operator (x*-4) that
|
||||
/// needs whitespace. If so, emit specific diagnostics for it and return true,
|
||||
/// otherwise return false.
|
||||
static bool diagnoseJuxtaposedBinOp(UnresolvedDeclRefExpr *UDRE,
|
||||
DeclContext *DC,
|
||||
TypeChecker &TC) {
|
||||
Identifier name = UDRE->getName();
|
||||
StringRef nameStr = name.str();
|
||||
if (!name.isOperator() || nameStr.size() < 2 ||
|
||||
UDRE->getRefKind() != DeclRefKind::BinaryOperator)
|
||||
return false;
|
||||
|
||||
// Relex the token, to decide whether it has whitespace around it or not. If
|
||||
// it does, it isn't likely to be a case where a space was forgotten.
|
||||
auto tok = Lexer::getTokenAtLocation(TC.Context.SourceMgr, UDRE->getLoc());
|
||||
if (tok.getKind() != tok::oper_binary_unspaced)
|
||||
return false;
|
||||
|
||||
// Okay, we have a failed lookup of a multicharacter unspaced binary operator.
|
||||
// Check to see if lookup succeeds if a prefix or postfix operator is split
|
||||
// off, and record the matches found. The bool indicated is false if the
|
||||
// first half of the split is the unary operator (x!*4) or true if it is the
|
||||
// binary operator (x*+4).
|
||||
std::vector<std::pair<unsigned, bool>> WorkableSplits;
|
||||
|
||||
// Check all the potential splits.
|
||||
for (unsigned splitLoc = 1, e = nameStr.size(); splitLoc != e; ++splitLoc) {
|
||||
// For it to be a valid split, the start and end section must be valid
|
||||
// operators, spliting a unicode code point isn't kosher.
|
||||
auto startStr = nameStr.substr(0, splitLoc);
|
||||
auto endStr = nameStr.drop_front(splitLoc);
|
||||
if (!Lexer::isOperator(startStr) || !Lexer::isOperator(endStr))
|
||||
continue;
|
||||
|
||||
auto startName = TC.Context.getIdentifier(startStr);
|
||||
auto endName = TC.Context.getIdentifier(endStr);
|
||||
|
||||
// Perform name lookup for the first and second pieces. If either fail to
|
||||
// be found, then it isn't a valid split.
|
||||
NameLookupOptions LookupOptions = defaultUnqualifiedLookupOptions;
|
||||
if (isa<AbstractFunctionDecl>(DC))
|
||||
LookupOptions |= NameLookupFlags::KnownPrivate;
|
||||
auto startLookup = TC.lookupUnqualified(DC, startName, UDRE->getLoc(),
|
||||
LookupOptions);
|
||||
if (!startLookup) continue;
|
||||
auto endLookup = TC.lookupUnqualified(DC, endName, UDRE->getLoc(),
|
||||
LookupOptions);
|
||||
if (!endLookup) continue;
|
||||
|
||||
// Look to see if the candidates found could possibly match.
|
||||
if (containsDeclRefKind(startLookup, DeclRefKind::PostfixOperator) &&
|
||||
containsDeclRefKind(endLookup, DeclRefKind::BinaryOperator))
|
||||
WorkableSplits.push_back({ splitLoc, false });
|
||||
|
||||
if (containsDeclRefKind(startLookup, DeclRefKind::BinaryOperator) &&
|
||||
containsDeclRefKind(endLookup, DeclRefKind::PrefixOperator))
|
||||
WorkableSplits.push_back({ splitLoc, true });
|
||||
}
|
||||
|
||||
switch (WorkableSplits.size()) {
|
||||
case 0:
|
||||
// No splits found, can't produce this diagnostic.
|
||||
return false;
|
||||
case 1:
|
||||
// One candidate: produce an error with a fixit on it.
|
||||
diagnoseOperatorSplit(UDRE, WorkableSplits[0],
|
||||
diag::unspaced_operators_fixit, TC);
|
||||
return true;
|
||||
|
||||
default:
|
||||
// Otherwise, we have to produce a series of notes listing the various
|
||||
// options.
|
||||
TC.diagnose(UDRE->getLoc(), diag::unspaced_operators)
|
||||
.highlight(UDRE->getLoc());
|
||||
|
||||
for (auto candidateSplit : WorkableSplits)
|
||||
diagnoseOperatorSplit(UDRE, candidateSplit,
|
||||
diag::unspaced_operators_candidate, TC);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/// Bind an UnresolvedDeclRefExpr by performing name lookup and
|
||||
/// returning the resultant expression. Context is the DeclContext used
|
||||
/// for the lookup.
|
||||
@@ -297,9 +423,14 @@ resolveDeclRefExpr(UnresolvedDeclRefExpr *UDRE, DeclContext *DC) {
|
||||
auto Lookup = lookupUnqualified(DC, Name, Loc, LookupOptions);
|
||||
|
||||
if (!Lookup) {
|
||||
diagnose(Loc, diag::use_unresolved_identifier, Name,
|
||||
UDRE->getName().isOperator())
|
||||
.highlight(Loc);
|
||||
// If we failed lookup of a binary operator, check to see it to see if
|
||||
// it is a binary operator juxtaposed with a unary operator (x*-4) that
|
||||
// needs whitespace. If so, emit specific diagnostics for it.
|
||||
if (!diagnoseJuxtaposedBinOp(UDRE, DC, *this)) {
|
||||
diagnose(Loc, diag::use_unresolved_identifier, Name,
|
||||
UDRE->getName().isOperator())
|
||||
.highlight(Loc);
|
||||
}
|
||||
return new (Context) ErrorExpr(Loc);
|
||||
}
|
||||
|
||||
|
||||
@@ -172,10 +172,9 @@ func ??= <T>(inout result : T?, rhs : Int) { // ok
|
||||
|
||||
|
||||
|
||||
|
||||
_ = n*-4 // expected-error {{use of unresolved operator '*-'}}
|
||||
|
||||
if n==-1 {} // expected-error {{use of unresolved operator '==-'}}
|
||||
// <rdar://problem/14296004> [QoI] Poor diagnostic/recovery when two operators (e.g., == and -) are adjacted without spaces.
|
||||
_ = n*-4 // expected-error {{missing whitespace between '*' and '-' operators}} {{6-6= }} {{7-7= }}
|
||||
if n==-1 {} // expected-error {{missing whitespace between '==' and '-' operators}} {{5-5= }} {{7-7= }}
|
||||
|
||||
prefix operator ☃⃠ {}
|
||||
prefix func☃⃠(a : Int) -> Int { return a }
|
||||
@@ -184,8 +183,10 @@ postfix func☃⃠(a : Int) -> Int { return a }
|
||||
|
||||
_ = n☃⃠ ☃⃠ n // Ok.
|
||||
_ = n ☃⃠ ☃⃠n // Ok.
|
||||
_ = n☃⃠☃⃠n // expected-error {{use of unresolved operator '☃⃠☃⃠'}}
|
||||
_ = n ☃⃠☃⃠ n // expected-error {{use of unresolved operator '☃⃠☃⃠'}}
|
||||
_ = n☃⃠☃⃠n // expected-error {{ambiguous missing whitespace between unary and binary operators}}
|
||||
// expected-note @-1 {{could be binary '☃⃠' and prefix '☃⃠'}} {{12-12= }} {{18-18= }}
|
||||
// expected-note @-2 {{could be postfix '☃⃠' and binary '☃⃠'}} {{6-6= }} {{12-12= }}
|
||||
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user