Fix <rdar://14296004> [QoI] Poor diagnostic/recovery when two operators (e.g., == and -) are adjacted without spaces.

This is a frequently reported and surprising issue where lack of whitespace leads to rejecting common code like "X*-4". Fix this by diagnosing it specifically as a lack of whitespace problem, including a fixit to insert the missing whitespace (to transform it into "X * -4". This even handles the cases where there are multiple valid (single) splits possible by emitting a series of notes.
2025-12-14 20:36:38 +01:00 · 2015-12-16 13:18:59 -08:00
parent d221401713
commit e28c2e2c6e
5 changed files with 189 additions and 16 deletions
--- a/include/swift/AST/DiagnosticsSema.def
+++ b/include/swift/AST/DiagnosticsSema.def
@@ -420,16 +420,28 @@ ERROR(ambiguous_module_type,sema_nb,none,
 ERROR(use_nonmatching_operator,sema_nb,none,
      "%0 is not a %select{binary|prefix unary|postfix unary}1 operator",
      (Identifier, unsigned))
+
+ERROR(unspaced_operators_fixit,sema_nb,none,
+      "missing whitespace between %0 and %1 operators",
+      (Identifier, Identifier, bool))
+ERROR(unspaced_operators,sema_nb,none,
+      "ambiguous missing whitespace between unary and binary operators", ())
+NOTE(unspaced_operators_candidate,sema_nb,none,
+     "could be %select{binary|postfix}2 %0 and %select{prefix|binary}2 %1",
+     (Identifier, Identifier, bool))
+
+
+
 ERROR(use_unresolved_identifier,sema_nb,none,
      "use of unresolved %select{identifier|operator}1 %0", (Identifier, bool))
 ERROR(use_undeclared_type,sema_nb,none,
      "use of undeclared type %0", (Identifier))
 ERROR(use_undeclared_type_did_you_mean,sema_nb,none,
-"use of undeclared type %0; did you mean to use '%1'?", (Identifier, StringRef))
+      "use of undeclared type %0; did you mean to use '%1'?", (Identifier, StringRef))
 NOTE(note_remapped_type,sema_nb,none,
-  "did you mean to use '%0'?", (StringRef))
+     "did you mean to use '%0'?", (StringRef))
 ERROR(identifier_init_failure,sema_nb,none,
-  "could not infer type for %0", (Identifier))
+      "could not infer type for %0", (Identifier))
 ERROR(pattern_used_in_type,sema_nb,none,
      "%0 used within its own type", (Identifier))
 NOTE(note_module_as_type,sema_nb,none,
--- a/include/swift/Parse/Lexer.h
+++ b/include/swift/Parse/Lexer.h
@@ -240,6 +240,15 @@ public:
    restoreState(S);
  }

+  /// \brief Retrieve the Token referred to by \c Loc.
+  ///
+  /// \param SM The source manager in which the given source location
+  /// resides.
+  ///
+  /// \param Loc The source location of the beginning of a token.
+  static Token getTokenAtLocation(const SourceManager &SM, SourceLoc Loc);
+
+
  /// \brief Retrieve the source location that points just past the
  /// end of the token referred to by \c Loc.
  ///
@@ -299,6 +308,10 @@ public:
  /// reserved word.
  static tok kindOfIdentifier(StringRef Str, bool InSILMode);

+  /// \brief Determines if the given string is a valid operator identifier,
+  /// without escaping characters.
+  static bool isOperator(StringRef string);
+
  SourceLoc getLocForStartOfBuffer() const {
    return SourceLoc(llvm::SMLoc::getFromPointer(BufferStart));
  }
--- a/lib/Parse/Lexer.cpp
+++ b/lib/Parse/Lexer.cpp
@@ -523,6 +523,18 @@ bool Lexer::isIdentifier(StringRef string) {
  return p == end;
 }

+/// \brief Determines if the given string is a valid operator identifier,
+/// without escaping characters.
+bool Lexer::isOperator(StringRef string) {
+  if (string.empty()) return false;
+  char const *p = string.data(), *end = string.end();
+  if (!advanceIfValidStartOfOperator(p, end))
+    return false;
+  while (p < end && advanceIfValidContinuationOfOperator(p, end));
+  return p == end;
+}
+
+
 tok Lexer::kindOfIdentifier(StringRef Str, bool InSILMode) {
  tok Kind = llvm::StringSwitch<tok>(Str)
 #define KEYWORD(kw) \
@@ -1664,15 +1676,15 @@ Restart:
  }
 }

-SourceLoc Lexer::getLocForEndOfToken(const SourceManager &SM, SourceLoc Loc) {
+Token Lexer::getTokenAtLocation(const SourceManager &SM, SourceLoc Loc) {
  // Don't try to do anything with an invalid location.
  if (!Loc.isValid())
-    return Loc;
+    return Token();

  // Figure out which buffer contains this location.
  int BufferID = SM.findBufferContainingLoc(Loc);
  if (BufferID < 0)
-    return SourceLoc();
+    return Token();
  
  // Use fake language options; language options only affect validity
  // and the exact token produced.
@@ -1685,10 +1697,14 @@ SourceLoc Lexer::getLocForEndOfToken(const SourceManager &SM, SourceLoc Loc) {
  Lexer L(FakeLangOpts, SM, BufferID, nullptr, /*InSILMode=*/ false,
          CommentRetentionMode::ReturnAsTokens);
  L.restoreState(State(Loc));
-  unsigned Length = L.peekNextToken().getLength();
-  return Loc.getAdvancedLoc(Length);
+  return L.peekNextToken();
 }

+SourceLoc Lexer::getLocForEndOfToken(const SourceManager &SM, SourceLoc Loc) {
+  return Loc.getAdvancedLoc(getTokenAtLocation(SM, Loc).getLength());
+}
+
+
 static SourceLoc getLocForStartOfTokenInBuf(SourceManager &SM,
                                            unsigned BufferID,
                                            unsigned Offset,
--- a/lib/Sema/TypeCheckConstraints.cpp
+++ b/lib/Sema/TypeCheckConstraints.cpp
@@ -281,6 +281,132 @@ static bool matchesDeclRefKind(ValueDecl *value, DeclRefKind refKind) {
  llvm_unreachable("bad declaration reference kind");
 }

+static bool containsDeclRefKind(LookupResult &lookupResult,
+                                DeclRefKind refKind) {
+  for (auto candidate : lookupResult) {
+    ValueDecl *D = candidate.Decl;
+    if (!D || !D->hasType())
+      continue;
+    if (matchesDeclRefKind(D, refKind))
+      return true;
+  }
+  return false;
+}
+
+/// Emit a diagnostic with a fixit hint for an invalid binary operator, showing
+/// how to split it according to splitCandidate.
+static void diagnoseOperatorSplit(UnresolvedDeclRefExpr *UDRE,
+                                  std::pair<unsigned, bool> splitCandidate,
+                                  Diag<Identifier, Identifier, bool> diagID,
+                                  TypeChecker &TC) {
+
+  unsigned splitLoc = splitCandidate.first;
+  bool isBinOpFirst = splitCandidate.second;
+  StringRef nameStr = UDRE->getName().str();
+  auto startStr = nameStr.substr(0, splitLoc);
+  auto endStr = nameStr.drop_front(splitLoc);
+
+  // One valid split found, it is almost certainly the right answer.
+  auto diag = TC.diagnose(UDRE->getLoc(), diagID,
+                          TC.Context.getIdentifier(startStr),
+                          TC.Context.getIdentifier(endStr), isBinOpFirst);
+  // Highlight the whole operator.
+  diag.highlight(UDRE->getLoc());
+  // Insert whitespace on the left if the binop is at the start, or to the
+  // right if it is end.
+  if (isBinOpFirst)
+    diag.fixItInsert(UDRE->getLoc(), " ");
+  else
+    diag.fixItInsertAfter(UDRE->getLoc(), " ");
+
+  // Insert a space between the operators.
+  diag.fixItInsert(UDRE->getLoc().getAdvancedLoc(splitLoc), " ");
+}
+
+/// If we failed lookup of a binary operator, check to see it to see if
+/// it is a binary operator juxtaposed with a unary operator (x*-4) that
+/// needs whitespace.  If so, emit specific diagnostics for it and return true,
+/// otherwise return false.
+static bool diagnoseJuxtaposedBinOp(UnresolvedDeclRefExpr *UDRE,
+                                    DeclContext *DC,
+                                    TypeChecker &TC) {
+  Identifier name = UDRE->getName();
+  StringRef nameStr = name.str();
+  if (!name.isOperator() || nameStr.size() < 2 ||
+      UDRE->getRefKind() != DeclRefKind::BinaryOperator)
+    return false;
+
+  // Relex the token, to decide whether it has whitespace around it or not.  If
+  // it does, it isn't likely to be a case where a space was forgotten.
+  auto tok = Lexer::getTokenAtLocation(TC.Context.SourceMgr, UDRE->getLoc());
+  if (tok.getKind() != tok::oper_binary_unspaced)
+    return false;
+
+  // Okay, we have a failed lookup of a multicharacter unspaced binary operator.
+  // Check to see if lookup succeeds if a prefix or postfix operator is split
+  // off, and record the matches found.  The bool indicated is false if the
+  // first half of the split is the unary operator (x!*4) or true if it is the
+  // binary operator (x*+4).
+  std::vector<std::pair<unsigned, bool>> WorkableSplits;
+
+  // Check all the potential splits.
+  for (unsigned splitLoc = 1, e = nameStr.size(); splitLoc != e; ++splitLoc) {
+    // For it to be a valid split, the start and end section must be valid
+    // operators, spliting a unicode code point isn't kosher.
+    auto startStr = nameStr.substr(0, splitLoc);
+    auto endStr = nameStr.drop_front(splitLoc);
+    if (!Lexer::isOperator(startStr) || !Lexer::isOperator(endStr))
+      continue;
+
+    auto startName = TC.Context.getIdentifier(startStr);
+    auto endName = TC.Context.getIdentifier(endStr);
+
+    // Perform name lookup for the first and second pieces.  If either fail to
+    // be found, then it isn't a valid split.
+    NameLookupOptions LookupOptions = defaultUnqualifiedLookupOptions;
+    if (isa<AbstractFunctionDecl>(DC))
+      LookupOptions |= NameLookupFlags::KnownPrivate;
+    auto startLookup = TC.lookupUnqualified(DC, startName, UDRE->getLoc(),
+                                       LookupOptions);
+    if (!startLookup) continue;
+    auto endLookup = TC.lookupUnqualified(DC, endName, UDRE->getLoc(),
+                                          LookupOptions);
+    if (!endLookup) continue;
+
+    // Look to see if the candidates found could possibly match.
+    if (containsDeclRefKind(startLookup, DeclRefKind::PostfixOperator) &&
+        containsDeclRefKind(endLookup, DeclRefKind::BinaryOperator))
+      WorkableSplits.push_back({ splitLoc, false });
+
+    if (containsDeclRefKind(startLookup, DeclRefKind::BinaryOperator) &&
+        containsDeclRefKind(endLookup, DeclRefKind::PrefixOperator))
+      WorkableSplits.push_back({ splitLoc, true });
+  }
+
+  switch (WorkableSplits.size()) {
+  case 0:
+    // No splits found, can't produce this diagnostic.
+    return false;
+  case 1:
+    // One candidate: produce an error with a fixit on it.
+    diagnoseOperatorSplit(UDRE, WorkableSplits[0],
+                          diag::unspaced_operators_fixit, TC);
+    return true;
+
+  default:
+    // Otherwise, we have to produce a series of notes listing the various
+    // options.
+    TC.diagnose(UDRE->getLoc(), diag::unspaced_operators)
+      .highlight(UDRE->getLoc());
+
+    for (auto candidateSplit : WorkableSplits)
+      diagnoseOperatorSplit(UDRE, candidateSplit,
+                            diag::unspaced_operators_candidate, TC);
+    return true;
+  }
+}
+
+
 /// Bind an UnresolvedDeclRefExpr by performing name lookup and
 /// returning the resultant expression.  Context is the DeclContext used
 /// for the lookup.
@@ -297,9 +423,14 @@ resolveDeclRefExpr(UnresolvedDeclRefExpr *UDRE, DeclContext *DC) {
  auto Lookup = lookupUnqualified(DC, Name, Loc, LookupOptions);

  if (!Lookup) {
-    diagnose(Loc, diag::use_unresolved_identifier, Name,
-             UDRE->getName().isOperator())
-      .highlight(Loc);
+    // If we failed lookup of a binary operator, check to see it to see if
+    // it is a binary operator juxtaposed with a unary operator (x*-4) that
+    // needs whitespace.  If so, emit specific diagnostics for it.
+    if (!diagnoseJuxtaposedBinOp(UDRE, DC, *this)) {
+      diagnose(Loc, diag::use_unresolved_identifier, Name,
+               UDRE->getName().isOperator())
+        .highlight(Loc);
+    }
    return new (Context) ErrorExpr(Loc);
  }

--- a/test/decl/operators.swift
+++ b/test/decl/operators.swift
@@ -172,10 +172,9 @@ func ??= <T>(inout result : T?, rhs : Int) {  // ok



-
-_ = n*-4       // expected-error {{use of unresolved operator '*-'}}
-
-if n==-1 {}    // expected-error {{use of unresolved operator '==-'}}
+// <rdar://problem/14296004> [QoI] Poor diagnostic/recovery when two operators (e.g., == and -) are adjacted without spaces.
+_ = n*-4       // expected-error {{missing whitespace between '*' and '-' operators}} {{6-6= }} {{7-7= }}
+if n==-1 {}    // expected-error {{missing whitespace between '==' and '-' operators}} {{5-5= }} {{7-7= }}

 prefix operator ☃⃠ {}
 prefix func☃⃠(a : Int) -> Int { return a }
@@ -184,8 +183,10 @@ postfix func☃⃠(a : Int) -> Int { return a }

 _ = n☃⃠ ☃⃠ n   // Ok.
 _ = n ☃⃠ ☃⃠n   // Ok.
-_ = n☃⃠☃⃠n     // expected-error {{use of unresolved operator '☃⃠☃⃠'}}
 _ = n ☃⃠☃⃠ n   // expected-error {{use of unresolved operator '☃⃠☃⃠'}}
+_ = n☃⃠☃⃠n     // expected-error {{ambiguous missing whitespace between unary and binary operators}}
+// expected-note @-1 {{could be binary '☃⃠' and prefix '☃⃠'}} {{12-12= }} {{18-18= }}
+// expected-note @-2 {{could be postfix '☃⃠' and binary '☃⃠'}} {{6-6= }} {{12-12= }}