Replace u8 string literal prefixes with SWIFT_UTF8 macro.

In C++20, `u8` literals create values of type `char8_t` instead of `char`, and these can't be implicitly converted. This macro mitigates the difference and allows the same code to compile under C++14/17 modes and C++20, preserving the `char` type while ensuring that the text is interpreted as UTF-8.
2025-12-14 20:36:38 +01:00 · 2023-08-02 16:26:56 -04:00
parent 05919751b4
commit 300a952ede
5 changed files with 43 additions and 15 deletions
--- a/include/swift/Basic/Compiler.h
+++ b/include/swift/Basic/Compiler.h
@@ -13,6 +13,8 @@
 #ifndef SWIFT_BASIC_COMPILER_H
 #define SWIFT_BASIC_COMPILER_H

+#include <stddef.h>
+
 #if defined(_MSC_VER) && !defined(__clang__)
 #define SWIFT_COMPILER_IS_MSVC 1
 #else
@@ -190,4 +192,21 @@
 #define ENUM_EXTENSIBILITY_ATTR(arg)
 #endif

+// The 'u8' string literal prefix creates `char` types on C++14/17 but
+// `char8_t` types on C++20. To support compiling in both modes
+// simultaneously, wrap Unicode literals in `SWIFT_UTF8("...")` to ensure
+// that they are interpreted by the compiler as UTF-8 but always return
+// `char` types.
+#if defined(__cplusplus)
+#if defined(__cpp_char8_t)
+inline constexpr char operator""_swift_u8(char8_t c) { return c; }
+inline const char *operator""_swift_u8(const char8_t *p, std::size_t) {
+  return reinterpret_cast<const char *>(p);
+}
+#define SWIFT_UTF8(literal) u8##literal##_swift_u8
+#else  // !defined(__cpp_char8_t)
+#define SWIFT_UTF8(literal) u8##literal
+#endif // defined(__cpp_char8_t)
+#endif // defined(__cplusplus)
+
 #endif // SWIFT_BASIC_COMPILER_H
--- a/lib/AST/Type.cpp
+++ b/lib/AST/Type.cpp
@@ -36,6 +36,7 @@
 #include "swift/AST/SubstitutionMap.h"
 #include "swift/AST/TypeLoc.h"
 #include "swift/AST/TypeRepr.h"
+#include "swift/Basic/Compiler.h"
 #include "clang/AST/Type.h"
 #include "llvm/ADT/APFloat.h"
 #include "llvm/ADT/SmallPtrSet.h"
@@ -2037,8 +2038,8 @@ Identifier GenericTypeParamType::getName() const {
  llvm::SmallString<10> nameBuf;
  llvm::raw_svector_ostream os(nameBuf);

-  static const char *tau = u8"\u03C4_";
-  
+  static const char *tau = SWIFT_UTF8("\u03C4_");
+
  os << tau << getDepth() << '_' << getIndex();
  Identifier name = C.getIdentifier(os.str());
  names.insert({depthIndex, name});
--- a/lib/Basic/Unicode.cpp
+++ b/lib/Basic/Unicode.cpp
@@ -11,6 +11,7 @@
 //===----------------------------------------------------------------------===//

 #include "swift/Basic/Unicode.h"
+#include "swift/Basic/Compiler.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/Support/ConvertUTF.h"
@@ -135,7 +136,7 @@ std::string swift::unicode::sanitizeUTF8(StringRef Text) {
  Builder.reserve(Text.size());
  const llvm::UTF8* Data = reinterpret_cast<const llvm::UTF8*>(Text.begin());
  const llvm::UTF8* End = reinterpret_cast<const llvm::UTF8*>(Text.end());
-  StringRef Replacement = u8"\ufffd";
+  StringRef Replacement = SWIFT_UTF8("\ufffd");
  while (Data < End) {
    auto Step = llvm::getNumBytesForUTF8(*Data);
    if (Data + Step > End) {
--- a/lib/IRGen/IRGenDebugInfo.cpp
+++ b/lib/IRGen/IRGenDebugInfo.cpp
@@ -28,6 +28,7 @@
 #include "swift/AST/ModuleLoader.h"
 #include "swift/AST/Pattern.h"
 #include "swift/AST/TypeDifferenceVisitor.h"
+#include "swift/Basic/Compiler.h"
 #include "swift/Basic/Dwarf.h"
 #include "swift/Basic/SourceManager.h"
 #include "swift/Basic/Version.h"
@@ -3034,7 +3035,7 @@ void IRGenDebugInfoImpl::emitTypeMetadata(IRGenFunction &IGF,
    return;

  llvm::SmallString<8> Buf;
-  static const char *Tau = u8"\u03C4";
+  static const char *Tau = SWIFT_UTF8("\u03C4");
  llvm::raw_svector_ostream OS(Buf);
  OS << '$' << Tau << '_' << Depth << '_' << Index;
  uint64_t PtrWidthInBits = CI.getTargetInfo().getPointerWidth(0);
--- a/unittests/IDE/FuzzyStringMatcherTest.cpp
+++ b/unittests/IDE/FuzzyStringMatcherTest.cpp
@@ -11,6 +11,7 @@
 //===----------------------------------------------------------------------===//

 #include "swift/IDE/FuzzyStringMatcher.h"
+#include "swift/Basic/Compiler.h"
 #include "gtest/gtest.h"

 using FuzzyStringMatcher = swift::ide::FuzzyStringMatcher;
@@ -53,26 +54,31 @@ TEST(FuzzyStringMatcher, SingleCharacterMatching) {

 TEST(FuzzyStringMatcher, UnicodeMatching) {
  // Single code point matching.
-  EXPECT_TRUE(FuzzyStringMatcher(u8"\u2602a\U0002000Bz")
-                  .matchesCandidate(u8"\u2602A\U0002000BZ"));
+  EXPECT_TRUE(FuzzyStringMatcher(SWIFT_UTF8("\u2602a\U0002000Bz"))
+                  .matchesCandidate(SWIFT_UTF8("\u2602A\U0002000BZ")));

  // Same-order combining marks.
-  EXPECT_TRUE(FuzzyStringMatcher(u8"a\u0323\u0307")
-                  .matchesCandidate(u8"A\u0323\u0307"));
+  EXPECT_TRUE(FuzzyStringMatcher(SWIFT_UTF8("a\u0323\u0307"))
+                  .matchesCandidate(SWIFT_UTF8("A\u0323\u0307")));

  // FIXME: Canonical equivalence. These should be the same.
-  EXPECT_FALSE(FuzzyStringMatcher(u8"a\u0307\u0323")
-                   .matchesCandidate(u8"A\u0323\u0307"));
-  EXPECT_FALSE(FuzzyStringMatcher(u8"a\u00C5").matchesCandidate(u8"A\u030A"));
+  EXPECT_FALSE(FuzzyStringMatcher(SWIFT_UTF8("a\u0307\u0323"))
+                   .matchesCandidate(SWIFT_UTF8("A\u0323\u0307")));
+  EXPECT_FALSE(FuzzyStringMatcher(SWIFT_UTF8("a\u00C5"))
+                   .matchesCandidate(SWIFT_UTF8("A\u030A")));

  // FIXME: Compatibility equivalence.  It would be good to make these the same
  // too, since we're fuzzy matching.
-  EXPECT_FALSE(FuzzyStringMatcher(u8"fi").matchesCandidate(u8"\uFB01"));
-  EXPECT_FALSE(FuzzyStringMatcher(u8"25").matchesCandidate(u8"2\u2075"));
+  EXPECT_FALSE(FuzzyStringMatcher(SWIFT_UTF8("fi"))
+                   .matchesCandidate(SWIFT_UTF8("\uFB01")));
+  EXPECT_FALSE(FuzzyStringMatcher(SWIFT_UTF8("25"))
+                   .matchesCandidate(SWIFT_UTF8("2\u2075")));

  // FIXME: Case-insensitivity in non-ASCII characters.
-  EXPECT_FALSE(FuzzyStringMatcher(u8"\u00E0").matchesCandidate(u8"\u00C0"));
-  EXPECT_FALSE(FuzzyStringMatcher(u8"ss").matchesCandidate(u8"\u00DF"));
+  EXPECT_FALSE(FuzzyStringMatcher(SWIFT_UTF8("\u00E0"))
+                   .matchesCandidate(SWIFT_UTF8("\u00C0")));
+  EXPECT_FALSE(FuzzyStringMatcher(SWIFT_UTF8("ss"))
+                   .matchesCandidate(SWIFT_UTF8("\u00DF")));
 }

 TEST(FuzzyStringMatcher, BasicScoring) {