Replace u8 string literal prefixes with SWIFT_UTF8 macro.

In C++20, `u8` literals create values of type `char8_t` instead of
`char`, and these can't be implicitly converted. This macro
mitigates the difference and allows the same code to compile under
C++14/17 modes and C++20, preserving the `char` type while ensuring
that the text is interpreted as UTF-8.
This commit is contained in:
Tony Allevato
2023-08-02 16:26:56 -04:00
parent 05919751b4
commit 300a952ede
5 changed files with 43 additions and 15 deletions

View File

@@ -13,6 +13,8 @@
#ifndef SWIFT_BASIC_COMPILER_H
#define SWIFT_BASIC_COMPILER_H
#include <stddef.h>
#if defined(_MSC_VER) && !defined(__clang__)
#define SWIFT_COMPILER_IS_MSVC 1
#else
@@ -190,4 +192,21 @@
#define ENUM_EXTENSIBILITY_ATTR(arg)
#endif
// The 'u8' string literal prefix creates `char` types on C++14/17 but
// `char8_t` types on C++20. To support compiling in both modes
// simultaneously, wrap Unicode literals in `SWIFT_UTF8("...")` to ensure
// that they are interpreted by the compiler as UTF-8 but always return
// `char` types.
#if defined(__cplusplus)
#if defined(__cpp_char8_t)
inline constexpr char operator""_swift_u8(char8_t c) { return c; }
inline const char *operator""_swift_u8(const char8_t *p, std::size_t) {
return reinterpret_cast<const char *>(p);
}
#define SWIFT_UTF8(literal) u8##literal##_swift_u8
#else // !defined(__cpp_char8_t)
#define SWIFT_UTF8(literal) u8##literal
#endif // defined(__cpp_char8_t)
#endif // defined(__cplusplus)
#endif // SWIFT_BASIC_COMPILER_H

View File

@@ -36,6 +36,7 @@
#include "swift/AST/SubstitutionMap.h"
#include "swift/AST/TypeLoc.h"
#include "swift/AST/TypeRepr.h"
#include "swift/Basic/Compiler.h"
#include "clang/AST/Type.h"
#include "llvm/ADT/APFloat.h"
#include "llvm/ADT/SmallPtrSet.h"
@@ -2037,8 +2038,8 @@ Identifier GenericTypeParamType::getName() const {
llvm::SmallString<10> nameBuf;
llvm::raw_svector_ostream os(nameBuf);
static const char *tau = u8"\u03C4_";
static const char *tau = SWIFT_UTF8("\u03C4_");
os << tau << getDepth() << '_' << getIndex();
Identifier name = C.getIdentifier(os.str());
names.insert({depthIndex, name});

View File

@@ -11,6 +11,7 @@
//===----------------------------------------------------------------------===//
#include "swift/Basic/Unicode.h"
#include "swift/Basic/Compiler.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/Support/ConvertUTF.h"
@@ -135,7 +136,7 @@ std::string swift::unicode::sanitizeUTF8(StringRef Text) {
Builder.reserve(Text.size());
const llvm::UTF8* Data = reinterpret_cast<const llvm::UTF8*>(Text.begin());
const llvm::UTF8* End = reinterpret_cast<const llvm::UTF8*>(Text.end());
StringRef Replacement = u8"\ufffd";
StringRef Replacement = SWIFT_UTF8("\ufffd");
while (Data < End) {
auto Step = llvm::getNumBytesForUTF8(*Data);
if (Data + Step > End) {

View File

@@ -28,6 +28,7 @@
#include "swift/AST/ModuleLoader.h"
#include "swift/AST/Pattern.h"
#include "swift/AST/TypeDifferenceVisitor.h"
#include "swift/Basic/Compiler.h"
#include "swift/Basic/Dwarf.h"
#include "swift/Basic/SourceManager.h"
#include "swift/Basic/Version.h"
@@ -3034,7 +3035,7 @@ void IRGenDebugInfoImpl::emitTypeMetadata(IRGenFunction &IGF,
return;
llvm::SmallString<8> Buf;
static const char *Tau = u8"\u03C4";
static const char *Tau = SWIFT_UTF8("\u03C4");
llvm::raw_svector_ostream OS(Buf);
OS << '$' << Tau << '_' << Depth << '_' << Index;
uint64_t PtrWidthInBits = CI.getTargetInfo().getPointerWidth(0);

View File

@@ -11,6 +11,7 @@
//===----------------------------------------------------------------------===//
#include "swift/IDE/FuzzyStringMatcher.h"
#include "swift/Basic/Compiler.h"
#include "gtest/gtest.h"
using FuzzyStringMatcher = swift::ide::FuzzyStringMatcher;
@@ -53,26 +54,31 @@ TEST(FuzzyStringMatcher, SingleCharacterMatching) {
TEST(FuzzyStringMatcher, UnicodeMatching) {
// Single code point matching.
EXPECT_TRUE(FuzzyStringMatcher(u8"\u2602a\U0002000Bz")
.matchesCandidate(u8"\u2602A\U0002000BZ"));
EXPECT_TRUE(FuzzyStringMatcher(SWIFT_UTF8("\u2602a\U0002000Bz"))
.matchesCandidate(SWIFT_UTF8("\u2602A\U0002000BZ")));
// Same-order combining marks.
EXPECT_TRUE(FuzzyStringMatcher(u8"a\u0323\u0307")
.matchesCandidate(u8"A\u0323\u0307"));
EXPECT_TRUE(FuzzyStringMatcher(SWIFT_UTF8("a\u0323\u0307"))
.matchesCandidate(SWIFT_UTF8("A\u0323\u0307")));
// FIXME: Canonical equivalence. These should be the same.
EXPECT_FALSE(FuzzyStringMatcher(u8"a\u0307\u0323")
.matchesCandidate(u8"A\u0323\u0307"));
EXPECT_FALSE(FuzzyStringMatcher(u8"a\u00C5").matchesCandidate(u8"A\u030A"));
EXPECT_FALSE(FuzzyStringMatcher(SWIFT_UTF8("a\u0307\u0323"))
.matchesCandidate(SWIFT_UTF8("A\u0323\u0307")));
EXPECT_FALSE(FuzzyStringMatcher(SWIFT_UTF8("a\u00C5"))
.matchesCandidate(SWIFT_UTF8("A\u030A")));
// FIXME: Compatibility equivalence. It would be good to make these the same
// too, since we're fuzzy matching.
EXPECT_FALSE(FuzzyStringMatcher(u8"fi").matchesCandidate(u8"\uFB01"));
EXPECT_FALSE(FuzzyStringMatcher(u8"25").matchesCandidate(u8"2\u2075"));
EXPECT_FALSE(FuzzyStringMatcher(SWIFT_UTF8("fi"))
.matchesCandidate(SWIFT_UTF8("\uFB01")));
EXPECT_FALSE(FuzzyStringMatcher(SWIFT_UTF8("25"))
.matchesCandidate(SWIFT_UTF8("2\u2075")));
// FIXME: Case-insensitivity in non-ASCII characters.
EXPECT_FALSE(FuzzyStringMatcher(u8"\u00E0").matchesCandidate(u8"\u00C0"));
EXPECT_FALSE(FuzzyStringMatcher(u8"ss").matchesCandidate(u8"\u00DF"));
EXPECT_FALSE(FuzzyStringMatcher(SWIFT_UTF8("\u00E0"))
.matchesCandidate(SWIFT_UTF8("\u00C0")));
EXPECT_FALSE(FuzzyStringMatcher(SWIFT_UTF8("ss"))
.matchesCandidate(SWIFT_UTF8("\u00DF")));
}
TEST(FuzzyStringMatcher, BasicScoring) {