[PrintAsClang] [NFC] Refactor decl sorting

Factor ModuleContentsWriter’s declaration sorting out into a separate helper function, and additionally rework that function into a series of abstract comparisons supported by various helper functions. This declutters the function and makes the high-level logic it implements much more clear, at the cost of hiding much of the control flow inside a macro.

This also makes a very small change to the handling of generic signature comparisons: declarations without a generic signature will be factored into comparisons by comparing an empty string.
This commit is contained in:
Becca Royal-Gordon
2025-02-17 17:03:04 -08:00
parent da07ff577c
commit de63f47224

View File

@@ -117,6 +117,252 @@ public:
}
};
namespace {
namespace compare_detail {
enum : int {
Ascending = -1,
Equivalent = 0,
Descending = 1,
};
static StringRef getNameString(const Decl *D) {
if (auto VD = dyn_cast<ValueDecl>(D))
return VD->getBaseName().userFacingName();
if (auto ED = dyn_cast<ExtensionDecl>(D)) {
auto baseClass = ED->getSelfClassDecl();
if (!baseClass)
return ED->getExtendedNominal()->getName().str();
return baseClass->getName().str();
}
llvm_unreachable("unknown top-level ObjC decl");
}
static std::string getLocString(const Decl *afd) {
std::string res;
llvm::raw_string_ostream os(res);
afd->getLoc().print(os, afd->getASTContext().SourceMgr);
return std::move(os.str());
}
static std::string getMangledNameString(const Decl *D) {
auto VD = dyn_cast<ValueDecl>(D);
if (!VD && isa<ExtensionDecl>(D))
VD = cast<ExtensionDecl>(D)->getExtendedNominal();
if (!VD)
return std::string();
Mangle::ASTMangler mangler(VD->getASTContext());
return mangler.mangleAnyDecl(VD, /*prefix=*/true,
/*respectOriginallyDefinedIn=*/true);
}
static std::string getTypeString(const ValueDecl *VD) {
return VD->getInterfaceType().getString();
}
static std::string getGenericSignatureString(const Decl *VD) {
if (auto gc = VD->getAsGenericContext())
return gc->getGenericSignature().getAsString();
return "";
}
enum class TypeMatch {
Disfavored,
Neutral,
Favored
};
/// Implements a type check where one declaration must belong to \p FavoredType
/// and the other must belong to \p DisfavoredType , with \p FavoredType
/// sorting later (printing first).
template <typename FavoredType, typename DisfavoredType>
TypeMatch areTypes(Decl *D) {
if (isa<FavoredType>(D))
return TypeMatch::Favored;
if (isa<DisfavoredType>(D))
return TypeMatch::Disfavored;
return TypeMatch::Neutral;
}
/// Implements a type check where one declaration must belong to \p FavoredType
/// and the other must not.
template <typename FavoredType>
TypeMatch isType(Decl *D) {
if (isa<FavoredType>(D))
return TypeMatch::Favored;
return TypeMatch::Disfavored;
}
static int reverseCompare(TypeMatch lhs, TypeMatch rhs) {
if (rhs == TypeMatch::Disfavored && lhs == TypeMatch::Favored)
return Descending;
if (lhs == TypeMatch::Disfavored && rhs == TypeMatch::Favored)
return Ascending;
return Equivalent;
}
static int reverseCompare(bool lhs, bool rhs) {
if (!rhs && lhs)
return Descending;
if (rhs && !lhs)
return Ascending;
return Equivalent;
}
template<typename T,
typename std::enable_if<std::is_integral<T>::value>::type * = nullptr>
static int reverseCompare(const T &lhs, const T &rhs) {
if (lhs != rhs)
return lhs < rhs ? Descending : Ascending;
return Equivalent;
}
static int reverseCompare(StringRef lhs, StringRef rhs) {
return rhs.compare(lhs);
}
static int lastDitchSort(Decl *lhs, Decl *rhs, bool suppressDiagnostic) {
int result = reverseCompare(reinterpret_cast<uintptr_t>(lhs),
reinterpret_cast<uintptr_t>(rhs));
// Sorting with yourself shouldn't happen (but implement consistent behavior
// if this assert is disabled).
ASSERT(result != Equivalent && "sorting should not compare decl to itself");
// Warn that this isn't stable across different compilations.
if (!suppressDiagnostic) {
auto earlier = (result == Ascending) ? lhs : rhs;
auto later = (result == Ascending) ? rhs : lhs;
earlier->diagnose(diag::objc_header_sorting_arbitrary, earlier, later);
later->diagnose(diag::objc_header_sorting_arbitrary_other, earlier, later);
earlier->diagnose(diag::objc_header_sorting_arbitrary_please_report);
}
return result;
}
}
/// Comparator for use with \c llvm::array_pod_sort() . This sorts decls into
/// reverse order since they will be pushed onto a stack.
static int reverseCompareDecls(Decl * const *lhs, Decl * const *rhs) {
using namespace compare_detail;
assert(*lhs != *rhs && "duplicate top-level decl");
/// Run the LHS and RHS expressions through an appropriate overload of
/// `compare_detail::reverseCompare()`, returning if they are unequal or
/// continuing if they are equal.
#define COMPARE(LHS, RHS) do {\
int result = reverseCompare((LHS), (RHS)); \
if (result != 0) \
return result; \
} while (0)
// When we visit a function, we might also generate a thunk that calls into the
// implementation of structs/enums to get the opaque pointers. To avoid
// referencing these methods before we see the definition for the generated
// classes, we want to visit function definitions last.
COMPARE((areTypes<NominalTypeDecl, AbstractFunctionDecl>(*lhs)),
(areTypes<NominalTypeDecl, AbstractFunctionDecl>(*rhs)));
// Sort by names.
COMPARE(getNameString(*lhs), getNameString(*rhs));
// Two overloaded functions can have the same name when emitting C++.
if (isa<AbstractFunctionDecl>(*rhs) && isa<AbstractFunctionDecl>(*lhs)) {
// Sort top level functions with the same C++ name by their location to
// have stable sorting that depends on users source but not on the
// compiler invocation.
// FIXME: This is pretty suspect; PrintAsClang sometimes operates on
// serialized modules which don't have SourceLocs, so this sort
// rule may be applied in some steps of a build but not others.
if ((*rhs)->getLoc().isValid() && (*lhs)->getLoc().isValid()) {
COMPARE(getLocString(*lhs), getLocString(*rhs));
}
}
// A function and a global variable can have the same name in C++,
// even when the variable might not actually be emitted by the emitter.
// In that case, order the function before the variable.
COMPARE((areTypes<AbstractFunctionDecl, VarDecl>(*lhs)),
(areTypes<AbstractFunctionDecl, VarDecl>(*rhs)));
// Prefer value decls to extensions.
COMPARE(isType<ValueDecl>(*lhs), isType<ValueDecl>(*rhs));
// Last-ditch ValueDecl tiebreaker: Compare mangled names. This captures
// *tons* of context and detail missed by the previous checks, but the
// resulting sort makes little sense to humans.
// FIXME: It'd be nice to share the mangler or even memoize mangled names,
// but we'd have to stop using `llvm::array_pod_sort()` so that we
// could capture some outside state.
COMPARE(getMangledNameString(*lhs), getMangledNameString(*rhs));
// Mangled names ought to distinguish all value decls, leaving only
// extensions of the same nominal type beyond this point.
if (!isa<ExtensionDecl>(*lhs) || !isa<ExtensionDecl>(*rhs))
return lastDitchSort(*lhs, *rhs, /*suppressDiagnostic=*/false);
// Break ties in extensions by putting smaller extensions last (in reverse
// order).
auto lhsMembers = cast<ExtensionDecl>(*lhs)->getAllMembers();
auto rhsMembers = cast<ExtensionDecl>(*rhs)->getAllMembers();
COMPARE(lhsMembers.size(), rhsMembers.size());
// Or the extension with fewer protocols.
auto lhsProtos = cast<ExtensionDecl>(*lhs)->getLocalProtocols();
auto rhsProtos = cast<ExtensionDecl>(*rhs)->getLocalProtocols();
COMPARE(lhsProtos.size(), rhsProtos.size());
// If that fails, arbitrarily pick the extension whose protocols are
// alphabetically first.
{
auto mismatch =
std::mismatch(lhsProtos.begin(), lhsProtos.end(), rhsProtos.begin(),
[] (const ProtocolDecl *nextLHSProto,
const ProtocolDecl *nextRHSProto) {
return nextLHSProto->getName() == nextRHSProto->getName();
});
if (mismatch.first != lhsProtos.end()) {
COMPARE(getNameString(*mismatch.first), getNameString(*mismatch.second));
}
}
// Still nothing? Fine, we'll look for a difference between the members.
{
for (auto pair : llvm::zip_equal(lhsMembers, rhsMembers)) {
auto *lhsMember = dyn_cast<ValueDecl>(std::get<0>(pair)),
*rhsMember = dyn_cast<ValueDecl>(std::get<1>(pair));
if (!lhsMember && !rhsMember)
continue;
COMPARE((bool)lhsMember, (bool)rhsMember);
ASSERT(lhsMember && rhsMember);
COMPARE(getNameString(lhsMember), getNameString(rhsMember));
COMPARE(getTypeString(lhsMember), getTypeString(rhsMember));
COMPARE(getGenericSignatureString(lhsMember),
getGenericSignatureString(rhsMember));
}
}
// Tough customer. Maybe they have different generic signatures?
COMPARE(getGenericSignatureString(*lhs), getGenericSignatureString(*rhs));
// Nothing, sadly.
bool bothEmpty = lhsMembers.empty() && rhsMembers.empty()
&& lhsProtos.empty() && rhsProtos.empty();
return lastDitchSort(*lhs, *rhs, /*suppressDiagnostic=*/bothEmpty);
#undef COMPARE
}
}
class ModuleWriter {
enum class EmissionState { NotYetDefined = 0, DefinitionRequested, Defined };
@@ -659,211 +905,7 @@ public:
}
// REVERSE sort the decls, since we are going to copy them onto a stack.
llvm::array_pod_sort(decls.begin(), decls.end(),
[](Decl * const *lhs, Decl * const *rhs) -> int {
enum : int {
Ascending = -1,
Equivalent = 0,
Descending = 1,
};
assert(*lhs != *rhs && "duplicate top-level decl");
auto getSortName = [](const Decl *D) -> StringRef {
if (auto VD = dyn_cast<ValueDecl>(D))
return VD->getBaseName().userFacingName();
if (auto ED = dyn_cast<ExtensionDecl>(D)) {
auto baseClass = ED->getSelfClassDecl();
if (!baseClass)
return ED->getExtendedNominal()->getName().str();
return baseClass->getName().str();
}
llvm_unreachable("unknown top-level ObjC decl");
};
// When we visit a function, we might also generate a thunk that calls into the
// implementation of structs/enums to get the opaque pointers. To avoid
// referencing these methods before we see the definition for the generated
// classes, we want to visit function definitions last.
if (isa<AbstractFunctionDecl>(*rhs) && isa<NominalTypeDecl>(*lhs))
return Descending;
if (isa<AbstractFunctionDecl>(*lhs) && isa<NominalTypeDecl>(*rhs))
return Ascending;
// Sort by names.
int result = getSortName(*rhs).compare(getSortName(*lhs));
if (result != 0)
return result;
// Two overloaded functions can have the same name when emitting C++.
if (isa<AbstractFunctionDecl>(*rhs) && isa<AbstractFunctionDecl>(*lhs)) {
// Sort top level functions with the same C++ name by their location to
// have stable sorting that depends on users source but not on the
// compiler invocation.
// FIXME: This is pretty suspect; PrintAsClang sometimes operates on
// serialized modules which don't have SourceLocs, so this sort
// rule may be applied in some steps of a build but not others.
if ((*rhs)->getLoc().isValid() && (*lhs)->getLoc().isValid()) {
auto getLocText = [](const Decl *afd) {
std::string res;
llvm::raw_string_ostream os(res);
afd->getLoc().print(os, afd->getASTContext().SourceMgr);
return std::move(os.str());
};
result = getLocText(*rhs).compare(getLocText(*lhs));
if (result != 0)
return result;
}
}
// A function and a global variable can have the same name in C++,
// even when the variable might not actually be emitted by the emitter.
// In that case, order the function before the variable.
if (isa<AbstractFunctionDecl>(*rhs) && isa<VarDecl>(*lhs))
return Descending;
if (isa<AbstractFunctionDecl>(*lhs) && isa<VarDecl>(*rhs))
return Ascending;
// Prefer value decls to extensions.
if (isa<ValueDecl>(*lhs) && !isa<ValueDecl>(*rhs))
return Descending;
if (!isa<ValueDecl>(*lhs) && isa<ValueDecl>(*rhs))
return Ascending;
// Last-ditch ValueDecl tiebreaker: Compare mangled names. This captures
// *tons* of context and detail missed by the previous checks, but the
// resulting sort makes little sense to humans.
// FIXME: It'd be nice to share the mangler or even memoize mangled names,
// but we'd have to stop using `llvm::array_pod_sort()` so that we
// could capture some outside state.
Mangle::ASTMangler mangler((*lhs)->getASTContext());
auto getMangledName = [&](const Decl *D) {
auto VD = dyn_cast<ValueDecl>(D);
if (!VD && isa<ExtensionDecl>(D))
VD = cast<ExtensionDecl>(D)->getExtendedNominal();
if (!VD)
return std::string();
return mangler.mangleAnyDecl(VD, /*prefix=*/true,
/*respectOriginallyDefinedIn=*/true);
};
result = getMangledName(*rhs).compare(getMangledName(*lhs));
if (result != 0)
return result;
auto lastDitchSort = [&](bool suppressDiagnostic) -> int {
// With no other criteria, we'll sort by memory address.
if (*lhs < *rhs)
result = Ascending;
else if (*lhs > *rhs)
result = Descending;
else {
// Sorting with yourself shouldn't happen (but implement consistent
// behavior if this assert is disabled).
ASSERT(*lhs != *rhs && "sorting should not compare decl to itself");
result = Equivalent;
}
// Warn that this isn't stable across different compilations.
if (!suppressDiagnostic) {
auto earlier = (result == Ascending) ? *lhs : *rhs;
auto later = (result == Ascending) ? *rhs : *lhs;
earlier->diagnose(diag::objc_header_sorting_arbitrary,
earlier, later);
later->diagnose(diag::objc_header_sorting_arbitrary_other,
earlier, later);
earlier->diagnose(diag::objc_header_sorting_arbitrary_please_report);
}
return result;
};
// Mangled names ought to distinguish all value decls, leaving only
// extensions of the same nominal type beyond this point.
if (!isa<ExtensionDecl>(*lhs) || !isa<ExtensionDecl>(*rhs))
return lastDitchSort(/*suppressDiagnostic=*/false);
// Break ties in extensions by putting smaller extensions last (in reverse
// order).
auto lhsMembers = cast<ExtensionDecl>(*lhs)->getAllMembers();
auto rhsMembers = cast<ExtensionDecl>(*rhs)->getAllMembers();
if (lhsMembers.size() != rhsMembers.size())
return lhsMembers.size() < rhsMembers.size() ? Descending : Ascending;
// Or the extension with fewer protocols.
auto lhsProtos = cast<ExtensionDecl>(*lhs)->getLocalProtocols();
auto rhsProtos = cast<ExtensionDecl>(*rhs)->getLocalProtocols();
if (lhsProtos.size() != rhsProtos.size())
return lhsProtos.size() < rhsProtos.size() ? Descending : Ascending;
// If that fails, arbitrarily pick the extension whose protocols are
// alphabetically first.
{
auto mismatch =
std::mismatch(lhsProtos.begin(), lhsProtos.end(), rhsProtos.begin(),
[] (const ProtocolDecl *nextLHSProto,
const ProtocolDecl *nextRHSProto) {
return nextLHSProto->getName() == nextRHSProto->getName();
});
if (mismatch.first != lhsProtos.end()) {
StringRef lhsProtoName = (*mismatch.first)->getName().str();
result = lhsProtoName.compare((*mismatch.second)->getName().str());
if (result != 0)
return result;
}
}
// Still nothing? Fine, we'll look for a difference between the members.
{
for (auto pair : llvm::zip_equal(lhsMembers, rhsMembers)) {
auto *lhsMember = dyn_cast<ValueDecl>(std::get<0>(pair)),
*rhsMember = dyn_cast<ValueDecl>(std::get<1>(pair));
if (!rhsMember && lhsMember)
return Descending;
if (!lhsMember && rhsMember)
return Ascending;
if (!lhsMember && !rhsMember)
continue;
result = rhsMember->getName().compare(lhsMember->getName());
if (result != 0)
return result;
result = rhsMember->getInterfaceType().getString().compare(
lhsMember->getInterfaceType().getString());
if (result != 0)
return result;
auto lhsGeneric = lhsMember->getAsGenericContext(),
rhsGeneric = rhsMember->getAsGenericContext();
if (lhsGeneric && rhsGeneric) {
result = rhsGeneric->getGenericSignature().getAsString().compare(
lhsGeneric->getGenericSignature().getAsString());
if (result != 0)
return result;
}
}
}
// Tough customer. Maybe they have different generic signatures?
{
auto lhsSig = cast<ExtensionDecl>(*lhs)->getGenericSignature()
.getAsString();
auto rhsSig = cast<ExtensionDecl>(*rhs)->getGenericSignature()
.getAsString();
result = rhsSig.compare(lhsSig);
if (result != 0)
return result;
}
// Nothing, sadly.
bool areEmptyExtensions = lhsMembers.size() == 0
&& rhsMembers.size() == 0
&& lhsProtos.size() == 0
&& rhsProtos.size() == 0;
return lastDitchSort(/*suppressDiagnostic=*/areEmptyExtensions);
});
llvm::array_pod_sort(decls.begin(), decls.end(), &reverseCompareDecls);
assert(declsToWrite.empty());
declsToWrite.assign(decls.begin(), decls.end());