//===--- StringExtras.cpp - String Utilities ------------------------------===// // // This source file is part of the Swift.org open source project // // Copyright (c) 2014 - 2017 Apple Inc. and the Swift project authors // Licensed under Apache License v2.0 with Runtime Library Exception // // See https://swift.org/LICENSE.txt for license information // See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors // //===----------------------------------------------------------------------===// // // This file implements utilities for working with words and camelCase // names. // //===----------------------------------------------------------------------===// #include "swift/Basic/Assertions.h" #include "swift/Basic/StringExtras.h" #include "clang/Basic/CharInfo.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Path.h" #include "llvm/Support/raw_ostream.h" #include #include using namespace swift; using namespace camel_case; using llvm::StringRef; bool swift::canBeArgumentLabel(StringRef identifier) { return llvm::StringSwitch(identifier) .Case("inout", false) .Case("$", false) .Default(true); } bool swift::canBeMemberName(StringRef identifier) { return llvm::StringSwitch(identifier) .Case("init", false) .Case("Protocol", false) .Case("self", false) .Case("Type", false) .Default(true); } bool swift::isPreposition(StringRef word) { #define PREPOSITION(Word) \ if (word.equals_insensitive(#Word)) \ return true; #include "PartsOfSpeech.def" return false; } PartOfSpeech swift::getPartOfSpeech(StringRef word) { // FIXME: This implementation is woefully inefficient. #define PREPOSITION(Word) \ if (word.equals_insensitive(#Word)) \ return PartOfSpeech::Preposition; #define VERB(Word) \ if (word.equals_insensitive(#Word)) \ return PartOfSpeech::Verb; #include "PartsOfSpeech.def" // Identify gerunds, which always end in "ing". if (word.ends_with("ing") && word.size() > 4) { StringRef possibleVerb = word.drop_back(3); // If what remains is a verb, we have a gerund. if (getPartOfSpeech(possibleVerb) == PartOfSpeech::Verb) return PartOfSpeech::Gerund; // Try adding an "e" and look for that as a verb. if (possibleVerb.back() != 'e') { SmallString<16> possibleVerbWithE; possibleVerbWithE += possibleVerb; possibleVerbWithE += 'e'; if (getPartOfSpeech(possibleVerbWithE) == PartOfSpeech::Verb) return PartOfSpeech::Gerund; } // If there is a repeated letter at the back, drop that second // instance of that letter and try again. unsigned count = possibleVerb.size(); if (possibleVerb[count-1] == possibleVerb[count-2] && getPartOfSpeech(possibleVerb.drop_back()) == PartOfSpeech::Verb) return PartOfSpeech::Gerund; } // "auto" tends to be used as a verb prefix. if (startsWithIgnoreFirstCase(word, "auto") && word.size() > 4) { if (getPartOfSpeech(word.substr(4)) == PartOfSpeech::Verb) return PartOfSpeech::Verb; } // "re" can prefix a verb. if (startsWithIgnoreFirstCase(word, "re") && word.size() > 2) { if (getPartOfSpeech(word.substr(2)) == PartOfSpeech::Verb) return PartOfSpeech::Verb; } // "de" can prefix a verb. if (startsWithIgnoreFirstCase(word, "de") && word.size() > 2) { if (getPartOfSpeech(word.substr(2)) == PartOfSpeech::Verb) return PartOfSpeech::Verb; } return PartOfSpeech::Unknown; } /// Whether the given word is a plural s static bool isPluralSuffix(StringRef word) { return word == "s" || word == "es" || word == "ies"; } void WordIterator::computeNextPosition() const { assert(Position < String.size() && "Already at end of string"); unsigned i = Position, n = String.size(); // Treat _ as a word on its own. Don't coalesce. if (String[i] == '_') { NextPosition = i + 1; NextPositionValid = true; return; } // Skip over any uppercase letters at the beginning of the word. while (i < n && clang::isUppercase(String[i])) ++i; // If there was more than one uppercase letter, this is an // acronym. if (i - Position > 1) { // If we hit the end of the string, that's it. Otherwise, this // word ends before the last uppercase letter if the next word is alphabetic // (URL_Loader) or after the last uppercase letter if it's not (UTF_8). // Collect the lowercase letters up to the next word. unsigned endOfNext = i; while (endOfNext < n && clang::isLowercase(String[endOfNext])) ++endOfNext; // If the next word is a plural suffix, add it on. if (i == n || (isPluralSuffix(String.slice(i, endOfNext)) && String.slice(i-1, endOfNext) != "Is")) NextPosition = endOfNext; else if (clang::isLowercase(String[i])) NextPosition = i-1; else NextPosition = i; NextPositionValid = true; return; } // Skip non-uppercase letters. while (i < n && !clang::isUppercase(String[i]) && String[i] != '_') ++i; NextPosition = i; NextPositionValid = true; } void WordIterator::computePrevPosition() const { assert(Position > 0 && "Already at beginning of string"); unsigned i = Position; // While we see non-uppercase letters, keep moving back. while (i > 0 && !clang::isUppercase(String[i-1]) && String[i-1] != '_') --i; // If what we found is a plural suffix, keep going. unsigned effectiveEndPosition = Position; if (i > 0 && isPluralSuffix(String.slice(i, Position))) { effectiveEndPosition = i; while (i > 0 && !clang::isUppercase(String[i-1]) && String[i-1] != '_') --i; } // If we found any lowercase letters, this was a normal camel case // word (not an acronym). if (i < effectiveEndPosition) { // If we hit the beginning of the string, that's it. Otherwise, this // word starts with an uppercase letter if the next word is alphabetic // (URL_Loader) or after the last uppercase letter if it's not (UTF_8). PrevPosition = i; if (i != 0 && clang::isLowercase(String[i]) && String[i-1] != '_') --PrevPosition; PrevPositionValid = true; return; } // Treat _ as a word on its own. Don't coalesce. if (String[i-1] == '_') { PrevPosition = i - 1; PrevPositionValid = true; return; } // There were no lowercase letters, so this is an acronym. Keep // skipping uppercase letters. while (i > 0 && clang::isUppercase(String[i-1])) --i; PrevPosition = i; PrevPositionValid = true; } bool camel_case::Words::hasWordStartingAt(unsigned targetPosition) const { // Iterate over the words until we see one at or past targetPosition. // FIXME: Is there a faster way to do this by looking at the characters around // the position? for (auto i = begin(); i != end() && i.getPosition() <= targetPosition; i++) { if (i.getPosition() == targetPosition) return true; } return false; } StringRef camel_case::getFirstWord(StringRef string) { if (string.empty()) return ""; return *WordIterator(string, 0); } StringRef camel_case::getLastWord(StringRef string) { if (string.empty()) return ""; return *--WordIterator(string, string.size()); } bool camel_case::sameWordIgnoreFirstCase(StringRef word1, StringRef word2) { if (word1.size() != word2.size()) return false; if (clang::toLowercase(word1[0]) != clang::toLowercase(word2[0])) return false; return word1.substr(1) == word2.substr(1); } bool camel_case::startsWithIgnoreFirstCase(StringRef word1, StringRef word2) { if (word1.size() < word2.size()) return false; if (clang::toLowercase(word1[0]) != clang::toLowercase(word2[0])) return false; return word1.substr(1, word2.size() - 1) == word2.substr(1); } bool camel_case::hasWordSuffix(StringRef haystack, StringRef needle) { // Is it even possible for one to be a suffix of the other? if (needle.empty() || haystack.size() <= needle.size()) return false; // Does haystack have a word boundary at the right position? auto targetPosition = haystack.size() - needle.size(); if (!Words(haystack).hasWordStartingAt(targetPosition)) return false; StringRef suffix = haystack.substr(targetPosition); // Fast path: Without potentially copying the strings, do they match? if (sameWordIgnoreFirstCase(suffix, needle)) return true; // Flatten out leading initialisms. Do they match? SmallString<32> suffixScratch, needleScratch; auto suffixFlat = toLowercaseInitialisms(suffix, suffixScratch); auto needleFlat = toLowercaseInitialisms(needle, needleScratch); return suffixFlat == needleFlat; } StringRef camel_case::toLowercaseWord(StringRef string, SmallVectorImpl &scratch) { if (string.empty()) return string; // Already lowercase. if (!clang::isUppercase(string[0])) return string; // Acronym doesn't get lowercased. if (string.size() > 1 && clang::isUppercase(string[1])) return string; // Lowercase the first letter, append the rest. scratch.clear(); scratch.push_back(clang::toLowercase(string[0])); scratch.append(string.begin() + 1, string.end()); return StringRef(scratch.data(), scratch.size()); } StringRef camel_case::toSentencecase(StringRef string, SmallVectorImpl &scratch) { if (string.empty()) return string; // Can't be uppercased. if (!clang::isLowercase(string[0])) return string; // Uppercase the first letter, append the rest. scratch.clear(); scratch.push_back(clang::toUppercase(string[0])); scratch.append(string.begin() + 1, string.end()); return StringRef(scratch.data(), scratch.size()); } StringRef camel_case::dropPrefix(StringRef string) { unsigned firstLower = 0, n = string.size(); if (n < 4) return string; for (; firstLower < n; ++firstLower) { if (!clang::isUppercase(string[firstLower])) break; } if (firstLower == n) return string; if (firstLower >= 3 && firstLower <= 4) return string.substr(firstLower - 1); return string; } StringRef camel_case::appendSentenceCase(SmallVectorImpl &buffer, StringRef string) { // Trivial case: empty string. if (string.empty()) return StringRef(buffer.data(), buffer.size()); // Uppercase the first letter, append the rest. buffer.push_back(clang::toUppercase(string[0])); buffer.append(string.begin() + 1, string.end()); return StringRef(buffer.data(), buffer.size()); } size_t camel_case::findWord(StringRef string, StringRef word) { assert(!word.empty()); assert(clang::isUppercase(word[0])); // Scan forward until we find the word as a complete word. size_t startingIndex = 0; while (true) { size_t index = string.find(word, startingIndex); if (index == StringRef::npos) return StringRef::npos; // If any of the following checks fail, we want to start searching // past the end of the match. (This assumes that the word doesn't // end with a prefix of itself, e.g. "LikableLike".) startingIndex = index + word.size(); // We assume that we don't have to check if the match starts a new // word in the string. // If we find the word, check whether it's a valid match. StringRef suffix = string.substr(index); if (!suffix.empty() && clang::isLowercase(suffix[0])) continue; return index; } } /// Skip a type suffix that can be dropped. static std::optional skipTypeSuffix(StringRef typeName) { if (typeName.empty()) return std::nullopt; auto lastWord = camel_case::getLastWord(typeName); // "Type" suffix. if (lastWord == "Type" && typeName.size() > 4) { return typeName.drop_back(4); } // "Ref" suffix. if (lastWord == "Ref" && typeName.size() > 3) { return typeName.drop_back(3); } // "Mask" suffix. if (lastWord == "Mask" && typeName.size() > 4) { return typeName.drop_back(4); } // \d+D for dimensionality. if (typeName.back() == 'D' && typeName.size() > 1) { unsigned firstDigit = typeName.size() - 1; while (firstDigit > 0) { if (!isdigit(typeName[firstDigit-1])) break; --firstDigit; } if (firstDigit < typeName.size()-1) { return typeName.substr(0, firstDigit); } } // _t. if (typeName.size() > 2 && typeName.ends_with("_t")) { return typeName.drop_back(2); } return std::nullopt; } /// Match a word within a name to a word within a type. static bool matchNameWordToTypeWord(StringRef nameWord, StringRef typeWord) { // If the name word is longer, there's no match. if (nameWord.size() > typeWord.size()) return false; // If the name word is shorter, try for a partial match. if (nameWord.size() < typeWord.size()) { // We can match the suffix of the type so long as everything preceding the // match is neither a lowercase letter nor a '_'. This ignores type // prefixes for acronyms, e.g., the 'NS' in 'NSURL'. if (typeWord.ends_with_insensitive(nameWord) && !clang::isLowercase(typeWord[typeWord.size() - nameWord.size()])) { // Check that everything preceding the match is neither a lowercase letter // nor a '_'. for (unsigned i = 0, n = nameWord.size(); i != n; ++i) { if (clang::isLowercase(typeWord[i]) || typeWord[i] == '_') return false; } return true; } // We can match a prefix so long as everything following the match is // a number. if (typeWord.starts_with_insensitive(nameWord)) { for (unsigned i = nameWord.size(), n = typeWord.size(); i != n; ++i) { if (!clang::isDigit(typeWord[i])) return false; } return true; } return false; } // Check for an exact match. return nameWord.equals_insensitive(typeWord); } /// Match the beginning of the name to the given type name. StringRef swift::matchLeadingTypeName(StringRef name, OmissionTypeName typeName) { // Match the camelCase beginning of the name to the // ending of the type name. auto nameWords = camel_case::getWords(name); auto typeWords = camel_case::getWords(typeName.Name); auto nameWordIter = nameWords.begin(); auto typeWordRevIter = typeWords.rbegin(); // Find the last instance of the first word in the name within // the words in the type name. typeWordRevIter = std::find_if( typeWordRevIter, typeWords.rend(), [nameWordIter](StringRef word) { return matchNameWordToTypeWord(*nameWordIter, word); }); // If we didn't find the first word in the name at all, we're // done. if (typeWordRevIter == typeWords.rend()) return name; // Now, match from the first word up until the end of the type name. std::advance(nameWordIter, 1); WordIterator typeMismatch = typeWords.end(), nameMismatch = nameWords.end(); std::tie(typeMismatch, nameMismatch) = std::mismatch(typeWordRevIter.base(), typeWords.end(), nameWordIter, nameWords.end(), matchNameWordToTypeWord); // If we didn't reach the end of the type name, don't match. if (typeMismatch != typeWords.end()) return name; // Chop of the beginning of the name. return nameMismatch.getRestOfStr(); } const char *swift::copyCString(StringRef string, llvm::BumpPtrAllocator &Allocator) { if (string.empty()) return ""; char *memory = Allocator.Allocate(string.size() + 1); memcpy(memory, string.data(), string.size()); memory[string.size()] = '\0'; return memory; } void InheritedNameSet::add(StringRef name) { Names.insert(name); } bool InheritedNameSet::contains(StringRef name) const { auto set = this; do { if (set->Names.count(name) > 0) return true; set = set->Parent; } while (set); return false; } /// Wrapper for camel_case::toLowercaseWord that uses string scratch space. StringRef camel_case::toLowercaseWord(StringRef string, StringScratchSpace &scratch){ llvm::SmallString<32> scratchStr; StringRef result = toLowercaseWord(string, scratchStr); if (string == result) return string; return scratch.copyString(result); } /// Omit needless words from the beginning of a name. static StringRef omitNeedlessWordsFromPrefix(StringRef name, OmissionTypeName type) { if (type.empty()) return name; // Match the result type to the beginning of the name. StringRef newName = matchLeadingTypeName(name, type); if (newName == name) return name; auto firstWord = camel_case::getFirstWord(newName); // If we have a preposition, we can chop off type information at the // beginning of the name. if (getPartOfSpeech(firstWord) == PartOfSpeech::Preposition && newName.size() > firstWord.size()) { // If the preposition was "by" and is followed by a gerund, also remove // "by". if (firstWord == "By") { StringRef nextWord = camel_case::getFirstWord( newName.substr(firstWord.size())); if (nextWord.ends_with("ing")) { return newName.substr(firstWord.size()); } } return newName; } return name; } /// Identify certain vacuous names to which we do not want to reduce any name. static bool isVacuousName(StringRef name) { return camel_case::sameWordIgnoreFirstCase(name, "get") || camel_case::sameWordIgnoreFirstCase(name, "for") || camel_case::sameWordIgnoreFirstCase(name, "set") || camel_case::sameWordIgnoreFirstCase(name, "using") || camel_case::sameWordIgnoreFirstCase(name, "with"); } /// Determine whether the given text matches a property name. static bool textMatchesPropertyName(StringRef text, const InheritedNameSet *allPropertyNames) { if (!allPropertyNames) return false; SmallString<16> localScratch; auto name = camel_case::toLowercaseWord(text, localScratch); // A property with exactly this name. if (allPropertyNames->contains(name)) return true; // From here on, we'll be working with scratch space. if (name.data() != localScratch.data()) localScratch = name; if (localScratch.back() == 'y') { // If the last letter is a 'y', try 'ies'. localScratch.pop_back(); localScratch += "ies"; if (allPropertyNames->contains(localScratch)) return true; } else { // Otherwise, add an 's' and try again. localScratch += 's'; if (allPropertyNames->contains(localScratch)) return true; // Alternatively, try to add 'es'. localScratch.pop_back(); localScratch += "es"; if (allPropertyNames->contains(localScratch)) return true; } return false; } namespace { /// Describes the role that a particular name has within a /// signature, which can affect how we omit needless words. enum class NameRole { /// The base name of a function or method. BaseName, /// The first parameter of a function or method. FirstParameter, // Subsequent parameters in a function or method. SubsequentParameter, // The name of a property. Property, // A partial name; used internally. Partial, }; } // end anonymous namespace static StringRef omitTrailingTypeNameWithSpecialCases(StringRef name, OmissionTypeName typeName, NameRole role, const InheritedNameSet *allPropertyNames); /// Returns the iterator pointing to the first word in \p name that starts the /// match for \p typeName (anchored at the end of \p name). /// /// If there is no match, returns the end WordIterator for \p name. static Words::iterator matchTypeNameFromBackWithSpecialCases( StringRef name, OmissionTypeName typeName, const InheritedNameSet *allPropertyNames) { // Get the camel-case words in the name and type name. auto nameWords = camel_case::getWords(name); auto typeWords = camel_case::getWords(typeName.Name); // Match the last words in the type name to the last words in the // name. auto nameWordRevIter = nameWords.rbegin(), nameWordRevIterBegin = nameWordRevIter, nameWordRevIterEnd = nameWords.rend(); auto typeWordRevIter = typeWords.rbegin(), typeWordRevIterEnd = typeWords.rend(); while (nameWordRevIter != nameWordRevIterEnd && typeWordRevIter != typeWordRevIterEnd) { // If the names match, continue. auto nameWord = *nameWordRevIter; if (matchNameWordToTypeWord(nameWord, *typeWordRevIter)) { ++nameWordRevIter; ++typeWordRevIter; continue; } // Special case: "Indexes" and "Indices" in the name match // "IndexSet" in the type. if ((matchNameWordToTypeWord(nameWord, "Indexes") || matchNameWordToTypeWord(nameWord, "Indices")) && *typeWordRevIter == "Set") { auto nextTypeWordRevIter = typeWordRevIter; ++nextTypeWordRevIter; if (nextTypeWordRevIter != typeWordRevIterEnd && matchNameWordToTypeWord("Index", *nextTypeWordRevIter)) { ++nameWordRevIter; typeWordRevIter = nextTypeWordRevIter; ++typeWordRevIter; continue; } } // Special case: "Index" in the name matches "Int" or "Integer" in the type. if (matchNameWordToTypeWord(nameWord, "Index") && (matchNameWordToTypeWord("Int", *typeWordRevIter) || matchNameWordToTypeWord("Integer", *typeWordRevIter))) { ++nameWordRevIter; ++typeWordRevIter; continue; } // Special case: "ObjectValue" in the name matches "Object" in the // type. if (matchNameWordToTypeWord("Object", *typeWordRevIter) && matchNameWordToTypeWord(nameWord, "Value")) { auto nextNameWordRevIter = std::next(nameWordRevIter); if (nextNameWordRevIter != nameWordRevIterEnd && matchNameWordToTypeWord(*nextNameWordRevIter, "Object")) { nameWordRevIter = nextNameWordRevIter; ++nameWordRevIter; ++typeWordRevIter; continue; } } // Special case: if the word in the name ends in 's', and we have // a collection element type, see if this is a plural. if (!typeName.CollectionElement.empty() && nameWord.size() > 2 && nameWord.back() == 's') { // Check s. auto shortenedNameWord = nameWordRevIter.base().getPriorStr().drop_back(); auto newShortenedNameWord = omitTrailingTypeNameWithSpecialCases( shortenedNameWord, typeName.CollectionElement, NameRole::Partial, allPropertyNames); if (shortenedNameWord != newShortenedNameWord) { unsigned targetSize = newShortenedNameWord.size(); auto newIter = std::make_reverse_iterator(WordIterator(name, targetSize)); #ifndef NDEBUG while (nameWordRevIter.base().getPosition() > targetSize) ++nameWordRevIter; assert(nameWordRevIter == newIter); #else nameWordRevIter = newIter; #endif continue; } } // If this is a skippable suffix, skip it and keep looking. if (nameWordRevIter == nameWordRevIterBegin) { if (auto withoutSuffix = skipTypeSuffix(typeName.Name)) { typeName.Name = *withoutSuffix; typeWords = camel_case::getWords(typeName.Name); typeWordRevIter = typeWords.rbegin(); typeWordRevIterEnd = typeWords.rend(); continue; } } break; } return nameWordRevIter.base(); } static StringRef omitSelfTypeFromBaseName(StringRef name, OmissionTypeName typeName, const InheritedNameSet *allPropertyNames, StringScratchSpace &scratch) { // If we have no name or no type name, there is nothing to do. if (name.empty() || typeName.empty()) return name; // Deliberately drop the collection element from typeName. typeName.CollectionElement = StringRef(); auto nameWords = camel_case::getWords(name); std::optional> matchingRange; // Search backwards for the type name, whether anchored at the end or not. for (auto nameReverseIter = nameWords.rbegin(); nameReverseIter != nameWords.rend(); ++nameReverseIter) { StringRef matchName = nameReverseIter.base().getPriorStr(); auto matchIter = matchTypeNameFromBackWithSpecialCases(matchName, typeName, allPropertyNames); auto matchIterInFullName = WordIterator(name, matchIter.getPosition()); if (matchIterInFullName != nameReverseIter.base()) { matchingRange = llvm::make_range(matchIterInFullName, nameReverseIter.base()); break; } // Note: This behavior fell out of a previous implementation of // omit-needless-words, even though it probably wasn't intentional. At this // point, though, it could be source-breaking to change it. while (auto withoutSuffix = skipTypeSuffix(typeName.Name)) typeName.Name = *withoutSuffix; } // If we matched nothing, or if the type name was all the way at the start // of the base name, don't strip anything. if (!matchingRange || matchingRange->begin() == nameWords.begin()) return name; assert(matchingRange->begin() != matchingRange->end() && "should not have been considered a match"); // Don't strip just "Error" at the end of a base name. // FIXME: Is this still relevant? if (matchingRange->end() == nameWords.end() && std::next(matchingRange->begin()) == nameWords.end() && *matchingRange->begin() == "Error") { return name; } // Only strip a type name that follows a verb. switch (getPartOfSpeech(*std::prev(matchingRange->begin()))) { case PartOfSpeech::Verb: break; case PartOfSpeech::Preposition: case PartOfSpeech::Gerund: case PartOfSpeech::Unknown: return name; } // Splice together the parts before and after the matched // type. For example, if we matched "ViewController" in // "dismissViewControllerAnimated", stitch together // "dismissAnimated". // Don't prune redundant type information from the base name if // there is a corresponding property (either singular or plural). StringRef removedText = name.substr(matchingRange->begin().getPosition(), matchingRange->end().getPosition()); if (textMatchesPropertyName(removedText, allPropertyNames)) return name; SmallString<16> newName = matchingRange->begin().getPriorStr(); newName += matchingRange->end().getRestOfStr(); // If we ended up with something that can't be a member name, do nothing. if (!canBeMemberName(newName)) return name; // If we ended up with a vacuous name like "get" or "set", do nothing. if (isVacuousName(newName)) return name; // We're done. return scratch.copyString(newName); } static StringRef omitTrailingTypeNameWithSpecialCases(StringRef name, OmissionTypeName typeName, NameRole role, const InheritedNameSet *allPropertyNames) { // If we have no name or no type name, there is nothing to do. if (name.empty() || typeName.empty()) return name; auto nameWords = camel_case::getWords(name); Words::iterator matchIter = matchTypeNameFromBackWithSpecialCases(name, typeName, allPropertyNames); if (matchIter == nameWords.end()) return name; // Handle complete name matches. if (matchIter == nameWords.begin()) { // If we're doing a partial match or we have an initial // parameter, return the empty string. if (role == NameRole::Partial || role == NameRole::FirstParameter) return ""; // Leave the name alone. return name; } // Don't strip just "Error". if (std::next(matchIter) == nameWords.end() && *matchIter == "Error") return name; switch (role) { case NameRole::Property: // Always strip off type information. break; case NameRole::FirstParameter: case NameRole::Partial: case NameRole::SubsequentParameter: { // Classify the part of speech of the word before the type information we // would strip off. We only want to strip it if the previous word is a // preposition, verb, or gerund. auto previousWordIter = std::prev(matchIter); if (getPartOfSpeech(*previousWordIter) == PartOfSpeech::Unknown) return name; break; } case NameRole::BaseName: { // Classify the part of speech of the word before the type // information we would strip off. auto previousWordIter = std::prev(matchIter); switch (getPartOfSpeech(*previousWordIter)) { case PartOfSpeech::Preposition: // If there's nothing preceding the preposition, don't strip anything. if (previousWordIter == nameWords.begin()) return name; break; case PartOfSpeech::Verb: case PartOfSpeech::Gerund: // Don't prune redundant type information from the base name if // there is a corresponding property (either singular or plural). if (textMatchesPropertyName(matchIter.getRestOfStr(), allPropertyNames)) return name; break; case PartOfSpeech::Unknown: // Assume it's a noun or adjective; don't strip anything. return name; } break; } } // Strip off the part of the name that is redundant with // type information. StringRef newName = matchIter.getPriorStr(); switch (role) { case NameRole::BaseName: case NameRole::Property: // If we ended up with something that can't be a member name, do nothing. if (!canBeMemberName(newName)) return name; // If we ended up with a vacuous name like "get" or "set", do nothing. if (isVacuousName(newName)) return name; break; case NameRole::SubsequentParameter: case NameRole::FirstParameter: case NameRole::Partial: break; } // We're done. return newName; } StringRef camel_case::toLowercaseInitialisms(StringRef string, StringScratchSpace &scratch) { llvm::SmallString<32> scratchStr; StringRef result = toLowercaseInitialisms(string, scratchStr); if (string == result) return string; return scratch.copyString(result); } StringRef camel_case::toLowercaseInitialisms(StringRef string, SmallVectorImpl &scratch) { if (string.empty()) return string; // Already lowercase. if (!clang::isUppercase(string[0])) return string; // Lowercase until we hit the an uppercase letter followed by a // non-uppercase letter. scratch.clear(); scratch.reserve(string.size()); for (unsigned i = 0, n = string.size(); i != n; ++i) { // If the next character is not uppercase, stop. if (i < n - 1 && !clang::isUppercase(string[i+1])) { // If the next non-uppercase character was not a letter, we seem // to have a plural, or we're at the beginning, we should still // lowercase the character we're on. if (i == 0 || !clang::isLetter(string[i+1]) || isPluralSuffix(camel_case::getFirstWord(string.substr(i+1)))) { scratch.push_back(clang::toLowercase(string[i])); ++i; } scratch.append(string.substr(i).begin(), string.substr(i).end()); break; } scratch.push_back(clang::toLowercase(string[i])); } return {scratch.data(), scratch.size()}; } /// Determine whether the given word occurring before the given /// preposition results in a conflict that suppresses preposition /// splitting. static bool wordConflictsBeforePreposition(StringRef word, StringRef preposition) { if (camel_case::sameWordIgnoreFirstCase(preposition, "in") && camel_case::sameWordIgnoreFirstCase(word, "plug")) return true; return false; } /// Determine whether the given word occurring after the given /// preposition results in a conflict that suppresses preposition /// splitting. static bool wordConflictsAfterPreposition(StringRef word, StringRef preposition) { if (camel_case::sameWordIgnoreFirstCase(preposition, "with")) { if (camel_case::sameWordIgnoreFirstCase(word, "error") || camel_case::sameWordIgnoreFirstCase(word, "no")) return true; } if (camel_case::sameWordIgnoreFirstCase(preposition, "to")) { if (camel_case::sameWordIgnoreFirstCase(word, "visible") || camel_case::sameWordIgnoreFirstCase(word, "backing")) return true; } if (camel_case::sameWordIgnoreFirstCase(preposition, "from")) { if (camel_case::sameWordIgnoreFirstCase(word, "backing")) return true; } return false; } /// When splitting based on a preposition, whether we should place the /// preposition on the argument label (vs. on the base name). static bool shouldPlacePrepositionOnArgLabel(StringRef beforePreposition, StringRef preposition, StringRef afterPreposition) { // X/Y/Z often used as coordinates and should be the labels. if (afterPreposition == "X" || afterPreposition == "Y" || afterPreposition == "Z") return false; return true; } /// Determine whether the word preceding the preposition is part of an /// "extended" preposition, such as "compatible with". static bool priorWordExtendsPreposition(StringRef preceding, StringRef preposition) { // compatible with if (camel_case::sameWordIgnoreFirstCase(preceding, "compatible") && camel_case::sameWordIgnoreFirstCase(preposition, "with")) return true; // best matching if (camel_case::sameWordIgnoreFirstCase(preceding, "best") && camel_case::sameWordIgnoreFirstCase(preposition, "matching")) return true; // according to if (camel_case::sameWordIgnoreFirstCase(preceding, "according") && camel_case::sameWordIgnoreFirstCase(preposition, "to")) return true; // bound by if (camel_case::sameWordIgnoreFirstCase(preceding, "bound") && camel_case::sameWordIgnoreFirstCase(preposition, "by")) return true; // separated by if (camel_case::sameWordIgnoreFirstCase(preceding, "separated") && camel_case::sameWordIgnoreFirstCase(preposition, "by")) return true; return false; } /// Determine whether the preposition in a split is "vacuous", and /// should be removed. static bool isVacuousPreposition(StringRef beforePreposition, StringRef preposition, StringRef afterPreposition, const OmissionTypeName ¶mType) { // Only consider "with" or "using" to be potentially vacuous. if (!camel_case::sameWordIgnoreFirstCase(preposition, "with") && !camel_case::sameWordIgnoreFirstCase(preposition, "using")) return false; // If the preposition is "with" followed by "zone", never consider // it vacuous. if (camel_case::sameWordIgnoreFirstCase(preposition, "with") && camel_case::sameWordIgnoreFirstCase( camel_case::getFirstWord(afterPreposition), "zone")) return false; // If the parameter has a default argument, it's vacuous. if (paramType.hasDefaultArgument()) return true; // If the parameter is of function type, it's vacuous. if (paramType.isFunction()) return true; return false; } namespace { using ReverseWordIterator = std::reverse_iterator; } // end anonymous namespace /// Find the last preposition in the given word. static ReverseWordIterator findLastPreposition(ReverseWordIterator first, ReverseWordIterator last, bool recursive = false) { // Find the last preposition. auto result = std::find_if(first, last, [](StringRef word) { return getPartOfSpeech(word) == PartOfSpeech::Preposition; }); // If the preposition is "of", look for a previous preposition. if (!recursive && result != last && camel_case::sameWordIgnoreFirstCase(*result, "of")) { auto prevPreposition = findLastPreposition(std::next(result), last, /*recursive=*/true); if (prevPreposition != last && !camel_case::sameWordIgnoreFirstCase(*prevPreposition, "of") && !camel_case::sameWordIgnoreFirstCase(*prevPreposition, "for")) return prevPreposition; } return result; } /// Split the base name after the last preposition, if there is one. static bool splitBaseNameAfterLastPreposition( StringRef &baseName, StringRef &argName, const OmissionTypeName ¶mType) { // Scan backwards for a preposition. auto nameWords = camel_case::getWords(baseName); auto nameWordRevIterBegin = nameWords.rbegin(), nameWordRevIterEnd = nameWords.rend(); // Find the last preposition. auto nameWordRevIter = findLastPreposition(nameWordRevIterBegin, nameWordRevIterEnd); if (nameWordRevIter == nameWordRevIterEnd) return false; // We found a split point. auto preposition = *nameWordRevIter; // If we have a conflict with the word before the preposition, don't // split. if (std::next(nameWordRevIter) != nameWordRevIterEnd && wordConflictsBeforePreposition(*std::next(nameWordRevIter), preposition)) return false; // If we have a conflict with the word after the preposition, don't // split. if (nameWordRevIter != nameWordRevIterBegin && wordConflictsAfterPreposition(*std::prev(nameWordRevIter), preposition)) return false; // If the word preceding the preposition extends the preposition, it // will never be dropped. if (std::next(nameWordRevIter) != nameWordRevIterEnd && priorWordExtendsPreposition(*std::next(nameWordRevIter), preposition)) { ++nameWordRevIter; preposition = StringRef((*nameWordRevIter).begin(), preposition.size() + (*nameWordRevIter).size()); } // Determine whether we should drop the preposition. StringRef beforePreposition(baseName.begin(), preposition.begin() - baseName.begin()); StringRef afterPreposition(preposition.end(), baseName.end() - preposition.end()); bool dropPreposition = isVacuousPreposition(beforePreposition, preposition, afterPreposition, paramType); // By default, put the preposition on the argument label. bool prepositionOnArgLabel = shouldPlacePrepositionOnArgLabel(beforePreposition, preposition, afterPreposition); if (prepositionOnArgLabel) ++nameWordRevIter; unsigned startOfArgumentLabel = nameWordRevIter.base().getPosition(); unsigned endOfBaseName = startOfArgumentLabel; // If we're supposed to drop the preposition, do so. if (dropPreposition) { if (prepositionOnArgLabel) startOfArgumentLabel += preposition.size(); else { endOfBaseName -= preposition.size(); } } if (endOfBaseName == 0) return false; // If the base name is vacuous or is a keyword and there are two or // fewer words in the base name, don't split. auto newBaseName = baseName.substr(0, endOfBaseName); { auto newWords = camel_case::getWords(newBaseName); auto newWordsIter = newWords.begin(); bool isKeyword = !canBeMemberName(*newWordsIter); bool isVacuous = isVacuousName(*newWordsIter); if (isKeyword || isVacuous) { // Just one word? ++newWordsIter; if (newWordsIter == newWords.end()) return false; // Or two words, if it's vacuous. ++newWordsIter; if (newWordsIter == newWords.end() && isVacuous) return false; // Okay: there is enough in the base name. } } // Update the argument label and base name. argName = baseName.substr(startOfArgumentLabel); baseName = newBaseName; return true; } /// Split the base name, if it makes sense. static bool splitBaseName(StringRef &baseName, StringRef &argName, const OmissionTypeName ¶mType, StringRef paramName) { // If there is already an argument label, do nothing. if (!argName.empty()) return false; // Try splitting a Boolean "Animated". if (paramType.isBoolean() && camel_case::getLastWord(baseName) == "Animated") { baseName = baseName.drop_back(strlen("Animated")); argName = "animated"; return true; } // Don't split anything that starts with "set". if (camel_case::getFirstWord(baseName) == "set") return false; // Don't split a method that looks like an action (with a "sender" // of type AnyObject). if (paramName == "sender" && camel_case::getLastWord(paramType.Name) == "Object") return false; // Try splitting after the last preposition. if (splitBaseNameAfterLastPreposition(baseName, argName, paramType)) return true; return false; } bool swift::omitNeedlessWords( StringRef &baseName, MutableArrayRef argNames, StringRef firstParamName, OmissionTypeName givenResultType, OmissionTypeName contextType, ArrayRef paramTypes, bool returnsSelf, bool isProperty, const InheritedNameSet *allPropertyNames, std::optional completionHandlerIndex, std::optional completionHandlerName, StringScratchSpace &scratch) { bool anyChanges = false; OmissionTypeName resultType = returnsSelf ? contextType : givenResultType; /// Local function that lowercases all of the base names and /// argument names before returning. auto lowercaseAcronymsForReturn = [&] { StringRef newBaseName = toLowercaseInitialisms(baseName, scratch); if (baseName.data() != newBaseName.data()) { baseName = newBaseName; anyChanges = true; } for (StringRef &argName : argNames) { StringRef newArgName = toLowercaseInitialisms(argName, scratch); if (argName.data() != newArgName.data()) { argName = newArgName; anyChanges = true; } } return anyChanges; }; // If the result type matches the context, remove the context type from the // prefix of the name. bool resultTypeMatchesContext = (resultType == contextType); if (resultTypeMatchesContext) { StringRef newBaseName = omitNeedlessWordsFromPrefix(baseName, contextType); if (newBaseName != baseName) { baseName = newBaseName; anyChanges = true; } } // Strip the context type from the base name of a method. if (!isProperty) { StringRef newBaseName = omitSelfTypeFromBaseName(baseName, contextType, allPropertyNames, scratch); if (newBaseName != baseName) { baseName = newBaseName; anyChanges = true; } } if (paramTypes.empty()) { if (resultTypeMatchesContext) { StringRef newBaseName = omitTrailingTypeNameWithSpecialCases( baseName, resultType, NameRole::Property, allPropertyNames); if (newBaseName != baseName) { baseName = newBaseName; anyChanges = true; } } return lowercaseAcronymsForReturn(); } if (camel_case::getFirstWord(baseName) == "set") { StringRef newBaseName = omitTrailingTypeNameWithSpecialCases( baseName, contextType, NameRole::Property, allPropertyNames); if (newBaseName != baseName) { baseName = newBaseName; anyChanges = true; } } // If the base name of a method imported as "async" starts with the word // "get", drop the "get". bool isAsync = completionHandlerIndex.has_value(); if (isAsync && camel_case::getFirstWord(baseName) == "get" && baseName.size() > 3) { baseName = baseName.substr(3); anyChanges = true; } // If needed, split the base name. if (!argNames.empty() && splitBaseName(baseName, argNames[0], paramTypes[0], firstParamName)) anyChanges = true; // If this is an asynchronous function where the completion handler is // the first parameter, strip off WithCompletion(Handler) from the base name. if (isAsync && *completionHandlerIndex == 0) { if (auto newBaseName = stripWithCompletionHandlerSuffix(baseName)) { baseName = *newBaseName; anyChanges = true; } } // For a method imported as "async", drop the "Asynchronously" suffix from // the base name. It is redundant with 'async'. const StringRef asynchronously = "Asynchronously"; if (isAsync && camel_case::getLastWord(baseName) == asynchronously && baseName.size() > asynchronously.size()) { baseName = baseName.drop_back(asynchronously.size()); anyChanges = true; } // If this is an asynchronous function where the completion handler is // past the first parameter the corresponding name has some additional // information prior to the completion-handled suffix, append that // additional text to the base name. if (isAsync && *completionHandlerIndex >= 1 && completionHandlerName) { if (auto extraParamText = stripWithCompletionHandlerSuffix( *completionHandlerName)) { SmallString<32> newBaseName; newBaseName += baseName; appendSentenceCase(newBaseName, *extraParamText); baseName = scratch.copyString(newBaseName); anyChanges = true; } } // Omit needless words based on parameter types. for (unsigned i = 0, n = argNames.size(); i != n; ++i) { // If there is no corresponding parameter, there is nothing to // omit. if (i >= paramTypes.size()) continue; // Omit needless words based on the type of the parameter. NameRole role = i > 0 ? NameRole::SubsequentParameter : argNames[0].empty() ? NameRole::BaseName : baseName == "init" ? NameRole::SubsequentParameter : paramTypes[0].hasDefaultArgument() ? NameRole::SubsequentParameter : NameRole::FirstParameter; // Omit needless words from the name. StringRef name = role == NameRole::BaseName ? baseName : argNames[i]; StringRef newName = omitTrailingTypeNameWithSpecialCases( name, paramTypes[i], role, role == NameRole::BaseName ? allPropertyNames : nullptr); if (name == newName) continue; // Record this change. anyChanges = true; if (role == NameRole::BaseName) { baseName = newName; } else { argNames[i] = newName; } } return lowercaseAcronymsForReturn(); } std::optional swift::stripWithCompletionHandlerSuffix(StringRef name) { if (name.ends_with("WithCompletionHandler")) { return name.drop_back(strlen("WithCompletionHandler")); } if (name.ends_with("WithCompletion")) { return name.drop_back(strlen("WithCompletion")); } if (name.ends_with("WithCompletionBlock")) { return name.drop_back(strlen("WithCompletionBlock")); } if (name.ends_with("WithBlock")) { return name.drop_back(strlen("WithBlock")); } if (name.ends_with("WithReplyTo")) { return name.drop_back(strlen("WithReplyTo")); } if (name.ends_with("WithReply")) { return name.drop_back(strlen("WithReply")); } return std::nullopt; } void swift::writeEscaped(llvm::StringRef Str, llvm::raw_ostream &OS) { for (unsigned i = 0, e = Str.size(); i != e; ++i) { unsigned char c = Str[i]; switch (c) { case '\\': OS << '\\' << '\\'; break; case '\t': OS << '\\' << 't'; break; case '\n': OS << '\\' << 'n'; break; case '"': OS << '\\' << '"'; break; default: OS << c; break; } } } bool swift::pathStartsWith(StringRef prefix, StringRef path) { auto prefixIt = llvm::sys::path::begin(prefix), prefixEnd = llvm::sys::path::end(prefix); for (auto pathIt = llvm::sys::path::begin(path), pathEnd = llvm::sys::path::end(path); prefixIt != prefixEnd && pathIt != pathEnd; ++prefixIt, ++pathIt) { if (*prefixIt != *pathIt) return false; } return prefixIt == prefixEnd; }