Files
swift-mirror/lib/Basic/StringExtras.cpp
Doug Gregor 050b324593 [Omit needless words] Split before last preposition in most cases.
Splitting *before* the last preposition tends to keep the
prepositional phrase together. Only leave the preposition on the base
name in rare cases where we would end up with weird argument labels
(e.g., prefer "moveTo(x:y:)" to "move(toX:y:)").

Also, refine our heuristics for when we can remove the preposition
entirely.
2016-02-10 14:20:44 -08:00

1160 lines
38 KiB
C++

//===--- StringExtras.cpp - String Utilities ------------------------------===//
//
// This source file is part of the Swift.org open source project
//
// Copyright (c) 2014 - 2016 Apple Inc. and the Swift project authors
// Licensed under Apache License v2.0 with Runtime Library Exception
//
// See http://swift.org/LICENSE.txt for license information
// See http://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
//
//===----------------------------------------------------------------------===//
//
// This file implements utilities for working with words and camelCase
// names.
//
//===----------------------------------------------------------------------===//
#include "swift/Basic/Fallthrough.h"
#include "swift/Basic/StringExtras.h"
#include "clang/Basic/CharInfo.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringSwitch.h"
#include <algorithm>
using namespace swift;
using namespace camel_case;
bool swift::canBeArgumentLabel(StringRef identifier) {
if (identifier == "var" || identifier == "let" || identifier == "inout")
return false;
return true;
}
PrepositionKind swift::getPrepositionKind(StringRef word) {
#define DIRECTIONAL_PREPOSITION(Word) \
if (word.equals_lower(#Word)) \
return PK_Directional;
#define PREPOSITION(Word) \
if (word.equals_lower(#Word)) \
return PK_Nondirectional;
#include "PartsOfSpeech.def"
return PK_None;
}
PartOfSpeech swift::getPartOfSpeech(StringRef word) {
// FIXME: This implementation is woefully inefficient.
#define PREPOSITION(Word) \
if (word.equals_lower(#Word)) \
return PartOfSpeech::Preposition;
#define VERB(Word) \
if (word.equals_lower(#Word)) \
return PartOfSpeech::Verb;
#include "PartsOfSpeech.def"
// Identify gerunds, which always end in "ing".
if (word.endswith("ing") && word.size() > 4) {
StringRef possibleVerb = word.substr(0, word.size()-3);
// If what remains is a verb, we have a gerund.
if (getPartOfSpeech(possibleVerb) == PartOfSpeech::Verb)
return PartOfSpeech::Gerund;
// Try adding an "e" and look for that as a verb.
if (possibleVerb.back() != 'e') {
SmallString<16> possibleVerbWithE;
possibleVerbWithE += possibleVerb;
possibleVerbWithE += 'e';
if (getPartOfSpeech(possibleVerbWithE) == PartOfSpeech::Verb)
return PartOfSpeech::Gerund;
}
// If there is a repeated letter at the back, drop that second
// instance of that letter and try again.
unsigned count = possibleVerb.size();
if (possibleVerb[count-1] == possibleVerb[count-2] &&
getPartOfSpeech(possibleVerb.substr(0, count-1)) == PartOfSpeech::Verb)
return PartOfSpeech::Gerund;
}
// "auto" tends to be used as a verb prefix.
if (word.startswith("auto") && word.size() > 4) {
if (getPartOfSpeech(word.substr(4)) == PartOfSpeech::Verb)
return PartOfSpeech::Verb;
}
// "re" can prefix a verb.
if (word.startswith("re") && word.size() > 2) {
if (getPartOfSpeech(word.substr(2)) == PartOfSpeech::Verb)
return PartOfSpeech::Verb;
}
return PartOfSpeech::Unknown;
}
/// Whether the given word is a plural s
static bool isPluralSuffix(StringRef word) {
return word == "s" || word == "es" || word == "ies";
}
void WordIterator::computeNextPosition() const {
assert(Position < String.size() && "Already at end of string");
unsigned i = Position, n = String.size();
// Treat _ as a word on its own. Don't coalesce.
if (String[i] == '_') {
NextPosition = i + 1;
NextPositionValid = true;
return;
}
// Skip over any uppercase letters at the beginning of the word.
while (i < n && clang::isUppercase(String[i]))
++i;
// If there was more than one uppercase letter, this is an
// acronym.
if (i - Position > 1) {
// If we hit the end of the string, that's it. Otherwise, this
// word ends before the last uppercase letter if the next word is alphabetic
// (URL_Loader) or after the last uppercase letter if it's not (UTF_8).
// Collect the lowercase letters up to the next word.
unsigned endOfNext = i;
while (endOfNext < n && clang::isLowercase(String[endOfNext]))
++endOfNext;
// If the next word is a plural suffix, add it on.
if (i == n || isPluralSuffix(String.slice(i, endOfNext)))
NextPosition = endOfNext;
else if (clang::isLowercase(String[i]))
NextPosition = i-1;
else
NextPosition = i;
NextPositionValid = true;
return;
}
// Skip non-uppercase letters.
while (i < n && !clang::isUppercase(String[i]) && String[i] != '_')
++i;
NextPosition = i;
NextPositionValid = true;
}
void WordIterator::computePrevPosition() const {
assert(Position > 0 && "Already at beginning of string");
unsigned i = Position;
// While we see non-uppercase letters, keep moving back.
while (i > 0 && !clang::isUppercase(String[i-1]) && String[i-1] != '_')
--i;
// If what we found is a plural suffix, keep going.
bool skippedPluralSuffix = false;
unsigned effectiveEndPosition = Position;
if (i > 0 && isPluralSuffix(String.slice(i, Position))) {
skippedPluralSuffix = true;
effectiveEndPosition = i;
while (i > 0 && !clang::isUppercase(String[i-1]) && String[i-1] != '_')
--i;
}
// If we found any lowercase letters, this was a normal camel case
// word (not an acronym).
if (i < effectiveEndPosition) {
// If we hit the beginning of the string, that's it. Otherwise, this
// word starts with an uppercase letter if the next word is alphabetic
// (URL_Loader) or after the last uppercase letter if it's not (UTF_8).
PrevPosition = i;
if (i != 0 && clang::isLowercase(String[i]) && String[i-1] != '_')
--PrevPosition;
PrevPositionValid = true;
return;
}
// Treat _ as a word on its own. Don't coalesce.
if (String[i-1] == '_') {
PrevPosition = i - 1;
PrevPositionValid = true;
return;
}
// There were no lowercase letters, so this is an acronym. Keep
// skipping uppercase letters.
while (i > 0 && clang::isUppercase(String[i-1]))
--i;
PrevPosition = i;
PrevPositionValid = true;
}
StringRef camel_case::getFirstWord(StringRef string) {
if (string.empty())
return "";
return *WordIterator(string, 0);
}
StringRef camel_case::getLastWord(StringRef string) {
if (string.empty())
return "";
return *--WordIterator(string, string.size());
}
bool camel_case::sameWordIgnoreFirstCase(StringRef word1, StringRef word2) {
if (word1.size() != word2.size())
return false;
if (clang::toLowercase(word1[0]) != clang::toLowercase(word2[0]))
return false;
return word1.substr(1) == word2.substr(1);
}
bool camel_case::startsWithIgnoreFirstCase(StringRef word1, StringRef word2) {
if (word1.size() < word2.size())
return false;
if (clang::toLowercase(word1[0]) != clang::toLowercase(word2[0]))
return false;
return word1.substr(1, word2.size() - 1) == word2.substr(1);
}
StringRef camel_case::toLowercaseWord(StringRef string,
SmallVectorImpl<char> &scratch) {
if (string.empty())
return string;
// Already lowercase.
if (!clang::isUppercase(string[0]))
return string;
// Acronym doesn't get lowercased.
if (string.size() > 1 && clang::isUppercase(string[1]))
return string;
// Lowercase the first letter, append the rest.
scratch.clear();
scratch.push_back(clang::toLowercase(string[0]));
scratch.append(string.begin() + 1, string.end());
return StringRef(scratch.data(), scratch.size());
}
StringRef camel_case::toSentencecase(StringRef string,
SmallVectorImpl<char> &scratch) {
if (string.empty())
return string;
// Can't be uppercased.
if (!clang::isLowercase(string[0]))
return string;
// Uppercase the first letter, append the rest.
scratch.clear();
scratch.push_back(clang::toUppercase(string[0]));
scratch.append(string.begin() + 1, string.end());
return StringRef(scratch.data(), scratch.size());
}
StringRef camel_case::dropPrefix(StringRef string) {
unsigned firstLower = 0, n = string.size();
if (n < 4)
return string;
for (; firstLower < n; ++firstLower) {
if (!clang::isUppercase(string[firstLower]))
break;
}
if (firstLower == n)
return string;
if (firstLower >= 3 && firstLower <= 4)
return string.substr(firstLower - 1);
return string;
}
StringRef camel_case::appendSentenceCase(SmallVectorImpl<char> &buffer,
StringRef string) {
// Trivial case: empty string.
if (string.empty())
return StringRef(buffer.data(), buffer.size());
// Uppercase the first letter, append the rest.
buffer.push_back(clang::toUppercase(string[0]));
buffer.append(string.begin() + 1, string.end());
return StringRef(buffer.data(), buffer.size());
}
size_t camel_case::findWord(StringRef string, StringRef word) {
assert(!word.empty());
assert(clang::isUppercase(word[0]));
// Scan forward until we find the word as a complete word.
size_t startingIndex = 0;
while (true) {
size_t index = string.find(word, startingIndex);
if (index == StringRef::npos)
return StringRef::npos;
// If any of the following checks fail, we want to start searching
// past the end of the match. (This assumes that the word doesn't
// end with a prefix of itself, e.g. "LikeableLike".)
startingIndex = index + word.size();
// We assume that we don't have to check if the match starts a new
// word in the string.
// If we find the word, check whether it's a valid match.
StringRef suffix = string.substr(index);
if (!suffix.empty() && clang::isLowercase(suffix[0]))
continue;
return index;
}
}
/// Determine whether the given identifier is a keyword.
static bool isKeyword(StringRef identifier) {
return llvm::StringSwitch<bool>(identifier)
#define KEYWORD(kw) .Case(#kw, true)
#define SIL_KEYWORD(kw)
#include "swift/Parse/Tokens.def"
.Default(false);
}
/// Skip a type suffix that can be dropped.
static Optional<StringRef> skipTypeSuffix(StringRef typeName) {
if (typeName.empty()) return None;
auto lastWord = camel_case::getLastWord(typeName);
// "Type" suffix.
if (lastWord == "Type" && typeName.size() > 4) {
return typeName.drop_back(4);
}
// "Ref" suffix.
if (lastWord == "Ref" && typeName.size() > 3) {
return typeName.drop_back(3);
}
// \d+D for dimensionality.
if (typeName.back() == 'D' && typeName.size() > 1) {
unsigned firstDigit = typeName.size() - 1;
while (firstDigit > 0) {
if (!isdigit(typeName[firstDigit-1])) break;
--firstDigit;
}
if (firstDigit < typeName.size()-1) {
return typeName.substr(0, firstDigit);
}
}
// _t.
if (typeName.size() > 2 && typeName.endswith("_t")) {
return typeName.drop_back(2);
}
return None;
}
/// Match a word within a name to a word within a type.
static bool matchNameWordToTypeWord(StringRef nameWord, StringRef typeWord) {
// If the name word is longer, there's no match.
if (nameWord.size() > typeWord.size()) return false;
// If the name word is shorter, try for a partial match.
if (nameWord.size() < typeWord.size()) {
// We can match the suffix of the type so long as everything preceding the
// match is neither a lowercase letter nor a '_'. This ignores type
// prefixes for acronyms, e.g., the 'NS' in 'NSURL'.
if (typeWord.endswith_lower(nameWord) &&
!clang::isLowercase(typeWord[typeWord.size()-nameWord.size()])) {
// Check that everything preceding the match is neither a lowercase letter
// nor a '_'.
for (unsigned i = 0, n = nameWord.size(); i != n; ++i) {
if (clang::isLowercase(typeWord[i]) || typeWord[i] == '_') return false;
}
return true;
}
// We can match a prefix so long as everything following the match is
// a number.
if (typeWord.startswith_lower(nameWord)) {
for (unsigned i = nameWord.size(), n = typeWord.size(); i != n; ++i) {
if (!clang::isDigit(typeWord[i])) return false;
}
return true;
}
return false;
}
// Check for an exact match.
return nameWord.equals_lower(typeWord);
}
/// Match the beginning of the name to the given type name.
StringRef swift::matchLeadingTypeName(StringRef name,
OmissionTypeName typeName) {
// Match the camelCase beginning of the name to the
// ending of the type name.
auto nameWords = camel_case::getWords(name);
auto typeWords = camel_case::getWords(typeName.Name);
auto nameWordIter = nameWords.begin(),
nameWordIterEnd = nameWords.end();
auto typeWordRevIter = typeWords.rbegin(),
typeWordRevIterEnd = typeWords.rend();
// Find the last instance of the first word in the name within
// the words in the type name.
while (typeWordRevIter != typeWordRevIterEnd &&
!matchNameWordToTypeWord(*nameWordIter, *typeWordRevIter)) {
++typeWordRevIter;
}
// If we didn't find the first word in the name at all, we're
// done.
if (typeWordRevIter == typeWordRevIterEnd)
return name;
// Now, match from the first word up until the end of the type name.
auto typeWordIter = typeWordRevIter.base(),
typeWordIterEnd = typeWords.end();
++nameWordIter;
while (typeWordIter != typeWordIterEnd &&
nameWordIter != nameWordIterEnd &&
matchNameWordToTypeWord(*nameWordIter, *typeWordIter)) {
++typeWordIter;
++nameWordIter;
}
// If we didn't reach the end of the type name, don't match.
if (typeWordIter != typeWordIterEnd)
return name;
// Chop of the beginning of the name.
return name.substr(nameWordIter.getPosition());
}
StringRef StringScratchSpace::copyString(StringRef string) {
void *memory = Allocator.Allocate(string.size(), alignof(char));
memcpy(memory, string.data(), string.size());
return StringRef(static_cast<char *>(memory), string.size());
}
void InheritedNameSet::add(StringRef name) {
Names.insert(name);
}
bool InheritedNameSet::contains(StringRef name) const {
auto set = this;
do {
if (set->Names.count(name) > 0) return true;
set = set->Parent;
} while (set);
return false;
}
/// Wrapper for camel_case::toLowercaseWord that uses string scratch space.
StringRef camel_case::toLowercaseWord(StringRef string,
StringScratchSpace &scratch){
llvm::SmallString<32> scratchStr;
StringRef result = toLowercaseWord(string, scratchStr);
if (string == result)
return string;
return scratch.copyString(result);
}
/// Omit needless words from the beginning of a name.
static StringRef omitNeedlessWordsFromPrefix(StringRef name,
OmissionTypeName type,
StringScratchSpace &scratch){
if (type.empty())
return name;
// Match the result type to the beginning of the name.
StringRef newName = matchLeadingTypeName(name, type);
if (newName == name)
return name;
auto firstWord = camel_case::getFirstWord(newName);
// If we have a preposition, we can chop off type information at the
// beginning of the name.
if (getPartOfSpeech(firstWord) == PartOfSpeech::Preposition &&
newName.size() > firstWord.size()) {
// If the preposition was "by" and is followed by a gerund, also remove
// "by".
if (firstWord == "By") {
StringRef nextWord = camel_case::getFirstWord(
newName.substr(firstWord.size()));
if (getPartOfSpeech(nextWord) == PartOfSpeech::Gerund) {
return toLowercaseWord(newName.substr(firstWord.size()), scratch);
}
}
return toLowercaseWord(newName, scratch);
}
return name;
}
/// Identify certain vacuous names to which we do not want to reduce any name.
static bool isVacuousName(StringRef name) {
return camel_case::sameWordIgnoreFirstCase(name, "get") ||
camel_case::sameWordIgnoreFirstCase(name, "for") ||
camel_case::sameWordIgnoreFirstCase(name, "set") ||
camel_case::sameWordIgnoreFirstCase(name, "using") ||
camel_case::sameWordIgnoreFirstCase(name, "with");
}
static StringRef omitNeedlessWords(StringRef name,
OmissionTypeName typeName,
NameRole role,
const InheritedNameSet *allPropertyNames,
StringScratchSpace &scratch) {
// If we have no name or no type name, there is nothing to do.
if (name.empty() || typeName.empty()) return name;
// Get the camel-case words in the name and type name.
auto nameWords = camel_case::getWords(name);
auto typeWords = camel_case::getWords(typeName.Name);
// Match the last words in the type name to the last words in the
// name.
auto nameWordRevIter = nameWords.rbegin(),
nameWordRevIterBegin = nameWordRevIter,
firstMatchingNameWordRevIter = nameWordRevIter,
nameWordRevIterEnd = nameWords.rend();
auto typeWordRevIter = typeWords.rbegin(),
typeWordRevIterEnd = typeWords.rend();
bool anyMatches = false;
auto matched = [&] {
if (anyMatches) return;
anyMatches = true;
firstMatchingNameWordRevIter = nameWordRevIter;
};
while (nameWordRevIter != nameWordRevIterEnd &&
typeWordRevIter != typeWordRevIterEnd) {
// If the names match, continue.
auto nameWord = *nameWordRevIter;
if (matchNameWordToTypeWord(nameWord, *typeWordRevIter)) {
matched();
++nameWordRevIter;
++typeWordRevIter;
continue;
}
// Special case: "Indexes" and "Indices" in the name match
// "IndexSet" in the type.
if ((matchNameWordToTypeWord(nameWord, "Indexes") ||
matchNameWordToTypeWord(nameWord, "Indices")) &&
*typeWordRevIter == "Set") {
auto nextTypeWordRevIter = typeWordRevIter;
++nextTypeWordRevIter;
if (nextTypeWordRevIter != typeWordRevIterEnd &&
matchNameWordToTypeWord("Index", *nextTypeWordRevIter)) {
matched();
++nameWordRevIter;
typeWordRevIter = nextTypeWordRevIter;
++typeWordRevIter;
continue;
}
}
// Special case: "Index" in the name matches "Int" or "Integer" in the type.
if (matchNameWordToTypeWord(nameWord, "Index") &&
(matchNameWordToTypeWord("Int", *typeWordRevIter) ||
matchNameWordToTypeWord("Integer", *typeWordRevIter))) {
matched();
++nameWordRevIter;
++typeWordRevIter;
continue;
}
// Special case: if the word in the name ends in 's', and we have
// a collection element type, see if this is a plural.
if (!typeName.CollectionElement.empty() && nameWord.size() > 2 &&
nameWord.back() == 's' && role != NameRole::BaseNameSelf) {
// Check <element name>s.
auto shortenedNameWord
= name.substr(0, nameWordRevIter.base().getPosition()-1);
auto newShortenedNameWord
= omitNeedlessWords(shortenedNameWord, typeName.CollectionElement,
NameRole::Partial, allPropertyNames, scratch);
if (shortenedNameWord == newShortenedNameWord &&
shortenedNameWord.back() == 'e') {
shortenedNameWord.drop_back();
newShortenedNameWord =
omitNeedlessWords(shortenedNameWord, typeName.CollectionElement,
NameRole::Partial, allPropertyNames, scratch);
}
if (shortenedNameWord != newShortenedNameWord) {
matched();
unsigned targetSize = newShortenedNameWord.size();
while (nameWordRevIter.base().getPosition() > targetSize)
++nameWordRevIter;
continue;
}
}
// If this is a skippable suffix, skip it and keep looking.
if (nameWordRevIter == nameWordRevIterBegin) {
if (auto withoutSuffix = skipTypeSuffix(typeName.Name)) {
typeName.Name = *withoutSuffix;
typeWords = camel_case::getWords(typeName.Name);
typeWordRevIter = typeWords.rbegin();
typeWordRevIterEnd = typeWords.rend();
continue;
}
}
// If we're matching the base name of a method against the type of
// 'Self', and we haven't matched anything yet, skip over words in
// the name.
if (role == NameRole::BaseNameSelf && !anyMatches) {
++nameWordRevIter;
continue;
}
break;
}
StringRef origName = name;
// If we matched anything above, update the name appropriately.
if (anyMatches) {
// Handle complete name matches.
if (nameWordRevIter == nameWordRevIterEnd) {
// If we're doing a partial match or we have an initial
// parameter, return the empty string.
if (role == NameRole::Partial || role == NameRole::FirstParameter)
return "";
// Leave the name alone.
return name;
}
// Don't strip just "Error".
if (nameWordRevIter != nameWordRevIterBegin) {
auto nameWordPrev = std::prev(nameWordRevIter);
if (nameWordPrev == nameWordRevIterBegin && *nameWordPrev == "Error")
return name;
}
switch (role) {
case NameRole::Property:
// Always strip off type information.
name = name.substr(0, nameWordRevIter.base().getPosition());
break;
case NameRole::BaseNameSelf:
switch (getPartOfSpeech(*nameWordRevIter)) {
case PartOfSpeech::Verb: {
// Splice together the parts before and after the matched
// type. For example, if we matched "ViewController" in
// "dismissViewControllerAnimated", stitch together
// "dismissAnimated".
SmallString<16> newName =
name.substr(0, nameWordRevIter.base().getPosition());
newName
+= name.substr(firstMatchingNameWordRevIter.base().getPosition());
name = scratch.copyString(newName);
break;
}
case PartOfSpeech::Preposition:
case PartOfSpeech::Gerund:
case PartOfSpeech::Unknown:
return name;
}
break;
case NameRole::BaseName:
case NameRole::FirstParameter:
case NameRole::Partial:
case NameRole::SubsequentParameter:
// Classify the part of speech of the word before the type
// information we would strip off.
switch (getPartOfSpeech(*nameWordRevIter)) {
case PartOfSpeech::Preposition:
if (role == NameRole::BaseName) {
// Strip off the part of the name that is redundant with
// type information, so long as there's something preceding the
// preposition.
if (std::next(nameWordRevIter) != nameWordRevIterEnd)
name = name.substr(0, nameWordRevIter.base().getPosition());
break;
}
SWIFT_FALLTHROUGH;
case PartOfSpeech::Verb:
case PartOfSpeech::Gerund:
// Don't prune redundant type information from the base name if
// there is a corresponding property (either singular or plural).
if (allPropertyNames && role == NameRole::BaseName) {
SmallString<16> localScratch;
auto removedText = name.substr(nameWordRevIter.base().getPosition());
auto removedName = camel_case::toLowercaseWord(removedText,
localScratch);
// A property with exactly this name.
if (allPropertyNames->contains(removedName)) return name;
// From here on, we'll be working with scratch space.
if (removedName.data() != localScratch.data())
localScratch = removedName;
if (localScratch.back() == 'y') {
// If the last letter is a 'y', try 'ies'.
localScratch.pop_back();
localScratch += "ies";
if (allPropertyNames->contains(localScratch)) return name;
} else {
// Otherwise, add an 's' and try again.
localScratch += 's';
if (allPropertyNames->contains(localScratch)) return name;
// Alternatively, try to add 'es'.
localScratch.pop_back();
localScratch += "es";
if (allPropertyNames->contains(localScratch)) return name;
}
}
// Strip off the part of the name that is redundant with
// type information.
name = name.substr(0, nameWordRevIter.base().getPosition());
break;
case PartOfSpeech::Unknown:
// Assume it's a noun or adjective; don't strip anything.
break;
}
break;
}
}
// If we ended up with a vacuous name like "get" or "set", do nothing.
if (isVacuousName(name))
return origName;
switch (role) {
case NameRole::BaseName:
case NameRole::BaseNameSelf:
case NameRole::Property:
// If we ended up with a keyword for a property name or base name,
// do nothing.
if (isKeyword(name))
return origName;
break;
case NameRole::SubsequentParameter:
case NameRole::FirstParameter:
case NameRole::Partial:
break;
}
// We're done.
return name;
}
StringRef camel_case::toLowercaseInitialisms(StringRef string,
StringScratchSpace &scratch) {
if (string.empty())
return string;
// Already lowercase.
if (!clang::isUppercase(string[0]))
return string;
// Lowercase until we hit the an uppercase letter followed by a
// non-uppercase letter.
llvm::SmallString<32> scratchStr;
scratchStr.push_back(clang::toLowercase(string[0]));
for (unsigned i = 1, n = string.size(); i != n; ++i) {
// If the next character is not uppercase, stop.
if (i < n - 1 && !clang::isUppercase(string[i+1])) {
// If the next non-uppercase character was not a letter, we seem
// to have a plural, we should still lowercase the character
// we're on.
if (!clang::isLetter(string[i+1]) ||
isPluralSuffix(camel_case::getFirstWord(string.substr(i+1)))) {
scratchStr.push_back(clang::toLowercase(string[i]));
++i;
}
scratchStr.append(string.substr(i));
break;
}
scratchStr.push_back(clang::toLowercase(string[i]));
}
return scratch.copyString(scratchStr);
}
/// Determine whether the given word occurring before the given
/// preposition results in a conflict that suppresses preposition
/// splitting.
static bool wordConflictsBeforePreposition(StringRef word,
StringRef preposition) {
if (camel_case::sameWordIgnoreFirstCase(preposition, "with") &&
camel_case::sameWordIgnoreFirstCase(word, "compatible"))
return true;
if (camel_case::sameWordIgnoreFirstCase(preposition, "of") &&
camel_case::sameWordIgnoreFirstCase(word, "kind"))
return true;
return false;
}
/// Determine whether the given word occurring after the given
/// preposition results in a conflict that suppresses preposition
/// splitting.
static bool wordConflictsAfterPreposition(StringRef word,
StringRef preposition) {
if (camel_case::sameWordIgnoreFirstCase(preposition, "with")) {
if (camel_case::sameWordIgnoreFirstCase(word, "error") ||
camel_case::sameWordIgnoreFirstCase(word, "no"))
return true;
}
if (camel_case::sameWordIgnoreFirstCase(preposition, "to") &&
camel_case::sameWordIgnoreFirstCase(word, "visible"))
return true;
return false;
}
/// When splitting based on a preposition, whether we should place the
/// preposition on the argument label (vs. on the base name).
static bool shouldPlacePrepositionOnArgLabel(StringRef beforePreposition,
StringRef preposition,
StringRef afterPreposition) {
// X/Y/Z often used as coordinates and should be the labels.
if (afterPreposition == "X" ||
afterPreposition == "Y" ||
afterPreposition == "Z")
return false;
return true;
}
/// Determine whether the preposition in a split is "vacuous", and
/// should be removed.
static bool isVacuousPreposition(StringRef beforePreposition,
StringRef preposition,
StringRef afterPreposition,
const OmissionTypeName &paramType) {
// Only consider "with" or "using" to be potentially vacuous.
if (!camel_case::sameWordIgnoreFirstCase(preposition, "with") &&
!camel_case::sameWordIgnoreFirstCase(preposition, "using"))
return false;
// If the preposition is "with", check for special cases.
if (camel_case::sameWordIgnoreFirstCase(preposition, "with")) {
// Some words following the preposition indicate that "with" is
// not vacuous.
auto following = camel_case::getFirstWord(afterPreposition);
if (camel_case::sameWordIgnoreFirstCase(following, "coder") ||
camel_case::sameWordIgnoreFirstCase(following, "zone"))
return false;
// If the last word of the argument label looks like a past
// participle (ends in "-ed"), the preposition is not vacuous.
auto lastWord = camel_case::getLastWord(afterPreposition);
if (lastWord.endswith("ed"))
return false;
if (camel_case::sameWordIgnoreFirstCase(following, "delegate") ||
camel_case::sameWordIgnoreFirstCase(following, "frame"))
return true;
}
// If the parameter has a default argument, it's vacuous.
if (paramType.hasDefaultArgument()) return true;
// If the parameter is of function type, it's vacuous.
if (paramType.isFunction()) return true;
// If the first word of the name is a verb, the preposition is
// likely vacuous.
if (getPartOfSpeech(camel_case::getFirstWord(beforePreposition))
== PartOfSpeech::Verb)
return true;
return false;
}
/// Split the base name after the last preposition, if there is one.
static bool splitBaseNameAfterLastPreposition(
StringRef &baseName,
StringRef &argName,
const OmissionTypeName &paramType) {
// Scan backwards for a preposition.
auto nameWords = camel_case::getWords(baseName);
auto nameWordRevIter = nameWords.rbegin(),
nameWordRevIterBegin = nameWordRevIter,
nameWordRevIterEnd = nameWords.rend();
bool done = false;
while (nameWordRevIter != nameWordRevIterEnd && !done) {
switch (getPartOfSpeech(*nameWordRevIter)) {
case PartOfSpeech::Preposition:
done = true;
break;
case PartOfSpeech::Verb:
case PartOfSpeech::Gerund:
return false;
case PartOfSpeech::Unknown:
++nameWordRevIter;
break;
}
}
// If we ran out of words, there's nothing to split.
if (!done) return false;
// We found a split point.
auto preposition = *nameWordRevIter;
// If we have a conflict with the word before the preposition, don't
// split.
if (std::next(nameWordRevIter) != nameWordRevIterEnd &&
wordConflictsBeforePreposition(*std::next(nameWordRevIter), preposition))
return false;
// If we have a conflict with the word after the preposition, don't
// split.
if (nameWordRevIter != nameWordRevIterBegin &&
wordConflictsAfterPreposition(*std::prev(nameWordRevIter), preposition))
return false;
// Determine whether we should drop the preposition.
StringRef beforePreposition(baseName.begin(),
preposition.begin() - baseName.begin());
StringRef afterPreposition(preposition.end(),
baseName.end() - preposition.end());
bool dropPreposition = isVacuousPreposition(beforePreposition,
preposition,
afterPreposition,
paramType);
// By default, put the prposition on the argument label.
bool prepositionOnArgLabel =
shouldPlacePrepositionOnArgLabel(beforePreposition, preposition,
afterPreposition);
if (prepositionOnArgLabel)
++nameWordRevIter;
unsigned startOfArgumentLabel = nameWordRevIter.base().getPosition();
unsigned endOfBaseName = startOfArgumentLabel;
// If we're supposed to drop the preposition, do so.
if (dropPreposition) {
if (prepositionOnArgLabel)
startOfArgumentLabel += preposition.size();
else {
endOfBaseName -= preposition.size();
}
}
if (endOfBaseName == 0) return false;
// If the base name is vacuous and there are two or fewer words in
// the base name, don't split.
auto newBaseName = baseName.substr(0, endOfBaseName);
{
auto newWords = camel_case::getWords(newBaseName);
auto newWordsIter = newWords.begin();
if (isVacuousName(*newWordsIter)) {
// Just one word?
++newWordsIter;
if (newWordsIter == newWords.end()) return false;
// Or two words?
++newWordsIter;
if (newWordsIter == newWords.end()) return false;
// Okay: there is enough in the base name.
}
}
// Update the argument label and base name.
argName = baseName.substr(startOfArgumentLabel);
baseName = newBaseName;
return true;
}
/// Split the base name, if it makes sense.
static bool splitBaseName(StringRef &baseName, StringRef &argName,
const OmissionTypeName &paramType,
StringRef paramName) {
// If there is already an argument label, do nothing.
if (!argName.empty()) return false;
// Try splitting a Boolean "Animated".
if (paramType.isBoolean() &&
camel_case::getLastWord(baseName) == "Animated") {
baseName = baseName.substr(0, baseName.size() - strlen("Animated"));
argName = "animated";
return true;
}
// Don't split anything that starts with "set".
if (camel_case::getFirstWord(baseName) == "set")
return false;
// Don't split a method that looks like an action (with a "sender"
// of type AnyObject).
if (paramName == "sender" &&
camel_case::getLastWord(paramType.Name) == "Object")
return false;
// Try splitting after the last preposition.
if (splitBaseNameAfterLastPreposition(baseName, argName, paramType))
return true;
return false;
}
bool swift::omitNeedlessWords(StringRef &baseName,
MutableArrayRef<StringRef> argNames,
StringRef firstParamName,
OmissionTypeName resultType,
OmissionTypeName contextType,
ArrayRef<OmissionTypeName> paramTypes,
bool returnsSelf,
bool isProperty,
const InheritedNameSet *allPropertyNames,
StringScratchSpace &scratch) {
bool anyChanges = false;
/// Local function that lowercases all of the base names and
/// argument names before returning.
auto lowercaseAcronymsForReturn = [&] {
StringRef newBaseName = toLowercaseInitialisms(baseName, scratch);
if (baseName.data() != newBaseName.data()) {
baseName = newBaseName;
anyChanges = true;
}
for (StringRef &argName : argNames) {
StringRef newArgName = toLowercaseInitialisms(argName, scratch);
if (argName.data() != newArgName.data()) {
argName = newArgName;
anyChanges = true;
}
}
return anyChanges;
};
// If the result type matches the context, remove the context type from the
// prefix of the name.
bool resultTypeMatchesContext = returnsSelf || (resultType == contextType);
if (resultTypeMatchesContext) {
StringRef newBaseName = omitNeedlessWordsFromPrefix(baseName, contextType,
scratch);
if (newBaseName != baseName) {
baseName = newBaseName;
anyChanges = true;
}
}
// Strip the context type from the base name of a method.
if (!isProperty) {
StringRef newBaseName = ::omitNeedlessWords(baseName, contextType,
NameRole::BaseNameSelf,
nullptr, scratch);
if (newBaseName != baseName) {
baseName = newBaseName;
anyChanges = true;
}
}
if (paramTypes.empty()) {
if (resultTypeMatchesContext) {
StringRef newBaseName = ::omitNeedlessWords(
baseName,
returnsSelf ? contextType : resultType,
NameRole::Property,
allPropertyNames,
scratch);
if (newBaseName != baseName) {
baseName = newBaseName;
anyChanges = true;
}
}
return lowercaseAcronymsForReturn();
}
// If needed, split the base name.
if (!argNames.empty() &&
splitBaseName(baseName, argNames[0], paramTypes[0], firstParamName))
anyChanges = true;
// Omit needless words based on parameter types.
for (unsigned i = 0, n = argNames.size(); i != n; ++i) {
// If there is no corresponding parameter, there is nothing to
// omit.
if (i >= paramTypes.size()) continue;
// Omit needless words based on the type of the parameter.
NameRole role = i > 0 ? NameRole::SubsequentParameter
: argNames[0].empty() ? NameRole::BaseName
: baseName == "init" ? NameRole::SubsequentParameter
: NameRole::FirstParameter;
// Omit needless words from the name.
StringRef name = role == NameRole::BaseName ? baseName : argNames[i];
StringRef newName = ::omitNeedlessWords(name, paramTypes[i], role,
role == NameRole::BaseName
? allPropertyNames
: nullptr,
scratch);
if (name == newName) continue;
// Record this change.
anyChanges = true;
if (role == NameRole::BaseName) {
baseName = newName;
} else {
argNames[i] = newName;
}
}
return lowercaseAcronymsForReturn();
}