//===--- StringExtras.cpp - String Utilities ------------------------------===// // // This source file is part of the Swift.org open source project // // Copyright (c) 2014 - 2015 Apple Inc. and the Swift project authors // Licensed under Apache License v2.0 with Runtime Library Exception // // See http://swift.org/LICENSE.txt for license information // See http://swift.org/CONTRIBUTORS.txt for the list of Swift project authors // //===----------------------------------------------------------------------===// // // This file implements utilities for working with words and camelCase // names. // //===----------------------------------------------------------------------===// #include "swift/Basic/StringExtras.h" #include "clang/Basic/CharInfo.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringSwitch.h" #include using namespace swift; using namespace camel_case; PrepositionKind swift::getPrepositionKind(StringRef word) { #define DIRECTIONAL_PREPOSITION(Word) \ if (word.equals_lower(#Word)) \ return PK_Directional; #define PREPOSITION(Word) \ if (word.equals_lower(#Word)) \ return PK_Nondirectional; #include "PartsOfSpeech.def" return PK_None; } PartOfSpeech swift::getPartOfSpeech(StringRef word) { // FIXME: This implementation is woefully inefficient. #define PREPOSITION(Word) \ if (word.equals_lower(#Word)) \ return PartOfSpeech::Preposition; #define VERB(Word) \ if (word.equals_lower(#Word)) \ return PartOfSpeech::Verb; #include "PartsOfSpeech.def" // Identify gerunds, which always end in "ing". if (word.endswith("ing") && word.size() > 4) { StringRef possibleVerb = word.substr(0, word.size()-3); // If what remains is a verb, we have a gerund. if (getPartOfSpeech(possibleVerb) == PartOfSpeech::Verb) return PartOfSpeech::Gerund; // Try adding an "e" and look for that as a verb. if (possibleVerb.back() != 'e') { SmallString<16> possibleVerbWithE; possibleVerbWithE += possibleVerb; possibleVerbWithE += 'e'; if (getPartOfSpeech(possibleVerbWithE) == PartOfSpeech::Verb) return PartOfSpeech::Gerund; } // If there is a repeated letter at the back, drop that second // instance of that letter and try again. unsigned count = possibleVerb.size(); if (possibleVerb[count-1] == possibleVerb[count-2] && getPartOfSpeech(possibleVerb.substr(0, count-1)) == PartOfSpeech::Verb) return PartOfSpeech::Gerund; } return PartOfSpeech::Unknown; } void WordIterator::computeNextPosition() const { assert(Position < String.size() && "Already at end of string"); unsigned i = Position, n = String.size(); // Treat _ as a word on its own. Don't coalesce. if (String[i] == '_') { NextPosition = i + 1; NextPositionValid = true; return; } // Skip over any uppercase letters at the beginning of the word. while (i < n && clang::isUppercase(String[i])) ++i; // If there was more than one uppercase letter, this is an // acronym. if (i - Position > 1) { // If we hit the end of the string, that's it. Otherwise, this // word ends before the last uppercase letter if the next word is alphabetic // (URL_Loader) or after the last uppercase letter if it's not (UTF_8). NextPosition = (i == n || !clang::isLowercase(String[i])) ? i : i-1; NextPositionValid = true; return; } // Skip non-uppercase letters. while (i < n && !clang::isUppercase(String[i]) && String[i] != '_') ++i; NextPosition = i; NextPositionValid = true; } void WordIterator::computePrevPosition() const { assert(Position > 0 && "Already at beginning of string"); unsigned i = Position; // While we see non-uppercase letters, keep moving back. while (i > 0 && !clang::isUppercase(String[i-1]) && String[i-1] != '_') --i; // If we found any lowercase letters, this was a normal camel case // word (not an acronym). if (i < Position) { // If we hit the beginning of the string, that's it. Otherwise, this // word starts with an uppercase letter if the next word is alphabetic // (URL_Loader) or after the last uppercase letter if it's not (UTF_8). PrevPosition = i; if (i != 0 && clang::isLowercase(String[i]) && String[i-1] != '_') --PrevPosition; PrevPositionValid = true; return; } // Treat _ as a word on its own. Don't coalesce. if (String[i-1] == '_') { PrevPosition = i - 1; PrevPositionValid = true; return; } // There were no lowercase letters, so this is an acronym. Keep // skipping uppercase letters. while (i > 0 && clang::isUppercase(String[i-1])) --i; PrevPosition = i; PrevPositionValid = true; } StringRef camel_case::getFirstWord(StringRef string) { if (string.empty()) return ""; return *WordIterator(string, 0); } StringRef camel_case::getLastWord(StringRef string) { if (string.empty()) return ""; return *--WordIterator(string, string.size()); } bool camel_case::sameWordIgnoreFirstCase(StringRef word1, StringRef word2) { if (word1.size() != word2.size()) return false; if (clang::toLowercase(word1[0]) != clang::toLowercase(word2[0])) return false; return word1.substr(1) == word2.substr(1); } bool camel_case::startsWithIgnoreFirstCase(StringRef word1, StringRef word2) { if (word1.size() < word2.size()) return false; if (clang::toLowercase(word1[0]) != clang::toLowercase(word2[0])) return false; return word1.substr(1) == word2.substr(1, word1.size() - 1); } StringRef camel_case::toLowercaseWord(StringRef string, SmallVectorImpl &scratch) { if (string.empty()) return string; // Already lowercase. if (!clang::isUppercase(string[0])) return string; // Acronym doesn't get lowercased. if (string.size() > 1 && clang::isUppercase(string[1])) return string; // Lowercase the first letter, append the rest. scratch.clear(); scratch.push_back(clang::toLowercase(string[0])); scratch.append(string.begin() + 1, string.end()); return StringRef(scratch.data(), scratch.size()); } StringRef camel_case::toSentencecase(StringRef string, SmallVectorImpl &scratch) { if (string.empty()) return string; // Can't be uppercased. if (!clang::isLowercase(string[0])) return string; // Uppercase the first letter, append the rest. scratch.clear(); scratch.push_back(clang::toUppercase(string[0])); scratch.append(string.begin() + 1, string.end()); return StringRef(scratch.data(), scratch.size()); } StringRef camel_case::dropPrefix(StringRef string) { unsigned firstLower = 0, n = string.size(); if (n < 4) return string; for (; firstLower < n; ++firstLower) { if (!clang::isUppercase(string[firstLower])) break; } if (firstLower == n) return string; if (firstLower >= 3 && firstLower <= 4) return string.substr(firstLower - 1); return string; } StringRef camel_case::appendSentenceCase(SmallVectorImpl &buffer, StringRef string) { // Trivial case: empty string. if (string.empty()) return StringRef(buffer.data(), buffer.size()); // Uppercase the first letter, append the rest. buffer.push_back(clang::toUppercase(string[0])); buffer.append(string.begin() + 1, string.end()); return StringRef(buffer.data(), buffer.size()); } size_t camel_case::findWord(StringRef string, StringRef word) { assert(!word.empty()); assert(clang::isUppercase(word[0])); // Scan forward until we find the word as a complete word. size_t startingIndex = 0; while (true) { size_t index = string.find(word, startingIndex); if (index == StringRef::npos) return StringRef::npos; // If any of the following checks fail, we want to start searching // past the end of the match. (This assumes that the word doesn't // end with a prefix of itself, e.g. "LikeableLike".) startingIndex = index + word.size(); // We assume that we don't have to check if the match starts a new // word in the string. // If we find the word, check whether it's a valid match. StringRef suffix = string.substr(index); if (!suffix.empty() && clang::isLowercase(suffix[0])) continue; return index; } } /// Determine whether the given identifier is a keyword. static bool isKeyword(StringRef identifier) { return llvm::StringSwitch(identifier) #define KEYWORD(kw) .Case(#kw, true) #define SIL_KEYWORD(kw) #include "swift/Parse/Tokens.def" .Default(false); } /// Skip a type suffix that can be dropped. static Optional skipTypeSuffix(StringRef typeName) { if (typeName.empty()) return None; // "Ptr" and "Ref" are common suffixes we can ignore. if (typeName.endswith("Ref") || typeName.endswith("Ptr")) { return typeName.drop_back(3); } /// \d+D for dimensionality. if (typeName.back() == 'D' && typeName.size() > 1) { unsigned firstDigit = typeName.size() - 1; while (firstDigit > 0) { if (!isdigit(typeName[firstDigit-1])) break; --firstDigit; } if (firstDigit < typeName.size()-1) { return typeName.substr(0, firstDigit); } } return None; } StringRef swift::omitNeedlessWords(StringRef name, OmissionTypeName typeName, NameRole role) { if (name.empty() || typeName.empty()) return name; // Get the camel-case words in the name and type name. auto nameWords = camel_case::getWords(name); auto typeWords = camel_case::getWords(typeName.Name); // Match the last words in the type name to the last words in the // name. auto nameWordRevIter = nameWords.rbegin(), nameWordRevIterBegin = nameWordRevIter, nameWordRevIterEnd = nameWords.rend(); auto typeWordRevIter = typeWords.rbegin(), typeWordRevIterEnd = typeWords.rend(); bool anyMatches = false; while (nameWordRevIter != nameWordRevIterEnd && typeWordRevIter != typeWordRevIterEnd) { // If the names match, continue. auto nameWord = *nameWordRevIter; if (camel_case::sameWordIgnoreFirstCase(nameWord, *typeWordRevIter)) { anyMatches = true; ++nameWordRevIter; ++typeWordRevIter; continue; } // Special case: "Indexes" and "Indices" in the name match // "IndexSet" in the type. if ((camel_case::sameWordIgnoreFirstCase(nameWord, "Indexes") || camel_case::sameWordIgnoreFirstCase(nameWord, "Indices")) && *typeWordRevIter == "Set") { auto nextTypeWordRevIter = typeWordRevIter; ++nextTypeWordRevIter; if (nextTypeWordRevIter != typeWordRevIterEnd && camel_case::sameWordIgnoreFirstCase(*nextTypeWordRevIter, "Index")) { anyMatches = true; ++nameWordRevIter; typeWordRevIter = nextTypeWordRevIter; ++typeWordRevIter; continue; } } // Special case: "Index" in the name matches "Int" or "Integer" in the type. if (camel_case::sameWordIgnoreFirstCase(nameWord, "Index") && (camel_case::sameWordIgnoreFirstCase(*typeWordRevIter, "Int") || camel_case::sameWordIgnoreFirstCase(*typeWordRevIter, "Integer"))) { anyMatches = true; ++nameWordRevIter; ++typeWordRevIter; continue; } // Special case: if the word in the name ends in 's', and we have // a collection element type, see if this is a plural. if (!typeName.CollectionElement.empty() && nameWord.size() > 2 && nameWord.back() == 's') { // Check s. auto shortenedNameWord = name.substr(0, nameWordRevIter.base().getPosition()-1); auto newShortenedNameWord = omitNeedlessWords(shortenedNameWord, typeName.CollectionElement, NameRole::Partial); if (shortenedNameWord != newShortenedNameWord) { anyMatches = true; unsigned targetSize = newShortenedNameWord.size(); while (nameWordRevIter.base().getPosition() > targetSize) ++nameWordRevIter; continue; } } // If this is a skippable suffix, skip it and keep looking. if (nameWordRevIter == nameWordRevIterBegin) { if (auto withoutSuffix = skipTypeSuffix(typeName.Name)) { typeName.Name = *withoutSuffix; typeWords = camel_case::getWords(typeName.Name); typeWordRevIter = typeWords.rbegin(); typeWordRevIterEnd = typeWords.rend(); continue; } } break; } // If nothing matched, there is nothing to omit. if (!anyMatches) return name; // Handle complete name matches. if (nameWordRevIter == nameWordRevIterEnd) { // If we're doing a partial match, return the empty string. if (role == NameRole::Partial) return ""; // Leave the name alone. return name; } if (role != NameRole::Property) { // Classify the part of speech of the word before the type // information we would strip off. switch (getPartOfSpeech(*nameWordRevIter)) { case PartOfSpeech::Preposition: case PartOfSpeech::Verb: case PartOfSpeech::Gerund: // Okay to strip off the part of the name that is redundant with // type information. break; case PartOfSpeech::Unknown: // Assume it's a noun or adjective; don't strip anything. return name; } } // Go back to the last matching word and chop off the name at that // point. StringRef newName = name.substr(0, nameWordRevIter.base().getPosition()); // If we ended up with a name like "get" or "set", do nothing. if (newName == "get" || newName == "set") return name; // If we ended up with a keyword for a property name or base name, // do nothing. switch (role) { case NameRole::BaseName: case NameRole::Property: if (isKeyword(newName)) return name; break; case NameRole::FirstParameter: case NameRole::SubsequentParameter: case NameRole::Partial: break; } // We're done. return newName; } bool swift::omitNeedlessWords(StringRef &baseName, MutableArrayRef argNames, OmissionTypeName resultType, OmissionTypeName contextType, ArrayRef paramTypes, bool returnsSelf) { // For zero-parameter methods that return 'Self' or a result type // that matches the declaration context, omit needless words from // the base name. if (paramTypes.empty()) { if (returnsSelf || (resultType == contextType)) { OmissionTypeName typeName = returnsSelf ? contextType : resultType; if (typeName.empty()) return false; StringRef oldBaseName = baseName; baseName = omitNeedlessWords(baseName, typeName, NameRole::Property); return baseName != oldBaseName; } return false; } // Omit needless words based on parameter types. bool anyChanges = false; for (unsigned i = 0, n = argNames.size(); i != n; ++i) { // If there is no corresponding parameter, there is nothing to // omit. if (i >= paramTypes.size()) continue; // Omit needless words based on the type of the parameter. NameRole role = i > 0 ? NameRole::SubsequentParameter : argNames[0].empty() ? NameRole::BaseName : NameRole::FirstParameter; StringRef name = role == NameRole::BaseName ? baseName : argNames[i]; StringRef newName = omitNeedlessWords(name, paramTypes[i], role); if (name == newName) continue; anyChanges = true; // Record this change. if (role == NameRole::BaseName) { baseName = newName; } else { argNames[i] = newName; } } return anyChanges; }