//===--- StringExtras.cpp - String Utilities ------------------------------===// // // This source file is part of the Swift.org open source project // // Copyright (c) 2014 - 2015 Apple Inc. and the Swift project authors // Licensed under Apache License v2.0 with Runtime Library Exception // // See http://swift.org/LICENSE.txt for license information // See http://swift.org/CONTRIBUTORS.txt for the list of Swift project authors // //===----------------------------------------------------------------------===// // // This file implements utilities for working with words and camelCase // names. // //===----------------------------------------------------------------------===// #include "swift/Basic/StringExtras.h" #include "swift/Basic/Optional.h" #include "clang/Basic/CharInfo.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/SmallVector.h" #include using namespace swift; using namespace camel_case; PrepositionKind swift::getPrepositionKind(StringRef word) { #define DIRECTIONAL_PREPOSITION(Word) \ if (word.equals_lower(#Word)) \ return PK_Directional; #define PREPOSITION(Word) \ if (word.equals_lower(#Word)) \ return PK_Nondirectional; #include "Prepositions.def" return PK_None; } void WordIterator::computeNextPosition() const { assert(Position < String.size() && "Already at end of string"); // Skip over any uppercase letters at the beginning of the word. unsigned i = Position, n = String.size(); while (i < n && clang::isUppercase(String[i])) ++i; // If there was more than one uppercase letter, this is an // acronym. if (i - Position > 1) { // If we hit the end of the string, that's it. Otherwise, this // word ends at the last uppercase letter, so that the next word // starts with the last uppercase letter. NextPosition = i == n? i : i-1; NextPositionValid = true; return; } // Skip non-uppercase letters. while (i < n && !clang::isUppercase(String[i])) ++i; NextPosition = i; NextPositionValid = true; } void WordIterator::computePrevPosition() const { assert(Position > 0 && "Already at beginning of string"); // While we see non-uppercase letters, keep moving back. unsigned i = Position; while (i > 0 && !clang::isUppercase(String[i-1])) --i; // If we found any lowercase letters, this was a normal camel case // word (not an acronym). if (i < Position) { // If we hit the beginning of the string, that's it. Otherwise, // this word starts at the uppercase letter that terminated the // search above. PrevPosition = i == 0 ? 0 : i-1; PrevPositionValid = true; return; } // There were no lowercase letters, so this is an acronym. Keep // skipping uppercase letters. while (i > 0 && clang::isUppercase(String[i-1])) --i; PrevPosition = i; PrevPositionValid = true; } StringRef camel_case::getFirstWord(StringRef string) { if (string.empty()) return ""; return *WordIterator(string, 0); } StringRef camel_case::getLastWord(StringRef string) { if (string.empty()) return ""; return *--WordIterator(string, string.size()); } bool camel_case::sameWordIgnoreFirstCase(StringRef word1, StringRef word2) { if (word1.size() != word2.size()) return false; if (clang::toLowercase(word1[0]) != clang::toLowercase(word2[0])) return false; return word1.substr(1) == word2.substr(1); } bool camel_case::startsWithIgnoreFirstCase(StringRef word1, StringRef word2) { if (word1.size() < word2.size()) return false; if (clang::toLowercase(word1[0]) != clang::toLowercase(word2[0])) return false; return word1.substr(1) == word2.substr(1, word1.size() - 1); } StringRef camel_case::toLowercaseWord(StringRef string, SmallVectorImpl &scratch) { if (string.empty()) return string; // Already lowercase. if (!clang::isUppercase(string[0])) return string; // Acronym doesn't get lowercased. if (string.size() > 1 && clang::isUppercase(string[1])) return string; // Lowercase the first letter, append the rest. scratch.clear(); scratch.push_back(clang::toLowercase(string[0])); scratch.append(string.begin() + 1, string.end()); return StringRef(scratch.data(), scratch.size()); } StringRef camel_case::toSentencecase(StringRef string, SmallVectorImpl &scratch) { if (string.empty()) return string; // Can't be uppercased. if (!clang::isLowercase(string[0])) return string; // Uppercase the first letter, append the rest. scratch.clear(); scratch.push_back(clang::toUppercase(string[0])); scratch.append(string.begin() + 1, string.end()); return StringRef(scratch.data(), scratch.size()); } StringRef camel_case::dropPrefix(StringRef string) { unsigned firstLower = 0, n = string.size(); if (n < 4) return string; for (; firstLower < n; ++firstLower) { if (!clang::isUppercase(string[firstLower])) break; } if (firstLower == n) return string; if (firstLower >= 3 && firstLower <= 4) return string.substr(firstLower - 1); return string; } Words::reverse_iterator MultiWordMap::match(Words::reverse_iterator first, Words::reverse_iterator last) const{ assert(first != last && "Empty sequence cannot be matched"); // If the current word isn't at the start of the sequence, we're done. llvm::SmallString<32> scratch; auto known = Data.find(toLowercaseWord(*first, scratch)); if (known == Data.end()) return first; /// The best result (if we found anything) and the length of the match. Optional> bestResult; for (const auto &multiword : known->second) { // Match the words in the sequence to the words in this multiword. auto matchNext = first; ++matchNext; bool matched = true; for (const auto &word : multiword) { if (matchNext == last || !sameWordIgnoreFirstCase(word, *matchNext++)) { matched = false; break; } } if (!matched) continue; // If this is the first result, or the previous result was a shorter match, // we just found a better result. if (!bestResult || bestResult->second < multiword.size()) { bestResult = { std::prev(matchNext), multiword.size() }; } } return bestResult ? bestResult->first : first; }