//===--- StringExtras.cpp - String Utilities ------------------------------===// // // This source file is part of the Swift.org open source project // // Copyright (c) 2014 - 2015 Apple Inc. and the Swift project authors // Licensed under Apache License v2.0 with Runtime Library Exception // // See http://swift.org/LICENSE.txt for license information // See http://swift.org/CONTRIBUTORS.txt for the list of Swift project authors // //===----------------------------------------------------------------------===// // // This file implements utilities for working with words and camelCase // names. // //===----------------------------------------------------------------------===// #include "swift/Basic/StringExtras.h" #include "clang/Basic/CharInfo.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/SmallVector.h" #include using namespace swift; using namespace camel_case; PrepositionKind swift::getPrepositionKind(StringRef word) { #define DIRECTIONAL_PREPOSITION(Word) \ if (word.equals_lower(#Word)) \ return PK_Directional; #define PREPOSITION(Word) \ if (word.equals_lower(#Word)) \ return PK_Nondirectional; #include "Prepositions.def" return PK_None; } void WordIterator::computeNextPosition() const { assert(Position < String.size() && "Already at end of string"); unsigned i = Position, n = String.size(); // Treat _ as a word on its own. Don't coalesce. if (String[i] == '_') { NextPosition = i + 1; NextPositionValid = true; return; } // Skip over any uppercase letters at the beginning of the word. while (i < n && clang::isUppercase(String[i])) ++i; // If there was more than one uppercase letter, this is an // acronym. if (i - Position > 1) { // If we hit the end of the string, that's it. Otherwise, this // word ends before the last uppercase letter if the next word is alphabetic // (URL_Loader) or after the last uppercase letter if it's not (UTF_8). NextPosition = (i == n || !clang::isLowercase(String[i])) ? i : i-1; NextPositionValid = true; return; } // Skip non-uppercase letters. while (i < n && !clang::isUppercase(String[i]) && String[i] != '_') ++i; NextPosition = i; NextPositionValid = true; } void WordIterator::computePrevPosition() const { assert(Position > 0 && "Already at beginning of string"); unsigned i = Position; // While we see non-uppercase letters, keep moving back. while (i > 0 && !clang::isUppercase(String[i-1]) && String[i-1] != '_') --i; // If we found any lowercase letters, this was a normal camel case // word (not an acronym). if (i < Position) { // If we hit the beginning of the string, that's it. Otherwise, this // word starts with an uppercase letter if the next word is alphabetic // (URL_Loader) or after the last uppercase letter if it's not (UTF_8). PrevPosition = i; if (i != 0 && clang::isLowercase(String[i]) && String[i-1] != '_') --PrevPosition; PrevPositionValid = true; return; } // Treat _ as a word on its own. Don't coalesce. if (String[i-1] == '_') { PrevPosition = i - 1; PrevPositionValid = true; return; } // There were no lowercase letters, so this is an acronym. Keep // skipping uppercase letters. while (i > 0 && clang::isUppercase(String[i-1])) --i; PrevPosition = i; PrevPositionValid = true; } StringRef camel_case::getFirstWord(StringRef string) { if (string.empty()) return ""; return *WordIterator(string, 0); } StringRef camel_case::getLastWord(StringRef string) { if (string.empty()) return ""; return *--WordIterator(string, string.size()); } bool camel_case::sameWordIgnoreFirstCase(StringRef word1, StringRef word2) { if (word1.size() != word2.size()) return false; if (clang::toLowercase(word1[0]) != clang::toLowercase(word2[0])) return false; return word1.substr(1) == word2.substr(1); } bool camel_case::startsWithIgnoreFirstCase(StringRef word1, StringRef word2) { if (word1.size() < word2.size()) return false; if (clang::toLowercase(word1[0]) != clang::toLowercase(word2[0])) return false; return word1.substr(1) == word2.substr(1, word1.size() - 1); } StringRef camel_case::toLowercaseWord(StringRef string, SmallVectorImpl &scratch) { if (string.empty()) return string; // Already lowercase. if (!clang::isUppercase(string[0])) return string; // Acronym doesn't get lowercased. if (string.size() > 1 && clang::isUppercase(string[1])) return string; // Lowercase the first letter, append the rest. scratch.clear(); scratch.push_back(clang::toLowercase(string[0])); scratch.append(string.begin() + 1, string.end()); return StringRef(scratch.data(), scratch.size()); } StringRef camel_case::toSentencecase(StringRef string, SmallVectorImpl &scratch) { if (string.empty()) return string; // Can't be uppercased. if (!clang::isLowercase(string[0])) return string; // Uppercase the first letter, append the rest. scratch.clear(); scratch.push_back(clang::toUppercase(string[0])); scratch.append(string.begin() + 1, string.end()); return StringRef(scratch.data(), scratch.size()); } StringRef camel_case::dropPrefix(StringRef string) { unsigned firstLower = 0, n = string.size(); if (n < 4) return string; for (; firstLower < n; ++firstLower) { if (!clang::isUppercase(string[firstLower])) break; } if (firstLower == n) return string; if (firstLower >= 3 && firstLower <= 4) return string.substr(firstLower - 1); return string; } StringRef camel_case::appendSentenceCase(SmallVectorImpl &buffer, StringRef string) { // Trivial case: empty string. if (string.empty()) return StringRef(buffer.data(), buffer.size()); // Uppercase the first letter, append the rest. buffer.push_back(clang::toUppercase(string[0])); buffer.append(string.begin() + 1, string.end()); return StringRef(buffer.data(), buffer.size()); } size_t camel_case::findWord(StringRef string, StringRef word) { assert(!word.empty()); assert(clang::isUppercase(word[0])); // Scan forward until we find the word as a complete word. size_t startingIndex = 0; while (true) { size_t index = string.find(word, startingIndex); if (index == StringRef::npos) return StringRef::npos; // If any of the following checks fail, we want to start searching // past the end of the match. (This assumes that the word doesn't // end with a prefix of itself, e.g. "LikeableLike".) startingIndex = index + word.size(); // We assume that we don't have to check if the match starts a new // word in the string. // If we find the word, check whether it's a valid match. StringRef suffix = string.substr(index); if (!suffix.empty() && clang::isLowercase(suffix[0])) continue; return index; } }