Files
swift-mirror/lib/Basic/StringExtras.cpp
John McCall e880aac1e0 Add a convenience routine for searching for a complete
word within a camelCase identifier.

Basically StringRef::find but requiring the next character
to not be lowercase.

Swift SVN r17658
2014-05-08 00:51:25 +00:00

234 lines
6.7 KiB
C++

//===--- StringExtras.cpp - String Utilities ------------------------------===//
//
// This source file is part of the Swift.org open source project
//
// Copyright (c) 2014 - 2015 Apple Inc. and the Swift project authors
// Licensed under Apache License v2.0 with Runtime Library Exception
//
// See http://swift.org/LICENSE.txt for license information
// See http://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
//
//===----------------------------------------------------------------------===//
//
// This file implements utilities for working with words and camelCase
// names.
//
//===----------------------------------------------------------------------===//
#include "swift/Basic/StringExtras.h"
#include "swift/Basic/Optional.h"
#include "clang/Basic/CharInfo.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/SmallVector.h"
#include <algorithm>
using namespace swift;
using namespace camel_case;
PrepositionKind swift::getPrepositionKind(StringRef word) {
#define DIRECTIONAL_PREPOSITION(Word) \
if (word.equals_lower(#Word)) \
return PK_Directional;
#define PREPOSITION(Word) \
if (word.equals_lower(#Word)) \
return PK_Nondirectional;
#include "Prepositions.def"
return PK_None;
}
bool swift::isLinkingVerb(StringRef word) {
#define LINKING_VERB(Word) \
if (word.equals_lower(#Word)) \
return true;
#include "LinkingVerbs.def"
return false;
}
void WordIterator::computeNextPosition() const {
assert(Position < String.size() && "Already at end of string");
// Skip over any uppercase letters at the beginning of the word.
unsigned i = Position, n = String.size();
while (i < n && clang::isUppercase(String[i]))
++i;
// If there was more than one uppercase letter, this is an
// acronym.
if (i - Position > 1) {
// If we hit the end of the string, that's it. Otherwise, this
// word ends at the last uppercase letter, so that the next word
// starts with the last uppercase letter.
NextPosition = (i == n || !clang::isLowercase(String[i])) ? i : i-1;
NextPositionValid = true;
return;
}
// Skip non-uppercase letters.
while (i < n && !clang::isUppercase(String[i]))
++i;
NextPosition = i;
NextPositionValid = true;
}
void WordIterator::computePrevPosition() const {
assert(Position > 0 && "Already at beginning of string");
// While we see non-uppercase letters, keep moving back.
unsigned i = Position;
while (i > 0 && !clang::isUppercase(String[i-1]))
--i;
// If we found any lowercase letters, this was a normal camel case
// word (not an acronym).
if (i < Position) {
// If we hit the beginning of the string, that's it. Otherwise,
// this word starts at the uppercase letter that terminated the
// search above.
PrevPosition = (i == 0 || !clang::isLowercase(String[i])) ? i : i-1;
PrevPositionValid = true;
return;
}
// There were no lowercase letters, so this is an acronym. Keep
// skipping uppercase letters.
while (i > 0 && clang::isUppercase(String[i-1]))
--i;
PrevPosition = i;
PrevPositionValid = true;
}
StringRef camel_case::getFirstWord(StringRef string) {
if (string.empty())
return "";
return *WordIterator(string, 0);
}
StringRef camel_case::getLastWord(StringRef string) {
if (string.empty())
return "";
return *--WordIterator(string, string.size());
}
bool camel_case::sameWordIgnoreFirstCase(StringRef word1, StringRef word2) {
if (word1.size() != word2.size())
return false;
if (clang::toLowercase(word1[0]) != clang::toLowercase(word2[0]))
return false;
return word1.substr(1) == word2.substr(1);
}
bool camel_case::startsWithIgnoreFirstCase(StringRef word1, StringRef word2) {
if (word1.size() < word2.size())
return false;
if (clang::toLowercase(word1[0]) != clang::toLowercase(word2[0]))
return false;
return word1.substr(1) == word2.substr(1, word1.size() - 1);
}
StringRef camel_case::toLowercaseWord(StringRef string,
SmallVectorImpl<char> &scratch) {
if (string.empty())
return string;
// Already lowercase.
if (!clang::isUppercase(string[0]))
return string;
// Acronym doesn't get lowercased.
if (string.size() > 1 && clang::isUppercase(string[1]))
return string;
// Lowercase the first letter, append the rest.
scratch.clear();
scratch.push_back(clang::toLowercase(string[0]));
scratch.append(string.begin() + 1, string.end());
return StringRef(scratch.data(), scratch.size());
}
StringRef camel_case::toSentencecase(StringRef string,
SmallVectorImpl<char> &scratch) {
if (string.empty())
return string;
// Can't be uppercased.
if (!clang::isLowercase(string[0]))
return string;
// Uppercase the first letter, append the rest.
scratch.clear();
scratch.push_back(clang::toUppercase(string[0]));
scratch.append(string.begin() + 1, string.end());
return StringRef(scratch.data(), scratch.size());
}
StringRef camel_case::dropPrefix(StringRef string) {
unsigned firstLower = 0, n = string.size();
if (n < 4)
return string;
for (; firstLower < n; ++firstLower) {
if (!clang::isUppercase(string[firstLower]))
break;
}
if (firstLower == n)
return string;
if (firstLower >= 3 && firstLower <= 4)
return string.substr(firstLower - 1);
return string;
}
StringRef camel_case::appendSentenceCase(SmallVectorImpl<char> &buffer,
StringRef string) {
// Trivial case: empty string.
if (string.empty())
return StringRef(buffer.data(), buffer.size());
// Uppercase the first letter, append the rest.
buffer.push_back(clang::toUppercase(string[0]));
buffer.append(string.begin() + 1, string.end());
return StringRef(buffer.data(), buffer.size());
}
size_t camel_case::findWord(StringRef string, StringRef word) {
assert(!word.empty());
assert(clang::isUppercase(word[0]));
// Scan forward until we find the word as a complete word.
size_t startingIndex = 0;
while (true) {
size_t index = string.find(word, startingIndex);
if (index == StringRef::npos)
return StringRef::npos;
// If any of the following checks fail, we want to start searching
// past the end of the match. (This assumes that the word doesn't
// end with a prefix of itself, e.g. "LikeableLike".)
startingIndex = index + word.size();
// We assume that we don't have to check if the match starts a new
// word in the string.
// If we find the word, check whether it's a valid match.
StringRef suffix = string.substr(index);
if (!suffix.empty() && clang::isLowercase(suffix[0]))
continue;
return index;
}
}