Files
swift-mirror/lib/Basic/StringExtras.cpp

191 lines
5.7 KiB
C++

//===--- StringExtras.cpp - String Utilities ------------------------------===//
//
// This source file is part of the Swift.org open source project
//
// Copyright (c) 2014 - 2015 Apple Inc. and the Swift project authors
// Licensed under Apache License v2.0 with Runtime Library Exception
//
// See http://swift.org/LICENSE.txt for license information
// See http://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
//
//===----------------------------------------------------------------------===//
//
// This file implements utilities for working with words and camelCase
// names.
//
//===----------------------------------------------------------------------===//
#include "swift/Basic/StringExtras.h"
#include "swift/Basic/Optional.h"
#include "clang/Basic/CharInfo.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/SmallVector.h"
#include <algorithm>
using namespace swift;
using namespace camel_case;
PrepositionKind swift::getPrepositionKind(StringRef word) {
#define DIRECTIONAL_PREPOSITION(Word) \
if (word.equals_lower(#Word)) \
return PK_Directional;
#define PREPOSITION(Word) \
if (word.equals_lower(#Word)) \
return PK_Nondirectional;
#include "Prepositions.def"
return PK_None;
}
void WordIterator::computeNextPosition() const {
assert(Position < String.size() && "Already at end of string");
// Skip over any uppercase letters at the beginning of the word.
unsigned i = Position, n = String.size();
while (i < n && clang::isUppercase(String[i]))
++i;
// If there was more than one uppercase letter, this is an
// acronym.
if (i - Position > 1) {
// If we hit the end of the string, that's it. Otherwise, this
// word ends at the last uppercase letter, so that the next word
// starts with the last uppercase letter.
NextPosition = i == n? i : i-1;
NextPositionValid = true;
return;
}
// Skip non-uppercase letters.
while (i < n && !clang::isUppercase(String[i]))
++i;
NextPosition = i;
NextPositionValid = true;
}
void WordIterator::computePrevPosition() const {
assert(Position > 0 && "Already at beginning of string");
// While we see non-uppercase letters, keep moving back.
unsigned i = Position;
while (i > 0 && !clang::isUppercase(String[i-1]))
--i;
// If we found any lowercase letters, this was a normal camel case
// word (not an acronym).
if (i < Position) {
// If we hit the beginning of the string, that's it. Otherwise,
// this word starts at the uppercase letter that terminated the
// search above.
PrevPosition = i == 0 ? 0 : i-1;
PrevPositionValid = true;
return;
}
// There were no lowercase letters, so this is an acronym. Keep
// skipping uppercase letters.
while (i > 0 && clang::isUppercase(String[i-1]))
--i;
PrevPosition = i;
PrevPositionValid = true;
}
StringRef camel_case::getFirstWord(StringRef string) {
if (string.empty())
return "";
return *WordIterator(string, 0);
}
StringRef camel_case::getLastWord(StringRef string) {
if (string.empty())
return "";
return *--WordIterator(string, string.size());
}
bool camel_case::sameWordIgnoreFirstCase(StringRef word1, StringRef word2) {
if (word1.size() != word2.size())
return false;
if (clang::toLowercase(word1[0]) != clang::toLowercase(word2[0]))
return false;
return word1.substr(1) == word2.substr(1);
}
StringRef camel_case::toLowercaseWord(StringRef string,
SmallVectorImpl<char> &scratch) {
if (string.empty())
return string;
// Already lowercase.
if (!clang::isUppercase(string[0]))
return string;
// Acronym doesn't get lowercased.
if (string.size() > 1 && clang::isUppercase(string[1]))
return string;
// Lowercase the first letter, append the rest.
scratch.clear();
scratch.push_back(clang::toLowercase(string[0]));
scratch.append(string.begin() + 1, string.end());
return StringRef(scratch.data(), scratch.size());
}
StringRef camel_case::toSentencecase(StringRef string,
SmallVectorImpl<char> &scratch) {
if (string.empty())
return string;
// Can't be uppercased.
if (!clang::isLowercase(string[0]))
return string;
// Uppercase the first letter, append the rest.
scratch.clear();
scratch.push_back(clang::toUppercase(string[0]));
scratch.append(string.begin() + 1, string.end());
return StringRef(scratch.data(), scratch.size());
}
Words::reverse_iterator MultiWordMap::match(Words::reverse_iterator first,
Words::reverse_iterator last) const{
assert(first != last && "Empty sequence cannot be matched");
// If the current word isn't at the start of the sequence, we're done.
llvm::SmallString<32> scratch;
auto known = Data.find(toLowercaseWord(*first, scratch));
if (known == Data.end())
return first;
/// The best result (if we found anything) and the length of the match.
Optional<std::pair<Words::reverse_iterator, unsigned>> bestResult;
for (const auto &multiword : known->second) {
// Match the words in the sequence to the words in this multiword.
auto matchNext = first;
++matchNext;
bool matched = true;
for (const auto &word : multiword) {
if (matchNext == last || !sameWordIgnoreFirstCase(word, *matchNext++)) {
matched = false;
break;
}
}
if (!matched)
continue;
// If this is the first result, or the previous result was a shorter match,
// we just found a better result.
if (!bestResult || bestResult->second < multiword.size()) {
bestResult = { std::prev(matchNext), multiword.size() };
}
}
return bestResult ? bestResult->first : first;
}