Add some utilities for working with camelCase names.

Swift SVN r15802
This commit is contained in:
Doug Gregor
2014-04-02 15:18:32 +00:00
parent f4c018d3f7
commit d6a173fead
7 changed files with 354 additions and 16 deletions

View File

@@ -0,0 +1,188 @@
//===--- StringExtras.h - String Utilities ----------------------*- C++ -*-===//
//
// This source file is part of the Swift.org open source project
//
// Copyright (c) 2014 - 2015 Apple Inc. and the Swift project authors
// Licensed under Apache License v2.0 with Runtime Library Exception
//
// See http://swift.org/LICENSE.txt for license information
// See http://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
//
//===----------------------------------------------------------------------===//
//
// This file provides utilities for working with English words and
// camelCase names.
//
//===----------------------------------------------------------------------===//
#ifndef SWIFT_BASIC_STRINGEXTRAS_HPP
#define SWIFT_BASIC_STRINGEXTRAS_HPP
#include "swift/Basic/LLVM.h"
#include "llvm/ADT/StringRef.h"
#include <iterator>
namespace swift {
/// Describes the kind of preposition a word is.
enum PrepositionKind {
PK_None = 0,
PK_Directional,
PK_Nondirectional
};
/// Determine what kind of preposition the given word is, if any,
/// ignoring case.
PrepositionKind getPrepositionKind(StringRef word);
namespace camel_case {
class WordIterator;
/// A bidirectional iterator that walks through the words in a camelCase
/// string.
///
/// Note that this iterator is not technically conforming bidirectional
/// iterator, because it's reference type is not a true reference. But it
/// quacks like a duck.
class WordIterator {
StringRef String;
unsigned Position;
mutable unsigned NextPosition : 31;
mutable unsigned NextPositionValid : 1;
mutable unsigned PrevPosition : 31;
mutable unsigned PrevPositionValid : 1;
void computeNextPosition() const;
void computePrevPosition() const;
/// Proxy used for the arrow operator of the word iterator.
class ArrowProxy {
StringRef String;
public:
explicit ArrowProxy(StringRef string) : String(string) { }
const StringRef *operator->() const {
return &String;
}
};
public:
typedef StringRef value_type;
typedef StringRef reference;
typedef ArrowProxy pointer;
typedef int difference_type;
typedef std::bidirectional_iterator_tag iterator_category;
WordIterator(StringRef string, unsigned position)
: String(string), Position(position)
{
NextPositionValid = false;
PrevPositionValid = false;
}
StringRef operator*() const {
if (!NextPositionValid)
computeNextPosition();
return String.slice(Position, NextPosition);
}
ArrowProxy operator->() const {
return ArrowProxy(**this);
}
WordIterator &operator++() {
if (!NextPositionValid)
computeNextPosition();
// Save the previous position.
PrevPosition = Position;
PrevPositionValid = true;
// Move to the next position.
Position = NextPosition;
// We don't know what lies ahead.
NextPositionValid = false;
return *this;
}
WordIterator operator++(int) {
WordIterator tmp(*this);
++(*this);
return tmp;
}
WordIterator &operator--() {
if (!PrevPositionValid)
computePrevPosition();
// Save the next position.
NextPosition = Position;
NextPositionValid = true;
// Move to the previous position.
Position = PrevPosition;
// We don't know what lies behind.
PrevPositionValid = false;
return *this;
}
WordIterator operator--(int) {
WordIterator tmp(*this);
--(*this);
return tmp;
}
friend bool operator==(const WordIterator &x, const WordIterator &y) {
assert(x.String.data() == y.String.data() &&
x.String.size() == y.String.size() &&
"comparing word iterators from different strings");
return x.Position == y.Position;
}
friend bool operator!=(const WordIterator &x, const WordIterator &y) {
return !(x == y);
}
};
/// Find the first camelCase word in the given string.
StringRef getFirstWord(StringRef string);
/// Find the last camelCase word in the given string.
StringRef getLastWord(StringRef string);
/// A wrapper that treats a string as a container of camelCase words.
class Words {
StringRef String;
public:
typedef WordIterator iterator;
typedef WordIterator const_iterator;
typedef std::reverse_iterator<WordIterator> reverse_iterator;
typedef std::reverse_iterator<WordIterator> const_reverse_iterator;
explicit Words(StringRef string) : String(string) { }
bool empty() const { return String.empty(); }
iterator begin() const { return WordIterator(String, 0); }
iterator end() const { return WordIterator(String, String.size()); }
reverse_iterator rbegin() const { return reverse_iterator(end()); }
reverse_iterator rend() const { return reverse_iterator(begin()); }
};
/// Retrieve the camelCase words in the given string.
inline Words getWords(StringRef string) { return Words(string); }
/// Find the last preposition in the given camelCase string.
///
/// \returns a pair containing the starting index of the last
/// preposition as well as the kind of preposition found.
std::pair<unsigned, PrepositionKind> findLastPreposition(StringRef string);
} // end namespace camel_case
}
#endif // LLVM_SWIFT_BASIC_STRINGEXTRAS_HPP

View File

@@ -9,6 +9,7 @@ add_swift_library(swiftBasic
Punycode.cpp
QuotedString.cpp
SourceLoc.cpp
StringExtras.cpp
TaskQueue.cpp
ThreadSafeRefCounted.cpp
Version.cpp

103
lib/Basic/StringExtras.cpp Normal file
View File

@@ -0,0 +1,103 @@
//===--- StringExtras.cpp - String Utilities ------------------------------===//
//
// This source file is part of the Swift.org open source project
//
// Copyright (c) 2014 - 2015 Apple Inc. and the Swift project authors
// Licensed under Apache License v2.0 with Runtime Library Exception
//
// See http://swift.org/LICENSE.txt for license information
// See http://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
//
//===----------------------------------------------------------------------===//
//
// This file implements utilities for working with words and camelCase
// names.
//
//===----------------------------------------------------------------------===//
#include "swift/Basic/StringExtras.h"
#include "clang/Basic/CharInfo.h"
using namespace swift;
using namespace camel_case;
PrepositionKind swift::getPrepositionKind(StringRef word) {
#define DIRECTIONAL_PREPOSITION(Word) \
if (word.equals_lower(#Word)) \
return PK_Directional;
#define PREPOSITION(Word) \
if (word.equals_lower(#Word)) \
return PK_Nondirectional;
#include "Prepositions.def"
return PK_None;
}
void WordIterator::computeNextPosition() const {
assert(Position < String.size() && "Already at end of string");
// Skip over any uppercase letters at the beginning of the word.
unsigned i = Position, n = String.size();
while (i < n && clang::isUppercase(String[i]))
++i;
// If there was more than one uppercase letter, this is an
// acronym.
if (i - Position > 1) {
// If we hit the end of the string, that's it. Otherwise, this
// word ends at the last uppercase letter, so that the next word
// starts with the last uppercase letter.
NextPosition = i == n? i : i-1;
NextPositionValid = true;
return;
}
// Skip non-uppercase letters.
while (i < n && !clang::isUppercase(String[i]))
++i;
NextPosition = i;
NextPositionValid = true;
}
void WordIterator::computePrevPosition() const {
assert(Position > 0 && "Already at beginning of string");
// While we see non-uppercase letters, keep moving back.
unsigned i = Position;
while (i > 0 && !clang::isUppercase(String[i-1]))
--i;
// If we found any lowercase letters, this was a normal camel case
// word (not an acronym).
if (i < Position) {
// If we hit the beginning of the string, that's it. Otherwise,
// this word starts at the uppercase letter that terminated the
// search above.
PrevPosition = i == 0 ? 0 : i-1;
PrevPositionValid = true;
return;
}
// There were no lowercase letters, so this is an acronym. Keep
// skipping uppercase letters.
while (i > 0 && clang::isUppercase(String[i-1]))
--i;
PrevPosition = i;
PrevPositionValid = true;
}
StringRef camel_case::getFirstWord(StringRef string) {
if (string.empty())
return "";
return *WordIterator(string, 0);
}
StringRef camel_case::getLastWord(StringRef string) {
if (string.empty())
return "";
return *--WordIterator(string, string.size());
}

View File

@@ -24,6 +24,7 @@
#include "swift/AST/NameLookup.h"
#include "swift/AST/Types.h"
#include "swift/Basic/Range.h"
#include "swift/Basic/StringExtras.h"
#include "swift/ClangImporter/ClangImporterOptions.h"
#include "clang/AST/ASTContext.h"
#include "clang/Basic/CharInfo.h"
@@ -598,19 +599,6 @@ splitSelectorPieceAt(StringRef selector, unsigned index,
return { selector.substr(0, index), StringRef(buffer.data(), buffer.size()) };
}
/// Determine whether the given word (which should have its first
/// letter already capitalized) is a preposition.
///
/// The stored boolean indicates whether the preposition has
/// direction.
static Optional<bool> isPreposition(StringRef word) {
return llvm::StringSwitch<Optional<bool>>(word)
#define DIRECTIONAL_PREPOSITION(Word) .Case(#Word, true)
#define PREPOSITION(Word) .Case(#Word, false)
#include "Prepositions.def"
.Default(Nothing);
}
std::pair<StringRef, StringRef>
ClangImporter::Implementation::splitFirstSelectorPiece(
StringRef selector,
@@ -648,7 +636,7 @@ ClangImporter::Implementation::splitFirstSelectorPiece(
break;
// If this word is a preposition, split here.
if (auto isPrep = isPreposition(
if (auto prepKind = getPrepositionKind(
selector.substr(wordStart, wordEnd - wordStart))) {
unsigned splitLocation;
switch (SplitPrepositions) {
@@ -664,7 +652,7 @@ ClangImporter::Implementation::splitFirstSelectorPiece(
break;
case SelectorSplitKind::DirectionalPreposition:
splitLocation = *isPrep ? wordStart : wordEnd;
splitLocation = prepKind == PK_Directional ? wordStart : wordEnd;
break;
}

View File

@@ -1,9 +1,11 @@
add_swift_unittest(SwiftBasicTests
SourceManager.cpp
TreeScopedHashTableTests.cpp
StringExtrasTest.cpp
SuccessorMapTest.cpp
)
target_link_libraries(SwiftBasicTests
swiftBasic
clangBasic
)

View File

@@ -0,0 +1,56 @@
#include "swift/Basic/StringExtras.h"
#include "gtest/gtest.h"
#include <algorithm>
using namespace swift;
TEST(CamelCaseWordsTest, Iteration) {
auto words = camel_case::getWords("URLByPrependingHTTPToURL");
// Forward iteration count.
EXPECT_EQ(6, std::distance(words.begin(), words.end()));
// Reverse iteration count.
EXPECT_EQ(6, std::distance(words.rbegin(), words.rend()));
// Iteration contents.
auto iter = words.begin();
EXPECT_EQ(*iter, "URL");
// Stepping forward.
++iter;
EXPECT_EQ(*iter, "By");
// Immediately stepping back (fast path).
--iter;
EXPECT_EQ(*iter, "URL");
// Immediately stepping forward (fast path).
++iter;
EXPECT_EQ(*iter, "By");
// Stepping forward.
++iter;
EXPECT_EQ(*iter, "Prepending");
// Stepping back twice (slow path).
--iter;
--iter;
EXPECT_EQ(*iter, "URL");
// Stepping forward to visit the remaining elements.
++iter;
EXPECT_EQ(*iter, "By");
++iter;
EXPECT_EQ(*iter, "Prepending");
++iter;
EXPECT_EQ(*iter, "HTTP");
++iter;
EXPECT_EQ(*iter, "To");
++iter;
EXPECT_EQ(*iter, "URL");
// We're done.
++iter;
EXPECT_EQ(iter, words.end());
}