//===--- StringExtras.h - String Utilities ----------------------*- C++ -*-===// // // This source file is part of the Swift.org open source project // // Copyright (c) 2014 - 2017 Apple Inc. and the Swift project authors // Licensed under Apache License v2.0 with Runtime Library Exception // // See https://swift.org/LICENSE.txt for license information // See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors // //===----------------------------------------------------------------------===// // // This file provides utilities for working with English words and // camelCase names. // //===----------------------------------------------------------------------===// #ifndef SWIFT_BASIC_STRINGEXTRAS_H #define SWIFT_BASIC_STRINGEXTRAS_H #include "swift/Basic/LLVM.h" #include "swift/Basic/OptionSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringMap.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/StringSet.h" #include "llvm/Support/Allocator.h" #include #include namespace swift { /// Determine whether the given string can be an argument label. /// /// \seealso Token::canBeArgumentLabel() bool canBeArgumentLabel(StringRef identifier); /// Determine whether the given string can be the name of a member. bool canBeMemberName(StringRef identifier); /// Describes the kind of preposition a word is. enum PrepositionKind { PK_None = 0, PK_Directional, PK_Nondirectional }; /// Determine what kind of preposition the given word is, if any, /// ignoring case. PrepositionKind getPrepositionKind(StringRef word); /// Describes the part of speech of a particular word. enum class PartOfSpeech { Unknown, Preposition, Verb, Gerund, }; /// Determine the part of speech for the given word. PartOfSpeech getPartOfSpeech(StringRef word); /// Scratch space used for returning a set of StringRefs. class StringScratchSpace { llvm::BumpPtrAllocator Allocator; public: StringRef copyString(StringRef string); }; namespace camel_case { class WordIterator; /// A bidirectional iterator that walks through the words in a camelCase /// string. /// /// Note that this iterator is not technically conforming bidirectional /// iterator, because it's reference type is not a true reference. But it /// quacks like a duck. class WordIterator { StringRef String; unsigned Position; mutable unsigned NextPosition : 31; mutable unsigned NextPositionValid : 1; mutable unsigned PrevPosition : 31; mutable unsigned PrevPositionValid : 1; void computeNextPosition() const; void computePrevPosition() const; /// Proxy used for the arrow operator of the word iterator. class ArrowProxy { StringRef String; public: explicit ArrowProxy(StringRef string) : String(string) { } const StringRef *operator->() const { return &String; } }; public: typedef StringRef value_type; typedef StringRef reference; typedef ArrowProxy pointer; typedef int difference_type; typedef std::bidirectional_iterator_tag iterator_category; WordIterator(StringRef string, unsigned position) : String(string), Position(position) { assert(!string.empty()); NextPositionValid = false; PrevPositionValid = false; } StringRef operator*() const { if (!NextPositionValid) computeNextPosition(); return String.slice(Position, NextPosition); } ArrowProxy operator->() const { return ArrowProxy(**this); } WordIterator &operator++() { if (!NextPositionValid) computeNextPosition(); // Save the previous position. PrevPosition = Position; PrevPositionValid = true; // Move to the next position. Position = NextPosition; // We don't know what lies ahead. NextPositionValid = false; return *this; } WordIterator operator++(int) { WordIterator tmp(*this); ++(*this); return tmp; } WordIterator &operator--() { if (!PrevPositionValid) computePrevPosition(); // Save the next position. NextPosition = Position; NextPositionValid = true; // Move to the previous position. Position = PrevPosition; // We don't know what lies behind. PrevPositionValid = false; return *this; } WordIterator operator--(int) { WordIterator tmp(*this); --(*this); return tmp; } friend bool operator==(const WordIterator &x, const WordIterator &y) { assert(x.String.data() == y.String.data() && x.String.size() == y.String.size() && "comparing word iterators from different strings"); return x.Position == y.Position; } friend bool operator!=(const WordIterator &x, const WordIterator &y) { return !(x == y); } /// Retrieve the position of this iterator within the underlying /// string. unsigned getPosition() const { return Position; } /// Retrieve the string up until this iterator StringRef getPriorStr() const { return String.slice(0, Position); } /// Retrieve the rest of the string (including this position) StringRef getRestOfStr() const { return String.slice(Position, String.size()); } }; /// Find the first camelCase word in the given string. StringRef getFirstWord(StringRef string); /// Find the last camelCase word in the given string. StringRef getLastWord(StringRef string); /// A wrapper that treats a string as a container of camelCase words. class Words { StringRef String; public: typedef WordIterator iterator; typedef WordIterator const_iterator; typedef std::reverse_iterator reverse_iterator; typedef std::reverse_iterator const_reverse_iterator; explicit Words(StringRef string) : String(string) { } bool empty() const { return String.empty(); } iterator begin() const { return WordIterator(String, 0); } iterator end() const { return WordIterator(String, String.size()); } reverse_iterator rbegin() const { return reverse_iterator(end()); } reverse_iterator rend() const { return reverse_iterator(begin()); } }; /// Retrieve the camelCase words in the given string. inline Words getWords(StringRef string) { return Words(string); } /// Check whether the two words are the same, ignoring the case of the /// first letter. bool sameWordIgnoreFirstCase(StringRef word1, StringRef word2); /// Check whether the first word starts with the second word, ignoring the /// case of the first letter. bool startsWithIgnoreFirstCase(StringRef word1, StringRef word2); /// Lowercase the first word within the given camelCase string. /// /// \param string The string to lowercase. /// \param scratch Scratch buffer used to form the resulting string. /// /// \returns the string with the first word lowercased. When the /// first word is an acronym, the string will be returned /// unchanged. StringRef toLowercaseWord(StringRef string, SmallVectorImpl &scratch); /// Lowercase the first word within the given camelCase string. /// /// \param string The string to lowercase. /// \param scratch Scratch buffer used to form the resulting string. /// /// \returns the string with the first word lowercased. When the /// first word is an acronym, the string will be returned /// unchanged. StringRef toLowercaseWord(StringRef string, StringScratchSpace &scratch); /// Lowercase the first word within the given camelCase string. /// /// \param string The string to lowercase. /// \param scratch Scratch buffer used to form the resulting string. /// /// \returns the string with the first word lowercased, including /// initialisms. StringRef toLowercaseInitialisms(StringRef string, StringScratchSpace &scratch); /// Lowercase the first word within the given camelCase string. /// /// \param string The string to lowercase. /// \param scratch Scratch buffer used to form the resulting string. /// /// \returns the string with the first word lowercased, including /// initialisms. StringRef toLowercaseInitialisms(StringRef string, SmallVectorImpl &scratch); /// Sentence-case the given camelCase string by turning the first /// letter into an uppercase letter. /// /// \param string The string to sentence-case. /// \param scratch Scratch buffer used to form the resulting string. /// /// \returns the string in sentence case. StringRef toSentencecase(StringRef string, SmallVectorImpl &scratch); /// Drop the class prefix (i..e, a 2-3 character acronym) from the front /// of the given string. /// /// \param string The string whose prefix will be dropped. /// /// \returns the result of dropping the prefix from \p string, or the /// whole string if it has no prefix. StringRef dropPrefix(StringRef string); /// Append the given string to the given buffer, sentence-casing the string /// so that the result reads as separate camelCase words. /// /// \param buffer The buffer to append to. /// \param string The new string to append, which will be sentence-cased. /// /// \returns the contents of the buffer after appending. StringRef appendSentenceCase(SmallVectorImpl &buffer, StringRef string); /// Search the given camelCase string for the first occurrence of /// the second string as a complete word. /// /// \param string The string to search. /// \param word The string to search for; must be a single Title word /// \returns the index of the start of the match, or String::npos if /// it was not found size_t findWord(StringRef string, StringRef word); } // end namespace camel_case /// Describes the role that a particular name has within a /// signature, which can affect how we omit needless words. enum class NameRole { /// The base name of a function or method. BaseName, /// The base name of a method where the omission type name is the /// 'self' type. BaseNameSelf, /// The first parameter of a function or method. FirstParameter, // Subsequent parameters in a function or method. SubsequentParameter, // The name of a property. Property, // A partial name; used internally. Partial, }; /// Flags used by \c OmissionTypeName to describe the input type. enum class OmissionTypeFlags { /// Whether the parameter with this type has a default argument. DefaultArgument = 0x01, /// Whether this parameter is of some Boolean type. Boolean = 0x02, /// Whether this parameter is of some function/block type. Function = 0x04, }; /// Options that described omitted types. typedef OptionSet OmissionTypeOptions; /// Describes the name of a type as is used for omitting needless /// words. struct OmissionTypeName { /// The name of the type. StringRef Name; /// For a collection type, the name of the element type. StringRef CollectionElement; /// Options that describe this type. OmissionTypeOptions Options; /// Construct a type name. OmissionTypeName(StringRef name = StringRef(), OmissionTypeOptions options = None, StringRef collectionElement = StringRef()) : Name(name), CollectionElement(collectionElement), Options(options) { } /// Construct a type name. OmissionTypeName(const char * name, OmissionTypeOptions options = None, StringRef collectionElement = StringRef()) : Name(name), CollectionElement(collectionElement), Options(options) { } /// Produce a new type name for omission with a default argument. OmissionTypeName withDefaultArgument(bool defaultArgument = true) { OmissionTypeName result(*this); if (defaultArgument) result.Options |= OmissionTypeFlags::DefaultArgument; else result.Options -= OmissionTypeFlags::DefaultArgument; return result; } /// Determine whether the parameter corresponding to this type has a default /// argument. bool hasDefaultArgument() const { return Options.contains(OmissionTypeFlags::DefaultArgument); } /// Whether this type is a Boolean type. bool isBoolean() const { return Options.contains(OmissionTypeFlags::Boolean); } /// Whether this type is a function/block type. bool isFunction() const { return Options.contains(OmissionTypeFlags::Function); } /// Determine whether the type name is empty. bool empty() const { return Name.empty(); } friend bool operator==(const OmissionTypeName &lhs, const OmissionTypeName &rhs) { return lhs.Name == rhs.Name && (lhs.CollectionElement.empty() || rhs.CollectionElement.empty() || lhs.CollectionElement == rhs.CollectionElement); } friend bool operator!=(const OmissionTypeName &lhs, const OmissionTypeName &rhs) { return !(lhs == rhs); } }; /// Match the given type name at the beginning of the given name, /// returning the remainder of the name. /// /// For example, matching "stringByAppendingString" to the type "NSString" /// would produce "ByAppendingString". StringRef matchLeadingTypeName(StringRef name, OmissionTypeName typeName); /// Describes a set of names with an inheritance relationship. class InheritedNameSet { const InheritedNameSet *Parent; llvm::StringSet<> Names; public: /// Construct a new inherited name set with the given parent. explicit InheritedNameSet(const InheritedNameSet *parent) : Parent(parent) { } // Add a new name to the set. void add(StringRef name); /// Determine whether this set includes the given name. bool contains(StringRef name) const; }; /// Omit needless words for a declaration. /// /// \param baseName The base name of the declaration. This value may be /// changed if any words are removed. /// /// \param argNames The names of the arguments to the function, or empty if /// the declaration is not a function. The values in this array may be changed if any words are removed. /// /// \param firstParamName The name of the first parameter. /// /// \param resultType The name of the result type. /// /// \param contextType The name of the type of the enclosing context, /// e.g., the class name. /// /// \param paramTypes The names of the parameter types for the /// function, or empty if the declaration is not a function. /// /// \param returnsSelf Whether the result of the declaration is 'Self' /// (in Swift) or 'instancetype' (in Objective-C). /// /// \param isProperty Whether this is the name of a property. /// /// \param allPropertyNames The set of property names in the enclosing context. /// /// \param scratch Scratch space that will be used for modifications beyond /// just chopping names. /// /// \returns true if any words were omitted, false otherwise. bool omitNeedlessWords(StringRef &baseName, MutableArrayRef argNames, StringRef firstParamName, OmissionTypeName resultType, OmissionTypeName contextType, ArrayRef paramTypes, bool returnsSelf, bool isProperty, const InheritedNameSet *allPropertyNames, StringScratchSpace &scratch); } // end namespace swift #endif // SWIFT_BASIC_STRINGEXTRAS_H