mirror of
https://github.com/apple/swift.git
synced 2025-12-14 20:36:38 +01:00
libBasic: implement extended grapheme cluster segmentation algorithm
This is only for the frontend, not for stdlib. The implementation is very slow, optimizing it is the next step. rdar://16755123 rdar://16013860 Swift SVN r18928
This commit is contained in:
@@ -28,6 +28,53 @@ static inline bool isSingleExtendedGraphemeCluster(StringRef S) {
|
||||
return First == S;
|
||||
}
|
||||
|
||||
enum class GraphemeClusterBreakProperty : uint8_t {
|
||||
Other,
|
||||
CR,
|
||||
LF,
|
||||
Control,
|
||||
Extend,
|
||||
Regional_Indicator,
|
||||
Prepend,
|
||||
SpacingMark,
|
||||
L,
|
||||
V,
|
||||
T,
|
||||
LV,
|
||||
LVT,
|
||||
};
|
||||
|
||||
/// Extended grapheme cluster boundary rules, represented as a matrix. Indexed
|
||||
/// by first code point, then by second code point in least-significant-bit
|
||||
/// order. A set bit means that a boundary is prohibited between two code
|
||||
/// points.
|
||||
extern uint16_t ExtendedGraphemeClusterNoBoundaryRulesMatrix[];
|
||||
|
||||
/// Returns the value of the Grapheme_Cluster_Break property for a given code
|
||||
/// point.
|
||||
GraphemeClusterBreakProperty getGraphemeClusterBreakProperty(uint32_t C);
|
||||
|
||||
/// Returns true if there is always an extended grapheme cluster boundary
|
||||
/// after a code point with a given property value. Use only for optimization,
|
||||
/// to skip calculating Grapheme_Cluster_Break property for the second code
|
||||
/// point.
|
||||
static inline bool
|
||||
isExtendedGraphemeClusterBoundaryAfter(GraphemeClusterBreakProperty GCB1) {
|
||||
auto RuleRow =
|
||||
ExtendedGraphemeClusterNoBoundaryRulesMatrix[static_cast<unsigned>(GCB1)];
|
||||
return RuleRow == 0;
|
||||
}
|
||||
|
||||
/// Determine if there is an extended grapheme cluster boundary between code
|
||||
/// points with given Grapheme_Cluster_Break property values.
|
||||
static inline bool
|
||||
isExtendedGraphemeClusterBoundary(GraphemeClusterBreakProperty GCB1,
|
||||
GraphemeClusterBreakProperty GCB2) {
|
||||
auto RuleRow =
|
||||
ExtendedGraphemeClusterNoBoundaryRulesMatrix[static_cast<unsigned>(GCB1)];
|
||||
return !(RuleRow & (1 << static_cast<unsigned>(GCB2)));
|
||||
}
|
||||
|
||||
} // namespace unicode
|
||||
} // namespace swift
|
||||
|
||||
|
||||
Reference in New Issue
Block a user