libBasic: implement extended grapheme cluster segmentation algorithm

This is only for the frontend, not for stdlib.  The implementation is very
slow, optimizing it is the next step.

rdar://16755123 rdar://16013860


Swift SVN r18928
This commit is contained in:
Dmitri Hrybenko
2014-06-16 14:20:43 +00:00
parent e310fd7f0b
commit 7704e19b7d
11 changed files with 2071 additions and 9 deletions

View File

@@ -28,6 +28,53 @@ static inline bool isSingleExtendedGraphemeCluster(StringRef S) {
return First == S;
}
enum class GraphemeClusterBreakProperty : uint8_t {
Other,
CR,
LF,
Control,
Extend,
Regional_Indicator,
Prepend,
SpacingMark,
L,
V,
T,
LV,
LVT,
};
/// Extended grapheme cluster boundary rules, represented as a matrix. Indexed
/// by first code point, then by second code point in least-significant-bit
/// order. A set bit means that a boundary is prohibited between two code
/// points.
extern uint16_t ExtendedGraphemeClusterNoBoundaryRulesMatrix[];
/// Returns the value of the Grapheme_Cluster_Break property for a given code
/// point.
GraphemeClusterBreakProperty getGraphemeClusterBreakProperty(uint32_t C);
/// Returns true if there is always an extended grapheme cluster boundary
/// after a code point with a given property value. Use only for optimization,
/// to skip calculating Grapheme_Cluster_Break property for the second code
/// point.
static inline bool
isExtendedGraphemeClusterBoundaryAfter(GraphemeClusterBreakProperty GCB1) {
auto RuleRow =
ExtendedGraphemeClusterNoBoundaryRulesMatrix[static_cast<unsigned>(GCB1)];
return RuleRow == 0;
}
/// Determine if there is an extended grapheme cluster boundary between code
/// points with given Grapheme_Cluster_Break property values.
static inline bool
isExtendedGraphemeClusterBoundary(GraphemeClusterBreakProperty GCB1,
GraphemeClusterBreakProperty GCB2) {
auto RuleRow =
ExtendedGraphemeClusterNoBoundaryRulesMatrix[static_cast<unsigned>(GCB1)];
return !(RuleRow & (1 << static_cast<unsigned>(GCB2)));
}
} // namespace unicode
} // namespace swift