Implement the Indic grapheme breaking rules

This commit is contained in:
Alejandro Alonso
2022-01-05 16:18:54 -08:00
parent a0693c4649
commit 4a451829f8
7 changed files with 495 additions and 98 deletions

View File

@@ -62,6 +62,9 @@ __swift_uint32_t _swift_stdlib_getComposition(__swift_uint32_t x,
SWIFT_RUNTIME_STDLIB_INTERNAL
__swift_uint8_t _swift_stdlib_getGraphemeBreakProperty(__swift_uint32_t scalar);
SWIFT_RUNTIME_STDLIB_INTERNAL
__swift_bool _swift_stdlib_isLinkingConsonant(__swift_uint32_t scalar);
//===----------------------------------------------------------------------===//
// Unicode.Scalar.Properties
//===----------------------------------------------------------------------===//

View File

@@ -10,6 +10,8 @@
//
//===----------------------------------------------------------------------===//
import SwiftShims
/// CR and LF are common special cases in grapheme breaking logic
private var _CR: UInt8 { return 0x0d }
private var _LF: UInt8 { return 0x0a }
@@ -175,13 +177,56 @@ extension _StringGuts {
}
}
extension Unicode.Scalar {
fileprivate var _isLinkingConsonant: Bool {
_swift_stdlib_isLinkingConsonant(value)
}
fileprivate var _isVirama: Bool {
switch value {
// Devanagari
case 0x94D:
return true
// Bengali
case 0x9CD:
return true
// Gujarati
case 0xACD:
return true
// Oriya
case 0xB4D:
return true
// Telugu
case 0xC4D:
return true
// Malayalam
case 0xD4D:
return true
default:
return false
}
}
}
internal struct _GraphemeBreakingState {
// When we're looking through an indic sequence, one of the requirements is
// that there is at LEAST 1 Virama present between two linking consonants.
// This value helps ensure that when we ultimately need to decide whether or
// not to break that we've at least seen 1 when walking.
var hasSeenVirama = false
// When walking forwards in a string, we need to know whether or not we've
// entered an emoji sequence to be able to eventually break after all of the
// emoji's various extenders and zero width joiners. This bit allows us to
// keep track of whether or not we're still in an emoji sequence when deciding
// to break.
var isInEmojiSequence: Bool = false
var isInEmojiSequence = false
// Similar to emoji sequences, we need to know not to break an Indic grapheme
// sequence. This sequence is (potentially) composed of many scalars and isn't
// as trivial as comparing two grapheme properties.
var isInIndicSequence = false
// When walking forward in a string, we need to not break on emoji flag
// sequences. Emoji flag sequences are composed of 2 regional indicators, so
@@ -190,7 +235,7 @@ internal struct _GraphemeBreakingState {
// is another regional indicator, we reach the same decision rule, but in this
// case we actually need to break there's a boundary between emoji flag
// sequences.
var shouldBreakRI: Bool = false
var shouldBreakRI = false
}
extension _StringGuts {
@@ -288,8 +333,12 @@ extension _StringGuts {
// continue treating the current grapheme cluster as an emoji sequence.
var enterEmojiSequence = false
// Very similar to emoji sequences, but for Indic grapheme sequences.
var enterIndicSequence = false
defer {
state.isInEmojiSequence = enterEmojiSequence
state.isInIndicSequence = enterIndicSequence
}
switch (x, y) {
@@ -338,6 +387,26 @@ extension _StringGuts {
enterEmojiSequence = true
}
// If we're currently in an indic sequence (or if our lhs is a linking
// consonant), then this check and everything underneath ensures that
// we continue being in one and may check if this extend is a Virama.
if state.isInIndicSequence || scalar1._isLinkingConsonant {
if y == .extend {
let extendNormData = Unicode._NormData(scalar2, fastUpperbound: 0x300)
// If our extend's CCC is 0, then this rule does not apply.
guard extendNormData.ccc != 0 else {
return false
}
}
enterIndicSequence = true
if scalar2._isVirama {
state.hasSeenVirama = true
}
}
return false
// GB9a
@@ -370,6 +439,32 @@ extension _StringGuts {
// GB999
default:
// GB9c
if state.isInIndicSequence, state.hasSeenVirama, scalar2._isLinkingConsonant {
state.hasSeenVirama = false
return false
}
// Handle GB9c when walking backwards.
if isBackwards {
switch (x, scalar2._isLinkingConsonant) {
case (.extend, true):
let extendNormData = Unicode._NormData(scalar1, fastUpperbound: 0x300)
guard extendNormData.ccc != 0 else {
return true
}
return !checkIfInIndicSequence(index)
case (.zwj, true):
return !checkIfInIndicSequence(index)
default:
return true
}
}
return true
}
}
@@ -417,9 +512,7 @@ extension _StringGuts {
// | = We found our starting .extendedPictographic letting us
// know that we are in an emoji sequence so our initial
// break question is answered as NO.
internal func checkIfInEmojiSequence(
_ index: Int
) -> Bool {
internal func checkIfInEmojiSequence(_ index: Int) -> Bool {
var emojiIdx = String.Index(_encodedOffset: index)
guard emojiIdx != startIndex else {
@@ -448,6 +541,90 @@ extension _StringGuts {
return false
}
// When walking backwards, it's impossible to know whether we break when we
// see our first ((.extend|.zwj), .linkingConsonant) without walking
// further backwards. This walks the string backwards enough until we figure
// out whether or not to break this indic sequence. For example:
//
// Scalar view #1:
//
// [.virama, .extend, .linkingConsonant]
// ^
// | = To be able to know whether or not to break these
// two, we need to walk backwards to determine if
// this is a legitimate indic sequence.
// ^
// | = The scalar sequence ends without a starting linking consonant,
// so this is in fact not an indic sequence, so we can break the two.
//
// Scalar view #2:
//
// [.linkingConsonant, .virama, .extend, .linkingConsonant]
// ^
// | = Same as above
// ^
// | = This is a virama, so we at least have seen
// 1 to be able to return true if we see a
// linking consonant later.
// ^
// | = Is a linking consonant and we've seen a virama, so this is a
// legitimate indic sequence, so do NOT break the initial question.
internal func checkIfInIndicSequence(_ index: Int) -> Bool {
var indicIdx = String.Index(_encodedOffset: index)
guard indicIdx != startIndex else {
return false
}
let scalars = String.UnicodeScalarView(self)
scalars.formIndex(before: &indicIdx)
var hasSeenVirama = false
// Check if the first extend was the Virama.
let scalar = scalars[indicIdx]
if scalar._isVirama {
hasSeenVirama = true
}
while indicIdx != startIndex {
scalars.formIndex(before: &indicIdx)
let scalar = scalars[indicIdx]
let gbp = Unicode._GraphemeBreakProperty(from: scalar)
switch (gbp, scalar._isLinkingConsonant) {
case (.extend, false):
let extendNormData = Unicode._NormData(scalar, fastUpperbound: 0x300)
guard extendNormData.ccc != 0 else {
return false
}
if scalar._isVirama {
hasSeenVirama = true
}
case (.zwj, false):
continue
// LinkingConsonant
case (_, true):
guard hasSeenVirama else {
return false
}
return true
default:
return false
}
}
return false
}
// When walking backwards, it's impossible to know whether we break when we
// see our first (.regionalIndicator, .regionalIndicator) without walking
// further backwards. This walks the string backwards enough until we figure

View File

@@ -20,7 +20,7 @@
#define GRAPHEME_BREAK_DATA_COUNT 621
static __swift_uint32_t _swift_stdlib_graphemeBreakProperties[621] = {
static const __swift_uint32_t _swift_stdlib_graphemeBreakProperties[621] = {
0x3E00000, 0x400007F, 0x800000A9, 0xAD, 0x800000AE, 0x2DE00300, 0x20C00483, 0x25800591,
0x200005BF, 0x202005C1, 0x202005C4, 0x200005C7, 0x40A00600, 0x21400610, 0x61C, 0x2280064B,
0x20000670, 0x20C006D6, 0x400006DD, 0x20A006DF, 0x202006E7, 0x206006EA, 0x4000070F, 0x20000711,
@@ -101,4 +101,41 @@ static __swift_uint32_t _swift_stdlib_graphemeBreakProperties[621] = {
0xB701F947, 0x3EE0000, 0x2BEE0020, 0xFEE0080, 0x3DEE0100,
};
static const __swift_uint16_t _swift_stdlib_linkingConsonant_ranks[165] = {
0x0, 0xE, 0xE, 0x12, 0x13, 0x0, 0x0, 0x0, 0x25, 0x35, 0x0, 0x20, 0x25, 0x46, 0x4B, 0x0, 0x17,
0x23, 0x3D, 0x44, 0x0, 0xA, 0x24, 0x31, 0x4B, 0x0, 0x1, 0x27, 0x27, 0x49, 0x0, 0xF, 0x2E, 0x3A,
0x57, 0x0, 0x0, 0x1F, 0x2C, 0x2C, 0x0, 0x21, 0x2D, 0x3C, 0x3C, 0x0, 0x0, 0x0, 0xD, 0x2C, 0x0,
0x2D, 0x30, 0x30, 0x30, 0x0, 0x0, 0x0, 0x1E, 0x31, 0x0, 0x2C, 0x2C, 0x63, 0x72, 0x0, 0x0, 0x0,
0x29, 0x2F, 0x0, 0x26, 0x4A, 0x51, 0x51, 0x0, 0x18, 0x39, 0x54, 0x68, 0x0, 0x18, 0x2F, 0x53, 0x64,
0x0, 0x23, 0x39, 0x69, 0x72, 0x0, 0x0, 0x0, 0xE, 0x18, 0x0, 0x0, 0xF, 0x25, 0x25, 0x0, 0x25, 0x26,
0x49, 0x49, 0x0, 0x19, 0x37, 0x59, 0x61, 0x0, 0xC, 0x24, 0x52, 0x5D, 0x0, 0x8, 0x8, 0x8, 0x2A,
0x0, 0x0, 0x21, 0x21, 0x21, 0x0, 0x2, 0x21, 0x23, 0x43, 0x0, 0x16, 0x22, 0x3D, 0x44, 0x0, 0x0,
0x0, 0x22, 0x22, 0x0, 0x0, 0x0, 0x22, 0x22, 0x0, 0x22, 0x28, 0x4B, 0x73, 0x0, 0x0, 0x21, 0x21,
0x3F, 0x0, 0x0, 0x25, 0x39, 0x43, 0x0, 0x12, 0x12, 0x12, 0x12,
};
static const __swift_uint64_t _swift_stdlib_linkingConsonant[166] = {
0x5, 0x7E0FF00, 0x0, 0x3C0000000, 0x400000000000000, 0x5BFF, 0x0, 0x0, 0x3FFFFFFFFE00000,
0xFF000000FF000000, 0x0, 0x3C5FDFFFFE0, 0x30000B000, 0x36DFDFFFFE0, 0x5E00, 0xFFE0, 0x3EDFDFF,
0xFFE0000002000000, 0xB000000003EDFDFF, 0xD620000000020000, 0xC718, 0x3FF, 0xFDFFFFE000000000,
0x700000003FF, 0xFDFFFFE000000000, 0x3EF, 0x40000000, 0x7FFFFFFFFE00000, 0x0, 0x2FFBFFFFFC000000,
0x7F, 0xFFFE000000000000, 0x7FFFFFFF, 0xF7D6000000000000, 0x7FAFFFFF, 0xF000, 0x0,
0xFFFFFEFF00000000, 0x1FFF, 0x0, 0x0, 0x1FFFFFFFF0000, 0xC0623C0300008000, 0x4003FFE1, 0x0, 0x0,
0x0, 0x0, 0xFFF8000000000000, 0xFFF80003FFF88003, 0x3, 0xFFFFFFFF0001DFF8, 0x7, 0x0, 0x0, 0x0,
0x0, 0x0, 0x7FFFFFFE0000, 0x7FFFF00000000, 0x0, 0xFFFFFFFFFFF, 0x0, 0xFFFFFFFF007FFFFF, 0x181FFF,
0x0, 0x0, 0x0, 0x1FE0000FFFFFFFF8, 0xFC00000000000000, 0xFFFF, 0xFFFFFFFF3800C001,
0xFFFFFFFF0000000F, 0xE0000000000F, 0x0, 0x0, 0xFFFFF78000000000, 0x3FFFFFFF00000007,
0xFFFC00000005FE3C, 0xFFFFF, 0x0, 0x3FFFFFC000000, 0x7FFFFF, 0xFFFFFFFF8E000000,
0xFF9F000000000007, 0x7C00, 0x1FFFFFFFFC0, 0xC40EFFFF00000000, 0xFFFFFFFFFFFF, 0x7FC00000000, 0x0,
0x0, 0x0, 0x3FFF000000000000, 0x7FD, 0x0, 0x0, 0xFEEF000100000000, 0x3FFFFF, 0x0, 0x0,
0xFFFFFFFFF80000, 0x20000000000000, 0xFFFFFFFFE000, 0x0, 0xFF80, 0x900000007FFFFF, 0x7FFFFFFE0,
0x7FFFFFFFE, 0xFF00000000000000, 0xFFFB, 0xFFF, 0xBFFFBD7000000000, 0x7FFFFFFFFC0001FF,
0xFFE0000000000000, 0xFDFF, 0x3ED, 0x0, 0x0, 0xFFFFFFFFC0000000, 0x1F, 0x0, 0xFFFFFFFF8000, 0x0,
0x0, 0x0, 0xC000000000000000, 0x7FFFFFFF, 0xC000000000000000, 0xFFFFFFFF, 0x0, 0xFFFFFC0000000000,
0x10007FF, 0x7FFFFFF00000000, 0x7F00000000, 0x0, 0x0, 0x0, 0xFFFFFFFFC000000, 0x0, 0x0, 0x0, 0x0,
0xFFFFFF6FF000, 0x0, 0x0, 0xFFFFFFFFC0000000, 0xF800000000000001, 0x7FFFFFFFF, 0xFFFFFFFFFF000,
0x0, 0x0, 0x7FFFFFFFC0000000, 0x0, 0xFFFFFFFC, 0x0, 0x0, 0x1FFFFFFFFF000, 0xFFFFF00000000000,
0x3FF, 0x0, 0x3FFFF, 0x0, 0x0, 0x0, 0x0,
};
#endif // #ifndef GRAPHEME_DATA_H

View File

@@ -12,6 +12,7 @@
#include "Common/GraphemeData.h"
#include "../SwiftShims/UnicodeData.h"
#include <limits>
SWIFT_RUNTIME_STDLIB_INTERNAL
__swift_uint8_t _swift_stdlib_getGraphemeBreakProperty(__swift_uint32_t scalar) {
@@ -57,3 +58,16 @@ __swift_uint8_t _swift_stdlib_getGraphemeBreakProperty(__swift_uint32_t scalar)
// property). Return the max value here to indicate .any.
return 0xFF;
}
SWIFT_RUNTIME_STDLIB_INTERNAL
__swift_bool _swift_stdlib_isLinkingConsonant(__swift_uint32_t scalar) {
auto idx = _swift_stdlib_getScalarBitArrayIdx(scalar,
_swift_stdlib_linkingConsonant,
_swift_stdlib_linkingConsonant_ranks);
if (idx == std::numeric_limits<__swift_intptr_t>::max()) {
return false;
}
return true;
}

View File

@@ -0,0 +1,179 @@
//===----------------------------------------------------------------------===//
//
// This source file is part of the Swift.org open source project
//
// Copyright (c) 2022 Apple Inc. and the Swift project authors
// Licensed under Apache License v2.0 with Runtime Library Exception
//
// See https://swift.org/LICENSE.txt for license information
// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
//
//===----------------------------------------------------------------------===//
import GenUtils
func getLinkingConsonant(
from data: String
) -> [ClosedRange<UInt32>] {
var unflattened: [(ClosedRange<UInt32>, String)] = []
for line in data.split(separator: "\n") {
// Skip comments
guard !line.hasPrefix("#") else {
continue
}
let components = line.split(separator: ";")
// Get the property first because it may be one we don't care about.
let splitProperty = components[1].split(separator: "#")
let filteredProperty = splitProperty[0].filter { !$0.isWhitespace }
// We only care about Linking Consonant who is defined as 'Consonant'.
guard filteredProperty == "Consonant" else {
continue
}
// This rule only applies to the following scripts, so ensure that these
// scalars are from such scripts.
for script in ["Bengali", "Devanagari", "Gujarati", "Oriya", "Telugu", "Malayalam"] {
guard line.contains(script.uppercased()) else {
continue
}
break
}
let scalars: ClosedRange<UInt32>
let filteredScalars = components[0].filter { !$0.isWhitespace }
// If we have . appear, it means we have a legitimate range. Otherwise,
// it's a singular scalar.
if filteredScalars.contains(".") {
let range = filteredScalars.split(separator: ".")
scalars = UInt32(range[0], radix: 16)! ... UInt32(range[1], radix: 16)!
} else {
let scalar = UInt32(filteredScalars, radix: 16)!
scalars = scalar ... scalar
}
unflattened.append((scalars, "Consonant"))
}
return flatten(unflattened).map { $0.0 }
}
func emitLinkingConsonant(
_ data: [ClosedRange<UInt32>],
into result: inout String
) {
// 64 bit arrays * 8 bytes = .512 KB
var bitArrays: [BitArray] = .init(repeating: .init(size: 64), count: 64)
let chunkSize = 0x110000 / 64 / 64
var chunks: [Int] = []
for i in 0 ..< 64 * 64 {
let lower = i * chunkSize
let upper = lower + chunkSize - 1
let idx = i / 64
let bit = i % 64
for scalar in lower ... upper {
if data.contains(where: { $0.contains(UInt32(scalar)) }) {
chunks.append(i)
bitArrays[idx][bit] = true
break
}
}
}
// Remove the trailing 0s. Currently this reduces quick look size down to
// 96 bytes from 512 bytes.
var reducedBA = Array(bitArrays.reversed())
reducedBA = Array(reducedBA.drop {
$0.words == [0x0]
})
bitArrays = reducedBA.reversed()
// Keep a record of every rank for all the bitarrays.
var ranks: [UInt16] = []
// Record our quick look ranks.
var lastRank: UInt16 = 0
for (i, _) in bitArrays.enumerated() {
guard i != 0 else {
ranks.append(0)
continue
}
var rank = UInt16(bitArrays[i - 1].words[0].nonzeroBitCount)
rank += lastRank
ranks.append(rank)
lastRank = rank
}
// Insert our quick look size at the beginning.
var size = BitArray(size: 64)
size.words = [UInt64(bitArrays.count)]
bitArrays.insert(size, at: 0)
for chunk in chunks {
var chunkBA = BitArray(size: chunkSize)
let lower = chunk * chunkSize
let upper = lower + chunkSize
for scalar in lower ..< upper {
if data.contains(where: { $0.contains(UInt32(scalar)) }) {
chunkBA[scalar % chunkSize] = true
}
}
// Append our chunk bit array's rank.
var lastRank: UInt16 = 0
for (i, _) in chunkBA.words.enumerated() {
guard i != 0 else {
ranks.append(0)
continue
}
var rank = UInt16(chunkBA.words[i - 1].nonzeroBitCount)
rank += lastRank
ranks.append(rank)
lastRank = rank
}
bitArrays += chunkBA.words.map {
var ba = BitArray(size: 64)
ba.words = [$0]
return ba
}
}
emitCollection(
ranks,
name: "_swift_stdlib_linkingConsonant_ranks",
into: &result
)
emitCollection(
bitArrays,
name: "_swift_stdlib_linkingConsonant",
type: "__swift_uint64_t",
into: &result
) {
assert($0.words.count == 1)
return "0x\(String($0.words[0], radix: 16, uppercase: true))"
}
}

View File

@@ -48,37 +48,6 @@ extension Unicode {
}
}
// Takes an unflattened array of scalar ranges and grapheme break properties and
// attempts to merge ranges who share the same break property. E.g:
//
// 0x0 ... 0xA = .control
// 0xB ... 0xB = .control
// 0xC ... 0x1F = .control
//
// into:
//
// 0x0 ... 0x1F = .control
func flatten(
_ unflattened: [(ClosedRange<UInt32>, Unicode.GraphemeBreakProperty)]
) -> [(ClosedRange<UInt32>, Unicode.GraphemeBreakProperty)] {
var result: [(ClosedRange<UInt32>, Unicode.GraphemeBreakProperty)] = []
for elt in unflattened.sorted(by: { $0.0.lowerBound < $1.0.lowerBound }) {
guard !result.isEmpty, result.last!.1 == elt.1 else {
result.append(elt)
continue
}
if elt.0.lowerBound == result.last!.0.upperBound + 1 {
result[result.count - 1].0 = result.last!.0.lowerBound ... elt.0.upperBound
} else {
result.append(elt)
}
}
return result
}
// Given a path to one of the Unicode data files, reads it and returns the
// unflattened list of scalar & grapheme break property.
//
@@ -150,7 +119,9 @@ func emit(
into result: inout String
) {
result += """
static __swift_uint32_t _swift_stdlib_graphemeBreakProperties[\(data.count)] = {
#define GRAPHEME_BREAK_DATA_COUNT \(data.count)
static const __swift_uint32_t _swift_stdlib_graphemeBreakProperties[\(data.count)] = {
"""
@@ -181,69 +152,20 @@ func emit(
value |= 1 << 31
}
return "0x\(String(value, radix: 16))"
return "0x\(String(value, radix: 16, uppercase: true))"
}
result += "\n};\n\n"
}
// Writes the stdlib internal routine for binary searching the grapheme array.
func emitAccessor(
_ dataCount: Int,
into result: inout String
) {
result += """
SWIFT_RUNTIME_STDLIB_INTERNAL
__swift_uint8_t _swift_stdlib_getGraphemeBreakProperty(__swift_uint32_t scalar) {
auto low = 0;
auto high = \(dataCount) - 1;
while (high >= low) {
auto idx = low + (high - low) / 2;
};
auto entry = _swift_stdlib_graphemeBreakProperties[idx];
// Shift the enum and range count out of the value.
auto lower = (entry << 11) >> 11;
// Shift the enum out first, then shift out the scalar value.
auto upper = lower + ((entry << 3) >> 24);
// Shift everything out.
auto enumValue = (__swift_uint8_t)(entry >> 29);
// Special case: extendedPictographic who used an extra bit for the range.
if (enumValue == 5) {
upper = lower + ((entry << 2) >> 23);
}
if (scalar >= lower && scalar <= upper) {
return enumValue;
}
if (scalar > upper) {
low = idx + 1;
continue;
}
if (scalar < lower) {
high = idx - 1;
continue;
}
}
// If we made it out here, then our scalar was not found in the grapheme
// array (this occurs when a scalar doesn't map to any grapheme break
// property). Return the max value here to indicate .any.
return 0xFF;
}
"""
}
// Main entry point into the grapheme break property generator.
func generateGraphemeBreakProperty() {
var result = readFile("Input/UnicodeGrapheme.cpp")
var result = readFile("Input/GraphemeData.h")
let baseData = getGraphemeBreakPropertyData(
for: "Data/GraphemeBreakProperty.txt"
@@ -268,9 +190,20 @@ func generateGraphemeBreakProperty() {
emit(data, into: &result)
emitAccessor(data.count, into: &result)
// Handle the CLDR grapheme breaking rules:
write(result, to: "Output/UnicodeGrapheme.cpp")
let indicSyllabicCategory = readFile("Data/IndicSyllabicCategory.txt")
let consonants = getLinkingConsonant(from: indicSyllabicCategory)
emitLinkingConsonant(consonants, into: &result)
result += """
#endif // #ifndef GRAPHEME_DATA_H
"""
write(result, to: "Output/Common/GraphemeData.h")
}
generateGraphemeBreakProperty()

View File

@@ -2280,4 +2280,58 @@ StringTests.test("NormalizationCheck/Opaque")
#endif
}
func expectBidirectionalCount(_ count: Int, _ string: String) {
var i = 0
var index = string.endIndex
while index != string.startIndex {
i += 1
string.formIndex(before: &index)
}
expectEqual(i, count)
}
StringTests.test("GraphemeBreaking.Indic Sequences") {
let test1 = "\u{0915}\u{0924}" // 2
expectEqual(2, test1.count)
expectBidirectionalCount(2, test1)
let test2 = "\u{0915}\u{094D}\u{0924}" // 1
expectEqual(1, test2.count)
expectBidirectionalCount(1, test2)
let test3 = "\u{0915}\u{094D}\u{094D}\u{0924}" // 1
expectEqual(1, test3.count)
expectBidirectionalCount(1, test3)
let test4 = "\u{0915}\u{094D}\u{200D}\u{0924}" // 1
expectEqual(1, test4.count)
expectBidirectionalCount(1, test4)
let test5 = "\u{0915}\u{093C}\u{200D}\u{094D}\u{0924}" // 1
expectEqual(1, test5.count)
expectBidirectionalCount(1, test5)
let test6 = "\u{0915}\u{093C}\u{094D}\u{200D}\u{0924}" // 1
expectEqual(1, test6.count)
expectBidirectionalCount(1, test6)
let test7 = "\u{0915}\u{094D}\u{0924}\u{094D}\u{092F}" // 1
expectEqual(1, test7.count)
expectBidirectionalCount(1, test7)
let test8 = "\u{0915}\u{094D}\u{0061}" // 2
expectEqual(2, test8.count)
expectBidirectionalCount(2, test8)
let test9 = "\u{0061}\u{094D}\u{0924}" // 2
expectEqual(2, test9.count)
expectBidirectionalCount(2, test9)
let test10 = "\u{003F}\u{094D}\u{0924}" // 2
expectEqual(2, test10.count)
expectBidirectionalCount(2, test10)
}
runAllTests()