Files
swift-mirror/stdlib/public/core/StringNormalization.swift
Alejandro Alonso 98aaa157ec Implement native normalization for String
use >/< instead of !=

fix some bugs

fix
2021-09-29 14:20:21 -07:00

67 lines
2.2 KiB
Swift

//===--- StringNormalization.swift ----------------------------------------===//
//
// This source file is part of the Swift.org open source project
//
// Copyright (c) 2014 - 2018 Apple Inc. and the Swift project authors
// Licensed under Apache License v2.0 with Runtime Library Exception
//
// See https://swift.org/LICENSE.txt for license information
// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
//
//===----------------------------------------------------------------------===//
import SwiftShims
extension Unicode.Scalar {
// Normalization boundary - a place in a string where everything left of the
// boundary can be normalized independently from everything right of the
// boundary. The concatenation of each result is the same as if the entire
// string had been normalized as a whole.
//
// Normalization segment - a sequence of code units between two normalization
// boundaries (without any boundaries in the middle). Note that normalization
// segments can, as a process of normalization, expand, contract, and even
// produce new sub-segments.
// Quick check if a scalar is an NFC segment starter.
internal var _isNFCStarter: Bool {
// Fast path: All scalars up to U+300 are NFC_QC and have boundaries
// before them.
if value < 0x300 {
return true
}
// Any scalar who has CCC of 0 has a normalization boundary before it AND
// any scalar who is also NFC_QC is considered an NFC starter.
let normData = _swift_stdlib_getNormData(value)
let ccc = normData >> 3
let isNFCQC = normData & 0x6 == 0
return ccc == 0 && isNFCQC
}
}
extension UnsafeBufferPointer where Element == UInt8 {
internal func hasNormalizationBoundary(before index: Int) -> Bool {
if index == 0 || index == count {
return true
}
_internalInvariant(!UTF8.isContinuation(self[_unchecked: index]))
// Sub-300 latiny fast-path
if self[_unchecked: index] < 0xCC { return true }
let cu = _decodeScalar(self, startingAt: index).0
return cu._isNFCStarter
}
internal func isOnUnicodeScalarBoundary(_ index: Int) -> Bool {
guard index < count else {
_internalInvariant(index == count)
return true
}
return !UTF8.isContinuation(self[index])
}
}