//===--- StringNormalization.swift ----------------------------------------===// // // This source file is part of the Swift.org open source project // // Copyright (c) 2014 - 2018 Apple Inc. and the Swift project authors // Licensed under Apache License v2.0 with Runtime Library Exception // // See https://swift.org/LICENSE.txt for license information // See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors // //===----------------------------------------------------------------------===// import SwiftShims // A namespace for various heuristics // internal enum _Normalization { // ICU's NFC unorm2 instance internal static var _nfcNormalizer: OpaquePointer = { var err = __swift_stdlib_U_ZERO_ERROR let normalizer = __swift_stdlib_unorm2_getNFCInstance(&err) guard err.isSuccess else { // This shouldn't be possible unless some deep (unrecoverable) system // invariants are violated fatalError("Unable to talk to ICU") } return normalizer }() // Whether this buffer of code units satisfies the quickCheck=YES property for // normality checking under NFC. // // ICU provides a quickCheck, which may yield "YES", "NO", or "MAYBE". YES // means that the string was determined to definitely be normal under NFC. In // practice, the majority of Strings have this property. Checking for YES is // considerably faster than trying to distinguish between NO and MAYBE. internal static func _prenormalQuickCheckYes( _ buffer: UnsafeBufferPointer ) -> Bool { var err = __swift_stdlib_U_ZERO_ERROR let length = __swift_stdlib_unorm2_spanQuickCheckYes( _Normalization._nfcNormalizer, buffer.baseAddress._unsafelyUnwrappedUnchecked, Int32(buffer.count), &err) guard err.isSuccess else { // This shouldn't be possible unless some deep (unrecoverable) system // invariants are violated fatalError("Unable to talk to ICU") } return length == buffer.count } internal static func _prenormalQuickCheckYes( _ string: _UnmanagedString ) -> Bool { var err = __swift_stdlib_U_ZERO_ERROR let length = __swift_stdlib_unorm2_spanQuickCheckYes( _Normalization._nfcNormalizer, string.start, Int32(string.count), &err) guard err.isSuccess else { // This shouldn't be possible unless some deep (unrecoverable) system // invariants are violated fatalError("Unable to talk to ICU") } return length == string.count } } extension UnicodeScalar { // Normalization boundary - a place in a string where everything left of the // boundary can be normalized independently from everything right of the // boundary. The concatenation of each result is the same as if the entire // string had been normalized as a whole. // // Normalization segment - a sequence of code units between two normalization // boundaries (without any boundaries in the middle). Note that normalization // segments can, as a process of normalization, expand, contract, and even // produce new sub-segments. // Whether this scalar value always has a normalization boundary before it. internal var _hasNormalizationBoundaryBefore: Bool { _sanityCheck(Int32(exactly: self.value) != nil, "top bit shouldn't be set") let value = Int32(bitPattern: self.value) return 0 != __swift_stdlib_unorm2_hasBoundaryBefore( _Normalization._nfcNormalizer, value) } // Whether the supported version of Unicode has assigned a code point to this // value. internal var _isDefined: Bool { return __swift_stdlib_u_isdefined(Int32(self.value)) != 0 } // A property tracked in ICU regarding the scalar's potential non-normality; // this is equivalent to whether quickCheck=NO. A subset of such scalars may // expand under NFC normalization, and a subset of those may expand into // multiple segments. internal var _hasFullCompExclusion: Bool { _sanityCheck(Int32(exactly: self.value) != nil, "top bit shouldn't be set") let value = Int32(bitPattern: self.value) let prop = __swift_stdlib_UCHAR_FULL_COMPOSITION_EXCLUSION return __swift_stdlib_u_hasBinaryProperty(value, prop) != 0 } } extension _Normalization { // When normalized in NFC, some segments may expand in size (e.g. some non-BMP // musical notes). This expansion is capped by the maximum expansion factor of // the normal form. For NFC, that is 3x. internal static let _maxNFCExpansionFactor = 3 // A small output buffer to use for normalizing a single normalization // segment. Fits all but pathological arbitrary-length segments (i.e. zalgo- // segments) internal typealias _SegmentOutputBuffer = _FixedArray16 }