mirror of
https://github.com/apple/swift.git
synced 2025-12-21 12:14:44 +01:00
[stdlib]Un-revert string comparison (#14694)
Restore (un-revert) sting comparison, with fixes More exhaustive testing of opaque strings, which consistently reproduces prior sporadic failure. Shims fixups. Some test tweaking.
This commit is contained in:
committed by
Michael Ilseman
parent
76af5c5b16
commit
0661de22a2
120
stdlib/public/core/StringNormalization.swift
Normal file
120
stdlib/public/core/StringNormalization.swift
Normal file
@@ -0,0 +1,120 @@
|
||||
//===--- StringNormalization.swift ----------------------------------------===//
|
||||
//
|
||||
// This source file is part of the Swift.org open source project
|
||||
//
|
||||
// Copyright (c) 2014 - 2018 Apple Inc. and the Swift project authors
|
||||
// Licensed under Apache License v2.0 with Runtime Library Exception
|
||||
//
|
||||
// See https://swift.org/LICENSE.txt for license information
|
||||
// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
import SwiftShims
|
||||
|
||||
// A namespace for various heuristics
|
||||
//
|
||||
internal enum _Normalization {
|
||||
// ICU's NFC unorm2 instance
|
||||
internal static var _nfcNormalizer: OpaquePointer = {
|
||||
var err = __swift_stdlib_U_ZERO_ERROR
|
||||
let normalizer = __swift_stdlib_unorm2_getNFCInstance(&err)
|
||||
guard err.isSuccess else {
|
||||
// This shouldn't be possible unless some deep (unrecoverable) system
|
||||
// invariants are violated
|
||||
fatalError("Unable to talk to ICU")
|
||||
}
|
||||
return normalizer
|
||||
}()
|
||||
|
||||
// Whether this buffer of code units satisfies the quickCheck=YES property for
|
||||
// normality checking under NFC.
|
||||
//
|
||||
// ICU provides a quickCheck, which may yield "YES", "NO", or "MAYBE". YES
|
||||
// means that the string was determined to definitely be normal under NFC. In
|
||||
// practice, the majority of Strings have this property. Checking for YES is
|
||||
// considerably faster than trying to distinguish between NO and MAYBE.
|
||||
internal static func _prenormalQuickCheckYes(
|
||||
_ buffer: UnsafeBufferPointer<UInt16>
|
||||
) -> Bool {
|
||||
var err = __swift_stdlib_U_ZERO_ERROR
|
||||
let length = __swift_stdlib_unorm2_spanQuickCheckYes(
|
||||
_Normalization._nfcNormalizer,
|
||||
buffer.baseAddress._unsafelyUnwrappedUnchecked,
|
||||
Int32(buffer.count),
|
||||
&err)
|
||||
|
||||
guard err.isSuccess else {
|
||||
// This shouldn't be possible unless some deep (unrecoverable) system
|
||||
// invariants are violated
|
||||
fatalError("Unable to talk to ICU")
|
||||
}
|
||||
return length == buffer.count
|
||||
}
|
||||
internal static func _prenormalQuickCheckYes(
|
||||
_ string: _UnmanagedString<UInt16>
|
||||
) -> Bool {
|
||||
var err = __swift_stdlib_U_ZERO_ERROR
|
||||
let length = __swift_stdlib_unorm2_spanQuickCheckYes(
|
||||
_Normalization._nfcNormalizer,
|
||||
string.start,
|
||||
Int32(string.count),
|
||||
&err)
|
||||
|
||||
guard err.isSuccess else {
|
||||
// This shouldn't be possible unless some deep (unrecoverable) system
|
||||
// invariants are violated
|
||||
fatalError("Unable to talk to ICU")
|
||||
}
|
||||
return length == string.count
|
||||
}
|
||||
}
|
||||
|
||||
extension UnicodeScalar {
|
||||
// Normalization boundary - a place in a string where everything left of the
|
||||
// boundary can be normalized independently from everything right of the
|
||||
// boundary. The concatenation of each result is the same as if the entire
|
||||
// string had been normalized as a whole.
|
||||
//
|
||||
// Normalization segment - a sequence of code units between two normalization
|
||||
// boundaries (without any boundaries in the middle). Note that normalization
|
||||
// segments can, as a process of normalization, expand, contract, and even
|
||||
// produce new sub-segments.
|
||||
|
||||
// Whether this scalar value always has a normalization boundary before it.
|
||||
internal var _hasNormalizationBoundaryBefore: Bool {
|
||||
_sanityCheck(Int32(exactly: self.value) != nil, "top bit shouldn't be set")
|
||||
let value = Int32(bitPattern: self.value)
|
||||
return 0 != __swift_stdlib_unorm2_hasBoundaryBefore(
|
||||
_Normalization._nfcNormalizer, value)
|
||||
}
|
||||
|
||||
// Whether the supported version of Unicode has assigned a code point to this
|
||||
// value.
|
||||
internal var _isDefined: Bool {
|
||||
return __swift_stdlib_u_isdefined(Int32(self.value)) != 0
|
||||
}
|
||||
|
||||
// A property tracked in ICU regarding the scalar's potential non-normality;
|
||||
// this is equivalent to whether quickCheck=NO. A subset of such scalars may
|
||||
// expand under NFC normalization, and a subset of those may expand into
|
||||
// multiple segments.
|
||||
internal var _hasFullCompExclusion: Bool {
|
||||
_sanityCheck(Int32(exactly: self.value) != nil, "top bit shouldn't be set")
|
||||
let value = Int32(bitPattern: self.value)
|
||||
let prop = __swift_stdlib_UCHAR_FULL_COMPOSITION_EXCLUSION
|
||||
return __swift_stdlib_u_hasBinaryProperty(value, prop) != 0
|
||||
}
|
||||
}
|
||||
|
||||
extension _Normalization {
|
||||
// When normalized in NFC, some segments may expand in size (e.g. some non-BMP
|
||||
// musical notes). This expansion is capped by the maximum expansion factor of
|
||||
// the normal form. For NFC, that is 3x.
|
||||
internal static let _maxNFCExpansionFactor = 3
|
||||
|
||||
// A small output buffer to use for normalizing a single normalization
|
||||
// segment. Fits all but pathological arbitrary-length segments (i.e. zalgo-
|
||||
// segments)
|
||||
internal typealias _SegmentOutputBuffer = _FixedArray16<UInt16>
|
||||
}
|
||||
Reference in New Issue
Block a user