mirror of
https://github.com/apple/swift.git
synced 2025-12-14 20:36:38 +01:00
- String hashing is not inlinable, so it can use _Hasher._core operations directly. Remove custom buffering. - Speed up ASCII hashing by as much as 5.5x by feeding the storage buffer directly into hasher in a single go. - For other strings, just feed the UTF-8 encoding of the normalized string to the hasher; don't switch to UTF-16 at the first non-ASCII scalar. (Doing that would make the hash encoding of some string sequences ambiguous, leading to artificial collisions.) - Add a single unconditional terminator byte, 0xFF. It's not a valid UTF-8 code unit, so it won't ever occur within a normalized string encoding.
151 lines
4.1 KiB
Swift
151 lines
4.1 KiB
Swift
//===----------------------------------------------------------------------===//
|
|
//
|
|
// This source file is part of the Swift.org open source project
|
|
//
|
|
// Copyright (c) 2014 - 2017 Apple Inc. and the Swift project authors
|
|
// Licensed under Apache License v2.0 with Runtime Library Exception
|
|
//
|
|
// See https://swift.org/LICENSE.txt for license information
|
|
// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
import SwiftShims
|
|
|
|
extension _UnmanagedString where CodeUnit == UInt8 {
|
|
internal func hashASCII(into core: inout _Hasher.Core) {
|
|
core.combine(bytes: rawBuffer)
|
|
}
|
|
}
|
|
|
|
extension BidirectionalCollection where Element == UInt16, SubSequence == Self {
|
|
internal func hashUTF16(into core: inout _Hasher.Core) {
|
|
for i in self.indices {
|
|
let cu = self[i]
|
|
let cuIsASCII = cu <= 0x7F
|
|
let isSingleSegmentScalar = self.hasNormalizationBoundary(after: i)
|
|
|
|
if cuIsASCII && isSingleSegmentScalar {
|
|
core.combine(UInt8(truncatingIfNeeded: cu))
|
|
} else {
|
|
for encodedScalar in Unicode._ParsingIterator(
|
|
codeUnits: _NormalizedCodeUnitIterator(self[i..<endIndex]),
|
|
parser: Unicode.UTF16.ForwardParser()
|
|
) {
|
|
let transcoded = Unicode.UTF8.transcode(
|
|
encodedScalar, from: Unicode.UTF16.self
|
|
).unsafelyUnwrapped // never fails
|
|
let (bytes, count) = transcoded._bytes
|
|
core.combine(bytes: bytes, count: count)
|
|
}
|
|
return
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
extension _UnmanagedString where CodeUnit == UInt8 {
|
|
internal func hash(into hasher: inout _Hasher) {
|
|
self.hashASCII(into: &hasher._core)
|
|
hasher._core.combine(0xFF as UInt8) // terminator
|
|
}
|
|
}
|
|
|
|
extension _UnmanagedString where CodeUnit == UInt16 {
|
|
internal func hash(into hasher: inout _Hasher) {
|
|
self.hashUTF16(into: &hasher._core)
|
|
hasher._core.combine(0xFF as UInt8) // terminator
|
|
}
|
|
}
|
|
|
|
extension _UnmanagedOpaqueString {
|
|
internal func hash(into hasher: inout _Hasher) {
|
|
self.hashUTF16(into: &hasher._core)
|
|
hasher._core.combine(0xFF as UInt8) // terminator
|
|
}
|
|
}
|
|
|
|
extension _SmallUTF8String {
|
|
internal func hash(into hasher: inout _Hasher) {
|
|
#if arch(i386) || arch(arm)
|
|
unsupportedOn32bit()
|
|
#else
|
|
if isASCII {
|
|
self.withUnmanagedASCII { $0.hash(into: &hasher) }
|
|
return
|
|
}
|
|
self.withUnmanagedUTF16 { $0.hash(into: &hasher) }
|
|
#endif // 64-bit
|
|
}
|
|
}
|
|
|
|
extension _StringGuts {
|
|
@effects(releasenone) // FIXME: Is this valid in the opaque case?
|
|
@usableFromInline
|
|
internal func _hash(into hasher: inout _Hasher) {
|
|
if _isSmall {
|
|
_smallUTF8String.hash(into: &hasher)
|
|
return
|
|
}
|
|
|
|
defer { _fixLifetime(self) }
|
|
if _slowPath(_isOpaque) {
|
|
_asOpaque().hash(into: &hasher)
|
|
return
|
|
}
|
|
if isASCII {
|
|
_unmanagedASCIIView.hash(into: &hasher)
|
|
return
|
|
}
|
|
_unmanagedUTF16View.hash(into: &hasher)
|
|
}
|
|
|
|
@effects(releasenone) // FIXME: Is this valid in the opaque case?
|
|
@usableFromInline
|
|
internal func _hash(_ range: Range<Int>, into hasher: inout _Hasher) {
|
|
if _isSmall {
|
|
_smallUTF8String[range].hash(into: &hasher)
|
|
return
|
|
}
|
|
|
|
defer { _fixLifetime(self) }
|
|
if _slowPath(_isOpaque) {
|
|
_asOpaque()[range].hash(into: &hasher)
|
|
return
|
|
}
|
|
if isASCII {
|
|
_unmanagedASCIIView[range].hash(into: &hasher)
|
|
return
|
|
}
|
|
_unmanagedUTF16View[range].hash(into: &hasher)
|
|
}
|
|
}
|
|
|
|
extension String : Hashable {
|
|
/// The string's hash value.
|
|
///
|
|
/// Hash values are not guaranteed to be equal across different executions of
|
|
/// your program. Do not save hash values to use during a future execution.
|
|
@inlinable
|
|
public var hashValue: Int {
|
|
return _hashValue(for: self)
|
|
}
|
|
|
|
@inlinable
|
|
public func _hash(into hasher: inout _Hasher) {
|
|
_guts._hash(into: &hasher)
|
|
}
|
|
}
|
|
|
|
extension StringProtocol {
|
|
@inlinable
|
|
public var hashValue : Int {
|
|
return _hashValue(for: self)
|
|
}
|
|
|
|
@inlinable
|
|
public func _hash(into hasher: inout _Hasher) {
|
|
_wholeString._guts._hash(_encodedOffsetRange, into: &hasher)
|
|
}
|
|
}
|