Files
swift-mirror/stdlib/core/Hashing.swift
Dmitri Hrybenko 3a04e0809f stdlib: add a function to squeeze a number in a given range from a hash value
This function mixes the bits in the hash value, which improves Dictionary
performance for keys with bad hashes.

PrecommitBenchmark changes with greater than 7% difference:

``````````Dictionary2`,```1456.00`,```1508.00`,```1502.00`,````624.00`,````607.00`,````592.00`,`864.00`,``145.9%
``````````Dictionary3`,```1379.00`,```1439.00`,```1408.00`,````585.00`,````567.00`,````552.00`,`827.00`,``149.8%
````````````Histogram`,````850.00`,````849.00`,````851.00`,```1053.00`,```1049.00`,```1048.00`,`199.00`,``-19.0%
````````````````Prims`,```1999.00`,```2005.00`,```2018.00`,```1734.00`,```1689.00`,```1701.00`,`310.00`,```18.4%
``````````StrSplitter`,```2365.00`,```2334.00`,```2316.00`,```1979.00`,```1997.00`,```2000.00`,`337.00`,```17.0%
```````````````TwoSum`,```1551.00`,```1568.00`,```1556.00`,```1771.00`,```1741.00`,```1716.00`,`165.00`,```-9.6%

Regressions are in benchmarks that use `Int` as dictionary key: we are just
doing more work than previously (hashing an `Int` was an identity function).

rdar://17962402


Swift SVN r21142
2014-08-12 12:02:26 +00:00

153 lines
5.3 KiB
Swift

//===----------------------------------------------------------------------===//
//
// This source file is part of the Swift.org open source project
//
// Copyright (c) 2014 - 2015 Apple Inc. and the Swift project authors
// Licensed under Apache License v2.0 with Runtime Library Exception
//
// See http://swift.org/LICENSE.txt for license information
// See http://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
//
//===----------------------------------------------------------------------===//
//
// This file implements helpers for constructing non-cryptographic hash
// functions.
//
// This code was ported from LLVM's ADT/Hashing.h.
//
// Currently the algorithm is based on CityHash, but this is an implementation
// detail. Even more, there are facilities to mix in a per-execution seed to
// ensure that hash values differ between executions.
//
struct _HashingDetail {
static var fixedSeedOverride: UInt64 = 0
@transparent
static func getExecutionSeed() -> UInt64 {
// FIXME: This needs to be a per-execution seed. This is just a placeholder
// implementation.
let seed: UInt64 = 0xff51afd7ed558ccd
return _HashingDetail.fixedSeedOverride == 0 ? seed : fixedSeedOverride
}
@transparent
static func hash16Bytes(low: UInt64, _ high: UInt64) -> UInt64 {
// Murmur-inspired hashing.
let mul: UInt64 = 0x9ddfea08eb382d69
var a: UInt64 = (low ^ high) &* mul
a ^= (a >> 47)
var b: UInt64 = (high ^ a) &* mul
b ^= (b >> 47)
b = b &* mul
return b
}
}
//
// API functions.
//
//
// _mix*() functions all have type (T) -> T. These functions don't compress
// their inputs and just exhibit avalance effect.
//
@transparent
func _mixUInt32(value: UInt32) -> UInt32 {
// Zero-extend to 64 bits, hash, select 32 bits from the hash.
//
// NOTE: this differs from LLVM's implementation, which selects the lower
// 32 bits. According to the statistical tests, the 3 lowest bits have
// weaker avalanche properties.
let extendedValue = UInt64(value)
let extendedResult = _mixUInt64(extendedValue)
return UInt32((extendedResult >> 3) & 0xffff_ffff)
}
@transparent
func _mixInt32(value: Int32) -> Int32 {
return Int32(bitPattern: _mixUInt32(UInt32(bitPattern: value)))
}
@transparent
func _mixUInt64(value: UInt64) -> UInt64 {
// Similar to hash_4to8_bytes but using a seed instead of length.
let seed: UInt64 = _HashingDetail.getExecutionSeed()
let low: UInt64 = value & 0xffff_ffff
let high: UInt64 = value >> 32
return _HashingDetail.hash16Bytes(seed + (low << 3), high);
}
@transparent
func _mixInt64(value: Int64) -> Int64 {
return Int64(bitPattern: _mixUInt64(UInt64(bitPattern: value)))
}
@transparent
func _mixUInt(value: UInt) -> UInt {
#if arch(i386) || arch(arm)
return UInt(_mixUInt32(UInt32(value)))
#elseif arch(x86_64) || arch(arm64)
return UInt(_mixUInt64(UInt64(value)))
#endif
}
@transparent
func _mixInt(value: Int) -> Int {
#if arch(i386) || arch(arm)
return Int(_mixInt32(Int32(value)))
#elseif arch(x86_64) || arch(arm64)
return Int(_mixInt64(Int64(value)))
#endif
}
/// Given a hash value, returns an integer value within the given range that
/// corresponds to a hash value.
///
/// This function is superior to computing the remainder of `hashValue` by
/// the range length. Some types have bad hash functions; sometimes simple
/// patterns in data sets create patterns in hash values and applying the
/// remainder operation just throws away even more information and invites
/// even more hash collisions. This effect is especially bad if the length
/// of the required range is a power of two -- applying the remainder
/// operation just throws away high bits of the hash (which would not be
/// a problem if the hash was known to be good). This function mixes the
/// bits in the hash value to compensate for such cases.
///
/// Of course, this function is a compressing function, and applying it to a
/// hash value does not change anything fundamentally: collisions are still
/// possible, and it does not prevent malicious users from constructing data
/// sets that will exhibit pathological collisions.
func _squeezeHashValue(hashValue: Int, resultRange: Range<Int>) -> Int {
// Length of a Range<Int> does not fit into an Int, but fits into an UInt.
// An efficient way to compute the length is to rely on two's complement
// arithmetic.
let resultCardinality =
UInt(bitPattern: resultRange.endIndex &- resultRange.startIndex)
// Calculate the result as `UInt` to handle the case when
// `resultCardinality >= Int.max`.
let unsignedResult =
_squeezeHashValue(hashValue, UInt(0)..<resultCardinality)
// We perform the unchecked arithmetic on `UInt` (instead of doing
// straightforward computations on `Int`) in order to handle the following
// tricky case: `startIndex` is negative, and `resultCardinality >= Int.max`.
// We can not convert the latter to `Int`.
return
Int(bitPattern:
UInt(bitPattern: resultRange.startIndex) &+ unsignedResult)
}
func _squeezeHashValue(hashValue: Int, resultRange: Range<UInt>) -> UInt {
let mixedHashValue = UInt(bitPattern: _mixInt(hashValue))
let resultCardinality: UInt = resultRange.endIndex - resultRange.startIndex
if _isPowerOf2(resultCardinality) {
return mixedHashValue & (resultCardinality - 1)
}
return resultRange.startIndex + (mixedHashValue % resultCardinality)
}