Files
swift-mirror/stdlib/public/core/StringBreadcrumbs.swift
Michael Ilseman 0ca42e9ef7 [string] Shrink storage class sizes.
* Don't allocate breadrumbs pointer if under threshold
* Increase breadrumbs threshold
* Linear 16-byte bucketing until 128 bytes, malloc_size after
* Allow cap less than _SmallString.capacity (bridging non-ASCII)

This change decreases the amount of heap usage for moderate-length
strings (< 64 UTF-8 code units in length) and increases the amount of
spare code unit capacity available (less growth needed).

Average improvements for moderate-length strings:

* 64-bit: on average, 8 bytes saved and 4 bytes of extra capacity
* 32-bit: on average, 4 bytes saved and 6 bytes of extra capacity

Additionally, on 32-bit, large-length strings also gain an average of
6 bytes of extra spare capacity.

Details:

On 64-bit, half of moderate-length allocations will save 16 bytes
while the other half get an extra 8 bytes of spare capacity.

On 32-bit, a quarter of moderate-length allocations will save 16
bytes, and the rest get an extra 4 bytes of spare
capacity. Additionally, 32-bit string's storage class now claims its
full allocation, which is its birthright. Prior to this change, we'd
have on average 1.5 bytes of spare capacity, and now we have 7.5 bytes
of spare capacity.

Breadcrumbs threshold is increased from the super-conservative 32 to
the pretty-conservative 64. Some speed improvements are incorporated
in this change, but more are in flight. Even without those eventual
improvements, this is a worthwhile change (ASCII is still fast-pathed
and irrelevant to breadcrumbing).

For a complex real-world workload, this amounts to around a 5%
improvement to transient heap usage due to all strings and a 4%
improvement to peak heap usage due to all strings. For moderate-length
strings specifically, this gives around 11% improvement to both.
2020-03-05 16:10:23 -08:00

111 lines
3.5 KiB
Swift

//===----------------------------------------------------------------------===//
//
// This source file is part of the Swift.org open source project
//
// Copyright (c) 2014 - 2017 Apple Inc. and the Swift project authors
// Licensed under Apache License v2.0 with Runtime Library Exception
//
// See https://swift.org/LICENSE.txt for license information
// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
//
//===----------------------------------------------------------------------===//
// @opaque
internal final class _StringBreadcrumbs {
internal static var breadcrumbStride: Int { 64 }
internal var utf16Length: Int
// TODO: does this need to be a pair?.... Can we be smaller than Int?
internal var crumbs: [String.Index]
// TODO: Does this need to be inout, unique, or how will we be enforcing
// atomicity?
internal init(_ str: String) {
let stride = _StringBreadcrumbs.breadcrumbStride
self.crumbs = []
if str.isEmpty {
self.utf16Length = 0
return
}
self.crumbs.reserveCapacity(
(str._guts.count / 3) / stride)
// TODO(String performance): More efficient implementation of initial scan.
// We'll also want to benchmark this initial scan in order to track changes.
let utf16 = str.utf16
var i = 0
var curIdx = utf16.startIndex
while curIdx != utf16.endIndex {
if i % stride == 0 { //i.isMultiple(of: stride) {
self.crumbs.append(curIdx)
}
i = i &+ 1
curIdx = utf16.index(after: curIdx)
}
// Corner case: index(_:offsetBy:) can produce the endIndex
if i % stride == 0 {
self.crumbs.append(utf16.endIndex)
}
self.utf16Length = i
_internalInvariant(self.crumbs.count == 1 + (self.utf16Length / stride))
_invariantCheck(for: str)
}
}
extension _StringBreadcrumbs {
internal var stride: Int {
@inline(__always) get { return _StringBreadcrumbs.breadcrumbStride }
}
// Fetch the lower-bound index corresponding to the given offset, returning
// the index and the remaining offset to adjust
internal func getBreadcrumb(
forOffset offset: Int
) -> (lowerBound: String.Index, remaining: Int) {
return (crumbs[offset / stride], offset % stride)
}
// Fetch the lower-bound offset corresponding to the given index, returning
// the lower-bound and its offset
internal func getBreadcrumb(
forIndex idx: String.Index
) -> (lowerBound: String.Index, offset: Int) {
var lowerBound = idx._encodedOffset / 3 / stride
var upperBound = Swift.min(1 + (idx._encodedOffset / stride), crumbs.count)
_internalInvariant(crumbs[lowerBound] <= idx)
_internalInvariant(upperBound == crumbs.count || crumbs[upperBound] >= idx)
while (upperBound &- lowerBound) > 1 {
let mid = lowerBound + ((upperBound &- lowerBound) / 2)
if crumbs[mid] <= idx { lowerBound = mid } else { upperBound = mid }
}
let crumb = crumbs[lowerBound]
_internalInvariant(crumb <= idx)
_internalInvariant(lowerBound == crumbs.count-1 || crumbs[lowerBound+1] > idx)
return (crumb, lowerBound &* stride)
}
#if !INTERNAL_CHECKS_ENABLED
@nonobjc @inline(__always) internal func _invariantCheck(for str: String) {}
#else
@nonobjc @inline(never) @_effects(releasenone)
internal func _invariantCheck(for str: String) {
_internalInvariant(self.utf16Length == str.utf16._distance(
from: str.startIndex, to: str.endIndex),
"Stale breadcrumbs")
}
#endif // INTERNAL_CHECKS_ENABLED
}