Improve RangeSet initialization performance (#75089)

When initializing a range set with a group of overlapping, identical,
or empty ranges, the initializer can exhibit poor performance due
to removing the unneeded ranges during processing. This change uses
a partitioning scheme instead, only removing the unnecessary ranges
at the end of initialization.
This commit is contained in:
Nate Cook
2024-07-12 15:01:38 -05:00
committed by GitHub
parent 8bb6b30288
commit 846a861660
2 changed files with 89 additions and 16 deletions

View File

@@ -37,25 +37,57 @@ extension RangeSet {
_storage.sort {
$0.lowerBound < $1.lowerBound
}
var i = 0
while i < _storage.count {
let current = _storage[i]
if i > 0 {
let previous = _storage[i - 1]
if previous.upperBound >= current.lowerBound {
let newUpper = Swift.max(previous.upperBound, current.upperBound)
_storage[i - 1] = previous.lowerBound ..< newUpper
_storage.remove(at: i)
continue
}
}
if current.isEmpty {
_storage.remove(at: i)
// Find the index of the first non-empty range. If all ranges are empty,
// the result is empty.
guard let firstNonEmpty = _storage.firstIndex(where: { $0.isEmpty == false }) else {
_storage = []
return
}
// Swap that non-empty range to be first. (This and the swap in the loop
// might be no-ops, if no empty or overlapping ranges have been found.)
_storage.swapAt(0, firstNonEmpty)
// That single range is now a valid range set, so we set up three sections
// of the storage array:
//
// 1: a processed, valid range set (0...lastValid)
// 2: ranges to discard (lastValid + 1 ..< current)
// 3: unprocessed ranges (current ..< _storage.count)
//
// Section 2 is made up of ranges that are either empty or that overlap
// with the ranges in section 1. By waiting to remove these ranges until
// we've processed the entire array, we avoid needing to constantly
// reshuffle the elements during processing.
var lastValid = 0
var current = firstNonEmpty + 1
while current < _storage.count {
defer { current += 1 }
// Skip over empty ranges.
if _storage[current].isEmpty { continue }
// If the last valid range overlaps with the current range, extend the
// last valid range to cover the current.
if _storage[lastValid].upperBound >= _storage[current].lowerBound {
let newUpper = Swift.max(
_storage[lastValid].upperBound,
_storage[current].upperBound)
_storage[lastValid] = Range(
uncheckedBounds: (_storage[lastValid].lowerBound, newUpper))
} else {
i += 1
// Otherwise, this is a valid new range to add to the range set:
// swap it into place at the end of the valid section.
lastValid += 1
_storage.swapAt(current, lastValid)
}
}
// Now that we've processed the whole array, remove anything left after
// the valid section.
_storage.removeSubrange((lastValid + 1) ..< _storage.count)
}
}
}

View File

@@ -42,6 +42,47 @@ if #available(SwiftStdlib 6.0, *) {
}
return set
}
RangeSetTests.test("initialization") {
// Test coalescing and elimination of empty ranges
do {
let empty = RangeSet(Array(repeating: 0..<0, count: 100))
expectTrue(empty.isEmpty)
let repeated = RangeSet(Array(repeating: 0..<3, count: 100))
expectEqual(repeated, [0..<3])
let singleAfterEmpty = RangeSet(Array(repeating: 0..<0, count: 100) + [0..<3])
expectEqual(singleAfterEmpty, [0..<3])
let contiguousRanges = (0..<100).map { $0 ..< ($0 + 1) }
expectEqual(RangeSet(contiguousRanges), [0..<100])
expectEqual(RangeSet(contiguousRanges.shuffled()), [0..<100])
}
// The `buildRandomRangeSet()` function builds a range set via additions
// and removals. This function creates an array of potentially empty or
// overlapping ranges that can be used to initialize a range set.
func randomRanges() -> [Range<Int>] {
(0..<100).map { _ in
let low = Int.random(in: 0...100)
let count = Int.random(in: 0...20)
return low ..< (low + count)
}
}
for _ in 0..<1000 {
let ranges = randomRanges()
let set = RangeSet(ranges)
// Manually construct a range set for comparison
var comparison = RangeSet<Int>()
for r in ranges {
comparison.insert(contentsOf: r)
}
expectEqual(set, comparison)
}
}
RangeSetTests.test("contains") {
expectFalse(source.contains(0))