//===--- DiffingMyers.swift -----------------------------------------------===// // // This source file is part of the Swift.org open source project // // Copyright (c) 2014 - 2019 Apple Inc. and the Swift project authors // Licensed under Apache License v2.0 with Runtime Library Exception // // See https://swift.org/LICENSE.txt for license information // See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors // //===----------------------------------------------------------------------===// import TestsUtils // The DiffingMyers test benchmarks Swift's performance running the algorithm // described in Myers (1986). The Diffing benchmark tracks the performance // of `Collection.difference(from:to:)`. public let benchmarks = BenchmarkInfo( name: "Diffing.Myers.Similar", runFunction: run_Myers, tags: [.algorithm], setUpFunction: { blackHole((loremIpsum, unabridgedLorem)) }) let loremIpsum = Array("Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua.") let unabridgedLorem = Array("Lorem ipsum, quia dolor sit amet consectetur adipisci[ng] velit, sed quia non-numquam [do] eius modi tempora inci[di]dunt, ut labore et dolore magnam aliqua.") @inline(never) public func run_Myers(n: Int) { if #available(macOS 10.15, iOS 13, tvOS 13, watchOS 6, *) { for _ in 1...n { blackHole(myers(from: unabridgedLorem, to: loremIpsum, using: ==)) } } } // _V is a rudimentary type made to represent the rows of the triangular matrix type used by the Myer's algorithm // // This type is basically an array that only supports indexes in the set `stride(from: -d, through: d, by: 2)` where `d` is the depth of this row in the matrix // `d` is always known at allocation-time, and is used to preallocate the structure. fileprivate struct _V { private var a: [Int] // The way negative indexes are implemented is by interleaving them in the empty slots between the valid positive indexes @inline(__always) private static func transform(_ index: Int) -> Int { // -3, -1, 1, 3 -> 3, 1, 0, 2 -> 0...3 // -2, 0, 2 -> 2, 0, 1 -> 0...2 return (index <= 0 ? -index : index &- 1) } init(maxIndex largest: Int) { a = [Int](repeating: 0, count: largest + 1) } subscript(index: Int) -> Int { get { return a[_V.transform(index)] } set(newValue) { a[_V.transform(index)] = newValue } } } @available(macOS 10.15, iOS 13, tvOS 13, watchOS 6, *) fileprivate func myers( from old: C, to new: D, using cmp: (C.Element, D.Element) -> Bool ) -> CollectionDifference where C : BidirectionalCollection, D : BidirectionalCollection, C.Element == D.Element { // Core implementation of the algorithm described at http://www.xmailserver.org/diff2.pdf // Variable names match those used in the paper as closely as possible func _descent(from a: UnsafeBufferPointer, to b: UnsafeBufferPointer) -> [_V] { let n = a.count let m = b.count let max = n + m var result = [_V]() var v = _V(maxIndex: 1) v[1] = 0 var x = 0 var y = 0 iterator: for d in 0...max { let prev_v = v result.append(v) v = _V(maxIndex: d) // The code in this loop is _very_ hot—the loop bounds increases in terms // of the iterator of the outer loop! for k in stride(from: -d, through: d, by: 2) { if k == -d { x = prev_v[k &+ 1] } else { let km = prev_v[k &- 1] if k != d { let kp = prev_v[k &+ 1] if km < kp { x = kp } else { x = km &+ 1 } } else { x = km &+ 1 } } y = x &- k while x < n && y < m { if !cmp(a[x], b[y]) { break; } x &+= 1 y &+= 1 } v[k] = x if x >= n && y >= m { break iterator } } if x >= n && y >= m { break } } return result } // Backtrack through the trace generated by the Myers descent to produce the changes that make up the diff func _formChanges( from a: UnsafeBufferPointer, to b: UnsafeBufferPointer, using trace: [_V] ) -> [CollectionDifference.Change] { var changes = [CollectionDifference.Change]() var x = a.count var y = b.count for d in stride(from: trace.count &- 1, to: 0, by: -1) { let v = trace[d] let k = x &- y let prev_k = (k == -d || (k != d && v[k &- 1] < v[k &+ 1])) ? k &+ 1 : k &- 1 let prev_x = v[prev_k] let prev_y = prev_x &- prev_k while x > prev_x && y > prev_y { // No change at this position. x &-= 1 y &-= 1 } assert((x == prev_x && y > prev_y) || (y == prev_y && x > prev_x)) if y != prev_y { changes.append(.insert(offset: prev_y, element: b[prev_y], associatedWith: nil)) } else { changes.append(.remove(offset: prev_x, element: a[prev_x], associatedWith: nil)) } x = prev_x y = prev_y } return changes } /* Splatting the collections into contiguous storage has two advantages: * * 1) Subscript access is much faster * 2) Subscript index becomes Int, matching the iterator types in the algorithm * * Combined, these effects dramatically improves performance when * collections differ significantly, without unduly degrading runtime when * the parameters are very similar. * * In terms of memory use, the linear cost of creating a ContiguousArray (when * necessary) is significantly less than the worst-case n² memory use of the * descent algorithm. */ func _withContiguousStorage( for values: C, _ body: (UnsafeBufferPointer) throws -> R ) rethrows -> R { if let result = try values.withContiguousStorageIfAvailable(body) { return result } let array = ContiguousArray(values) return try array.withUnsafeBufferPointer(body) } return _withContiguousStorage(for: old) { a in return _withContiguousStorage(for: new) { b in return CollectionDifference(_formChanges(from: a, to: b, using:_descent(from: a, to: b)))! } } }