swift-mirror/benchmark/single-source/DiffingMyers.swift

//===--- DiffingMyers.swift -----------------------------------------------===//
//
// This source file is part of the Swift.org open source project
//
// Copyright (c) 2014 - 2019 Apple Inc. and the Swift project authors
// Licensed under Apache License v2.0 with Runtime Library Exception
//
// See https://swift.org/LICENSE.txt for license information
// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
//
//===----------------------------------------------------------------------===//

import TestsUtils

// The DiffingMyers test benchmarks Swift's performance running the algorithm
// described in Myers (1986). The Diffing benchmark tracks the performance
// of `Collection.difference(from:to:)`.

public let benchmarks =
  BenchmarkInfo(
    name: "Diffing.Myers.Similar",
    runFunction: run_Myers,
    tags: [.algorithm],
    setUpFunction: { blackHole((loremIpsum, unabridgedLorem)) })


let loremIpsum = Array("Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua.")
let unabridgedLorem = Array("Lorem ipsum, quia dolor sit amet consectetur adipisci[ng] velit, sed quia non-numquam [do] eius modi tempora inci[di]dunt, ut labore et dolore magnam aliqua.")

@inline(never)
public func run_Myers(n: Int) {
  if #available(macOS 10.15, iOS 13, tvOS 13, watchOS 6, *) {
    for _ in 1...n {
      blackHole(myers(from: unabridgedLorem, to: loremIpsum, using: ==))
    }
  }
}

// _V is a rudimentary type made to represent the rows of the triangular matrix type used by the Myer's algorithm
//
// This type is basically an array that only supports indexes in the set `stride(from: -d, through: d, by: 2)` where `d` is the depth of this row in the matrix
// `d` is always known at allocation-time, and is used to preallocate the structure.
fileprivate struct _V {

  private var a: [Int]

  // The way negative indexes are implemented is by interleaving them in the empty slots between the valid positive indexes
  @inline(__always) private static func transform(_ index: Int) -> Int {
    // -3, -1, 1, 3 -> 3, 1, 0, 2 -> 0...3
    // -2, 0, 2 -> 2, 0, 1 -> 0...2
    return (index <= 0 ? -index : index &- 1)
  }

  init(maxIndex largest: Int) {
    a = [Int](repeating: 0, count: largest + 1)
  }

  subscript(index: Int) -> Int {
    get {
      return a[_V.transform(index)]
    }
    set(newValue) {
      a[_V.transform(index)] = newValue
    }
  }
}

@available(macOS 10.15, iOS 13, tvOS 13, watchOS 6, *)
fileprivate func myers<C,D>(
  from old: C, to new: D,
  using cmp: (C.Element, D.Element) -> Bool
) -> CollectionDifference<C.Element>
  where
    C : BidirectionalCollection,
    D : BidirectionalCollection,
    C.Element == D.Element
{

  // Core implementation of the algorithm described at http://www.xmailserver.org/diff2.pdf
  // Variable names match those used in the paper as closely as possible
  func _descent(from a: UnsafeBufferPointer<C.Element>, to b: UnsafeBufferPointer<D.Element>) -> [_V] {
    let n = a.count
    let m = b.count
    let max = n + m

    var result = [_V]()
    var v = _V(maxIndex: 1)
    v[1] = 0

    var x = 0
    var y = 0
    iterator: for d in 0...max {
      let prev_v = v
      result.append(v)
      v = _V(maxIndex: d)

      // The code in this loop is _very_ hot—the loop bounds increases in terms
      // of the iterator of the outer loop!
      for k in stride(from: -d, through: d, by: 2) {
        if k == -d {
          x = prev_v[k &+ 1]
        } else {
          let km = prev_v[k &- 1]

          if k != d {
            let kp = prev_v[k &+ 1]
            if km < kp {
              x = kp
            } else {
              x = km &+ 1
            }
          } else {
            x = km &+ 1
          }
        }
        y = x &- k

        while x < n && y < m {
          if !cmp(a[x], b[y]) {
            break;
          }
          x &+= 1
          y &+= 1
        }

        v[k] = x

        if x >= n && y >= m {
          break iterator
        }
      }
      if x >= n && y >= m {
        break
      }
    }

    return result
  }

  // Backtrack through the trace generated by the Myers descent to produce the changes that make up the diff
  func _formChanges(
    from a: UnsafeBufferPointer<C.Element>,
    to b: UnsafeBufferPointer<C.Element>,
    using trace: [_V]
  ) -> [CollectionDifference<C.Element>.Change] {
    var changes = [CollectionDifference<C.Element>.Change]()

    var x = a.count
    var y = b.count
    for d in stride(from: trace.count &- 1, to: 0, by: -1) {
      let v = trace[d]
      let k = x &- y
      let prev_k = (k == -d || (k != d && v[k &- 1] < v[k &+ 1])) ? k &+ 1 : k &- 1
      let prev_x = v[prev_k]
      let prev_y = prev_x &- prev_k

      while x > prev_x && y > prev_y {
        // No change at this position.
        x &-= 1
        y &-= 1
      }

      assert((x == prev_x && y > prev_y) || (y == prev_y && x > prev_x))
      if y != prev_y {
        changes.append(.insert(offset: prev_y, element: b[prev_y], associatedWith: nil))
      } else {
        changes.append(.remove(offset: prev_x, element: a[prev_x], associatedWith: nil))
      }

      x = prev_x
      y = prev_y
    }

    return changes
  }

  /* Splatting the collections into contiguous storage has two advantages:
   *
   *   1) Subscript access is much faster
   *   2) Subscript index becomes Int, matching the iterator types in the algorithm
   *
   * Combined, these effects dramatically improves performance when
   * collections differ significantly, without unduly degrading runtime when
   * the parameters are very similar.
   *
   * In terms of memory use, the linear cost of creating a ContiguousArray (when
   * necessary) is significantly less than the worst-case n² memory use of the
   * descent algorithm.
   */
  func _withContiguousStorage<Col : Collection, R>(
    for values: Col,
    _ body: (UnsafeBufferPointer<Col.Element>) throws -> R
  ) rethrows -> R {
    if let result = try values.withContiguousStorageIfAvailable(body) { return result }
    let array = ContiguousArray(values)
    return try array.withUnsafeBufferPointer(body)
  }

  return _withContiguousStorage(for: old) { a in
    return _withContiguousStorage(for: new) { b in
      return CollectionDifference(_formChanges(from: a, to: b, using:_descent(from: a, to: b)))!
    }
  }
}