//===--- RedundantLoadElimination.swift ------------------------------------==//
//
// This source file is part of the Swift.org open source project
//
// Copyright (c) 2014 - 2023 Apple Inc. and the Swift project authors
// Licensed under Apache License v2.0 with Runtime Library Exception
//
// See https://swift.org/LICENSE.txt for license information
// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
//
//===----------------------------------------------------------------------===//

import SIL

/// Replaces redundant load instructions with already available values.
///
/// A load is redundant if the loaded value is already available at that point.
/// This can be via a preceding store to the same address:
///
///     store %1 to %addr
///     ...               // no writes to %addr
///     %2 = load %addr
/// ->
///     store %1 to %addr
///     ...               // no writes to %addr
///     // replace uses of %2 with the available value %1
///
/// or a preceding load from the same address:
///
///     %1 = load %addr
///     ...               // no writes to %addr
///     %2 = load %addr
/// ->
///     %1 = load %addr
///     ...               // no writes to %addr
///     // replace uses of %2 with the available value %1
///
/// In case of a partial redundant load, the load is split so that some of the new
/// individual loads can be eliminated in the next round of the optimization:
///
///     %fa1 = struct_element_addr %addr, #field1
///     store %1 to %fa1
///     ...               // no writes to %fa1
///     %2 = load %addr   // partially redundant
/// ->
///     %fa1 = struct_extract %addr, #field1
///     store %1 to %fa1
///     ...               // no writes to %fa1
///     %fa1 = struct_element_addr %addr, #field1
///     %f1 = load %fa1                              // this load is redundant now
///     %fa2 = struct_element_addr %addr, #field2
///     %f2 = load %fa2
///     %2 = struct (%f1, %f2)
///
/// The algorithm is a data flow analysis which starts at the original load and searches
/// for preceding stores or loads by following the control flow in backward direction.
/// The preceding stores and loads provide the "available values" with which the original
/// load can be replaced.
///
/// If the function is in OSSA, redundant loads are replaced in a way that no additional
/// copies of the loaded value are introduced. If this is not possible, the redundant load
/// is not replaced.
///
let redundantLoadElimination = FunctionPass(name: "redundant-load-elimination") {
    (function: Function, context: FunctionPassContext) in
  eliminateRedundantLoads(in: function, ignoreArrays: false, context)
}

// Early RLE does not touch loads from Arrays. This is important because later array optimizations,
// like ABCOpt, get confused if an array load in a loop is converted to a pattern with a phi argument.
let earlyRedundantLoadElimination = FunctionPass(name: "early-redundant-load-elimination") {
    (function: Function, context: FunctionPassContext) in
  eliminateRedundantLoads(in: function, ignoreArrays: true, context)
}

private func eliminateRedundantLoads(in function: Function, ignoreArrays: Bool, _ context: FunctionPassContext) {

  // Avoid quadratic complexity by limiting the number of visited instructions.
  // This limit is sufficient for most "real-world" functions, by far.
  var complexityBudget = 50_000

  for block in function.blocks.reversed() {

    // We cannot use for-in iteration here because if the load is split, the new
    // individual loads are inserted right before and they would be ignored by a for-in iteration.
    var inst = block.instructions.reversed().first
    while let i = inst {
      defer { inst = i.previous }

      if let load = inst as? LoadInst {
        if !context.continueWithNextSubpassRun(for: load) {
          return
        }
        if ignoreArrays,
           let nominal = load.type.nominal,
           nominal == context.swiftArrayDecl
        {
          continue
        }
        // Check if the type can be expanded without a significant increase to
        // code size.
        // We block redundant load elimination because it might increase
        // register pressure for large values. Furthermore, this pass also
        // splits values into its projections (e.g
        // shrinkMemoryLifetimeAndSplit).
        if !load.type.shouldExpand(context) {
           continue
        }
        tryEliminate(load: load, complexityBudget: &complexityBudget, context)
      }
    }
  }
}

private func tryEliminate(load: LoadInst, complexityBudget: inout Int, _ context: FunctionPassContext) {
  switch load.isRedundant(complexityBudget: &complexityBudget, context) {
  case .notRedundant:
    break
  case .redundant(let availableValues):
    replace(load: load, with: availableValues, context)
  case .maybePartiallyRedundant(let subPath):
    // Check if the a partial load would really be redundant to avoid unnecessary splitting.
    switch load.isRedundant(at: subPath, complexityBudget: &complexityBudget, context) {
      case .notRedundant, .maybePartiallyRedundant:
        break
      case .redundant:
        // The new individual loads are inserted right before the current load and
        // will be optimized in the following loop iterations.
        load.trySplit(context)
    }
  }
}

private extension LoadInst {

  enum DataflowResult {
    case notRedundant
    case redundant([AvailableValue])
    case maybePartiallyRedundant(AccessPath)

    init(notRedundantWith subPath: AccessPath?) {
      if let subPath = subPath {
        self = .maybePartiallyRedundant(subPath)
      } else {
        self = .notRedundant
      }
    }
  }

  func isRedundant(complexityBudget: inout Int, _ context: FunctionPassContext) -> DataflowResult {
    return isRedundant(at: address.constantAccessPath, complexityBudget: &complexityBudget, context)
  }

  func isRedundant(at accessPath: AccessPath, complexityBudget: inout Int, _ context: FunctionPassContext) -> DataflowResult {
    var scanner = InstructionScanner(load: self, accessPath: accessPath, context.aliasAnalysis)

    switch scanner.scan(instructions: ReverseInstructionList(first: self.previous),
                        in: parentBlock,
                        complexityBudget: &complexityBudget)
    {
    case .overwritten:
      return DataflowResult(notRedundantWith: scanner.potentiallyRedundantSubpath)
    case .available:
      return .redundant(scanner.availableValues)
    case .transparent:
      return self.isRedundantInPredecessorBlocks(scanner: &scanner, complexityBudget: &complexityBudget, context)
    }
  }

  private func isRedundantInPredecessorBlocks(
    scanner: inout InstructionScanner,
    complexityBudget: inout Int,
    _ context: FunctionPassContext
  ) -> DataflowResult {

    var liverange = Liverange(endBlock: self.parentBlock, context)
    defer { liverange.deinitialize() }
    liverange.pushPredecessors(of: self.parentBlock)

    while let block = liverange.pop() {
      switch scanner.scan(instructions: block.instructions.reversed(),
                          in: block,
                          complexityBudget: &complexityBudget)
      {
      case .overwritten:
        return DataflowResult(notRedundantWith: scanner.potentiallyRedundantSubpath)
      case .available:
        liverange.add(beginBlock: block)
      case .transparent:
        liverange.pushPredecessors(of: block)
      }
    }
    if !self.canReplaceWithoutInsertingCopies(liverange: liverange, context) {
      return DataflowResult(notRedundantWith: scanner.potentiallyRedundantSubpath)
    }
    return .redundant(scanner.availableValues)
  }

  func canReplaceWithoutInsertingCopies(liverange: Liverange,_ context: FunctionPassContext) -> Bool {
    switch self.loadOwnership {
    case .trivial, .unqualified:
      return true

    case .copy, .take:
      let deadEndBlocks = context.deadEndBlocks

      // The liverange of the value has an "exit", i.e. a path which doesn't lead to the load,
      // it means that we would have to insert a destroy on that exit to satisfy ownership rules.
      // But an inserted destroy also means that we would need to insert copies of the value which
      // were not there originally. For example:
      //
      //     store %1 to [init] %addr
      //     cond_br bb1, bb2
      //   bb1:
      //     %2 = load [take] %addr
      //   bb2:                      // liverange exit
      //
      // TODO: we could extend OSSA to transfer ownership to support liverange exits without copying. E.g.:
      //
      //     %b = store_and_borrow %1 to [init] %addr   // %b is borrowed from %addr
      //     cond_br bb1, bb2
      //   bb1:
      //     %o = borrowed_to_owned %b take_ownership_from %addr
      //     // replace %2 with %o
      //   bb2:
      //     end_borrow %b
      //
      if liverange.hasExits(deadEndBlocks) {
        return false
      }

      // Handle a corner case: if the load is in an infinite loop, the liverange doesn't have an exit,
      // but we still would need to insert a copy. For example:
      //
      //     store %1 to [init] %addr
      //     br bb1
      //   bb1:
      //     %2 = load [copy] %addr   // would need to insert a copy here
      //    br bb1                    // no exit from the liverange
      //
      // For simplicity, we don't handle this in OSSA.
      if deadEndBlocks.isDeadEnd(parentBlock) {
        return false
      }
      return true
    }
  }
}

private func replace(load: LoadInst, with availableValues: [AvailableValue], _ context: FunctionPassContext) {
  var ssaUpdater = SSAUpdater(function: load.parentFunction,
                              type: load.type, ownership: load.ownership, context)

  for availableValue in availableValues {
    let block = availableValue.instruction.parentBlock
    let availableValue = provideValue(for: load, from: availableValue, context)
    ssaUpdater.addAvailableValue(availableValue, in: block)
  }

  let newValue: Value
  if availableValues.count == 1 {
    // A single available value means that this available value is located _before_ the load. E.g.:
    //
    //     store %1 to %addr   // a single available value
    //     ...
    //     %2 = load %addr     // The load
    //
    newValue = ssaUpdater.getValue(atEndOf: load.parentBlock)
  } else {
    // In case of multiple available values, if an available value is defined in the same basic block
    // as the load, this available is located _after_ the load. E.g.:
    //
    //     store %1 to %addr   // an available value
    //     br bb1
    //   bb1:
    //     %2 = load %addr     // The load
    //     store %3 to %addr   // another available value
    //     cond_br bb1, bb2
    //
    newValue = ssaUpdater.getValue(inMiddleOf: load.parentBlock)
  }
  load.replace(with: newValue, context)
}

private func provideValue(
  for load: LoadInst,
  from availableValue: AvailableValue,
  _ context: FunctionPassContext
) -> Value {
  let projectionPath = availableValue.address.constantAccessPath.getMaterializableProjection(to: load.address.constantAccessPath)!

  switch load.loadOwnership {
  case .unqualified:
    return availableValue.value.createProjection(path: projectionPath,
                                                 builder: availableValue.getBuilderForProjections(context))
  case .copy, .trivial:
    // Note: even if the load is trivial, the available value may be projected out of a non-trivial value.
    return availableValue.value.createProjectionAndCopy(path: projectionPath,
                                                        builder: availableValue.getBuilderForProjections(context))
  case .take:
    if projectionPath.isEmpty {
      return shrinkMemoryLifetime(from: load, to: availableValue, context)
    } else {
      return shrinkMemoryLifetimeAndSplit(from: load, to: availableValue, projectionPath: projectionPath, context)
    }
  }
}

/// In case of a `load [take]` shrink lifetime of the value in memory back to the `availableValue`
/// and return the (possibly projected) available value. For example:
///
///     store %1 to [assign] %addr
///     ...
///     %2 = load [take] %addr
/// ->
///     destroy_addr %addr
///     ...
///     // replace %2 with %1
///
private func shrinkMemoryLifetime(from load: LoadInst, to availableValue: AvailableValue, _ context: FunctionPassContext) -> Value {
  switch availableValue {
  case .viaLoad(let availableLoad):
    assert(availableLoad.loadOwnership == .copy)
    let builder = Builder(after: availableLoad, context)
    availableLoad.set(ownership: .take, context)
    return builder.createCopyValue(operand: availableLoad)
  case .viaStore(let availableStore):
    let builder = Builder(after: availableStore, context)
    let valueToAdd = availableStore.source
    switch availableStore.storeOwnership {
    case .assign:
      builder.createDestroyAddr(address: availableStore.destination)
      context.erase(instruction: availableStore)
    case .initialize,
         // It can be the case that e non-payload case is stored as trivial enum and the enum is loaded as [take], e.g.
         //   %1 = enum $Optional<Class>, #Optional.none
         //   store %1 to [trivial] %addr : $*Optional<Class>
         //   %2 = load [take] %addr : $*Optional<Class>
         .trivial:
      context.erase(instruction: availableStore)
    case .unqualified:
      fatalError("unqualified store in ossa function?")
    }
    return valueToAdd
  }
}

/// Like `shrinkMemoryLifetime`, but the available value must be projected.
/// In this case we cannot just shrink the lifetime and reuse the available value.
/// Therefore, we split the available load or store and load the projected available value.
/// The inserted load can be optimized with the split value in the next iteration.
///
///     store %1 to [assign] %addr
///     ...
///     %2 = struct_element_addr %addr, #field1
///     %3 = load [take] %2
/// ->
///     %f1 = struct_extract %1, #field1
///     %fa1 = struct_element_addr %addr, #field1
///     store %f1 to [assign] %fa1
///     %f2 = struct_extract %1, #field2
///     %fa2 = struct_element_addr %addr, #field2
///     store %f2 to [assign] %fa2
///     %1 = load [take] %fa1         // will be combined with `store %f1 to [assign] %fa1` in the next iteration
///     ...
///     // replace %3 with %1
///
private func shrinkMemoryLifetimeAndSplit(from load: LoadInst, to availableValue: AvailableValue, projectionPath: SmallProjectionPath, _ context: FunctionPassContext) -> Value {
  switch availableValue {
  case .viaLoad(let availableLoad):
    assert(availableLoad.loadOwnership == .copy)
    let builder = Builder(after: availableLoad, context)
    let addr = availableLoad.address.createAddressProjection(path: projectionPath, builder: builder)
    let valueToAdd = builder.createLoad(fromAddress: addr, ownership: .take)
    availableLoad.trySplit(context)
    return valueToAdd
  case .viaStore(let availableStore):
    let builder = Builder(after: availableStore, context)
    let addr = availableStore.destination.createAddressProjection(path: projectionPath, builder: builder)
    let valueToAdd = builder.createLoad(fromAddress: addr, ownership: .take)
    availableStore.trySplit(context)
    return valueToAdd
  }
}

/// Either a `load` or `store` which is preceding the original load and provides the loaded value.
private enum AvailableValue {
  case viaLoad(LoadInst)
  case viaStore(StoreInst)

  var value: Value {
    switch self {
    case .viaLoad(let load):   return load
    case .viaStore(let store): return store.source
    }
  }

  var address: Value {
    switch self {
    case .viaLoad(let load):   return load.address
    case .viaStore(let store): return store.destination
    }
  }

  var instruction: Instruction {
    switch self {
    case .viaLoad(let load):   return load
    case .viaStore(let store): return store
    }
  }

  func getBuilderForProjections(_ context: FunctionPassContext) -> Builder {
    switch self {
    case .viaLoad(let load):   return Builder(after: load, context)
    case .viaStore(let store): return Builder(before: store, context)
    }
  }
}

private struct InstructionScanner {
  private let load: LoadInst
  private let accessPath: AccessPath
  private let storageDefBlock: BasicBlock?
  private let aliasAnalysis: AliasAnalysis

  private(set) var potentiallyRedundantSubpath: AccessPath? = nil
  private(set) var availableValues = Array<AvailableValue>()

  init(load: LoadInst, accessPath: AccessPath, _ aliasAnalysis: AliasAnalysis) {
    self.load = load
    self.accessPath = accessPath
    self.storageDefBlock = accessPath.base.reference?.referenceRoot.parentBlock
    self.aliasAnalysis = aliasAnalysis
  }

  enum ScanResult {
    case overwritten
    case available
    case transparent
  }

  mutating func scan(instructions: ReverseInstructionList,
                     in block: BasicBlock,
                     complexityBudget: inout Int) -> ScanResult
  {
    for inst in instructions {
      complexityBudget -= 1
      if complexityBudget <= 0 {
        return .overwritten
      }

      switch visit(instruction: inst) {
        case .available:   return .available
        case .overwritten: return .overwritten
        case .transparent: break
      }
    }

    // Abort if we find the storage definition of the access in case of a loop, e.g.
    //
    //   bb1:
    //     %storage_root = apply
    //     %2 = ref_element_addr %storage_root
    //     %3 = load %2
    //     cond_br %c, bb1, bb2
    //
    // The storage root is different in each loop iteration. Therefore the load in a
    // successive loop iteration does not load from the same address as in the previous iteration.
    if let storageDefBlock = storageDefBlock,
       block == storageDefBlock {
      return .overwritten
    }
    if block.predecessors.isEmpty {
      // We reached the function entry without finding an available value.
      return .overwritten
    }
    return .transparent
  }

  private mutating func visit(instruction: Instruction) -> ScanResult {
    switch instruction {
    case is FixLifetimeInst, is EndAccessInst, is EndBorrowInst:
      // Those scope-ending instructions are only irrelevant if the preceding load is not changed.
      // If it is changed from `load [copy]` -> `load [take]` the memory effects of those scope-ending
      // instructions prevent that the `load [take]` will illegally mutate memory which is protected
      // from mutation by the scope.
      if load.loadOwnership != .take {
        return .transparent
      }
    case let precedingLoad as LoadInst:
      if precedingLoad == load {
        // We need to stop the data flow analysis when we visit the original load again.
        // This happens if the load is in a loop.
        return .available
      }
      let precedingLoadPath = precedingLoad.address.constantAccessPath
      if precedingLoadPath.getMaterializableProjection(to: accessPath) != nil {
        availableValues.append(.viaLoad(precedingLoad))
        return .available
      }
      if accessPath.getMaterializableProjection(to: precedingLoadPath) != nil,
         potentiallyRedundantSubpath == nil {
        potentiallyRedundantSubpath = precedingLoadPath
      }
      if load.loadOwnership != .take {
        return .transparent
      }

    case let precedingStore as StoreInst:
      if precedingStore.source is Undef {
        return .overwritten
      }
      let precedingStorePath = precedingStore.destination.constantAccessPath
      if precedingStorePath.getMaterializableProjection(to: accessPath) != nil {
        availableValues.append(.viaStore(precedingStore))
        return .available
      }
      if accessPath.getMaterializableProjection(to: precedingStorePath) != nil,
         potentiallyRedundantSubpath == nil {
        potentiallyRedundantSubpath = precedingStorePath
      }

    default:
      break
    }
    if load.loadOwnership == .take {
      // In case of `take`, don't allow reading instructions in the liverange.
      // Otherwise we cannot shrink the memory liverange afterwards.
      if instruction.mayReadOrWrite(address: load.address, aliasAnalysis) {
        return .overwritten
      }
    } else {
      if instruction.mayWrite(toAddress: load.address, aliasAnalysis) {
        return .overwritten
      }
    }
    return .transparent
  }
}

/// Represents the liverange (in terms of basic blocks) of the loaded value.
///
/// In contrast to a BlockRange, this liverange has multiple begin blocks (containing the
/// available values) and a single end block (containing the original load). For example:
///
///   bb1:
///     store %1 to %addr   // begin block
///     br bb3
///   bb2:
///     store %2 to %addr   // begin block
///     br bb3
///   bb3:
///     %3 = load %addr     // end block
///
private struct Liverange {
  private var worklist: BasicBlockWorklist
  private var containingBlocks: Stack<BasicBlock> // doesn't include the end-block
  private var beginBlocks: BasicBlockSet
  private let endBlock: BasicBlock

  init(endBlock: BasicBlock, _ context: FunctionPassContext) {
    self.worklist = BasicBlockWorklist(context)
    self.containingBlocks = Stack(context)
    self.beginBlocks = BasicBlockSet(context)
    self.endBlock = endBlock
    pushPredecessors(of: endBlock)
  }

  mutating func deinitialize() {
    worklist.deinitialize()
    containingBlocks.deinitialize()
    beginBlocks.deinitialize()
  }

  mutating func pushPredecessors(of block: BasicBlock) {
    worklist.pushIfNotVisited(contentsOf: block.predecessors)
    containingBlocks.append(contentsOf: block.predecessors)
  }

  mutating func pop() -> BasicBlock? { worklist.pop() }

  mutating func add(beginBlock: BasicBlock) {
    beginBlocks.insert(beginBlock)
  }

  /// Returns true if there is some path from a begin block to a function exit which doesn't
  /// go through the end-block. For example:
  ///
  ///     store %1 to %addr   // begin
  ///     cond_br bb1, bb2
  ///   bb1:
  ///     %2 = load %addr     // end
  ///   bb2:
  ///     ...                 // exit
  ///
  func hasExits(_ deadEndBlocks: DeadEndBlocksAnalysis) -> Bool {
    for block in containingBlocks {
      for succ in block.successors {
        if succ != endBlock,
           (!worklist.hasBeenPushed(succ) || beginBlocks.contains(succ)),
           !deadEndBlocks.isDeadEnd(succ) {
          return true
        }
      }
    }
    return false
  }
}