swift-mirror/lib/SILOptimizer/Mandatory/PredictableMemOpt.cpp

//===--- PredictableMemOpt.cpp - Perform predictable memory optzns --------===//
//
// This source file is part of the Swift.org open source project
//
// Copyright (c) 2014 - 2017 Apple Inc. and the Swift project authors
// Licensed under Apache License v2.0 with Runtime Library Exception
//
// See https://swift.org/LICENSE.txt for license information
// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
//
//===----------------------------------------------------------------------===//

#define DEBUG_TYPE "predictable-memopt"

#include "swift/SILOptimizer/PassManager/Passes.h"
#include "DIMemoryUseCollector.h"
#include "swift/SIL/SILBuilder.h"
#include "swift/SILOptimizer/Utils/Local.h"
#include "swift/SILOptimizer/PassManager/Transforms.h"
#include "llvm/ADT/SmallBitVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"

using namespace swift;

STATISTIC(NumLoadPromoted, "Number of loads promoted");
STATISTIC(NumDestroyAddrPromoted, "Number of destroy_addrs promoted");
STATISTIC(NumAllocRemoved, "Number of allocations completely removed");

//===----------------------------------------------------------------------===//
// Subelement Analysis Implementation
//===----------------------------------------------------------------------===//

// We can only analyze components of structs whose storage is fully accessible
// from Swift.
static StructDecl *
getFullyReferenceableStruct(SILType Ty) {
  auto SD = Ty.getStructOrBoundGenericStruct();
  if (!SD || SD->hasUnreferenceableStorage())
    return nullptr;
  return SD;
}

static unsigned getNumSubElements(SILType T, SILModule &M) {

  if (auto TT = T.getAs<TupleType>()) {
    unsigned NumElements = 0;
    for (auto index : indices(TT.getElementTypes()))
      NumElements += getNumSubElements(T.getTupleElementType(index), M);
    return NumElements;
  }

  if (auto *SD = getFullyReferenceableStruct(T)) {
    unsigned NumElements = 0;
    for (auto *D : SD->getStoredProperties())
      NumElements += getNumSubElements(T.getFieldType(D, M), M);
    return NumElements;
  }

  // If this isn't a tuple or struct, it is a single element.
  return 1;
}

/// getAccessPathRoot - Given an address, dive through any tuple/struct element
/// addresses to get the underlying value.
static SILValue getAccessPathRoot(SILValue Pointer) {
  while (1) {
    if (auto *TEAI = dyn_cast<TupleElementAddrInst>(Pointer))
      Pointer = TEAI->getOperand();
    else if (auto SEAI = dyn_cast<StructElementAddrInst>(Pointer))
      Pointer = SEAI->getOperand();
    else if (auto BAI = dyn_cast<BeginAccessInst>(Pointer))
      Pointer = BAI->getSource();
    else
      return Pointer;
  }
}

/// Compute the subelement number indicated by the specified pointer (which is
/// derived from the root by a series of tuple/struct element addresses) by
/// treating the type as a linearized namespace with sequential elements.  For
/// example, given:
///
///   root = alloc { a: { c: i64, d: i64 }, b: (i64, i64) }
///   tmp1 = struct_element_addr root, 1
///   tmp2 = tuple_element_addr tmp1, 0
///
/// This will return a subelement number of 2.
///
/// If this pointer is to within an existential projection, it returns ~0U.
///
static unsigned computeSubelement(SILValue Pointer,
                                  SingleValueInstruction *RootInst) {
  unsigned SubEltNumber = 0;
  SILModule &M = RootInst->getModule();

  while (1) {
    // If we got to the root, we're done.
    if (RootInst == Pointer)
      return SubEltNumber;

    if (auto *PBI = dyn_cast<ProjectBoxInst>(Pointer)) {
      Pointer = PBI->getOperand();
      continue;
    }

    if (auto *BAI = dyn_cast<BeginAccessInst>(Pointer)) {
      Pointer = BAI->getSource();
      continue;
    }

    if (auto *TEAI = dyn_cast<TupleElementAddrInst>(Pointer)) {
      SILType TT = TEAI->getOperand()->getType();

      // Keep track of what subelement is being referenced.
      for (unsigned i = 0, e = TEAI->getFieldNo(); i != e; ++i) {
        SubEltNumber += getNumSubElements(TT.getTupleElementType(i), M);
      }
      Pointer = TEAI->getOperand();
      continue;
    }

    if (auto *SEAI = dyn_cast<StructElementAddrInst>(Pointer)) {
      SILType ST = SEAI->getOperand()->getType();

      // Keep track of what subelement is being referenced.
      StructDecl *SD = SEAI->getStructDecl();
      for (auto *D : SD->getStoredProperties()) {
        if (D == SEAI->getField()) break;
        SubEltNumber += getNumSubElements(ST.getFieldType(D, M), M);
      }

      Pointer = SEAI->getOperand();
      continue;
    }


    assert(isa<InitExistentialAddrInst>(Pointer) &&
           "Unknown access path instruction");
    // Cannot promote loads and stores from within an existential projection.
    return ~0U;
  }
}


/// Given an aggregate value and an access path, extract the value indicated by
/// the path.
static SILValue extractSubElement(SILValue Val, unsigned SubElementNumber,
                                  SILBuilder &B, SILLocation Loc) {
  SILType ValTy = Val->getType();

  // Extract tuple elements.
  if (auto TT = ValTy.getAs<TupleType>()) {
    for (unsigned EltNo : indices(TT.getElementTypes())) {
      // Keep track of what subelement is being referenced.
      SILType EltTy = ValTy.getTupleElementType(EltNo);
      unsigned NumSubElt = getNumSubElements(EltTy, B.getModule());
      if (SubElementNumber < NumSubElt) {
        Val = B.emitTupleExtract(Loc, Val, EltNo, EltTy);
        return extractSubElement(Val, SubElementNumber, B, Loc);
      }

      SubElementNumber -= NumSubElt;
    }

    llvm_unreachable("Didn't find field");
  }

  // Extract struct elements.
  if (auto *SD = getFullyReferenceableStruct(ValTy)) {
    for (auto *D : SD->getStoredProperties()) {
      auto fieldType = ValTy.getFieldType(D, B.getModule());
      unsigned NumSubElt = getNumSubElements(fieldType, B.getModule());

      if (SubElementNumber < NumSubElt) {
        Val = B.emitStructExtract(Loc, Val, D);
        return extractSubElement(Val, SubElementNumber, B, Loc);
      }

      SubElementNumber -= NumSubElt;

    }
    llvm_unreachable("Didn't find field");
  }

  // Otherwise, we're down to a scalar.
  assert(SubElementNumber == 0 && "Miscalculation indexing subelements");
  return Val;
}

//===----------------------------------------------------------------------===//
//                          Allocation Optimization
//===----------------------------------------------------------------------===//

namespace {
  /// AllocOptimize - This performs load promotion and deletes synthesized
  /// allocations if all loads can be removed.
  class AllocOptimize {
    SILModule &Module;

    /// TheMemory - This is either an alloc_box or alloc_stack instruction.
    AllocationInst *TheMemory;

    /// This is the SILType of the memory object.
    SILType MemoryType;

    /// The number of primitive subelements across all elements of this memory
    /// value.
    unsigned NumMemorySubElements;

    SmallVectorImpl<DIMemoryUse> &Uses;
    SmallVectorImpl<SILInstruction*> &Releases;

    llvm::SmallPtrSet<SILBasicBlock*, 32> HasLocalDefinition;

    /// This is a map of uses that are not loads (i.e., they are Stores,
    /// InOutUses, and Escapes), to their entry in Uses.
    llvm::SmallDenseMap<SILInstruction*, unsigned, 16> NonLoadUses;

    /// Does this value escape anywhere in the function.
    bool HasAnyEscape = false;

  public:
    AllocOptimize(AllocationInst *TheMemory,
                  SmallVectorImpl<DIMemoryUse> &Uses,
                  SmallVectorImpl<SILInstruction*> &Releases);

    bool doIt();

  private:

    bool promoteLoad(SILInstruction *Inst);
    bool promoteDestroyAddr(DestroyAddrInst *DAI);

    // Load promotion.
    bool hasEscapedAt(SILInstruction *I);
    void updateAvailableValues(SILInstruction *Inst,
                               llvm::SmallBitVector &RequiredElts,
                         SmallVectorImpl<std::pair<SILValue, unsigned>> &Result,
                               llvm::SmallBitVector &ConflictingValues);
    void computeAvailableValues(SILInstruction *StartingFrom,
                                llvm::SmallBitVector &RequiredElts,
                        SmallVectorImpl<std::pair<SILValue, unsigned>> &Result);
    void computeAvailableValuesFrom(SILBasicBlock::iterator StartingFrom,
                                    SILBasicBlock *BB,
                                    llvm::SmallBitVector &RequiredElts,
                        SmallVectorImpl<std::pair<SILValue, unsigned>> &Result,
  llvm::SmallDenseMap<SILBasicBlock*, llvm::SmallBitVector, 32> &VisitedBlocks,
                                    llvm::SmallBitVector &ConflictingValues);

    void explodeCopyAddr(CopyAddrInst *CAI);

    bool tryToRemoveDeadAllocation();
  };
} // end anonymous namespace


AllocOptimize::AllocOptimize(AllocationInst *TheMemory,
                             SmallVectorImpl<DIMemoryUse> &Uses,
                             SmallVectorImpl<SILInstruction*> &Releases)
: Module(TheMemory->getModule()), TheMemory(TheMemory), Uses(Uses),
  Releases(Releases) {

  // Compute the type of the memory object.
  if (auto *ABI = dyn_cast<AllocBoxInst>(TheMemory)) {
    assert(ABI->getBoxType()->getLayout()->getFields().size() == 1
           && "optimizing multi-field boxes not implemented");
    MemoryType = ABI->getBoxType()->getFieldType(ABI->getModule(), 0);
  } else {
    assert(isa<AllocStackInst>(TheMemory));
    MemoryType = cast<AllocStackInst>(TheMemory)->getElementType();
  }

  NumMemorySubElements = getNumSubElements(MemoryType, Module);

  // The first step of processing an element is to collect information about the
  // element into data structures we use later.
  for (unsigned ui = 0, e = Uses.size(); ui != e; ++ui) {
    auto &Use = Uses[ui];
    assert(Use.Inst && "No instruction identified?");

    // Keep track of all the uses that aren't loads.
    if (Use.Kind == DIUseKind::Load)
      continue;

    NonLoadUses[Use.Inst] = ui;

    HasLocalDefinition.insert(Use.Inst->getParent());

    if (Use.Kind == DIUseKind::Escape) {
      // Determine which blocks the value can escape from.  We aren't allowed to
      // promote loads in blocks reachable from an escape point.
      HasAnyEscape = true;
    }
  }

  // If isn't really a use, but we account for the alloc_box/mark_uninitialized
  // as a use so we see it in our dataflow walks.
  NonLoadUses[TheMemory] = ~0U;
  HasLocalDefinition.insert(TheMemory->getParent());
}


/// hasEscapedAt - Return true if the box has escaped at the specified
/// instruction.  We are not allowed to do load promotion in an escape region.
bool AllocOptimize::hasEscapedAt(SILInstruction *I) {
  // FIXME: This is not an aggressive implementation.  :)

  // TODO: At some point, we should special case closures that just *read* from
  // the escaped value (by looking at the body of the closure).  They should not
  // prevent load promotion, and will allow promoting values like X in regions
  // dominated by "... && X != 0".
  return HasAnyEscape;
}


/// The specified instruction is a non-load access of the element being
/// promoted.  See if it provides a value or refines the demanded element mask
/// used for load promotion.
void AllocOptimize::
updateAvailableValues(SILInstruction *Inst, llvm::SmallBitVector &RequiredElts,
                      SmallVectorImpl<std::pair<SILValue, unsigned>> &Result,
                      llvm::SmallBitVector &ConflictingValues) {
  // Handle store and assign.
  if (isa<StoreInst>(Inst) || isa<AssignInst>(Inst)) {
    unsigned StartSubElt = computeSubelement(Inst->getOperand(1), TheMemory);
    assert(StartSubElt != ~0U && "Store within enum projection not handled");
    SILType ValTy = Inst->getOperand(0)->getType();

    for (unsigned i = 0, e = getNumSubElements(ValTy, Module); i != e; ++i) {
      // If this element is not required, don't fill it in.
      if (!RequiredElts[StartSubElt+i]) continue;

      // If there is no result computed for this subelement, record it.  If
      // there already is a result, check it for conflict.  If there is no
      // conflict, then we're ok.
      auto &Entry = Result[StartSubElt+i];
      if (Entry.first == SILValue())
        Entry = { Inst->getOperand(0), i };
      else if (Entry.first != Inst->getOperand(0) || Entry.second != i)
        ConflictingValues[StartSubElt+i] = true;

      // This element is now provided.
      RequiredElts[StartSubElt+i] = false;
    }

    return;
  }

  // If we get here with a copy_addr, it must be storing into the element. Check
  // to see if any loaded subelements are being used, and if so, explode the
  // copy_addr to its individual pieces.
  if (auto *CAI = dyn_cast<CopyAddrInst>(Inst)) {
    unsigned StartSubElt = computeSubelement(Inst->getOperand(1), TheMemory);
    assert(StartSubElt != ~0U && "Store within enum projection not handled");
    SILType ValTy = Inst->getOperand(1)->getType();

    bool AnyRequired = false;
    for (unsigned i = 0, e = getNumSubElements(ValTy, Module); i != e; ++i) {
      // If this element is not required, don't fill it in.
      AnyRequired = RequiredElts[StartSubElt+i];
      if (AnyRequired) break;
    }

    // If this is a copy addr that doesn't intersect the loaded subelements,
    // just continue with an unmodified load mask.
    if (!AnyRequired)
      return;

    // If the copyaddr is of a non-loadable type, we can't promote it.  Just
    // consider it to be a clobber.
    if (CAI->getOperand(0)->getType().isLoadable(Module)) {
      // Otherwise, some part of the copy_addr's value is demanded by a load, so
      // we need to explode it to its component pieces.  This only expands one
      // level of the copyaddr.
      explodeCopyAddr(CAI);

      // The copy_addr doesn't provide any values, but we've arranged for our
      // iterators to visit the newly generated instructions, which do.
      return;
    }
  }


  // TODO: inout apply's should only clobber pieces passed in.

  // Otherwise, this is some unknown instruction, conservatively assume that all
  // values are clobbered.
  RequiredElts.clear();
  ConflictingValues = llvm::SmallBitVector(Result.size(), true);
  return;
}


/// Try to find available values of a set of subelements of the current value,
/// starting right before the specified instruction.
///
/// The bitvector indicates which subelements we're interested in, and result
/// captures the available value (plus an indicator of which subelement of that
/// value is needed).
///
void AllocOptimize::
computeAvailableValues(SILInstruction *StartingFrom,
                       llvm::SmallBitVector &RequiredElts,
                       SmallVectorImpl<std::pair<SILValue, unsigned>> &Result) {
  llvm::SmallDenseMap<SILBasicBlock*, llvm::SmallBitVector, 32> VisitedBlocks;
  llvm::SmallBitVector ConflictingValues(Result.size());

  computeAvailableValuesFrom(StartingFrom->getIterator(),
                             StartingFrom->getParent(), RequiredElts, Result,
                             VisitedBlocks, ConflictingValues);

  // If we have any conflicting values, explicitly mask them out of the result,
  // so we don't pick one arbitrary available value.
  if (!ConflictingValues.none())
    for (unsigned i = 0, e = Result.size(); i != e; ++i)
      if (ConflictingValues[i])
        Result[i] = { SILValue(), 0U };

  return;
}

void AllocOptimize::
computeAvailableValuesFrom(SILBasicBlock::iterator StartingFrom,
                           SILBasicBlock *BB,
                           llvm::SmallBitVector &RequiredElts,
                         SmallVectorImpl<std::pair<SILValue, unsigned>> &Result,
   llvm::SmallDenseMap<SILBasicBlock*, llvm::SmallBitVector, 32> &VisitedBlocks,
                           llvm::SmallBitVector &ConflictingValues) {
  assert(!RequiredElts.none() && "Scanning with a goal of finding nothing?");

  // If there is a potential modification in the current block, scan the block
  // to see if the store or escape is before or after the load.  If it is
  // before, check to see if it produces the value we are looking for.
  if (HasLocalDefinition.count(BB)) {
    for (SILBasicBlock::iterator BBI = StartingFrom; BBI != BB->begin();) {
      SILInstruction *TheInst = &*std::prev(BBI);

      // If this instruction is unrelated to the element, ignore it.
      if (!NonLoadUses.count(TheInst)) {
        --BBI;
        continue;
      }

      // Given an interesting instruction, incorporate it into the set of
      // results, and filter down the list of demanded subelements that we still
      // need.
      updateAvailableValues(TheInst, RequiredElts, Result, ConflictingValues);

      // If this satisfied all of the demanded values, we're done.
      if (RequiredElts.none())
        return;

      // Otherwise, keep scanning the block.  If the instruction we were looking
      // at just got exploded, don't skip the next instruction.
      if (&*std::prev(BBI) == TheInst)
        --BBI;
    }
  }


  // Otherwise, we need to scan up the CFG looking for available values.
  for (auto PI = BB->pred_begin(), E = BB->pred_end(); PI != E; ++PI) {
    SILBasicBlock *PredBB = *PI;

    // If the predecessor block has already been visited (potentially due to a
    // cycle in the CFG), don't revisit it.  We can do this safely because we
    // are optimistically assuming that all incoming elements in a cycle will be
    // the same.  If we ever detect a conflicting element, we record it and do
    // not look at the result.
    auto Entry = VisitedBlocks.insert({PredBB, RequiredElts});
    if (!Entry.second) {
      // If we are revisiting a block and asking for different required elements
      // then anything that isn't agreeing is in conflict.
      const auto &PrevRequired = Entry.first->second;
      if (PrevRequired != RequiredElts) {
        ConflictingValues |= (PrevRequired ^ RequiredElts);

        RequiredElts &= ~ConflictingValues;
        if (RequiredElts.none())
          return;
      }
      continue;
    }

    // Make sure to pass in the same set of required elements for each pred.
    llvm::SmallBitVector Elts = RequiredElts;
    computeAvailableValuesFrom(PredBB->end(), PredBB, Elts, Result,
                               VisitedBlocks, ConflictingValues);

    // If we have any conflicting values, don't bother searching for them.
    RequiredElts &= ~ConflictingValues;
    if (RequiredElts.none())
      return;
  }
}


static bool anyMissing(unsigned StartSubElt, unsigned NumSubElts,
                       ArrayRef<std::pair<SILValue, unsigned>> &Values) {
  while (NumSubElts) {
    if (!Values[StartSubElt].first) return true;
    ++StartSubElt;
    --NumSubElts;
  }
  return false;
}


/// AggregateAvailableValues - Given a bunch of primitive subelement values,
/// build out the right aggregate type (LoadTy) by emitting tuple and struct
/// instructions as necessary.
static SILValue aggregateAvailableValues(
    SILInstruction *Inst, SILType LoadTy, SILValue Address,
    ArrayRef<std::pair<SILValue, unsigned>> AvailableValues,
    unsigned FirstElt) {
  assert(LoadTy.isObject());
  SILModule &M = Inst->getModule();

  // Check to see if the requested value is fully available, as an aggregate.
  // This is a super-common case for single-element structs, but is also a
  // general answer for arbitrary structs and tuples as well.
  if (FirstElt < AvailableValues.size()) {  // #Elements may be zero.
    SILValue FirstVal = AvailableValues[FirstElt].first;
    if (FirstVal && AvailableValues[FirstElt].second == 0 &&
        FirstVal->getType() == LoadTy) {
      // If the first element of this value is available, check any extra ones
      // before declaring success.
      bool AllMatch = true;
      for (unsigned i = 0, e = getNumSubElements(LoadTy, M); i != e; ++i)
        if (AvailableValues[FirstElt+i].first != FirstVal ||
            AvailableValues[FirstElt+i].second != i) {
          AllMatch = false;
          break;
        }

      if (AllMatch)
        return FirstVal;
    }
  }


  SILBuilderWithScope B(Inst);

  if (TupleType *TT = LoadTy.getAs<TupleType>()) {
    SmallVector<SILValue, 4> ResultElts;

    for (unsigned EltNo : indices(TT->getElements())) {
      SILType EltTy = LoadTy.getTupleElementType(EltNo);
      unsigned NumSubElt = getNumSubElements(EltTy, M);

      // If we are missing any of the available values in this struct element,
      // compute an address to load from.
      SILValue EltAddr;
      if (anyMissing(FirstElt, NumSubElt, AvailableValues))
        EltAddr = B.createTupleElementAddr(Inst->getLoc(), Address, EltNo,
                                           EltTy.getAddressType());

      ResultElts.push_back(aggregateAvailableValues(Inst, EltTy, EltAddr,
                                                    AvailableValues, FirstElt));
      FirstElt += NumSubElt;
    }

    return B.createTuple(Inst->getLoc(), LoadTy, ResultElts);
  }

  // Extract struct elements from fully referenceable structs.
  if (auto *SD = getFullyReferenceableStruct(LoadTy)) {
    SmallVector<SILValue, 4> ResultElts;

    for (auto *FD : SD->getStoredProperties()) {
      SILType EltTy = LoadTy.getFieldType(FD, M);
      unsigned NumSubElt = getNumSubElements(EltTy, M);

      // If we are missing any of the available values in this struct element,
      // compute an address to load from.
      SILValue EltAddr;
      if (anyMissing(FirstElt, NumSubElt, AvailableValues))
        EltAddr = B.createStructElementAddr(Inst->getLoc(), Address, FD,
                                            EltTy.getAddressType());

      ResultElts.push_back(aggregateAvailableValues(Inst, EltTy, EltAddr,
                                                    AvailableValues, FirstElt));
      FirstElt += NumSubElt;
    }
    return B.createStruct(Inst->getLoc(), LoadTy, ResultElts);
  }

  // Otherwise, we have a simple primitive.  If the value is available, use it,
  // otherwise emit a load of the value.
  auto Val = AvailableValues[FirstElt];
  if (!Val.first)
    return B.createLoad(Inst->getLoc(), Address,
                        LoadOwnershipQualifier::Unqualified);

  SILValue EltVal = extractSubElement(Val.first, Val.second, B, Inst->getLoc());
  // It must be the same type as LoadTy if available.
  assert(EltVal->getType() == LoadTy &&
         "Subelement types mismatch");
  return EltVal;
}


/// At this point, we know that this element satisfies the definitive init
/// requirements, so we can try to promote loads to enable SSA-based dataflow
/// analysis.  We know that accesses to this element only access this element,
/// cross element accesses have been scalarized.
///
/// This returns true if the load has been removed from the program.
///
bool AllocOptimize::promoteLoad(SILInstruction *Inst) {
  // Note that we intentionally don't support forwarding of weak pointers,
  // because the underlying value may drop be deallocated at any time.  We would
  // have to prove that something in this function is holding the weak value
  // live across the promoted region and that isn't desired for a stable
  // diagnostics pass this like one.

  // We only handle load and copy_addr right now.
  SILValue src;
  if (auto CAI = dyn_cast<CopyAddrInst>(Inst)) {
    // If this is a CopyAddr, verify that the element type is loadable.  If not,
    // we can't explode to a load.
    src = CAI->getSrc();
    if (!src->getType().isLoadable(Module))
      return false;
  } else if (auto load = dyn_cast<LoadInst>(Inst)) {
    src = load->getOperand();
  } else {
    return false;
  }

  // If the box has escaped at this instruction, we can't safely promote the
  // load.
  if (hasEscapedAt(Inst))
    return false;

  SILType LoadTy = src->getType().getObjectType();

  // If this is a load/copy_addr from a struct field that we want to promote,
  // compute the access path down to the field so we can determine precise
  // def/use behavior.
  unsigned FirstElt = computeSubelement(src, TheMemory);

  // If this is a load from within an enum projection, we can't promote it since
  // we don't track subelements in a type that could be changing.
  if (FirstElt == ~0U)
    return false;

  unsigned NumLoadSubElements = getNumSubElements(LoadTy, Module);

  // Set up the bitvector of elements being demanded by the load.
  llvm::SmallBitVector RequiredElts(NumMemorySubElements);
  RequiredElts.set(FirstElt, FirstElt+NumLoadSubElements);

  SmallVector<std::pair<SILValue, unsigned>, 8> AvailableValues;
  AvailableValues.resize(NumMemorySubElements);

  // Find out if we have any available values.  If no bits are demanded, we
  // trivially succeed. This can happen when there is a load of an empty struct.
  if (NumLoadSubElements != 0) {
    computeAvailableValues(Inst, RequiredElts, AvailableValues);

    // If there are no values available at this load point, then we fail to
    // promote this load and there is nothing to do.
    bool AnyAvailable = false;
    for (unsigned i = FirstElt, e = i+NumLoadSubElements; i != e; ++i)
      if (AvailableValues[i].first) {
        AnyAvailable = true;
        break;
      }

    if (!AnyAvailable)
      return false;
  }

  // Ok, we have some available values.  If we have a copy_addr, explode it now,
  // exposing the load operation within it.  Subsequent optimization passes will
  // see the load and propagate the available values into it.
  if (auto *CAI = dyn_cast<CopyAddrInst>(Inst)) {
    explodeCopyAddr(CAI);

    // This is removing the copy_addr, but explodeCopyAddr takes care of
    // removing the instruction from Uses for us, so we return false.
    return false;
  }

  // Aggregate together all of the subelements into something that has the same
  // type as the load did, and emit smaller) loads for any subelements that were
  // not available.
  auto Load = cast<LoadInst>(Inst);
  auto NewVal = aggregateAvailableValues(Load, LoadTy, Load->getOperand(),
                                         AvailableValues, FirstElt);

  ++NumLoadPromoted;

  // Simply replace the load.
  DEBUG(llvm::dbgs() << "  *** Promoting load: " << *Load << "\n");
  DEBUG(llvm::dbgs() << "      To value: " << *NewVal << "\n");

  Load->replaceAllUsesWith(NewVal);
  SILValue Addr = Load->getOperand();
  Load->eraseFromParent();
  if (auto *AddrI = Addr->getDefiningInstruction())
    recursivelyDeleteTriviallyDeadInstructions(AddrI);
  return true;
}

/// promoteDestroyAddr - DestroyAddr is a composed operation merging
/// load+strong_release.  If the implicit load's value is available, explode it.
///
/// Note that we handle the general case of a destroy_addr of a piece of the
/// memory object, not just destroy_addrs of the entire thing.
///
bool AllocOptimize::promoteDestroyAddr(DestroyAddrInst *DAI) {
  SILValue Address = DAI->getOperand();

  // We cannot promote destroys of address-only types, because we can't expose
  // the load.
  SILType LoadTy = Address->getType().getObjectType();
  if (LoadTy.isAddressOnly(Module))
    return false;

  // If the box has escaped at this instruction, we can't safely promote the
  // load.
  if (hasEscapedAt(DAI))
    return false;

  // Compute the access path down to the field so we can determine precise
  // def/use behavior.
  unsigned FirstElt = computeSubelement(Address, TheMemory);
  assert(FirstElt != ~0U && "destroy within enum projection is not valid");
  unsigned NumLoadSubElements = getNumSubElements(LoadTy, Module);

  // Set up the bitvector of elements being demanded by the load.
  llvm::SmallBitVector RequiredElts(NumMemorySubElements);
  RequiredElts.set(FirstElt, FirstElt+NumLoadSubElements);

  SmallVector<std::pair<SILValue, unsigned>, 8> AvailableValues;
  AvailableValues.resize(NumMemorySubElements);

  // Find out if we have any available values.  If no bits are demanded, we
  // trivially succeed. This can happen when there is a load of an empty struct.
  if (NumLoadSubElements != 0) {
    computeAvailableValues(DAI, RequiredElts, AvailableValues);

    // If some value is not available at this load point, then we fail.
    for (unsigned i = FirstElt, e = FirstElt+NumLoadSubElements; i != e; ++i)
      if (!AvailableValues[i].first)
        return false;
  }

  // Aggregate together all of the subelements into something that has the same
  // type as the load did, and emit smaller) loads for any subelements that were
  // not available.
  auto NewVal =
      aggregateAvailableValues(DAI, LoadTy, Address, AvailableValues, FirstElt);

  ++NumDestroyAddrPromoted;

  DEBUG(llvm::dbgs() << "  *** Promoting destroy_addr: " << *DAI << "\n");
  DEBUG(llvm::dbgs() << "      To value: " << *NewVal << "\n");

  SILBuilderWithScope(DAI).emitDestroyValueOperation(DAI->getLoc(), NewVal);
  DAI->eraseFromParent();
  return true;
}


/// Explode a copy_addr instruction of a loadable type into lower level
/// operations like loads, stores, retains, releases, retain_value, etc.
void AllocOptimize::explodeCopyAddr(CopyAddrInst *CAI) {
  DEBUG(llvm::dbgs() << "  -- Exploding copy_addr: " << *CAI << "\n");

  SILType ValTy = CAI->getDest()->getType().getObjectType();
  auto &TL = Module.getTypeLowering(ValTy);

  // Keep track of the new instructions emitted.
  SmallVector<SILInstruction*, 4> NewInsts;
  SILBuilder B(CAI, &NewInsts);
  B.setCurrentDebugScope(CAI->getDebugScope());

  // Use type lowering to lower the copyaddr into a load sequence + store
  // sequence appropriate for the type.
  SILValue StoredValue = TL.emitLoadOfCopy(B, CAI->getLoc(), CAI->getSrc(),
                                           CAI->isTakeOfSrc());

  TL.emitStoreOfCopy(B, CAI->getLoc(), StoredValue, CAI->getDest(),
                     CAI->isInitializationOfDest());

  // Update our internal state for this being gone.
  NonLoadUses.erase(CAI);

  // Remove the copy_addr from Uses.  A single copy_addr can appear multiple
  // times if the source and dest are to elements within a single aggregate, but
  // we only want to pick up the CopyAddrKind from the store.
  DIMemoryUse LoadUse, StoreUse;
  for (auto &Use : Uses) {
    if (Use.Inst != CAI) continue;

    if (Use.Kind == DIUseKind::Load) {
      assert(LoadUse.isInvalid());
      LoadUse = Use;
    } else {
      assert(StoreUse.isInvalid());
      StoreUse = Use;
    }

    Use.Inst = nullptr;

    // Keep scanning in case the copy_addr appears multiple times.
  }

  assert((LoadUse.isValid() || StoreUse.isValid()) &&
         "we should have a load or a store, possibly both");
  assert(StoreUse.isInvalid() || StoreUse.Kind == Assign ||
         StoreUse.Kind == PartialStore || StoreUse.Kind == Initialization);

  // Now that we've emitted a bunch of instructions, including a load and store
  // but also including other stuff, update the internal state of
  // LifetimeChecker to reflect them.

  // Update the instructions that touch the memory.  NewInst can grow as this
  // iterates, so we can't use a foreach loop.
  for (auto *NewInst : NewInsts) {
    switch (NewInst->getKind()) {
    default:
      NewInst->dump();
      llvm_unreachable("Unknown instruction generated by copy_addr lowering");

    case SILInstructionKind::StoreInst:
      // If it is a store to the memory object (as oppose to a store to
      // something else), track it as an access.
      if (StoreUse.isValid()) {
        StoreUse.Inst = NewInst;
        NonLoadUses[NewInst] = Uses.size();
        Uses.push_back(StoreUse);
      }
      continue;

    case SILInstructionKind::LoadInst:
      // If it is a load from the memory object (as oppose to a load from
      // something else), track it as an access.  We need to explicitly check to
      // see if the load accesses "TheMemory" because it could either be a load
      // for the copy_addr source, or it could be a load corresponding to the
      // "assign" operation on the destination of the copyaddr.
      if (LoadUse.isValid() &&
          getAccessPathRoot(NewInst->getOperand(0)) == TheMemory) {
        LoadUse.Inst = NewInst;
        Uses.push_back(LoadUse);
      }
      continue;

    case SILInstructionKind::RetainValueInst:
    case SILInstructionKind::StrongRetainInst:
    case SILInstructionKind::StrongReleaseInst:
    case SILInstructionKind::UnownedRetainInst:
    case SILInstructionKind::UnownedReleaseInst:
    case SILInstructionKind::ReleaseValueInst:   // Destroy overwritten value
      // These are ignored.
      continue;
    }
  }

  // Next, remove the copy_addr itself.
  CAI->eraseFromParent();
}

/// tryToRemoveDeadAllocation - If the allocation is an autogenerated allocation
/// that is only stored to (after load promotion) then remove it completely.
bool AllocOptimize::tryToRemoveDeadAllocation() {
  assert((isa<AllocBoxInst>(TheMemory) || isa<AllocStackInst>(TheMemory)) &&
         "Unhandled allocation case");

  // We don't want to remove allocations that are required for useful debug
  // information at -O0.  As such, we only remove allocations if:
  //
  // 1. They are in a transparent function.
  // 2. They are in a normal function, but didn't come from a VarDecl, or came
  //    from one that was autogenerated or inlined from a transparent function.
  SILLocation Loc = TheMemory->getLoc();
  if (!TheMemory->getFunction()->isTransparent() &&
      Loc.getAsASTNode<VarDecl>() && !Loc.isAutoGenerated() &&
      !Loc.is<MandatoryInlinedLocation>())
    return false;

  // Check the uses list to see if there are any non-store uses left over after
  // load promotion and other things DI does.
  for (auto &U : Uses) {
    // Ignore removed instructions.
    if (U.Inst == nullptr) continue;

    switch (U.Kind) {
    case DIUseKind::SelfInit:
    case DIUseKind::SuperInit:
      llvm_unreachable("Can't happen on allocations");
    case DIUseKind::Assign:
    case DIUseKind::PartialStore:
    case DIUseKind::InitOrAssign:
      break;    // These don't prevent removal.
    case DIUseKind::Initialization:
      if (!isa<ApplyInst>(U.Inst) &&
          // A copy_addr that is not a take affects the retain count
          // of the source.
          (!isa<CopyAddrInst>(U.Inst) ||
           cast<CopyAddrInst>(U.Inst)->isTakeOfSrc()))
        break;
      // FALL THROUGH.
     LLVM_FALLTHROUGH;
    case DIUseKind::Load:
    case DIUseKind::IndirectIn:
    case DIUseKind::InOutUse:
    case DIUseKind::Escape:
      DEBUG(llvm::dbgs() << "*** Failed to remove autogenerated alloc: "
            "kept alive by: " << *U.Inst);
      return false;   // These do prevent removal.
    }
  }

  // If the memory object has non-trivial type, then removing the deallocation
  // will drop any releases.  Check that there is nothing preventing removal.
  if (!MemoryType.isTrivial(Module)) {
    for (auto *R : Releases) {
      if (R == nullptr || isa<DeallocStackInst>(R) || isa<DeallocBoxInst>(R))
        continue;

      DEBUG(llvm::dbgs() << "*** Failed to remove autogenerated alloc: "
            "kept alive by release: " << *R);
      return false;
    }
  }

  DEBUG(llvm::dbgs() << "*** Removing autogenerated alloc_stack: "<<*TheMemory);

  // If it is safe to remove, do it.  Recursively remove all instructions
  // hanging off the allocation instruction, then return success.  Let the
  // caller remove the allocation itself to avoid iterator invalidation.
  eraseUsesOfInstruction(TheMemory);

  return true;
}

/// doIt - returns true on error.
bool AllocOptimize::doIt() {
  bool Changed = false;

  // Don't try to optimize incomplete aggregates.
  if (MemoryType.aggregateHasUnreferenceableStorage())
    return false;

  // If we've successfully checked all of the definitive initialization
  // requirements, try to promote loads.  This can explode copy_addrs, so the
  // use list may change size.
  for (unsigned i = 0; i != Uses.size(); ++i) {
    auto &Use = Uses[i];
    // Ignore entries for instructions that got expanded along the way.
    if (Use.Inst && Use.Kind == DIUseKind::Load) {
      if (promoteLoad(Use.Inst)) {
        Uses[i].Inst = nullptr;  // remove entry if load got deleted.
        Changed = true;
      }
    }
  }

  // destroy_addr(p) is strong_release(load(p)), try to promote it too.
  for (unsigned i = 0; i != Releases.size(); ++i) {
    if (auto *DAI = dyn_cast_or_null<DestroyAddrInst>(Releases[i]))
      if (promoteDestroyAddr(DAI)) {
        // remove entry if destroy_addr got deleted.
        Releases[i] = nullptr;
        Changed = true;
      }
  }

  // If this is an allocation, try to remove it completely.
  Changed |= tryToRemoveDeadAllocation();

  return Changed;
 }


static bool optimizeMemoryAllocations(SILFunction &Fn) {
  bool Changed = false;
  for (auto &BB : Fn) {
    auto I = BB.begin(), E = BB.end();
    while (I != E) {
      SILInstruction *Inst = &*I;
      if (!isa<AllocBoxInst>(Inst) && !isa<AllocStackInst>(Inst)) {
        ++I;
        continue;
      }
      auto Alloc = cast<AllocationInst>(Inst);

      DEBUG(llvm::dbgs() << "*** DI Optimize looking at: " << *Alloc << "\n");
      DIMemoryObjectInfo MemInfo(Alloc);

      // Set up the datastructure used to collect the uses of the allocation.
      SmallVector<DIMemoryUse, 16> Uses;
      SmallVector<SILInstruction*, 4> Releases;

      // Walk the use list of the pointer, collecting them.
      collectDIElementUsesFrom(MemInfo, Uses, Releases);

      Changed |= AllocOptimize(Alloc, Uses, Releases).doIt();

      // Carefully move iterator to avoid invalidation problems.
      ++I;
      if (Alloc->use_empty()) {
        Alloc->eraseFromParent();
        ++NumAllocRemoved;
        Changed = true;
      }
    }
  }
  return Changed;
}

namespace {
class PredictableMemoryOptimizations : public SILFunctionTransform {

  /// The entry point to the transformation.
  void run() override {
    if (optimizeMemoryAllocations(*getFunction()))
      invalidateAnalysis(SILAnalysis::InvalidationKind::FunctionBody);
  }

};
} // end anonymous namespace


SILTransform *swift::createPredictableMemoryOptimizations() {
  return new PredictableMemoryOptimizations();
}