//===--- ForEachLoopUnrolling.cpp - Unroll loops over array literals ----- ===// // // This source file is part of the Swift.org open source project // // Copyright (c) 2014 - 2020 Apple Inc. and the Swift project authors // Licensed under Apache License v2.0 with Runtime Library Exception // // See https://swift.org/LICENSE.txt for license information // See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors // //===----------------------------------------------------------------------===// // This pass unrolls Sequence.forEach calls invoked on an array created // from an array literal. See below for an overview of the algorithm. This is // a function-level transformation pass that operates on ownership SIL and // so must be applied before ownership is stripped. // // Algorithm overview: // // 1. Iterate over the body of the function and analyze calls to the array // initializer annotated "array.uninitialized_intrinsic". This is done in // `run` method. // // 2. For every "array.uninitialized_intrinsic" initializer call, try // extracting the elements with which the array is initialized, and classify // the uses of the array into incidental uses, forEach calls and uses that // can write into the array. If any of the following conditions hold, give // up and look for the next initializer call: // - The elements stored into the array cannot be extracted. // - The array can be modified after initialization. // - There are no forEach calls on the array. // - The array is too large to unroll forEach calls. This check uses // SILModule::UnrollThreshold parameter. // If none of the above conditions hold, procede to unrolling every forEach // call on the array (Step 3). Step 2 is implemented in the function: // `tryUnrollForEachCallsOverArrayLiteral`. // // 3. Given a forEach call, unroll it by applying the "body closure" passed to // the call to every element of the array. There are three important // details to this unrolling. Firstly, the SILValue of the element stored // into the array through the store instruction (identified in step 2) may // not be valid at the point where the unrolling must happen (i.e., where the // forEach is called). Therefore, we need to copy_value the elements at the // point where they are stored and ensure that the copies are alive until // they are used by the unrolled code. The implementation actually // makes the copies valid for the entire lifetime of the array. // // Secondly, the body closure uses @in_guaranteed convention for the // parameter. Therefore, an alloc_stack is created before the unrolled code // begins to hold the elements, and is destroyed once the unrolled code ends. // // Thirdly, the body closure throws. Hence, it has to be try_applied. This // means that we need to chain the try_applies in such a way that when the // try_apply on the element e_i completes normally it jumps to the try_apply // of e_i+1. When the try_apply (of any element) throws, it must go to the // error block of the original forEach call. // // All of this is implemented by the function `unrollForEach`. // // 4. Delete the forEach calls that were unrolled and clean up dead code // resulting due to that. // // This transformation is illustrated below on a small example: // // Input: // %initResult = apply %arrayUninitialized(...) // (%array, %storage_ptr) = destructure_tuple %initResult // store %elem1 at array index 1 // store %elem2 at array index 2 // .. // try_apply %forEach(%body, %array) normal bb1, error bb2 // bb1(%res : $()): // .. // bb2(%error : @owned $Error): // ... // bb3: // destroy_value %array // // Output: // // %initResult = apply %arrayUninitialized(...) // (%array, %storage_ptr) = destructure_tuple %initResult // %elem1copy = copy_value %elem1 <-- // store %elem1 at array index 1 // %elem2copy = copy_value %elem2 <-- // store %elem2 at array index 2 // .. // alloc_stack %stack // %elem1borrow = begin_borrow %elem1copy // store_borrow %elem1borrow to %stack // try_apply %body(%stack) normal normalbb1, error errbb1 // // errbb1(%error : @owned $Error): // end_borrow %elem1borrow // br bb2(%error) // // normalbb1(%res : $()): // end_borrow %elem1borrow // %elem2borrow = begin_borrow %elem2copy // store_borrow %elem2borrow to %stack // try_apply %body(%stack) normal bb1, error errbb2 // // errbb2(%error : @owned $Error): // end_borrow %elem2borrow // br bb2(%error) // // bb1(%res : $()): // end_borrow %elem2borrow // dealloc_stack %stack // ... // bb2(%error : @owned $Error): // ... // dealloc_stack %stack // bb3: // destroy_value %elem1copy // destroy_value %elem2copy // destroy_value %array #include "swift/AST/ASTContext.h" #include "swift/AST/Expr.h" #include "swift/AST/Module.h" #include "swift/AST/SemanticAttrs.h" #include "swift/AST/SubstitutionMap.h" #include "swift/Basic/Assertions.h" #include "swift/Basic/OptimizationMode.h" #include "swift/SIL/BasicBlockUtils.h" #include "swift/SIL/CFG.h" #include "swift/SIL/DebugUtils.h" #include "swift/SIL/InstructionUtils.h" #include "swift/SIL/OwnershipUtils.h" #include "swift/SIL/SILBasicBlock.h" #include "swift/SIL/SILBuilder.h" #include "swift/SIL/SILConstants.h" #include "swift/SIL/SILFunction.h" #include "swift/SIL/SILInstruction.h" #include "swift/SIL/SILModule.h" #include "swift/SIL/TypeLowering.h" #include "swift/SILOptimizer/Analysis/ArraySemantic.h" #include "swift/SILOptimizer/PassManager/Passes.h" #include "swift/SILOptimizer/PassManager/Transforms.h" #include "swift/SILOptimizer/Utils/CFGOptUtils.h" #include "swift/SILOptimizer/Utils/InstOptUtils.h" #include "swift/SILOptimizer/Utils/ValueLifetime.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallVector.h" using namespace swift; namespace { /// A class for processing and storing information about an array /// such as the values with which it is initialized and the kind of /// users it has. class ArrayInfo { /// The array value returned by the _allocateUninitialized call. SILValue arrayValue; /// A map from an array index to the store instruction that initializes that /// index. llvm::DenseMap elementStoreMap; /// List of Sequence.forEach calls invoked on the array. llvm::SmallSetVector forEachCalls; /// Indicates whether the array could be modified after initialization. Note /// that this not include modifications to the elements of the array. When /// set, this will prevent the forEach loops from getting unrolled. bool mayBeWritten = false; /// Instructions that destroy the array. These must be destroy_value /// instructions either of the \c arrayValue or the copy_values of that. SmallVector destroys; /// Classify uses of the array into forEach uses, read-only uses etc. and set /// the fields of this instance appropriately. This function will recursively /// classify the uses of borrows and copy-values of the array as well. void classifyUsesOfArray(SILValue arrayValue); public: ArrayInfo() {} /// Given an apply instruction \c apply, try to initialize this ArrayInfo /// with it. This would succeed iff the apply instruction starts an /// initialization pattern that is auto-generated by the compiler for array /// literals. Return true on success and false on failure. bool tryInitialize(ApplyInst *apply); /// Return the SILValue of the array that is initialized. SILValue getArrayValue() { assert(arrayValue); return arrayValue; } /// Return true iff the array could be modified after initialization. bool mayBeModified() { assert(arrayValue); return mayBeWritten; } /// Return the forEach uses identified for the array. ArrayRef getForEachUses() { assert(arrayValue); return ArrayRef(forEachCalls.begin(), forEachCalls.end()); } /// Return the number of elements in the array. uint64_t getNumElements() { assert(arrayValue); return elementStoreMap.size(); } /// Return the store instruction that initializes the given \c index /// of the array. StoreInst *getElementStore(uint64_t index) { assert(arrayValue); return elementStoreMap[index]; } /// Return the SIL type of the elements stored in the array. /// \pre the array must be non-empty. SILType getElementSILType() { assert(getNumElements() > 0 && "cannot call this on empty arrays"); return elementStoreMap[0]->getSrc()->getType(); } /// Add the destroy_value instructions that represents the last use of the /// array to the parameter \c lastDestroys. The \c lastDestroys /// added by this function are guaranteed to come after all uses of the /// \c arrayValue and copy_values of the \c arrayValue (along all /// "non-dead-end" blocks). void getLastDestroys(SmallVectorImpl &lastDestroys); }; /// Given a Array-typed SIL value \c array and an instruction \c user that uses /// the array, check whether this use actually represents a call to _fixLifetime /// function. This would be the case if \c user is a store_borrow instruction /// that stores into an alloc_stack which is passed to a fixLifetime /// instruction. That is, if the following pattern holds: /// /// %stack = alloc_stack /// user: store_borrow %array to %stack /// fixLifetime %stack /// \returns the fixLifetime instruction if this is a fixLifetime use of the /// array, nullptr otherwise. static FixLifetimeInst *isFixLifetimeUseOfArray(SILInstruction *user, SILValue array) { StoreBorrowInst *storeUser = dyn_cast(user); if (!storeUser || storeUser->getSrc() != array) return nullptr; AllocStackInst *alloc = dyn_cast(storeUser->getDest()); if (!alloc) return nullptr; auto fixLifetimeUsers = storeUser->getUsersOfType(); if (fixLifetimeUsers.empty()) return nullptr; auto firstUser = fixLifetimeUsers.begin(); FixLifetimeInst *result = *firstUser; // We need to have a unique result. if (++firstUser != fixLifetimeUsers.end()) return nullptr; return result; } /// Given an array-typed SIL value \c array and an instruction \c user that uses /// the array, check whether this use actually represents a call to the /// Sequence.forEach function. This would be case if \c user is a store_borrow /// instruction that stores into an alloc_stack which is passed to a try-apply /// of the Sequence.forEach function. That is, if the following pattern holds: /// /// %stack = alloc_stack /// user: store_borrow %array to %stack /// try_apply %forEachCall(%closure, %array) /// \returns the try-apply instruction invoking the forEach function if this is /// a forEach use of the array, nullptr otherwise. static TryApplyInst *isForEachUseOfArray(SILInstruction *user, SILValue array) { StoreBorrowInst *storeUser = dyn_cast(user); if (!storeUser || storeUser->getSrc() != array) return nullptr; AllocStackInst *alloc = dyn_cast(storeUser->getDest()); if (!alloc) return nullptr; auto applyUsers = storeUser->getUsersOfType(); if (applyUsers.empty()) return nullptr; auto firstUser = applyUsers.begin(); TryApplyInst *apply = *firstUser; // We need to have a unique result. if (++firstUser != applyUsers.end()) return nullptr; SILFunction *callee = apply->getCalleeFunction(); if (!callee || !callee->hasSemanticsAttr(semantics::SEQUENCE_FOR_EACH)) return nullptr; return apply; } void ArrayInfo::classifyUsesOfArray(SILValue arrayValue) { for (Operand *operand : arrayValue->getUses()) { auto *user = operand->getUser(); if (isIncidentalUse(user)) continue; // Ignore this user if it is a call to _fixLifetime. Note that this use // will not be subsumed by InstructionUtils::isIncidentalUse check made // above as the array would be passed indirectly. if (isFixLifetimeUseOfArray(user, arrayValue)) continue; if (auto mdi = MarkDependenceInstruction(user)) { if (mdi.getBase() == arrayValue) continue; } // Check if this is a forEach call on the array. if (TryApplyInst *forEachCall = isForEachUseOfArray(user, arrayValue)) { forEachCalls.insert(forEachCall); continue; } // Recursively classify begin_borrow, copy_value, and move_value uses. if (BeginBorrowInst *beginBorrow = dyn_cast(user)) { classifyUsesOfArray(beginBorrow); continue; } if (CopyValueInst *copyValue = dyn_cast(user)) { classifyUsesOfArray(copyValue); continue; } if (MoveValueInst *moveValue = dyn_cast(user)) { classifyUsesOfArray(moveValue); continue; } if (DestroyValueInst *destroyValue = dyn_cast(user)) { destroys.push_back(destroyValue); continue; } // Set mayBeWritten to true if the user could potentially modify the array. // Note that the array elements are allowed to be modified as long // as the array itself is not modified (which is possible with reference // types). ArraySemanticsCall arrayOp(user); if (arrayOp.doesNotChangeArray()) continue; if (arrayOp.getKind() == swift::ArrayCallKind::kArrayFinalizeIntrinsic) { classifyUsesOfArray((ApplyInst *)arrayOp); continue; } mayBeWritten = true; } } bool ArrayInfo::tryInitialize(ApplyInst *apply) { ArraySemanticsCall arrayAllocateUninitCall( apply, semantics::ARRAY_UNINITIALIZED_INTRINSIC); if (!arrayAllocateUninitCall) return false; arrayValue = arrayAllocateUninitCall.getArrayValue(); if (!arrayValue) return false; if (!arrayAllocateUninitCall.mapInitializationStores(elementStoreMap)) return false; // Collect information about uses of the array value. classifyUsesOfArray(arrayValue); return true; } void ArrayInfo::getLastDestroys( SmallVectorImpl &lastDestroys) { assert(arrayValue); // Collect the frontier instructions of the field \c destroys, which stores // destroy_value instructions of the \c arrayValue as well as of its // copy_values, using ValueLifetimeAnalysis. The frontier is a list of // instructions that mark the exits of the flow of control from the // \c destroys. ValueLifetimeAnalysis lifetimeAnalysis(arrayValue->getDefiningInstruction(), destroys); ValueLifetimeAnalysis::Frontier frontier; lifetimeAnalysis.computeFrontier(frontier, ValueLifetimeAnalysis::DontModifyCFG); for (SILInstruction *frontierInst : frontier) { // Skip frontier instructions at the start of a basic block as they do not // follow a destroy_value of the array. Note that the goal is to collect // the last destroys, which must always immediately precede a frontier // instruction as it marks the end of the use of the array. if (frontierInst == &frontierInst->getParent()->front()) continue; SILInstruction *inst = &*(--frontierInst->getIterator()); // This must be a destroy instruction. Moreover it must also belong to \c // destroys. DestroyValueInst *lastDestroy = dyn_cast(inst); assert(lastDestroy); lastDestroys.push_back(cast(lastDestroy)); } } /// Delete the forEach calls from the SIL that contains it. This function will /// not clean up any resulting dead instructions. static void removeForEachCall(TryApplyInst *forEachCall, InstructionDeleter &deleter) { auto *sbi = cast(forEachCall->getArgument(1)); auto *asi = cast(sbi->getDest()); // The allocStack will be used in the forEach call and also in a store // instruction and a dealloc_stack instruction. Force delete all of them. deleter.recursivelyForceDeleteUsersAndFixLifetimes(asi); } /// Unroll the \c forEachCall on an array, using the information in /// \c ArrayInfo. Once unrolled, \c forEachCall will be deleted /// from the containing function. This function implements the following /// transformation. /// - If the array stores non-trivial elements, for every element stored /// into the array, insert a copy_value of the element just before it /// is stored. This is necessary so that an @owned SILValue for the /// element is available at the point where the forEach is used. /// /// - Create an alloc_stack A at the point of the forEach call. This is /// necessary to indirectly pass the elements of the array. /// /// For every element e_i at the index i of the array, do the following: /// - create a new basic block b_i if i > 0. Let b_0 denote the basic block /// that contains the forEach call. /// - store_borrow the element e_i into the alloc_stack A. Note that we /// can use the owned copy of e_i created in the previous step. /// - try_apply the forEach's body closure on the alloc_stack A. /// If i is not the last index, jump to b_i+1 in the normal case of the /// try_apply. If i is the last index of the array jump to the normal /// target of the forEach call. Jump to a new error block: err_i in the /// error case. Make err_i jump to the error target of the original /// forEach call. /// /// - Dealloc the alloc_stack along the normal and error targets of the /// forEach calls. /// /// - Destroy all the copies of the elements (if it is non-trivial) just /// before the array's lifetime ends. static void unrollForEach(ArrayInfo &arrayInfo, TryApplyInst *forEachCall, InstructionDeleter &deleter) { if (arrayInfo.getNumElements() == 0) { // If this is an empty array, delete the forEach entirely. removeForEachCall(forEachCall, deleter); return; } SILFunction *fun = forEachCall->getFunction(); SILLocation forEachLoc = forEachCall->getLoc(); SILValue forEachBodyClosure = forEachCall->getArgument(0); SILType arrayElementType = arrayInfo.getElementSILType(); SILFunctionType *bodyClosureType = forEachBodyClosure->getType().getAs(); SILParameterInfo bodyParameterInfo = bodyClosureType->getParameters()[0]; // The forEachBodyClosure must use @in_guaranteed convention for passing // arguments. assert(bodyParameterInfo.getConvention() == ParameterConvention::Indirect_In_Guaranteed && "forEach body closure is expected to take @in_guaranteed argument"); // Copy the elements stored into the array. This is necessary as we need to // extend the lifetime of the stored elements at least until the forEach call, // which will now be unrolled. The following code inserts a copy_value of the // elements just before the point where they are stored into the array, and // a corresponding destroy_value at the end of the lifetime of the array. In // other words, the lifetime of the element copies are made to match the // lifetime of the array. Even though copies can be destroyed sooner after // they are used by the unrolled code, doing so may introduce more destroys // than needed as the unrolled code have many branches (due to try applies) // all of which joins later into a single path eventually. SmallVector elementCopies; for (uint64_t i = 0; i < arrayInfo.getNumElements(); ++i) { StoreInst *elementStore = arrayInfo.getElementStore(i); // Insert the copy just before the store of the element into the array. SILValue copy = SILBuilderWithScope(elementStore) .emitCopyValueOperation(elementStore->getLoc(), elementStore->getSrc()); elementCopies.push_back(copy); } // Destroy the copy_value of the elements before the last destroys of the // array. It is important that the copied elements are destroyed before the // array is destroyed. This enables other optimizations. SmallVector lastDestroys; arrayInfo.getLastDestroys(lastDestroys); for (DestroyValueInst *destroy : lastDestroys) { SILBuilderWithScope destroyBuilder(destroy); for (SILValue element : elementCopies) { destroyBuilder.emitDestroyValueOperation(destroy->getLoc(), element); } } // Create alloc_stack for passing the array elements indirectly. SILValue allocStack = SILBuilderWithScope(forEachCall) .createAllocStack(forEachLoc, arrayElementType); // Extract the Error and normal targets of the forEach call. Both these // targets must be taking a phi argument. SILBasicBlock *normalBB = forEachCall->getNormalBB(); SILBasicBlock *errorBB = forEachCall->getErrorBB(); assert(normalBB->getSILPhiArguments().size() == 1); SILPhiArgument *normalArgument = normalBB->getSILPhiArguments()[0]; SILPhiArgument *errorArgument = nullptr; if (errorBB->getSILPhiArguments().size() == 1) errorArgument = errorBB->getSILPhiArguments()[0]; // A generator for creating a basic block for use as the target of the // "normal" branch of a try_apply. auto normalTargetGenerator = [&](SILBasicBlock *insertionBlock) { SILBasicBlock *newBB = fun->createBasicBlockBefore(insertionBlock); newBB->createPhiArgument(normalArgument->getType(), normalArgument->getOwnershipKind()); return newBB; }; // A generator for creating a basic block for use as the target of the // "error" branch of a try_apply. The error block created here will always // jump to the error target of the original forEach. auto errorTargetGenerator = [&](SILBasicBlock *insertionBlock, SILValue borrowedElem, SILValue storeBorrow) { SILBasicBlock *newErrorBB = fun->createBasicBlockBefore(insertionBlock); SILValue argument; if (errorArgument) { argument = newErrorBB->createPhiArgument( errorArgument->getType(), errorArgument->getOwnershipKind()); } // Make the errorBB jump to the error target of the original forEach. SILBuilderWithScope builder(newErrorBB, forEachCall); if (storeBorrow) { builder.createEndBorrow(forEachLoc, storeBorrow); } if (borrowedElem) { builder.createEndBorrow(forEachLoc, borrowedElem); } if (argument) builder.createBranch(forEachLoc, errorBB, argument); else builder.createBranch(forEachLoc, errorBB); return newErrorBB; }; // The basic block to jump to in the normal case of the try_apply in each // unrolling. SILBasicBlock *nextNormalBB = normalBB; // Iterate through the array indices in the reverse order and do the // following: // - create a new basic block b_i if i > 0. Let b_0 denote the basic block // that contains the forEach call. // - store_borrow the owned copy of the element e_i into the `allocStack`. // - try_apply the forEach's body closure on the `allocStack`. The normal // target of the try_apply is b_i+1 if i is not the last index, otherwise // it is `normalBB`. (The normal target is captured by `nextNormalBB`.) // Jump to a new error block: err_i in the error case. Note that all // error blocks jump to the error target of the original forEach call. for (uint64_t num = arrayInfo.getNumElements(); num > 0; --num) { SILValue elementCopy = elementCopies[num - 1]; // Creating the next normal block ends the borrow scope for borrowedElem // from the previous iteration. SILBasicBlock *currentBB = num > 1 ? normalTargetGenerator(nextNormalBB) : forEachCall->getParentBlock(); SILBuilderWithScope unrollBuilder(currentBB, forEachCall); SILBuilderWithScope normalBuilder(&nextNormalBB->front(), forEachCall); SILValue borrowedElem; SILValue addr; if (arrayElementType.isTrivial(*fun)) { unrollBuilder.createStore(forEachLoc, elementCopy, allocStack, StoreOwnershipQualifier::Trivial); addr = allocStack; } else { // Borrow the elementCopy and store it in the allocStack. Note that the // element's copy is guaranteed to be alive until the array is alive. // Therefore it is okay to use a borrow here. borrowedElem = unrollBuilder.createBeginBorrow(forEachLoc, elementCopy); addr = unrollBuilder.createStoreBorrow(forEachLoc, borrowedElem, allocStack); normalBuilder.createEndBorrow(forEachLoc, addr); normalBuilder.createEndBorrow(forEachLoc, borrowedElem); } SILBasicBlock *errorTarget = errorTargetGenerator(nextNormalBB, borrowedElem, isa(addr) ? addr : SILValue()); // Note that the substitution map must be empty as we are not supporting // elements of address-only type. All elements in the array are guaranteed // to be loadable. TODO: generalize this to address-only types. unrollBuilder.createTryApply(forEachLoc, forEachBodyClosure, SubstitutionMap(), addr, nextNormalBB, errorTarget); if (nextNormalBB == normalBB) { // Dealloc the stack in the normalBB and also in errorBB. Note that every // try_apply created during the unrolling must pass through these blocks. normalBuilder.createDeallocStack(forEachLoc, allocStack); } nextNormalBB = currentBB; } // Dealloc the stack in the normalBB and also in errorBB. Note that every // try_apply created during the unrolling must pass through these blocks. SILBuilderWithScope(&errorBB->front()) .createDeallocStack(forEachLoc, allocStack); // Remove the forEach call as it has now been unrolled. removeForEachCall(forEachCall, deleter); } /// Determine whether the cost of unrolling the forEach is within the /// \c unrollThreshold. static bool canUnrollForEachOfArray(ArrayInfo arrayInfo, SILModule &module) { const uint64_t unrollThreshold = module.getOptions().UnrollThreshold; // The cost of unrolling a forEach loop is mostly just two instructions per // array element: one to store the element into an alloc_stack and another to // invoke the forEach body closure with the element. Note that the copy_value // of the element and the basic blocks created for the try-applies of the // body closure are not counted, as these should likely get optimized away. const uint64_t cost = 2; return arrayInfo.getNumElements() * cost <= unrollThreshold; } static bool tryUnrollForEachCallsOverArrayLiteral(ApplyInst *apply, InstructionDeleter &deleter) { ArrayInfo arrayInfo; if (!arrayInfo.tryInitialize(apply)) return false; // Bail out, if the array could be modified after initialization. if (arrayInfo.mayBeModified()) return false; // If there are no forEach loops to unroll, return. ArrayRef forEachCalls = arrayInfo.getForEachUses(); if (forEachCalls.empty()) return false; // If the array is too large to unroll, bail out. if (!canUnrollForEachOfArray(arrayInfo, apply->getParent()->getModule())) return false; for (TryApplyInst *forEachCall : forEachCalls) unrollForEach(arrayInfo, forEachCall, deleter); return true; } class ForEachLoopUnroller : public SILFunctionTransform { ~ForEachLoopUnroller() override {} void run() override { SILFunction &fun = *getFunction(); bool changed = false; if (!fun.hasOwnership()) return; InstructionDeleter deleter; for (SILBasicBlock &bb : fun) { for (auto instIter = bb.begin(); instIter != bb.end();) { SILInstruction *inst = &*instIter; ApplyInst *apply = dyn_cast(inst); if (!apply) { ++instIter; continue; } // Note that the following operation may delete a forEach call but // would not delete this apply instruction, which is an array // initializer. Therefore, the iterator should be valid here. changed |= tryUnrollForEachCallsOverArrayLiteral(apply, deleter); ++instIter; } } if (changed) { deleter.cleanupDeadInstructions(); PM->invalidateAnalysis(&fun, SILAnalysis::InvalidationKind::FunctionBody); } } }; } // end anonymous namespace SILTransform *swift::createForEachLoopUnroll() { return new ForEachLoopUnroller(); }