//===--- Passes.cpp - Reference Counting Optimizations --------------------===// // // This source file is part of the Swift.org open source project // // Copyright (c) 2014 - 2015 Apple Inc. and the Swift project authors // Licensed under Apache License v2.0 with Runtime Library Exception // // See http://swift.org/LICENSE.txt for license information // See http://swift.org/CONTRIBUTORS.txt for the list of Swift project authors // //===----------------------------------------------------------------------===// // // This file implements optimizations for reference counting, object // allocation, and other runtime entrypoints. // //===----------------------------------------------------------------------===// #define DEBUG_TYPE "swift-optimize" #include "swift/OptimizeARC/Passes.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Module.h" #include "llvm/Pass.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/Transforms/Utils/SSAUpdater.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/ADT/TinyPtrVector.h" #include "llvm/ADT/Triple.h" #include "llvm/IR/InstIterator.h" #include "llvm/IR/IRBuilder.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Support/CommandLine.h" using namespace llvm; using swift::SwiftAliasAnalysis; using swift::SwiftARCOpt; using swift::SwiftARCExpandPass; STATISTIC(NumNoopDeleted, "Number of no-op swift calls eliminated"); STATISTIC(NumRetainReleasePairs, "Number of swift retain/release pairs eliminated"); STATISTIC(NumObjCRetainReleasePairs, "Number of objc retain/release pairs eliminated"); STATISTIC(NumAllocateReleasePairs, "Number of swift allocate/release pairs eliminated"); STATISTIC(NumStoreOnlyObjectsEliminated, "Number of swift stored-only objects eliminated"); STATISTIC(NumReturnThreeTailCallsFormed, "Number of swift_retainAndReturnThree tail calls formed"); //===----------------------------------------------------------------------===// // Utility Functions //===----------------------------------------------------------------------===// enum RT_Kind { /// An instruction with this classification is known to not access (read or /// write) memory. RT_NoMemoryAccessed, /// SwiftHeapObject *swift_retain(SwiftHeapObject *object) RT_Retain, // void swift_retain_noresult(SwiftHeapObject *object) RT_RetainNoResult, // (i64,i64,i64) swift_retainAndReturnThree(SwiftHeapObject *obj, i64,i64,i64) RT_RetainAndReturnThree, /// void swift_release(SwiftHeapObject *object) RT_Release, /// SwiftHeapObject *swift_allocObject(SwiftHeapMetadata *metadata, /// size_t size, size_t alignment) RT_AllocObject, /// void objc_release(%objc_object* %P) RT_ObjCRelease, /// %objc_object* objc_retain(%objc_object* %P) RT_ObjCRetain, /// This is not a runtime function that we support. Maybe it is not a call, /// or is a call to something we don't care about. RT_Unknown, }; /// classifyInstruction - Take a look at the specified instruction and classify /// it into what kind of runtime entrypoint it is, if any. static RT_Kind classifyInstruction(const Instruction &I) { if (!I.mayReadOrWriteMemory()) return RT_NoMemoryAccessed; // Non-calls or calls to indirect functions are unknown. const CallInst *CI = dyn_cast(&I); if (CI == 0) return RT_Unknown; Function *F = CI->getCalledFunction(); if (F == 0) return RT_Unknown; return StringSwitch(F->getName()) .Case("swift_retain", RT_Retain) .Case("swift_retain_noresult", RT_RetainNoResult) .Case("swift_release", RT_Release) .Case("swift_allocObject", RT_AllocObject) .Case("swift_retainAndReturnThree", RT_RetainAndReturnThree) .Case("objc_release", RT_ObjCRelease) .Case("objc_retain", RT_ObjCRetain) .Default(RT_Unknown); } /// getRetain - Return a callable function for swift_retain. F is the function /// being operated on, ObjectPtrTy is an instance of the object pointer type to /// use, and Cache is a null-initialized place to make subsequent requests /// faster. static Constant *getRetain(Function &F, Type *ObjectPtrTy, Constant *&Cache) { if (Cache) return Cache; auto AttrList = AttributeSet::get(F.getContext(), AttributeSet::FunctionIndex, Attribute::NoUnwind); Module *M = F.getParent(); return Cache = M->getOrInsertFunction("swift_retain", AttrList, ObjectPtrTy, ObjectPtrTy, NULL); } /// getRetainNoResult - Return a callable function for swift_retain_noresult. /// F is the function being operated on, ObjectPtrTy is an instance of the /// object pointer type to use, and Cache is a null-initialized place to make /// subsequent requests faster. static Constant *getRetainNoResult(Function &F, Type *ObjectPtrTy, Constant *&Cache) { if (Cache) return Cache; auto AttrList = AttributeSet::get(F.getContext(), 1, Attribute::NoCapture); AttrList = AttrList.addAttribute(F.getContext(), AttributeSet::FunctionIndex, Attribute::NoUnwind); Module *M = F.getParent(); return Cache = M->getOrInsertFunction("swift_retain_noresult", AttrList, Type::getVoidTy(F.getContext()), ObjectPtrTy, NULL); } /// getRetainAndReturnThree - Return a callable function for /// swift_retainAndReturnThree. F is the function being operated on, /// ObjectPtrTy is an instance of the object pointer type to use, and Cache is a /// null-initialized place to make subsequent requests faster. static Constant *getRetainAndReturnThree(Function &F, Type *ObjectPtrTy, Constant *&Cache) { if (Cache) return Cache; auto AttrList = AttributeSet::get(F.getContext(), AttributeSet::FunctionIndex, Attribute::NoUnwind); Module *M = F.getParent(); Type *Int64Ty = Type::getInt64Ty(F.getContext()); Type *RetTy = StructType::get(Int64Ty, Int64Ty, Int64Ty, NULL); return Cache = M->getOrInsertFunction("swift_retainAndReturnThree", AttrList, RetTy, ObjectPtrTy, Int64Ty, Int64Ty, Int64Ty, NULL); } //===----------------------------------------------------------------------===// // SwiftAliasAnalysis //===----------------------------------------------------------------------===// namespace llvm { void initializeSwiftAliasAnalysisPass(PassRegistry&); } // Register this pass... char SwiftAliasAnalysis::ID = 0; INITIALIZE_AG_PASS(SwiftAliasAnalysis, AliasAnalysis, "swift-aa", "Swift Alias Analysis", false, true, false) AliasAnalysis::ModRefResult SwiftAliasAnalysis::getModRefInfo(ImmutableCallSite CS, const Location &Loc) { // We know the mod-ref behavior of various runtime functions. switch (classifyInstruction(*CS.getInstruction())) { case RT_AllocObject: case RT_NoMemoryAccessed: case RT_Retain: case RT_RetainNoResult: case RT_RetainAndReturnThree: case RT_ObjCRetain: // These entrypoints don't modify any compiler-visible state. return NoModRef; case RT_Release: case RT_ObjCRelease: case RT_Unknown: break; } return AliasAnalysis::getModRefInfo(CS, Loc); } //===----------------------------------------------------------------------===// // Input Function Canonicalizer //===----------------------------------------------------------------------===// /// updateCallValueUses - We have something like this: /// %z = ptrtoint %swift.refcounted* %2 to i64 /// %3 = call { i64, i64, i64 } /// @swift_retainAndReturnThree(..., i64 %x, i64 %1, i64 %z) /// %a = extractvalue { i64, i64, i64 } %3, 0 /// %b = extractvalue { i64, i64, i64 } %3, 1 /// %c = extractvalue { i64, i64, i64 } %3, 2 /// %a1 = inttoptr i64 %a to i8* /// %c1 = inttoptr i64 %c to %swift.refcounted* /// /// This function is invoked three times (once each for the three arg/retvalues /// that need to be replaced) and tries a best effort to patch up things to /// avoid all the casts. "Inst" coming into here is the call to /// swift_retainAndReturnThree or to an extract that returns all three words. /// static void updateCallValueUses(CallInst &CI, unsigned EltNo) { Value *Op = CI.getArgOperand(1+EltNo); for (auto UI = CI.user_begin(), E = CI.user_end(); UI != E; ++UI) { ExtractValueInst *Extract = dyn_cast(*UI); // Make sure this extract is relevant to EltNo. if (Extract == 0 || Extract->getNumIndices() != 1 || Extract->getIndices()[0] != EltNo) continue; // Both the input and result should be i64's. assert(Extract->getType() == Op->getType() && "Should have i64's here"); for (auto UI2 = Extract->user_begin(), E = Extract->user_end(); UI2 != E; ){ IntToPtrInst *ExtractUser = dyn_cast(*UI2++); PtrToIntInst *OpCast = dyn_cast(Op); if (ExtractUser && OpCast && OpCast->getOperand(0)->getType() == ExtractUser->getType()) { ExtractUser->replaceAllUsesWith(OpCast->getOperand(0)); ExtractUser->eraseFromParent(); } } // Stitch up anything other than the ptrtoint -> inttoptr. Extract->replaceAllUsesWith(Op); // Zap the dead ExtractValue's. RecursivelyDeleteTriviallyDeadInstructions(Extract); return; } } /// canonicalizeInputFunction - Functions like swift_retain return an /// argument as a low-level performance optimization. This makes it difficult /// to reason about pointer equality though, so undo it as an initial /// canonicalization step. After this step, all swift_retain's have been /// replaced with swift_retain_noresult. /// /// This also does some trivial peep-hole optimizations as we go. static bool canonicalizeInputFunction(Function &F) { Constant *RetainNoResultCache = 0; bool Changed = false; for (auto &BB : F) for (auto I = BB.begin(); I != BB.end(); ) { Instruction &Inst = *I++; switch (classifyInstruction(Inst)) { case RT_Unknown: case RT_AllocObject: case RT_NoMemoryAccessed: break; case RT_RetainNoResult: { CallInst &CI = cast(Inst); Value *ArgVal = CI.getArgOperand(0); // retain_noresult(null) is a no-op. if (isa(ArgVal)) { CI.eraseFromParent(); Changed = true; ++NumNoopDeleted; continue; } break; } case RT_Retain: { // If any x = swift_retain(y)'s got here, canonicalize them into: // x = y; swift_retain_noresult(y). // This is important even though the front-end doesn't generate them, // because inlined functions can be ARC optimized, and thus may contain // swift_retain calls. CallInst &CI = cast(Inst); Value *ArgVal = CI.getArgOperand(0); // Rewrite uses of the result to use the argument. if (!CI.use_empty()) Inst.replaceAllUsesWith(ArgVal); // Insert a call to swift_retain_noresult to replace this and reset the // iterator so that we visit it next. I = CallInst::Create(getRetainNoResult(F, ArgVal->getType(), RetainNoResultCache), ArgVal, "", &CI); CI.eraseFromParent(); Changed = true; break; } case RT_Release: { CallInst &CI = cast(Inst); // swift_release(null) is a noop, zap it. Value *ArgVal = CI.getArgOperand(0); if (isa(ArgVal)) { CI.eraseFromParent(); Changed = true; ++NumNoopDeleted; continue; } break; } case RT_RetainAndReturnThree: { // (a,b,c) = swift_retainAndReturnThree(obj, d,e,f) // -> swift_retain_noresult(obj) // -> (a,b,c) = (d,e,f) // // The important case of doing this is when this function has been inlined // into another function. In this case, there is no return anymore. CallInst &CI = cast(Inst); IRBuilder<> B(&CI); Type *HeapObjectTy = CI.getArgOperand(0)->getType(); // Reprocess starting at the new swift_retain_noresult. I = B.CreateCall(getRetainNoResult(F, HeapObjectTy, RetainNoResultCache), CI.getArgOperand(0)); // See if we can eliminate all of the extractvalue's that are hanging off // the swift_retainAndReturnThree. This is important to eliminate casts // that will block optimizations and generally results in better IR. Note // that this is just a best-effort attempt though. updateCallValueUses(CI, 0); updateCallValueUses(CI, 1); updateCallValueUses(CI, 2); // If our best-effort wasn't good enough, fall back to generating terrible // but correct code. if (!CI.use_empty()) { Value *V = UndefValue::get(CI.getType()); V = B.CreateInsertValue(V, CI.getArgOperand(1), 0U); V = B.CreateInsertValue(V, CI.getArgOperand(2), 1U); V = B.CreateInsertValue(V, CI.getArgOperand(3), 2U); CI.replaceAllUsesWith(V); } CI.eraseFromParent(); Changed = true; break; } case RT_ObjCRelease: { CallInst &CI = cast(Inst); Value *ArgVal = CI.getArgOperand(0); // objc_release(null) is a noop, zap it. if (isa(ArgVal)) { CI.eraseFromParent(); Changed = true; ++NumNoopDeleted; continue; } break; } case RT_ObjCRetain: { // Canonicalize objc_retain so that nothing uses its result. CallInst &CI = cast(Inst); Value *ArgVal = CI.getArgOperand(0); if (!CI.use_empty()) { CI.replaceAllUsesWith(ArgVal); Changed = true; } // objc_retain(null) is a noop, delete it. if (isa(ArgVal)) { CI.eraseFromParent(); Changed = true; ++NumNoopDeleted; continue; } break; } } } return Changed; } //===----------------------------------------------------------------------===// // Release() Motion //===----------------------------------------------------------------------===// /// performLocalReleaseMotion - Scan backwards from the specified release, /// moving it earlier in the function if possible, over instructions that do not /// access the released object. If we get to a retain or allocation of the /// object, zap both. static bool performLocalReleaseMotion(CallInst &Release, BasicBlock &BB) { // FIXME: Call classifier should identify the object for us. Too bad C++ // doesn't have nice Swift-style enums. Value *ReleasedObject = Release.getArgOperand(0); BasicBlock::iterator BBI = &Release; // Scan until we get to the top of the block. while (BBI != BB.begin()) { --BBI; // Don't analyze PHI nodes. We can't move retains before them and they // aren't "interesting". if (isa(BBI) || // If we found the instruction that defines the value we're releasing, // don't push the release past it. &*BBI == ReleasedObject) { ++BBI; goto OutOfLoop; } switch (classifyInstruction(*BBI)) { case RT_Retain: // Canonicalized away, shouldn't exist. case RT_RetainAndReturnThree: assert(0 && "these entrypoints should be canonicalized away"); case RT_NoMemoryAccessed: // Skip over random instructions that don't touch memory. They don't need // protection by retain/release. continue; case RT_Release: { // If we get to a release, we can generally ignore it and scan past it. // However, if we get to a release of obviously the same object, we stop // scanning here because it should have already be moved as early as // possible, so there is no reason to move its friend to the same place. // // NOTE: If this occurs frequently, maybe we can have a release(Obj, N) // API to drop multiple retain counts at once. CallInst &ThisRelease = cast(*BBI); Value *ThisReleasedObject = ThisRelease.getArgOperand(0); if (ThisReleasedObject == ReleasedObject) { //Release.dump(); ThisRelease.dump(); BB.getParent()->dump(); ++BBI; goto OutOfLoop; } continue; } case RT_RetainNoResult: { // swift_retain_noresult(obj) CallInst &Retain = cast(*BBI); Value *RetainedObject = Retain.getArgOperand(0); // If the retain and release are to obviously pointer-equal objects, then // we can delete both of them. We have proven that they do not protect // anything of value. if (RetainedObject == ReleasedObject) { Retain.eraseFromParent(); Release.eraseFromParent(); ++NumRetainReleasePairs; return true; } // Otherwise, this is a retain of an object that is not statically known // to be the same object. It may still be dynamically the same object // though. In this case, we can't move the release past it. // TODO: Strengthen analysis. //Release.dump(); ThisRelease.dump(); BB.getParent()->dump(); ++BBI; goto OutOfLoop; } case RT_AllocObject: { // %obj = swift_alloc(...) CallInst &Allocation = cast(*BBI); // If this is an allocation of an unrelated object, just ignore it. // TODO: This is not safe without proving the object being released is not // related to the allocated object. Consider something silly like this: // A = allocate() // B = bitcast A to object // release(B) if (ReleasedObject != &Allocation) { // Release.dump(); BB.getParent()->dump(); ++BBI; goto OutOfLoop; } // If this is a release right after an allocation of the object, then we // can zap both. Allocation.replaceAllUsesWith(UndefValue::get(Allocation.getType())); Allocation.eraseFromParent(); Release.eraseFromParent(); ++NumAllocateReleasePairs; return true; } case RT_Unknown: case RT_ObjCRelease: case RT_ObjCRetain: // BBI->dump(); // Otherwise, we get to something unknown/unhandled. Bail out for now. ++BBI; goto OutOfLoop; } } OutOfLoop: // If we got to the top of the block, (and if the instruction didn't start // there) move the release to the top of the block. // TODO: This is where we'd plug in some global algorithms someday. if (&*BBI != &Release) { Release.moveBefore(BBI); return true; } return false; } //===----------------------------------------------------------------------===// // Retain() Motion //===----------------------------------------------------------------------===// /// performLocalRetainMotion - Scan forward from the specified retain, moving it /// later in the function if possible, over instructions that provably can't /// release the object. If we get to a release of the object, zap both. /// /// NOTE: this handles both objc_retain and swift_retain_noresult. /// static bool performLocalRetainMotion(CallInst &Retain, BasicBlock &BB) { // FIXME: Call classifier should identify the object for us. Too bad C++ // doesn't have nice Swift-style enums. Value *RetainedObject = Retain.getArgOperand(0); BasicBlock::iterator BBI = &Retain, BBE = BB.getTerminator(); bool isObjCRetain = Retain.getCalledFunction()->getName() == "objc_retain"; bool MadeProgress = false; // Scan until we get to the end of the block. for (++BBI; BBI != BBE; ++BBI) { Instruction &CurInst = *BBI; // Classify the instruction. This switch does a "break" when the instruction // can be skipped and is interesting, and a "continue" when it is a retain // of the same pointer. switch (classifyInstruction(CurInst)) { case RT_Retain: // Canonicalized away, shouldn't exist. case RT_RetainAndReturnThree: assert(0 && "these entrypoints should be canonicalized away"); case RT_NoMemoryAccessed: case RT_AllocObject: // Skip over random instructions that don't touch memory. They don't need // protection by retain/release. break; case RT_RetainNoResult: { // swift_retain_noresult(obj) //CallInst &ThisRetain = cast(CurInst); //Value *ThisRetainedObject = ThisRetain.getArgOperand(0); // If we see a retain of the same object, we can skip over it, but we // can't count it as progress. Just pushing a retain(x) past a retain(y) // doesn't change the program. continue; } case RT_Release: { // If we get to a release that is provably to this object, then we can zap // it and the retain. CallInst &ThisRelease = cast(CurInst); Value *ThisReleasedObject = ThisRelease.getArgOperand(0); if (!isObjCRetain && ThisReleasedObject == RetainedObject) { Retain.eraseFromParent(); ThisRelease.eraseFromParent(); ++NumRetainReleasePairs; return true; } // Otherwise, if this is some other pointer, we can only ignore it if we // can prove that the two objects don't alias. // Retain.dump(); ThisRelease.dump(); BB.getParent()->dump(); goto OutOfLoop; } case RT_ObjCRelease: { // If we get to an objc_release that is provably to this object, then we // can zap it and the objc_retain. CallInst &ThisRelease = cast(CurInst); Value *ThisReleasedObject = ThisRelease.getArgOperand(0); if (isObjCRetain && ThisReleasedObject == RetainedObject) { Retain.eraseFromParent(); ThisRelease.eraseFromParent(); ++NumObjCRetainReleasePairs; return true; } // Otherwise, if this is some other pointer, we can only ignore it if we // can prove that the two objects don't alias. // Retain.dump(); ThisRelease.dump(); BB.getParent()->dump(); goto OutOfLoop; } case RT_Unknown: case RT_ObjCRetain: // Load, store, memcpy etc can't do a release. if (isa(CurInst) || isa(CurInst) || isa(CurInst)) break; // CurInst->dump(); BBI->dump(); // Otherwise, we get to something unknown/unhandled. Bail out for now. goto OutOfLoop; } // If the switch did a break, we made some progress moving this retain. MadeProgress = true; } OutOfLoop: // If we were able to move the retain down, move it now. // TODO: This is where we'd plug in some global algorithms someday. if (MadeProgress) { Retain.moveBefore(BBI); return true; } return false; } //===----------------------------------------------------------------------===// // Store-Only Object Elimination //===----------------------------------------------------------------------===// /// DT_Kind - Classification for destructor semantics. enum class DtorKind { /// NoSideEffects - The destructor does nothing, or just touches the local /// object in a non-observable way after it is destroyed. NoSideEffects, /// NoEscape - The destructor potentially has some side effects, but the /// address of the destroyed object never escapes (in the LLVM IR sense). NoEscape, /// Unknown - Something potentially crazy is going on here. Unknown }; /// analyzeDestructor - Given the heap.metadata argument to swift_allocObject, /// take a look a the destructor and try to decide if it has side effects or any /// other bad effects that can prevent it from being optimized. static DtorKind analyzeDestructor(Value *P) { // If we have a null pointer for the metadata info, the dtor has no side // effects. Actually, the final release would crash. This is really only // useful for writing testcases. if (isa(P->stripPointerCasts())) return DtorKind::NoSideEffects; // We have to have a known heap metadata value, reject dynamically computed // ones, or places GlobalVariable *GV = dyn_cast(P->stripPointerCasts()); if (GV == 0 || GV->mayBeOverridden()) return DtorKind::Unknown; ConstantStruct *CS = dyn_cast_or_null(GV->getInitializer()); if (CS == 0 || CS->getNumOperands() == 0) return DtorKind::Unknown; // FIXME: Would like to abstract the dtor slot (#0) out from this to somewhere // unified. enum { DTorSlotOfHeapMeatadata = 0 }; Function *DtorFn =dyn_cast(CS->getOperand(DTorSlotOfHeapMeatadata)); if (DtorFn == 0 || DtorFn->mayBeOverridden() || DtorFn->hasExternalLinkage()) return DtorKind::Unknown; // Okay, we have a body, and we can trust it. If the function is marked // readonly, then we know it can't have any interesting side effects, so we // don't need to analyze it at all. if (DtorFn->onlyReadsMemory()) return DtorKind::NoSideEffects; // The first argument is the object being destroyed. assert(DtorFn->arg_size() == 1 && !DtorFn->isVarArg() && "expected a single object argument to destructors"); Value *ThisObject = DtorFn->arg_begin(); // Scan the body of the function, looking for anything scary. for (BasicBlock &BB : *DtorFn) { for (Instruction &I : BB) { // Note that the destructor may not be in any particular canonical form. switch (classifyInstruction(I)) { case RT_NoMemoryAccessed: case RT_AllocObject: // Skip over random instructions that don't touch memory in the caller. continue; case RT_Retain: // x = swift_retain(y) case RT_RetainAndReturnThree: // swift_retainAndReturnThree(obj,a,b,c) case RT_RetainNoResult: { // swift_retain_noresult(obj) // Ignore retains of the "self" object, no ressurection is possible. Value *ThisRetainedObject = cast(I).getArgOperand(0); if (ThisRetainedObject->stripPointerCasts() == ThisObject->stripPointerCasts()) continue; // Otherwise, we may be retaining something scary. break; } case RT_Release: { // If we get to a release that is provably to this object, then we can // ignore it. Value *ThisReleasedObject = cast(I).getArgOperand(0); if (ThisReleasedObject->stripPointerCasts() == ThisObject->stripPointerCasts()) continue; // Otherwise, we may be retaining something scary. break; } case RT_ObjCRelease: case RT_ObjCRetain: // Objective-C retain and release can have arbitrary side effects. break; case RT_Unknown: // Ignore all instructions with no side effects. if (!I.mayHaveSideEffects()) continue; // store, memcpy, memmove *to* the object can be dropped. if (StoreInst *SI = dyn_cast(&I)) { if (SI->getPointerOperand()->stripInBoundsOffsets() == ThisObject) continue; } if (MemIntrinsic *MI = dyn_cast(&I)) { if (MI->getDest()->stripInBoundsOffsets() == ThisObject) continue; } // Otherwise, we can't remove the deallocation completely. break; } // Okay, the function has some side effects, if it doesn't capture the // object argument, at least that is something. return DtorFn->doesNotCapture(0) ? DtorKind::NoEscape : DtorKind::Unknown; } } // If we didn't find any side effects, we win. return DtorKind::NoSideEffects; } /// performStoreOnlyObjectElimination - Scan the graph of uses of the specified /// object allocation. If the object does not escape and is only stored to /// (this happens because GVN and other optimizations hoists forward substitutes /// all stores to the object to eliminate all loads from it), then zap the /// object and all accesses related to it. static bool performStoreOnlyObjectElimination(CallInst &Allocation, BasicBlock::iterator &BBI) { DtorKind DtorInfo = analyzeDestructor(Allocation.getArgOperand(0)); // We can't delete the object if its destructor has side effects. if (DtorInfo != DtorKind::NoSideEffects) return false; // Do a depth first search exploring all of the uses of the object pointer, // following through casts, pointer adjustments etc. If we find any loads or // any escape sites of the object, we give up. If we succeed in walking the // entire graph of uses, we can remove the resultant set. SmallSetVector InvolvedInstructions; SmallVector Worklist; Worklist.push_back(&Allocation); // Stores - Keep track of all of the store instructions we see. SmallVector Stores; while (!Worklist.empty()) { Instruction *I = Worklist.pop_back_val(); // Insert the instruction into our InvolvedInstructions set. If we have // already seen it, then don't reprocess all of the uses. if (!InvolvedInstructions.insert(I)) continue; // Okay, this is the first time we've seen this instruction, proceed. switch (classifyInstruction(*I)) { case RT_Retain: case RT_RetainAndReturnThree: assert(0 && "These should be canonicalized away"); case RT_AllocObject: // If this is a different swift_allocObject than we started with, then // there is some computation feeding into a size or alignment computation // that we have to keep... unless we can delete *that* entire object as // well. break; // If no memory is accessed, then something is being done with the // pointer: maybe it is bitcast or GEP'd. Since there are no side effects, // it is perfectly fine to delete this instruction if all uses of the // instruction are also eliminable. case RT_NoMemoryAccessed: if (I->mayHaveSideEffects() || isa(I)) return false; break; // It is perfectly fine to eliminate various retains and releases of this // object: we are zapping all accesses or none. case RT_Release: case RT_RetainNoResult: break; // If this is an unknown instruction, we have more interesting things to // consider. case RT_Unknown: case RT_ObjCRelease: case RT_ObjCRetain: // Otherwise, this really is some unhandled instruction. Bail out. return false; } // Okay, if we got here, the instruction can be eaten so-long as all of its // uses can be. Scan through the uses and add them to the worklist for // recursive processing. for (auto UI = I->user_begin(), E = I->user_end(); UI != E; ++UI) { Instruction *User = cast(*UI); // Handle stores as a special case here: we want to make sure that the // object is being stored *to*, not itself being stored (which would be an // escape point). Since stores themselves don't have any uses, we can // short-cut the classification scheme above. if (StoreInst *SI = dyn_cast(User)) { // If this is a store *to* the object, we can zap it. if (UI.getOperandNo() == StoreInst::getPointerOperandIndex()) { InvolvedInstructions.insert(SI); continue; } // Otherwise, using the object as a source (or size) is an escape. return false; } if (MemIntrinsic *MI = dyn_cast(User)) { // If this is a memset/memcpy/memmove *to* the object, we can zap it. if (UI.getOperandNo() == 0) { InvolvedInstructions.insert(MI); continue; } // Otherwise, using the object as a source (or size) is an escape. return false; } // Otherwise, normal instructions just go on the worklist for processing. Worklist.push_back(User); } } // Ok, we succeeded! This means we can zap all of the instructions that use // the object. One thing we have to be careful of is to make sure that we // don't invalidate "BBI" (the iterator the outer walk of the optimization // pass is using, and indicates the next instruction to process). This would // happen if we delete the instruction it is pointing to. Advance the // iterator if that would happen. while (InvolvedInstructions.count(BBI)) ++BBI; // Zap all of the instructions. for (auto I : InvolvedInstructions) { if (!I->use_empty()) I->replaceAllUsesWith(UndefValue::get(I->getType())); I->eraseFromParent(); } ++NumStoreOnlyObjectsEliminated; return true; } /// performGeneralOptimizations - This does a forward scan over basic blocks, /// looking for interesting local optimizations that can be done. static bool performGeneralOptimizations(Function &F) { bool Changed = false; // TODO: This is a really trivial local algorithm. It could be much better. for (BasicBlock &BB : F) { for (BasicBlock::iterator BBI = BB.begin(), E = BB.end(); BBI != E; ) { // Preincrement the iterator to avoid invalidation and out trouble. Instruction &I = *BBI++; // Do various optimizations based on the instruction we find. switch (classifyInstruction(I)) { default: break; case RT_AllocObject: Changed |= performStoreOnlyObjectElimination(cast(I), BBI); break; case RT_Release: Changed |= performLocalReleaseMotion(cast(I), BB); break; case RT_RetainNoResult: case RT_ObjCRetain: { // Retain motion is a forward pass over the block. Make sure we don't // invalidate our iterators by parking it on the instruction before I. BasicBlock::iterator Safe = &I; Safe = Safe != BB.begin() ? std::prev(Safe) : BB.end(); if (performLocalRetainMotion(cast(I), BB)) { // If we zapped or moved the retain, reset the iterator on the // instruction *newly* after the prev instruction. BBI = Safe != BB.end() ? std::next(Safe) : BB.begin(); Changed = true; } break; } } } } return Changed; } //===----------------------------------------------------------------------===// // SwiftARCOpt Pass //===----------------------------------------------------------------------===// namespace llvm { void initializeSwiftARCOptPass(PassRegistry&); } char SwiftARCOpt::ID = 0; INITIALIZE_PASS_BEGIN(SwiftARCOpt, "swift-arc-optimize", "Swift ARC optimization", false, false) INITIALIZE_PASS_DEPENDENCY(SwiftAliasAnalysis) INITIALIZE_PASS_END(SwiftARCOpt, "swift-arc-optimize", "Swift ARC optimization", false, false) // Optimization passes. llvm::FunctionPass *swift::createSwiftARCOptPass() { initializeSwiftARCOptPass(*llvm::PassRegistry::getPassRegistry()); return new SwiftARCOpt(); } SwiftARCOpt::SwiftARCOpt() : FunctionPass(ID) { } void SwiftARCOpt::getAnalysisUsage(llvm::AnalysisUsage &AU) const { AU.addRequired(); AU.setPreservesCFG(); } bool SwiftARCOpt::runOnFunction(Function &F) { bool Changed = false; // First thing: canonicalize swift_retain and similar calls so that nothing // uses their result. This exposes the copy that the function does to the // optimizer. Changed |= canonicalizeInputFunction(F); // Next, do a pass with a couple of optimizations: // 1) release() motion, eliminating retain/release pairs when it turns out // that a pair is not protecting anything that accesses the guarded heap // object. // 2) deletion of stored-only objects - objects that are allocated and // potentially retained and released, but are only stored to and don't // escape. Changed |= performGeneralOptimizations(F); return Changed; } //===----------------------------------------------------------------------===// // Return Argument Optimizer //===----------------------------------------------------------------------===// /// optimizeReturn3 - Look to see if we can optimize "ret (a,b,c)" - where one /// of the three values was retained right before the return, into a /// swift_retainAndReturnThree call. This is particularly common when returning /// a string or array slice. static bool optimizeReturn3(ReturnInst *TheReturn) { // Ignore ret void. if (TheReturn->getNumOperands() == 0) return false; // See if this is a return of three things. Value *RetVal = TheReturn->getOperand(0); StructType *RetSTy = dyn_cast(RetVal->getType()); if (RetSTy == 0 || RetSTy->getNumElements() != 3) return false; // See if we can find scalars that feed into the return instruction. If not, // bail out. Value *RetVals[3]; for (unsigned i = 0; i != 3; ++i) { RetVals[i] = FindInsertedValue(RetVal, i); if (RetVals[i] == 0) return false; // If the scalar isn't int64 or pointer type, we can't transform it. if (!isa(RetVals[i]->getType()) && !RetVals[i]->getType()->isIntegerTy(64)) return false; } // The ARC optimizer will push the retains to be immediately before the // return, past any insertvalues. We tolerate other non-memory instructions // though, in case other optimizations have moved them around. Collect all // the retain candidates. SmallDenseMap RetainedPointers; for (BasicBlock::iterator BBI = TheReturn,E = TheReturn->getParent()->begin(); BBI != E; ) { Instruction &I = *--BBI; switch (classifyInstruction(I)) { case RT_Retain: { // Collect retained pointers. If a pointer is multiply retained, it // doesn't matter which one we aquire. CallInst &TheRetain = cast(I); RetainedPointers[TheRetain.getArgOperand(0)] = &TheRetain; break; } case RT_NoMemoryAccessed: // If the instruction doesn't access memory, ignore it. break; default: // Otherwise, break out of the for loop. BBI = E; break; } } // If there are no retain candidates, we can't form a return3. if (RetainedPointers.empty()) return false; // Check to see if any of the values returned is retained. If so, we can form // a return3, which makes the retain a tail call. CallInst *TheRetain = 0; for (unsigned i = 0; i != 3; ++i) { // If the return value is also retained, we found our retain. TheRetain = RetainedPointers[RetVals[i]]; if (TheRetain) break; // If we're returning the result of a known retain, then we can also handle // it. if (CallInst *CI = dyn_cast(RetVals[i])) if (classifyInstruction(*CI) == RT_Retain) { TheRetain = RetainedPointers[CI->getArgOperand(0)]; if (TheRetain) break; } } // If none of the three values was retained, we can't form a return3. if (TheRetain == 0) return false; // Okay, there is, which means we can perform the transformation. Get the // argument to swift_retain (the result will be zapped when we zap the call) // as the object to retain (of %swift.refcounted* type). Value *RetainedObject = TheRetain->getArgOperand(0); // Insert any new instructions before the return. IRBuilder<> B(TheReturn); Type *Int64Ty = B.getInt64Ty(); // The swift_retainAndReturnThree function takes the three arguments as i64. // Cast the arguments to i64 if needed. // Update the element with a cast to i64 if needed. for (Value *&Elt : RetVals) { if (isa(Elt->getType())) Elt = B.CreatePtrToInt(Elt, Int64Ty); } // Call swift_retainAndReturnThree with our pointer to retain and the three // i64's. Function &F = *TheReturn->getParent()->getParent(); Constant *Cache = 0; // Not utilized. Value *LibCall = getRetainAndReturnThree(F,RetainedObject->getType(),Cache); CallInst *NR = B.CreateCall4(LibCall, RetainedObject, RetVals[0],RetVals[1], RetVals[2]); NR->setTailCall(true); // The return type of the libcall is (i64,i64,i64). Since at least one of // the pointers is a pointer (we retained it afterall!) we have to unpack // the elements, bitcast at least that one, and then repack to the proper // type expected by the ret instruction. for (unsigned i = 0; i != 3; ++i) { RetVals[i] = B.CreateExtractValue(NR, i); if (RetVals[i]->getType() != RetSTy->getElementType(i)) RetVals[i] = B.CreateIntToPtr(RetVals[i], RetSTy->getElementType(i)); } // Repack into an aggregate that can be returned. Value *RV = UndefValue::get(RetVal->getType()); for (unsigned i = 0; i != 3; ++i) RV = B.CreateInsertValue(RV, RetVals[i], i); // Return the right thing and zap any instruction tree of inserts that // existed just to feed the old return. TheReturn->setOperand(0, RV); RecursivelyDeleteTriviallyDeadInstructions(RetVal); // Zap the retain that we're subsuming and we're done! if (!TheRetain->use_empty()) TheRetain->replaceAllUsesWith(RetainedObject); TheRetain->eraseFromParent(); ++NumReturnThreeTailCallsFormed; return true; } //===----------------------------------------------------------------------===// // SwiftARCExpandPass Pass //===----------------------------------------------------------------------===// /// performARCExpansion - This implements the very late (just before code /// generation) lowering processes that we do to expose low level performance /// optimizations and take advantage of special features of the ABI. These /// expansion steps can foil the general mid-level optimizer, so they are done /// very, very, late. /// /// Expansions include: /// - Lowering retain calls to swift_retain (which return the retained /// argument) to lower register pressure. /// - Forming calls to swift_retainAndReturnThree when the last thing in a /// function is to retain one of its result values, and when it returns /// exactly three values. /// /// Coming into this function, we assume that the code is in canonical form: /// none of these calls have any uses of their return values. bool SwiftARCExpandPass::runOnFunction(Function &F) { Constant *RetainCache = nullptr; bool Changed = false; SmallVector Returns; // Since all of the calls are canonicalized, we know that we can just walk // through the function and collect the interesting heap object definitions by // getting the argument to these functions. DenseMap> DefsOfValue; // Keep track of which order we see values in since iteration over a densemap // isn't in a deterministic order, and isn't efficient anyway. SmallVector DefOrder; // Do a first pass over the function, collecting all interesting definitions. // In this pass, we rewrite any intra-block uses that we can, since the // SSAUpdater doesn't handle them. DenseMap LocalUpdates; for (BasicBlock &BB : F) { for (auto II = BB.begin(), E = BB.end(); II != E; ) { // Preincrement iterator to avoid iteration issues in the loop. Instruction &Inst = *II++; switch (classifyInstruction(Inst)) { case RT_Retain: assert(0 && "This should be canonicalized away!"); case RT_RetainAndReturnThree: case RT_RetainNoResult: { Value *ArgVal = cast(Inst).getArgOperand(0); // First step: rewrite swift_retain_noresult to swift_retain, exposing // the result value. CallInst &CI = *CallInst::Create(getRetain(F, ArgVal->getType(), RetainCache), ArgVal, "", &Inst); CI.setTailCall(true); Inst.eraseFromParent(); if (!isa(ArgVal)) continue; TinyPtrVector &GlobalEntry = DefsOfValue[ArgVal]; // If this is the first definition of a value for the argument that // we've seen, keep track of it in DefOrder. if (GlobalEntry.empty()) DefOrder.push_back(ArgVal); // Check to see if there is already an entry for this basic block. If // there is another local entry, switch to using the local value and // remove the previous value from the GlobalEntry. Value *&LocalEntry = LocalUpdates[ArgVal]; if (LocalEntry) { Changed = true; CI.setArgOperand(0, LocalEntry); assert(GlobalEntry.back() == LocalEntry && "Local/Global mismatch?"); GlobalEntry.pop_back(); } LocalEntry = &CI; GlobalEntry.push_back(&CI); continue; } case RT_Unknown: case RT_Release: case RT_AllocObject: case RT_NoMemoryAccessed: case RT_ObjCRelease: case RT_ObjCRetain: // TODO: Could chain together objc_retains. // Remember returns in the first pass. if (ReturnInst *RI = dyn_cast(&Inst)) Returns.push_back(RI); // Just remap any uses in the value. break; } // Check to see if there are any uses of a value in the LocalUpdates // map. If so, remap it now to the locally defined version. for (unsigned i = 0, e = Inst.getNumOperands(); i != e; ++i) if (Value *V = LocalUpdates.lookup(Inst.getOperand(i))) { Changed = true; Inst.setOperand(i, V); } } LocalUpdates.clear(); } // Now that we've collected all of the interesting heap object values that are // passed into argument-returning functions, rewrite uses of these pointers // with optimized lifetime-shorted versions of it. for (Value *Ptr : DefOrder) { // If Ptr is an instruction, remember its block. If not, use the entry // block as its block (it must be an argument, constant, etc). BasicBlock *PtrBlock; if (Instruction *PI = dyn_cast(Ptr)) PtrBlock = PI->getParent(); else PtrBlock = &F.getEntryBlock(); TinyPtrVector &Defs = DefsOfValue[Ptr]; // This is the same problem as SSA construction, so we just use LLVM's // SSAUpdater, with each retain as a definition of the virtual value. SSAUpdater Updater; Updater.Initialize(Ptr->getType(), Ptr->getName()); // Set the return value of each of these calls as a definition of the // virtual value. for (auto D : Defs) Updater.AddAvailableValue(D->getParent(), D); // If we didn't add a definition for Ptr's block, then Ptr itself is // available in its block. if (!Updater.HasValueForBlock(PtrBlock)) Updater.AddAvailableValue(PtrBlock, Ptr); // Rewrite uses of Ptr to their optimized forms. for (auto UI = Ptr->user_begin(), E = Ptr->user_end(); UI != E; ) { // Make sure to increment the use iterator before potentially rewriting // it. Use &U = UI.getUse(); ++UI; // If the use is in the same block that defines it and the User is not a // PHI node, then this is a local use that shouldn't be rewritten. Instruction *User = cast(U.getUser()); if (User->getParent() == PtrBlock && !isa(User)) continue; // Otherwise, change it if profitable! Updater.RewriteUse(U); if (U.get() != Ptr) Changed = true; } } // Scan through all the returns to see if there are any that can be optimized. // FIXME: swift_retainAndReturnThree runtime call // is currently implemented only on x86_64. // FIXME: optimizeReturn3() implementation assumes 64-bit if (llvm::Triple(F.getParent()->getTargetTriple()).getArchName() == "x86_64") for (ReturnInst *RI : Returns) Changed |= optimizeReturn3(RI); return Changed; } namespace llvm { void initializeSwiftARCExpandPassPass(PassRegistry&); } char SwiftARCExpandPass::ID = 0; INITIALIZE_PASS(SwiftARCExpandPass, "swift-arc-expand", "Swift ARC expansion", false, false) llvm::FunctionPass *swift::createSwiftARCExpandPass() { initializeSwiftARCExpandPassPass(*llvm::PassRegistry::getPassRegistry()); return new SwiftARCExpandPass(); }