Files
swift-mirror/lib/OptimizeARC/Passes.cpp
Nadav Rotem 96cf01ba82 swift_release may cause side effects when the constructor is called.
GVN uses the swift alias analysis so we can't declare swift_release as having no side effects.

rdar://16443423



Swift SVN r16030
2014-04-07 23:54:16 +00:00

1301 lines
48 KiB
C++

//===--- Passes.cpp - Reference Counting Optimizations --------------------===//
//
// This source file is part of the Swift.org open source project
//
// Copyright (c) 2014 - 2015 Apple Inc. and the Swift project authors
// Licensed under Apache License v2.0 with Runtime Library Exception
//
// See http://swift.org/LICENSE.txt for license information
// See http://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
//
//===----------------------------------------------------------------------===//
//
// This file implements optimizations for reference counting, object
// allocation, and other runtime entrypoints.
//
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "swift-optimize"
#include "swift/OptimizeARC/Passes.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Module.h"
#include "llvm/Pass.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/Transforms/Utils/SSAUpdater.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/StringSwitch.h"
#include "llvm/ADT/TinyPtrVector.h"
#include "llvm/ADT/Triple.h"
#include "llvm/IR/InstIterator.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Support/CommandLine.h"
using namespace llvm;
using swift::SwiftAliasAnalysis;
using swift::SwiftARCOpt;
using swift::SwiftARCExpandPass;
STATISTIC(NumNoopDeleted,
"Number of no-op swift calls eliminated");
STATISTIC(NumRetainReleasePairs,
"Number of swift retain/release pairs eliminated");
STATISTIC(NumObjCRetainReleasePairs,
"Number of objc retain/release pairs eliminated");
STATISTIC(NumAllocateReleasePairs,
"Number of swift allocate/release pairs eliminated");
STATISTIC(NumStoreOnlyObjectsEliminated,
"Number of swift stored-only objects eliminated");
STATISTIC(NumReturnThreeTailCallsFormed,
"Number of swift_retainAndReturnThree tail calls formed");
//===----------------------------------------------------------------------===//
// Utility Functions
//===----------------------------------------------------------------------===//
enum RT_Kind {
/// An instruction with this classification is known to not access (read or
/// write) memory.
RT_NoMemoryAccessed,
/// SwiftHeapObject *swift_retain(SwiftHeapObject *object)
RT_Retain,
// void swift_retain_noresult(SwiftHeapObject *object)
RT_RetainNoResult,
// (i64,i64,i64) swift_retainAndReturnThree(SwiftHeapObject *obj, i64,i64,i64)
RT_RetainAndReturnThree,
/// void swift_release(SwiftHeapObject *object)
RT_Release,
/// SwiftHeapObject *swift_allocObject(SwiftHeapMetadata *metadata,
/// size_t size, size_t alignment)
RT_AllocObject,
/// void objc_release(%objc_object* %P)
RT_ObjCRelease,
/// %objc_object* objc_retain(%objc_object* %P)
RT_ObjCRetain,
/// This is not a runtime function that we support. Maybe it is not a call,
/// or is a call to something we don't care about.
RT_Unknown,
};
/// classifyInstruction - Take a look at the specified instruction and classify
/// it into what kind of runtime entrypoint it is, if any.
static RT_Kind classifyInstruction(const Instruction &I) {
if (!I.mayReadOrWriteMemory())
return RT_NoMemoryAccessed;
// Non-calls or calls to indirect functions are unknown.
const CallInst *CI = dyn_cast<CallInst>(&I);
if (CI == 0) return RT_Unknown;
Function *F = CI->getCalledFunction();
if (F == 0) return RT_Unknown;
return StringSwitch<RT_Kind>(F->getName())
.Case("swift_retain", RT_Retain)
.Case("swift_retain_noresult", RT_RetainNoResult)
.Case("swift_release", RT_Release)
.Case("swift_allocObject", RT_AllocObject)
.Case("swift_retainAndReturnThree", RT_RetainAndReturnThree)
.Case("objc_release", RT_ObjCRelease)
.Case("objc_retain", RT_ObjCRetain)
.Default(RT_Unknown);
}
/// getRetain - Return a callable function for swift_retain. F is the function
/// being operated on, ObjectPtrTy is an instance of the object pointer type to
/// use, and Cache is a null-initialized place to make subsequent requests
/// faster.
static Constant *getRetain(Function &F, Type *ObjectPtrTy, Constant *&Cache) {
if (Cache) return Cache;
auto AttrList = AttributeSet::get(F.getContext(),
AttributeSet::FunctionIndex,
Attribute::NoUnwind);
Module *M = F.getParent();
return Cache = M->getOrInsertFunction("swift_retain", AttrList,
ObjectPtrTy, ObjectPtrTy, NULL);
}
/// getRetainNoResult - Return a callable function for swift_retain_noresult.
/// F is the function being operated on, ObjectPtrTy is an instance of the
/// object pointer type to use, and Cache is a null-initialized place to make
/// subsequent requests faster.
static Constant *getRetainNoResult(Function &F, Type *ObjectPtrTy,
Constant *&Cache) {
if (Cache) return Cache;
auto AttrList = AttributeSet::get(F.getContext(), 1, Attribute::NoCapture);
AttrList = AttrList.addAttribute(F.getContext(),
AttributeSet::FunctionIndex,
Attribute::NoUnwind);
Module *M = F.getParent();
return Cache = M->getOrInsertFunction("swift_retain_noresult", AttrList,
Type::getVoidTy(F.getContext()),
ObjectPtrTy, NULL);
}
/// getRetainAndReturnThree - Return a callable function for
/// swift_retainAndReturnThree. F is the function being operated on,
/// ObjectPtrTy is an instance of the object pointer type to use, and Cache is a
/// null-initialized place to make subsequent requests faster.
static Constant *getRetainAndReturnThree(Function &F, Type *ObjectPtrTy,
Constant *&Cache) {
if (Cache) return Cache;
auto AttrList = AttributeSet::get(F.getContext(),
AttributeSet::FunctionIndex,
Attribute::NoUnwind);
Module *M = F.getParent();
Type *Int64Ty = Type::getInt64Ty(F.getContext());
Type *RetTy = StructType::get(Int64Ty, Int64Ty, Int64Ty, NULL);
return Cache = M->getOrInsertFunction("swift_retainAndReturnThree", AttrList,
RetTy, ObjectPtrTy,
Int64Ty, Int64Ty, Int64Ty, NULL);
}
//===----------------------------------------------------------------------===//
// SwiftAliasAnalysis
//===----------------------------------------------------------------------===//
namespace llvm {
void initializeSwiftAliasAnalysisPass(PassRegistry&);
}
// Register this pass...
char SwiftAliasAnalysis::ID = 0;
INITIALIZE_AG_PASS(SwiftAliasAnalysis, AliasAnalysis, "swift-aa",
"Swift Alias Analysis", false, true, false)
AliasAnalysis::ModRefResult
SwiftAliasAnalysis::getModRefInfo(ImmutableCallSite CS, const Location &Loc) {
// We know the mod-ref behavior of various runtime functions.
switch (classifyInstruction(*CS.getInstruction())) {
case RT_AllocObject:
case RT_NoMemoryAccessed:
case RT_Retain:
case RT_RetainNoResult:
case RT_RetainAndReturnThree:
case RT_ObjCRetain:
// These entrypoints don't modify any compiler-visible state.
return NoModRef;
case RT_Release:
case RT_ObjCRelease:
case RT_Unknown:
break;
}
return AliasAnalysis::getModRefInfo(CS, Loc);
}
//===----------------------------------------------------------------------===//
// Input Function Canonicalizer
//===----------------------------------------------------------------------===//
/// updateCallValueUses - We have something like this:
/// %z = ptrtoint %swift.refcounted* %2 to i64
/// %3 = call { i64, i64, i64 }
/// @swift_retainAndReturnThree(..., i64 %x, i64 %1, i64 %z)
/// %a = extractvalue { i64, i64, i64 } %3, 0
/// %b = extractvalue { i64, i64, i64 } %3, 1
/// %c = extractvalue { i64, i64, i64 } %3, 2
/// %a1 = inttoptr i64 %a to i8*
/// %c1 = inttoptr i64 %c to %swift.refcounted*
///
/// This function is invoked three times (once each for the three arg/retvalues
/// that need to be replaced) and tries a best effort to patch up things to
/// avoid all the casts. "Inst" coming into here is the call to
/// swift_retainAndReturnThree or to an extract that returns all three words.
///
static void updateCallValueUses(CallInst &CI, unsigned EltNo) {
Value *Op = CI.getArgOperand(1+EltNo);
for (auto UI = CI.user_begin(), E = CI.user_end(); UI != E; ++UI) {
ExtractValueInst *Extract = dyn_cast<ExtractValueInst>(*UI);
// Make sure this extract is relevant to EltNo.
if (Extract == 0 || Extract->getNumIndices() != 1 ||
Extract->getIndices()[0] != EltNo)
continue;
// Both the input and result should be i64's.
assert(Extract->getType() == Op->getType() && "Should have i64's here");
for (auto UI2 = Extract->user_begin(), E = Extract->user_end(); UI2 != E; ){
IntToPtrInst *ExtractUser = dyn_cast<IntToPtrInst>(*UI2++);
PtrToIntInst *OpCast = dyn_cast<PtrToIntInst>(Op);
if (ExtractUser && OpCast &&
OpCast->getOperand(0)->getType() == ExtractUser->getType()) {
ExtractUser->replaceAllUsesWith(OpCast->getOperand(0));
ExtractUser->eraseFromParent();
}
}
// Stitch up anything other than the ptrtoint -> inttoptr.
Extract->replaceAllUsesWith(Op);
// Zap the dead ExtractValue's.
RecursivelyDeleteTriviallyDeadInstructions(Extract);
return;
}
}
/// canonicalizeInputFunction - Functions like swift_retain return an
/// argument as a low-level performance optimization. This makes it difficult
/// to reason about pointer equality though, so undo it as an initial
/// canonicalization step. After this step, all swift_retain's have been
/// replaced with swift_retain_noresult.
///
/// This also does some trivial peep-hole optimizations as we go.
static bool canonicalizeInputFunction(Function &F) {
Constant *RetainNoResultCache = 0;
bool Changed = false;
for (auto &BB : F)
for (auto I = BB.begin(); I != BB.end(); ) {
Instruction &Inst = *I++;
switch (classifyInstruction(Inst)) {
case RT_Unknown:
case RT_AllocObject:
case RT_NoMemoryAccessed:
break;
case RT_RetainNoResult: {
CallInst &CI = cast<CallInst>(Inst);
Value *ArgVal = CI.getArgOperand(0);
// retain_noresult(null) is a no-op.
if (isa<ConstantPointerNull>(ArgVal)) {
CI.eraseFromParent();
Changed = true;
++NumNoopDeleted;
continue;
}
break;
}
case RT_Retain: {
// If any x = swift_retain(y)'s got here, canonicalize them into:
// x = y; swift_retain_noresult(y).
// This is important even though the front-end doesn't generate them,
// because inlined functions can be ARC optimized, and thus may contain
// swift_retain calls.
CallInst &CI = cast<CallInst>(Inst);
Value *ArgVal = CI.getArgOperand(0);
// Rewrite uses of the result to use the argument.
if (!CI.use_empty())
Inst.replaceAllUsesWith(ArgVal);
// Insert a call to swift_retain_noresult to replace this and reset the
// iterator so that we visit it next.
I = CallInst::Create(getRetainNoResult(F, ArgVal->getType(),
RetainNoResultCache),
ArgVal, "", &CI);
CI.eraseFromParent();
Changed = true;
break;
}
case RT_Release: {
CallInst &CI = cast<CallInst>(Inst);
// swift_release(null) is a noop, zap it.
Value *ArgVal = CI.getArgOperand(0);
if (isa<ConstantPointerNull>(ArgVal)) {
CI.eraseFromParent();
Changed = true;
++NumNoopDeleted;
continue;
}
break;
}
case RT_RetainAndReturnThree: {
// (a,b,c) = swift_retainAndReturnThree(obj, d,e,f)
// -> swift_retain_noresult(obj)
// -> (a,b,c) = (d,e,f)
//
// The important case of doing this is when this function has been inlined
// into another function. In this case, there is no return anymore.
CallInst &CI = cast<CallInst>(Inst);
IRBuilder<> B(&CI);
Type *HeapObjectTy = CI.getArgOperand(0)->getType();
// Reprocess starting at the new swift_retain_noresult.
I = B.CreateCall(getRetainNoResult(F, HeapObjectTy, RetainNoResultCache),
CI.getArgOperand(0));
// See if we can eliminate all of the extractvalue's that are hanging off
// the swift_retainAndReturnThree. This is important to eliminate casts
// that will block optimizations and generally results in better IR. Note
// that this is just a best-effort attempt though.
updateCallValueUses(CI, 0);
updateCallValueUses(CI, 1);
updateCallValueUses(CI, 2);
// If our best-effort wasn't good enough, fall back to generating terrible
// but correct code.
if (!CI.use_empty()) {
Value *V = UndefValue::get(CI.getType());
V = B.CreateInsertValue(V, CI.getArgOperand(1), 0U);
V = B.CreateInsertValue(V, CI.getArgOperand(2), 1U);
V = B.CreateInsertValue(V, CI.getArgOperand(3), 2U);
CI.replaceAllUsesWith(V);
}
CI.eraseFromParent();
Changed = true;
break;
}
case RT_ObjCRelease: {
CallInst &CI = cast<CallInst>(Inst);
Value *ArgVal = CI.getArgOperand(0);
// objc_release(null) is a noop, zap it.
if (isa<ConstantPointerNull>(ArgVal)) {
CI.eraseFromParent();
Changed = true;
++NumNoopDeleted;
continue;
}
break;
}
case RT_ObjCRetain: {
// Canonicalize objc_retain so that nothing uses its result.
CallInst &CI = cast<CallInst>(Inst);
Value *ArgVal = CI.getArgOperand(0);
if (!CI.use_empty()) {
CI.replaceAllUsesWith(ArgVal);
Changed = true;
}
// objc_retain(null) is a noop, delete it.
if (isa<ConstantPointerNull>(ArgVal)) {
CI.eraseFromParent();
Changed = true;
++NumNoopDeleted;
continue;
}
break;
}
}
}
return Changed;
}
//===----------------------------------------------------------------------===//
// Release() Motion
//===----------------------------------------------------------------------===//
/// performLocalReleaseMotion - Scan backwards from the specified release,
/// moving it earlier in the function if possible, over instructions that do not
/// access the released object. If we get to a retain or allocation of the
/// object, zap both.
static bool performLocalReleaseMotion(CallInst &Release, BasicBlock &BB) {
// FIXME: Call classifier should identify the object for us. Too bad C++
// doesn't have nice Swift-style enums.
Value *ReleasedObject = Release.getArgOperand(0);
BasicBlock::iterator BBI = &Release;
// Scan until we get to the top of the block.
while (BBI != BB.begin()) {
--BBI;
// Don't analyze PHI nodes. We can't move retains before them and they
// aren't "interesting".
if (isa<PHINode>(BBI) ||
// If we found the instruction that defines the value we're releasing,
// don't push the release past it.
&*BBI == ReleasedObject) {
++BBI;
goto OutOfLoop;
}
switch (classifyInstruction(*BBI)) {
case RT_Retain: // Canonicalized away, shouldn't exist.
case RT_RetainAndReturnThree:
assert(0 && "these entrypoints should be canonicalized away");
case RT_NoMemoryAccessed:
// Skip over random instructions that don't touch memory. They don't need
// protection by retain/release.
continue;
case RT_Release: {
// If we get to a release, we can generally ignore it and scan past it.
// However, if we get to a release of obviously the same object, we stop
// scanning here because it should have already be moved as early as
// possible, so there is no reason to move its friend to the same place.
//
// NOTE: If this occurs frequently, maybe we can have a release(Obj, N)
// API to drop multiple retain counts at once.
CallInst &ThisRelease = cast<CallInst>(*BBI);
Value *ThisReleasedObject = ThisRelease.getArgOperand(0);
if (ThisReleasedObject == ReleasedObject) {
//Release.dump(); ThisRelease.dump(); BB.getParent()->dump();
++BBI;
goto OutOfLoop;
}
continue;
}
case RT_RetainNoResult: { // swift_retain_noresult(obj)
CallInst &Retain = cast<CallInst>(*BBI);
Value *RetainedObject = Retain.getArgOperand(0);
// If the retain and release are to obviously pointer-equal objects, then
// we can delete both of them. We have proven that they do not protect
// anything of value.
if (RetainedObject == ReleasedObject) {
Retain.eraseFromParent();
Release.eraseFromParent();
++NumRetainReleasePairs;
return true;
}
// Otherwise, this is a retain of an object that is not statically known
// to be the same object. It may still be dynamically the same object
// though. In this case, we can't move the release past it.
// TODO: Strengthen analysis.
//Release.dump(); ThisRelease.dump(); BB.getParent()->dump();
++BBI;
goto OutOfLoop;
}
case RT_AllocObject: { // %obj = swift_alloc(...)
CallInst &Allocation = cast<CallInst>(*BBI);
// If this is an allocation of an unrelated object, just ignore it.
// TODO: This is not safe without proving the object being released is not
// related to the allocated object. Consider something silly like this:
// A = allocate()
// B = bitcast A to object
// release(B)
if (ReleasedObject != &Allocation) {
// Release.dump(); BB.getParent()->dump();
++BBI;
goto OutOfLoop;
}
// If this is a release right after an allocation of the object, then we
// can zap both.
Allocation.replaceAllUsesWith(UndefValue::get(Allocation.getType()));
Allocation.eraseFromParent();
Release.eraseFromParent();
++NumAllocateReleasePairs;
return true;
}
case RT_Unknown:
case RT_ObjCRelease:
case RT_ObjCRetain:
// BBI->dump();
// Otherwise, we get to something unknown/unhandled. Bail out for now.
++BBI;
goto OutOfLoop;
}
}
OutOfLoop:
// If we got to the top of the block, (and if the instruction didn't start
// there) move the release to the top of the block.
// TODO: This is where we'd plug in some global algorithms someday.
if (&*BBI != &Release) {
Release.moveBefore(BBI);
return true;
}
return false;
}
//===----------------------------------------------------------------------===//
// Retain() Motion
//===----------------------------------------------------------------------===//
/// performLocalRetainMotion - Scan forward from the specified retain, moving it
/// later in the function if possible, over instructions that provably can't
/// release the object. If we get to a release of the object, zap both.
///
/// NOTE: this handles both objc_retain and swift_retain_noresult.
///
static bool performLocalRetainMotion(CallInst &Retain, BasicBlock &BB) {
// FIXME: Call classifier should identify the object for us. Too bad C++
// doesn't have nice Swift-style enums.
Value *RetainedObject = Retain.getArgOperand(0);
BasicBlock::iterator BBI = &Retain, BBE = BB.getTerminator();
bool isObjCRetain = Retain.getCalledFunction()->getName() == "objc_retain";
bool MadeProgress = false;
// Scan until we get to the end of the block.
for (++BBI; BBI != BBE; ++BBI) {
Instruction &CurInst = *BBI;
// Classify the instruction. This switch does a "break" when the instruction
// can be skipped and is interesting, and a "continue" when it is a retain
// of the same pointer.
switch (classifyInstruction(CurInst)) {
case RT_Retain: // Canonicalized away, shouldn't exist.
case RT_RetainAndReturnThree:
assert(0 && "these entrypoints should be canonicalized away");
case RT_NoMemoryAccessed:
case RT_AllocObject:
// Skip over random instructions that don't touch memory. They don't need
// protection by retain/release.
break;
case RT_RetainNoResult: { // swift_retain_noresult(obj)
//CallInst &ThisRetain = cast<CallInst>(CurInst);
//Value *ThisRetainedObject = ThisRetain.getArgOperand(0);
// If we see a retain of the same object, we can skip over it, but we
// can't count it as progress. Just pushing a retain(x) past a retain(y)
// doesn't change the program.
continue;
}
case RT_Release: {
// If we get to a release that is provably to this object, then we can zap
// it and the retain.
CallInst &ThisRelease = cast<CallInst>(CurInst);
Value *ThisReleasedObject = ThisRelease.getArgOperand(0);
if (!isObjCRetain && ThisReleasedObject == RetainedObject) {
Retain.eraseFromParent();
ThisRelease.eraseFromParent();
++NumRetainReleasePairs;
return true;
}
// Otherwise, if this is some other pointer, we can only ignore it if we
// can prove that the two objects don't alias.
// Retain.dump(); ThisRelease.dump(); BB.getParent()->dump();
goto OutOfLoop;
}
case RT_ObjCRelease: {
// If we get to an objc_release that is provably to this object, then we
// can zap it and the objc_retain.
CallInst &ThisRelease = cast<CallInst>(CurInst);
Value *ThisReleasedObject = ThisRelease.getArgOperand(0);
if (isObjCRetain && ThisReleasedObject == RetainedObject) {
Retain.eraseFromParent();
ThisRelease.eraseFromParent();
++NumObjCRetainReleasePairs;
return true;
}
// Otherwise, if this is some other pointer, we can only ignore it if we
// can prove that the two objects don't alias.
// Retain.dump(); ThisRelease.dump(); BB.getParent()->dump();
goto OutOfLoop;
}
case RT_Unknown:
case RT_ObjCRetain:
// Load, store, memcpy etc can't do a release.
if (isa<LoadInst>(CurInst) || isa<StoreInst>(CurInst) ||
isa<MemIntrinsic>(CurInst))
break;
// CurInst->dump(); BBI->dump();
// Otherwise, we get to something unknown/unhandled. Bail out for now.
goto OutOfLoop;
}
// If the switch did a break, we made some progress moving this retain.
MadeProgress = true;
}
OutOfLoop:
// If we were able to move the retain down, move it now.
// TODO: This is where we'd plug in some global algorithms someday.
if (MadeProgress) {
Retain.moveBefore(BBI);
return true;
}
return false;
}
//===----------------------------------------------------------------------===//
// Store-Only Object Elimination
//===----------------------------------------------------------------------===//
/// DT_Kind - Classification for destructor semantics.
enum class DtorKind {
/// NoSideEffects - The destructor does nothing, or just touches the local
/// object in a non-observable way after it is destroyed.
NoSideEffects,
/// NoEscape - The destructor potentially has some side effects, but the
/// address of the destroyed object never escapes (in the LLVM IR sense).
NoEscape,
/// Unknown - Something potentially crazy is going on here.
Unknown
};
/// analyzeDestructor - Given the heap.metadata argument to swift_allocObject,
/// take a look a the destructor and try to decide if it has side effects or any
/// other bad effects that can prevent it from being optimized.
static DtorKind analyzeDestructor(Value *P) {
// If we have a null pointer for the metadata info, the dtor has no side
// effects. Actually, the final release would crash. This is really only
// useful for writing testcases.
if (isa<ConstantPointerNull>(P->stripPointerCasts()))
return DtorKind::NoSideEffects;
// We have to have a known heap metadata value, reject dynamically computed
// ones, or places
GlobalVariable *GV = dyn_cast<GlobalVariable>(P->stripPointerCasts());
if (GV == 0 || GV->mayBeOverridden()) return DtorKind::Unknown;
ConstantStruct *CS = dyn_cast_or_null<ConstantStruct>(GV->getInitializer());
if (CS == 0 || CS->getNumOperands() == 0) return DtorKind::Unknown;
// FIXME: Would like to abstract the dtor slot (#0) out from this to somewhere
// unified.
enum { DTorSlotOfHeapMeatadata = 0 };
Function *DtorFn =dyn_cast<Function>(CS->getOperand(DTorSlotOfHeapMeatadata));
if (DtorFn == 0 || DtorFn->mayBeOverridden() || DtorFn->hasExternalLinkage())
return DtorKind::Unknown;
// Okay, we have a body, and we can trust it. If the function is marked
// readonly, then we know it can't have any interesting side effects, so we
// don't need to analyze it at all.
if (DtorFn->onlyReadsMemory())
return DtorKind::NoSideEffects;
// The first argument is the object being destroyed.
assert(DtorFn->arg_size() == 1 && !DtorFn->isVarArg() &&
"expected a single object argument to destructors");
Value *ThisObject = DtorFn->arg_begin();
// Scan the body of the function, looking for anything scary.
for (BasicBlock &BB : *DtorFn) {
for (Instruction &I : BB) {
// Note that the destructor may not be in any particular canonical form.
switch (classifyInstruction(I)) {
case RT_NoMemoryAccessed:
case RT_AllocObject:
// Skip over random instructions that don't touch memory in the caller.
continue;
case RT_Retain: // x = swift_retain(y)
case RT_RetainAndReturnThree: // swift_retainAndReturnThree(obj,a,b,c)
case RT_RetainNoResult: { // swift_retain_noresult(obj)
// Ignore retains of the "self" object, no ressurection is possible.
Value *ThisRetainedObject = cast<CallInst>(I).getArgOperand(0);
if (ThisRetainedObject->stripPointerCasts() ==
ThisObject->stripPointerCasts())
continue;
// Otherwise, we may be retaining something scary.
break;
}
case RT_Release: {
// If we get to a release that is provably to this object, then we can
// ignore it.
Value *ThisReleasedObject = cast<CallInst>(I).getArgOperand(0);
if (ThisReleasedObject->stripPointerCasts() ==
ThisObject->stripPointerCasts())
continue;
// Otherwise, we may be retaining something scary.
break;
}
case RT_ObjCRelease:
case RT_ObjCRetain:
// Objective-C retain and release can have arbitrary side effects.
break;
case RT_Unknown:
// Ignore all instructions with no side effects.
if (!I.mayHaveSideEffects()) continue;
// store, memcpy, memmove *to* the object can be dropped.
if (StoreInst *SI = dyn_cast<StoreInst>(&I)) {
if (SI->getPointerOperand()->stripInBoundsOffsets() == ThisObject)
continue;
}
if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(&I)) {
if (MI->getDest()->stripInBoundsOffsets() == ThisObject)
continue;
}
// Otherwise, we can't remove the deallocation completely.
break;
}
// Okay, the function has some side effects, if it doesn't capture the
// object argument, at least that is something.
return DtorFn->doesNotCapture(0) ? DtorKind::NoEscape : DtorKind::Unknown;
}
}
// If we didn't find any side effects, we win.
return DtorKind::NoSideEffects;
}
/// performStoreOnlyObjectElimination - Scan the graph of uses of the specified
/// object allocation. If the object does not escape and is only stored to
/// (this happens because GVN and other optimizations hoists forward substitutes
/// all stores to the object to eliminate all loads from it), then zap the
/// object and all accesses related to it.
static bool performStoreOnlyObjectElimination(CallInst &Allocation,
BasicBlock::iterator &BBI) {
DtorKind DtorInfo = analyzeDestructor(Allocation.getArgOperand(0));
// We can't delete the object if its destructor has side effects.
if (DtorInfo != DtorKind::NoSideEffects)
return false;
// Do a depth first search exploring all of the uses of the object pointer,
// following through casts, pointer adjustments etc. If we find any loads or
// any escape sites of the object, we give up. If we succeed in walking the
// entire graph of uses, we can remove the resultant set.
SmallSetVector<Instruction*, 16> InvolvedInstructions;
SmallVector<Instruction*, 16> Worklist;
Worklist.push_back(&Allocation);
// Stores - Keep track of all of the store instructions we see.
SmallVector<StoreInst*, 16> Stores;
while (!Worklist.empty()) {
Instruction *I = Worklist.pop_back_val();
// Insert the instruction into our InvolvedInstructions set. If we have
// already seen it, then don't reprocess all of the uses.
if (!InvolvedInstructions.insert(I)) continue;
// Okay, this is the first time we've seen this instruction, proceed.
switch (classifyInstruction(*I)) {
case RT_Retain:
case RT_RetainAndReturnThree:
assert(0 && "These should be canonicalized away");
case RT_AllocObject:
// If this is a different swift_allocObject than we started with, then
// there is some computation feeding into a size or alignment computation
// that we have to keep... unless we can delete *that* entire object as
// well.
break;
// If no memory is accessed, then something is being done with the
// pointer: maybe it is bitcast or GEP'd. Since there are no side effects,
// it is perfectly fine to delete this instruction if all uses of the
// instruction are also eliminable.
case RT_NoMemoryAccessed:
if (I->mayHaveSideEffects() || isa<TerminatorInst>(I))
return false;
break;
// It is perfectly fine to eliminate various retains and releases of this
// object: we are zapping all accesses or none.
case RT_Release:
case RT_RetainNoResult:
break;
// If this is an unknown instruction, we have more interesting things to
// consider.
case RT_Unknown:
case RT_ObjCRelease:
case RT_ObjCRetain:
// Otherwise, this really is some unhandled instruction. Bail out.
return false;
}
// Okay, if we got here, the instruction can be eaten so-long as all of its
// uses can be. Scan through the uses and add them to the worklist for
// recursive processing.
for (auto UI = I->user_begin(), E = I->user_end(); UI != E; ++UI) {
Instruction *User = cast<Instruction>(*UI);
// Handle stores as a special case here: we want to make sure that the
// object is being stored *to*, not itself being stored (which would be an
// escape point). Since stores themselves don't have any uses, we can
// short-cut the classification scheme above.
if (StoreInst *SI = dyn_cast<StoreInst>(User)) {
// If this is a store *to* the object, we can zap it.
if (UI.getOperandNo() == StoreInst::getPointerOperandIndex()) {
InvolvedInstructions.insert(SI);
continue;
}
// Otherwise, using the object as a source (or size) is an escape.
return false;
}
if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(User)) {
// If this is a memset/memcpy/memmove *to* the object, we can zap it.
if (UI.getOperandNo() == 0) {
InvolvedInstructions.insert(MI);
continue;
}
// Otherwise, using the object as a source (or size) is an escape.
return false;
}
// Otherwise, normal instructions just go on the worklist for processing.
Worklist.push_back(User);
}
}
// Ok, we succeeded! This means we can zap all of the instructions that use
// the object. One thing we have to be careful of is to make sure that we
// don't invalidate "BBI" (the iterator the outer walk of the optimization
// pass is using, and indicates the next instruction to process). This would
// happen if we delete the instruction it is pointing to. Advance the
// iterator if that would happen.
while (InvolvedInstructions.count(BBI))
++BBI;
// Zap all of the instructions.
for (auto I : InvolvedInstructions) {
if (!I->use_empty())
I->replaceAllUsesWith(UndefValue::get(I->getType()));
I->eraseFromParent();
}
++NumStoreOnlyObjectsEliminated;
return true;
}
/// performGeneralOptimizations - This does a forward scan over basic blocks,
/// looking for interesting local optimizations that can be done.
static bool performGeneralOptimizations(Function &F) {
bool Changed = false;
// TODO: This is a really trivial local algorithm. It could be much better.
for (BasicBlock &BB : F) {
for (BasicBlock::iterator BBI = BB.begin(), E = BB.end(); BBI != E; ) {
// Preincrement the iterator to avoid invalidation and out trouble.
Instruction &I = *BBI++;
// Do various optimizations based on the instruction we find.
switch (classifyInstruction(I)) {
default: break;
case RT_AllocObject:
Changed |= performStoreOnlyObjectElimination(cast<CallInst>(I), BBI);
break;
case RT_Release:
Changed |= performLocalReleaseMotion(cast<CallInst>(I), BB);
break;
case RT_RetainNoResult:
case RT_ObjCRetain: {
// Retain motion is a forward pass over the block. Make sure we don't
// invalidate our iterators by parking it on the instruction before I.
BasicBlock::iterator Safe = &I;
Safe = Safe != BB.begin() ? std::prev(Safe) : BB.end();
if (performLocalRetainMotion(cast<CallInst>(I), BB)) {
// If we zapped or moved the retain, reset the iterator on the
// instruction *newly* after the prev instruction.
BBI = Safe != BB.end() ? std::next(Safe) : BB.begin();
Changed = true;
}
break;
}
}
}
}
return Changed;
}
//===----------------------------------------------------------------------===//
// SwiftARCOpt Pass
//===----------------------------------------------------------------------===//
namespace llvm {
void initializeSwiftARCOptPass(PassRegistry&);
}
char SwiftARCOpt::ID = 0;
INITIALIZE_PASS_BEGIN(SwiftARCOpt,
"swift-arc-optimize", "Swift ARC optimization",
false, false)
INITIALIZE_PASS_DEPENDENCY(SwiftAliasAnalysis)
INITIALIZE_PASS_END(SwiftARCOpt,
"swift-arc-optimize", "Swift ARC optimization",
false, false)
// Optimization passes.
llvm::FunctionPass *swift::createSwiftARCOptPass() {
initializeSwiftARCOptPass(*llvm::PassRegistry::getPassRegistry());
return new SwiftARCOpt();
}
SwiftARCOpt::SwiftARCOpt() : FunctionPass(ID) {
}
void SwiftARCOpt::getAnalysisUsage(llvm::AnalysisUsage &AU) const {
AU.addRequired<SwiftAliasAnalysis>();
AU.setPreservesCFG();
}
bool SwiftARCOpt::runOnFunction(Function &F) {
bool Changed = false;
// First thing: canonicalize swift_retain and similar calls so that nothing
// uses their result. This exposes the copy that the function does to the
// optimizer.
Changed |= canonicalizeInputFunction(F);
// Next, do a pass with a couple of optimizations:
// 1) release() motion, eliminating retain/release pairs when it turns out
// that a pair is not protecting anything that accesses the guarded heap
// object.
// 2) deletion of stored-only objects - objects that are allocated and
// potentially retained and released, but are only stored to and don't
// escape.
Changed |= performGeneralOptimizations(F);
return Changed;
}
//===----------------------------------------------------------------------===//
// Return Argument Optimizer
//===----------------------------------------------------------------------===//
/// optimizeReturn3 - Look to see if we can optimize "ret (a,b,c)" - where one
/// of the three values was retained right before the return, into a
/// swift_retainAndReturnThree call. This is particularly common when returning
/// a string or array slice.
static bool optimizeReturn3(ReturnInst *TheReturn) {
// Ignore ret void.
if (TheReturn->getNumOperands() == 0) return false;
// See if this is a return of three things.
Value *RetVal = TheReturn->getOperand(0);
StructType *RetSTy = dyn_cast<StructType>(RetVal->getType());
if (RetSTy == 0 || RetSTy->getNumElements() != 3) return false;
// See if we can find scalars that feed into the return instruction. If not,
// bail out.
Value *RetVals[3];
for (unsigned i = 0; i != 3; ++i) {
RetVals[i] = FindInsertedValue(RetVal, i);
if (RetVals[i] == 0) return false;
// If the scalar isn't int64 or pointer type, we can't transform it.
if (!isa<PointerType>(RetVals[i]->getType()) &&
!RetVals[i]->getType()->isIntegerTy(64))
return false;
}
// The ARC optimizer will push the retains to be immediately before the
// return, past any insertvalues. We tolerate other non-memory instructions
// though, in case other optimizations have moved them around. Collect all
// the retain candidates.
SmallDenseMap<Value*, CallInst*, 8> RetainedPointers;
for (BasicBlock::iterator BBI = TheReturn,E = TheReturn->getParent()->begin();
BBI != E; ) {
Instruction &I = *--BBI;
switch (classifyInstruction(I)) {
case RT_Retain: {
// Collect retained pointers. If a pointer is multiply retained, it
// doesn't matter which one we aquire.
CallInst &TheRetain = cast<CallInst>(I);
RetainedPointers[TheRetain.getArgOperand(0)] = &TheRetain;
break;
}
case RT_NoMemoryAccessed:
// If the instruction doesn't access memory, ignore it.
break;
default:
// Otherwise, break out of the for loop.
BBI = E;
break;
}
}
// If there are no retain candidates, we can't form a return3.
if (RetainedPointers.empty())
return false;
// Check to see if any of the values returned is retained. If so, we can form
// a return3, which makes the retain a tail call.
CallInst *TheRetain = 0;
for (unsigned i = 0; i != 3; ++i) {
// If the return value is also retained, we found our retain.
TheRetain = RetainedPointers[RetVals[i]];
if (TheRetain) break;
// If we're returning the result of a known retain, then we can also handle
// it.
if (CallInst *CI = dyn_cast<CallInst>(RetVals[i]))
if (classifyInstruction(*CI) == RT_Retain) {
TheRetain = RetainedPointers[CI->getArgOperand(0)];
if (TheRetain) break;
}
}
// If none of the three values was retained, we can't form a return3.
if (TheRetain == 0)
return false;
// Okay, there is, which means we can perform the transformation. Get the
// argument to swift_retain (the result will be zapped when we zap the call)
// as the object to retain (of %swift.refcounted* type).
Value *RetainedObject = TheRetain->getArgOperand(0);
// Insert any new instructions before the return.
IRBuilder<> B(TheReturn);
Type *Int64Ty = B.getInt64Ty();
// The swift_retainAndReturnThree function takes the three arguments as i64.
// Cast the arguments to i64 if needed.
// Update the element with a cast to i64 if needed.
for (Value *&Elt : RetVals) {
if (isa<PointerType>(Elt->getType()))
Elt = B.CreatePtrToInt(Elt, Int64Ty);
}
// Call swift_retainAndReturnThree with our pointer to retain and the three
// i64's.
Function &F = *TheReturn->getParent()->getParent();
Constant *Cache = 0; // Not utilized.
Value *LibCall = getRetainAndReturnThree(F,RetainedObject->getType(),Cache);
CallInst *NR = B.CreateCall4(LibCall, RetainedObject, RetVals[0],RetVals[1],
RetVals[2]);
NR->setTailCall(true);
// The return type of the libcall is (i64,i64,i64). Since at least one of
// the pointers is a pointer (we retained it afterall!) we have to unpack
// the elements, bitcast at least that one, and then repack to the proper
// type expected by the ret instruction.
for (unsigned i = 0; i != 3; ++i) {
RetVals[i] = B.CreateExtractValue(NR, i);
if (RetVals[i]->getType() != RetSTy->getElementType(i))
RetVals[i] = B.CreateIntToPtr(RetVals[i], RetSTy->getElementType(i));
}
// Repack into an aggregate that can be returned.
Value *RV = UndefValue::get(RetVal->getType());
for (unsigned i = 0; i != 3; ++i)
RV = B.CreateInsertValue(RV, RetVals[i], i);
// Return the right thing and zap any instruction tree of inserts that
// existed just to feed the old return.
TheReturn->setOperand(0, RV);
RecursivelyDeleteTriviallyDeadInstructions(RetVal);
// Zap the retain that we're subsuming and we're done!
if (!TheRetain->use_empty())
TheRetain->replaceAllUsesWith(RetainedObject);
TheRetain->eraseFromParent();
++NumReturnThreeTailCallsFormed;
return true;
}
//===----------------------------------------------------------------------===//
// SwiftARCExpandPass Pass
//===----------------------------------------------------------------------===//
/// performARCExpansion - This implements the very late (just before code
/// generation) lowering processes that we do to expose low level performance
/// optimizations and take advantage of special features of the ABI. These
/// expansion steps can foil the general mid-level optimizer, so they are done
/// very, very, late.
///
/// Expansions include:
/// - Lowering retain calls to swift_retain (which return the retained
/// argument) to lower register pressure.
/// - Forming calls to swift_retainAndReturnThree when the last thing in a
/// function is to retain one of its result values, and when it returns
/// exactly three values.
///
/// Coming into this function, we assume that the code is in canonical form:
/// none of these calls have any uses of their return values.
bool SwiftARCExpandPass::runOnFunction(Function &F) {
Constant *RetainCache = nullptr;
bool Changed = false;
SmallVector<ReturnInst*, 8> Returns;
// Since all of the calls are canonicalized, we know that we can just walk
// through the function and collect the interesting heap object definitions by
// getting the argument to these functions.
DenseMap<Value*, TinyPtrVector<Instruction*>> DefsOfValue;
// Keep track of which order we see values in since iteration over a densemap
// isn't in a deterministic order, and isn't efficient anyway.
SmallVector<Value*, 16> DefOrder;
// Do a first pass over the function, collecting all interesting definitions.
// In this pass, we rewrite any intra-block uses that we can, since the
// SSAUpdater doesn't handle them.
DenseMap<Value*, Value*> LocalUpdates;
for (BasicBlock &BB : F) {
for (auto II = BB.begin(), E = BB.end(); II != E; ) {
// Preincrement iterator to avoid iteration issues in the loop.
Instruction &Inst = *II++;
switch (classifyInstruction(Inst)) {
case RT_Retain: assert(0 && "This should be canonicalized away!");
case RT_RetainAndReturnThree:
case RT_RetainNoResult: {
Value *ArgVal = cast<CallInst>(Inst).getArgOperand(0);
// First step: rewrite swift_retain_noresult to swift_retain, exposing
// the result value.
CallInst &CI =
*CallInst::Create(getRetain(F, ArgVal->getType(), RetainCache),
ArgVal, "", &Inst);
CI.setTailCall(true);
Inst.eraseFromParent();
if (!isa<Instruction>(ArgVal))
continue;
TinyPtrVector<Instruction*> &GlobalEntry = DefsOfValue[ArgVal];
// If this is the first definition of a value for the argument that
// we've seen, keep track of it in DefOrder.
if (GlobalEntry.empty())
DefOrder.push_back(ArgVal);
// Check to see if there is already an entry for this basic block. If
// there is another local entry, switch to using the local value and
// remove the previous value from the GlobalEntry.
Value *&LocalEntry = LocalUpdates[ArgVal];
if (LocalEntry) {
Changed = true;
CI.setArgOperand(0, LocalEntry);
assert(GlobalEntry.back() == LocalEntry && "Local/Global mismatch?");
GlobalEntry.pop_back();
}
LocalEntry = &CI;
GlobalEntry.push_back(&CI);
continue;
}
case RT_Unknown:
case RT_Release:
case RT_AllocObject:
case RT_NoMemoryAccessed:
case RT_ObjCRelease:
case RT_ObjCRetain: // TODO: Could chain together objc_retains.
// Remember returns in the first pass.
if (ReturnInst *RI = dyn_cast<ReturnInst>(&Inst))
Returns.push_back(RI);
// Just remap any uses in the value.
break;
}
// Check to see if there are any uses of a value in the LocalUpdates
// map. If so, remap it now to the locally defined version.
for (unsigned i = 0, e = Inst.getNumOperands(); i != e; ++i)
if (Value *V = LocalUpdates.lookup(Inst.getOperand(i))) {
Changed = true;
Inst.setOperand(i, V);
}
}
LocalUpdates.clear();
}
// Now that we've collected all of the interesting heap object values that are
// passed into argument-returning functions, rewrite uses of these pointers
// with optimized lifetime-shorted versions of it.
for (Value *Ptr : DefOrder) {
// If Ptr is an instruction, remember its block. If not, use the entry
// block as its block (it must be an argument, constant, etc).
BasicBlock *PtrBlock;
if (Instruction *PI = dyn_cast<Instruction>(Ptr))
PtrBlock = PI->getParent();
else
PtrBlock = &F.getEntryBlock();
TinyPtrVector<Instruction*> &Defs = DefsOfValue[Ptr];
// This is the same problem as SSA construction, so we just use LLVM's
// SSAUpdater, with each retain as a definition of the virtual value.
SSAUpdater Updater;
Updater.Initialize(Ptr->getType(), Ptr->getName());
// Set the return value of each of these calls as a definition of the
// virtual value.
for (auto D : Defs)
Updater.AddAvailableValue(D->getParent(), D);
// If we didn't add a definition for Ptr's block, then Ptr itself is
// available in its block.
if (!Updater.HasValueForBlock(PtrBlock))
Updater.AddAvailableValue(PtrBlock, Ptr);
// Rewrite uses of Ptr to their optimized forms.
for (auto UI = Ptr->user_begin(), E = Ptr->user_end(); UI != E; ) {
// Make sure to increment the use iterator before potentially rewriting
// it.
Use &U = UI.getUse();
++UI;
// If the use is in the same block that defines it and the User is not a
// PHI node, then this is a local use that shouldn't be rewritten.
Instruction *User = cast<Instruction>(U.getUser());
if (User->getParent() == PtrBlock && !isa<PHINode>(User))
continue;
// Otherwise, change it if profitable!
Updater.RewriteUse(U);
if (U.get() != Ptr)
Changed = true;
}
}
// Scan through all the returns to see if there are any that can be optimized.
// FIXME: swift_retainAndReturnThree runtime call
// is currently implemented only on x86_64.
// FIXME: optimizeReturn3() implementation assumes 64-bit
if (llvm::Triple(F.getParent()->getTargetTriple()).getArchName() == "x86_64")
for (ReturnInst *RI : Returns)
Changed |= optimizeReturn3(RI);
return Changed;
}
namespace llvm {
void initializeSwiftARCExpandPassPass(PassRegistry&);
}
char SwiftARCExpandPass::ID = 0;
INITIALIZE_PASS(SwiftARCExpandPass,
"swift-arc-expand", "Swift ARC expansion", false, false)
llvm::FunctionPass *swift::createSwiftARCExpandPass() {
initializeSwiftARCExpandPassPass(*llvm::PassRegistry::getPassRegistry());
return new SwiftARCExpandPass();
}