mirror of
https://github.com/apple/swift.git
synced 2025-12-14 20:36:38 +01:00
GVN uses the swift alias analysis so we can't declare swift_release as having no side effects. rdar://16443423 Swift SVN r16030
1301 lines
48 KiB
C++
1301 lines
48 KiB
C++
//===--- Passes.cpp - Reference Counting Optimizations --------------------===//
|
|
//
|
|
// This source file is part of the Swift.org open source project
|
|
//
|
|
// Copyright (c) 2014 - 2015 Apple Inc. and the Swift project authors
|
|
// Licensed under Apache License v2.0 with Runtime Library Exception
|
|
//
|
|
// See http://swift.org/LICENSE.txt for license information
|
|
// See http://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
//
|
|
// This file implements optimizations for reference counting, object
|
|
// allocation, and other runtime entrypoints.
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
#define DEBUG_TYPE "swift-optimize"
|
|
#include "swift/OptimizeARC/Passes.h"
|
|
#include "llvm/IR/Instructions.h"
|
|
#include "llvm/IR/IntrinsicInst.h"
|
|
#include "llvm/IR/Module.h"
|
|
#include "llvm/Pass.h"
|
|
#include "llvm/Analysis/AliasAnalysis.h"
|
|
#include "llvm/Analysis/InstructionSimplify.h"
|
|
#include "llvm/Analysis/ValueTracking.h"
|
|
#include "llvm/Transforms/Utils/SSAUpdater.h"
|
|
#include "llvm/Transforms/Utils/Local.h"
|
|
#include "llvm/ADT/DenseMap.h"
|
|
#include "llvm/ADT/SetVector.h"
|
|
#include "llvm/ADT/Statistic.h"
|
|
#include "llvm/ADT/StringSwitch.h"
|
|
#include "llvm/ADT/TinyPtrVector.h"
|
|
#include "llvm/ADT/Triple.h"
|
|
#include "llvm/IR/InstIterator.h"
|
|
#include "llvm/IR/IRBuilder.h"
|
|
#include "llvm/Support/raw_ostream.h"
|
|
#include "llvm/Support/CommandLine.h"
|
|
|
|
using namespace llvm;
|
|
using swift::SwiftAliasAnalysis;
|
|
using swift::SwiftARCOpt;
|
|
using swift::SwiftARCExpandPass;
|
|
|
|
STATISTIC(NumNoopDeleted,
|
|
"Number of no-op swift calls eliminated");
|
|
STATISTIC(NumRetainReleasePairs,
|
|
"Number of swift retain/release pairs eliminated");
|
|
STATISTIC(NumObjCRetainReleasePairs,
|
|
"Number of objc retain/release pairs eliminated");
|
|
STATISTIC(NumAllocateReleasePairs,
|
|
"Number of swift allocate/release pairs eliminated");
|
|
STATISTIC(NumStoreOnlyObjectsEliminated,
|
|
"Number of swift stored-only objects eliminated");
|
|
STATISTIC(NumReturnThreeTailCallsFormed,
|
|
"Number of swift_retainAndReturnThree tail calls formed");
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
// Utility Functions
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
enum RT_Kind {
|
|
/// An instruction with this classification is known to not access (read or
|
|
/// write) memory.
|
|
RT_NoMemoryAccessed,
|
|
|
|
/// SwiftHeapObject *swift_retain(SwiftHeapObject *object)
|
|
RT_Retain,
|
|
|
|
// void swift_retain_noresult(SwiftHeapObject *object)
|
|
RT_RetainNoResult,
|
|
|
|
// (i64,i64,i64) swift_retainAndReturnThree(SwiftHeapObject *obj, i64,i64,i64)
|
|
RT_RetainAndReturnThree,
|
|
|
|
/// void swift_release(SwiftHeapObject *object)
|
|
RT_Release,
|
|
|
|
/// SwiftHeapObject *swift_allocObject(SwiftHeapMetadata *metadata,
|
|
/// size_t size, size_t alignment)
|
|
RT_AllocObject,
|
|
|
|
/// void objc_release(%objc_object* %P)
|
|
RT_ObjCRelease,
|
|
/// %objc_object* objc_retain(%objc_object* %P)
|
|
RT_ObjCRetain,
|
|
|
|
/// This is not a runtime function that we support. Maybe it is not a call,
|
|
/// or is a call to something we don't care about.
|
|
RT_Unknown,
|
|
};
|
|
|
|
/// classifyInstruction - Take a look at the specified instruction and classify
|
|
/// it into what kind of runtime entrypoint it is, if any.
|
|
static RT_Kind classifyInstruction(const Instruction &I) {
|
|
if (!I.mayReadOrWriteMemory())
|
|
return RT_NoMemoryAccessed;
|
|
|
|
// Non-calls or calls to indirect functions are unknown.
|
|
const CallInst *CI = dyn_cast<CallInst>(&I);
|
|
if (CI == 0) return RT_Unknown;
|
|
Function *F = CI->getCalledFunction();
|
|
if (F == 0) return RT_Unknown;
|
|
|
|
return StringSwitch<RT_Kind>(F->getName())
|
|
.Case("swift_retain", RT_Retain)
|
|
.Case("swift_retain_noresult", RT_RetainNoResult)
|
|
.Case("swift_release", RT_Release)
|
|
.Case("swift_allocObject", RT_AllocObject)
|
|
.Case("swift_retainAndReturnThree", RT_RetainAndReturnThree)
|
|
.Case("objc_release", RT_ObjCRelease)
|
|
.Case("objc_retain", RT_ObjCRetain)
|
|
.Default(RT_Unknown);
|
|
}
|
|
|
|
/// getRetain - Return a callable function for swift_retain. F is the function
|
|
/// being operated on, ObjectPtrTy is an instance of the object pointer type to
|
|
/// use, and Cache is a null-initialized place to make subsequent requests
|
|
/// faster.
|
|
static Constant *getRetain(Function &F, Type *ObjectPtrTy, Constant *&Cache) {
|
|
if (Cache) return Cache;
|
|
|
|
auto AttrList = AttributeSet::get(F.getContext(),
|
|
AttributeSet::FunctionIndex,
|
|
Attribute::NoUnwind);
|
|
|
|
Module *M = F.getParent();
|
|
return Cache = M->getOrInsertFunction("swift_retain", AttrList,
|
|
ObjectPtrTy, ObjectPtrTy, NULL);
|
|
}
|
|
|
|
/// getRetainNoResult - Return a callable function for swift_retain_noresult.
|
|
/// F is the function being operated on, ObjectPtrTy is an instance of the
|
|
/// object pointer type to use, and Cache is a null-initialized place to make
|
|
/// subsequent requests faster.
|
|
static Constant *getRetainNoResult(Function &F, Type *ObjectPtrTy,
|
|
Constant *&Cache) {
|
|
if (Cache) return Cache;
|
|
|
|
auto AttrList = AttributeSet::get(F.getContext(), 1, Attribute::NoCapture);
|
|
AttrList = AttrList.addAttribute(F.getContext(),
|
|
AttributeSet::FunctionIndex,
|
|
Attribute::NoUnwind);
|
|
Module *M = F.getParent();
|
|
return Cache = M->getOrInsertFunction("swift_retain_noresult", AttrList,
|
|
Type::getVoidTy(F.getContext()),
|
|
ObjectPtrTy, NULL);
|
|
}
|
|
|
|
/// getRetainAndReturnThree - Return a callable function for
|
|
/// swift_retainAndReturnThree. F is the function being operated on,
|
|
/// ObjectPtrTy is an instance of the object pointer type to use, and Cache is a
|
|
/// null-initialized place to make subsequent requests faster.
|
|
static Constant *getRetainAndReturnThree(Function &F, Type *ObjectPtrTy,
|
|
Constant *&Cache) {
|
|
if (Cache) return Cache;
|
|
|
|
auto AttrList = AttributeSet::get(F.getContext(),
|
|
AttributeSet::FunctionIndex,
|
|
Attribute::NoUnwind);
|
|
Module *M = F.getParent();
|
|
|
|
Type *Int64Ty = Type::getInt64Ty(F.getContext());
|
|
Type *RetTy = StructType::get(Int64Ty, Int64Ty, Int64Ty, NULL);
|
|
|
|
return Cache = M->getOrInsertFunction("swift_retainAndReturnThree", AttrList,
|
|
RetTy, ObjectPtrTy,
|
|
Int64Ty, Int64Ty, Int64Ty, NULL);
|
|
}
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
// SwiftAliasAnalysis
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
namespace llvm {
|
|
void initializeSwiftAliasAnalysisPass(PassRegistry&);
|
|
}
|
|
|
|
// Register this pass...
|
|
char SwiftAliasAnalysis::ID = 0;
|
|
INITIALIZE_AG_PASS(SwiftAliasAnalysis, AliasAnalysis, "swift-aa",
|
|
"Swift Alias Analysis", false, true, false)
|
|
|
|
|
|
AliasAnalysis::ModRefResult
|
|
SwiftAliasAnalysis::getModRefInfo(ImmutableCallSite CS, const Location &Loc) {
|
|
// We know the mod-ref behavior of various runtime functions.
|
|
switch (classifyInstruction(*CS.getInstruction())) {
|
|
case RT_AllocObject:
|
|
case RT_NoMemoryAccessed:
|
|
case RT_Retain:
|
|
case RT_RetainNoResult:
|
|
case RT_RetainAndReturnThree:
|
|
case RT_ObjCRetain:
|
|
// These entrypoints don't modify any compiler-visible state.
|
|
return NoModRef;
|
|
case RT_Release:
|
|
case RT_ObjCRelease:
|
|
case RT_Unknown:
|
|
break;
|
|
}
|
|
|
|
return AliasAnalysis::getModRefInfo(CS, Loc);
|
|
}
|
|
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
// Input Function Canonicalizer
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
/// updateCallValueUses - We have something like this:
|
|
/// %z = ptrtoint %swift.refcounted* %2 to i64
|
|
/// %3 = call { i64, i64, i64 }
|
|
/// @swift_retainAndReturnThree(..., i64 %x, i64 %1, i64 %z)
|
|
/// %a = extractvalue { i64, i64, i64 } %3, 0
|
|
/// %b = extractvalue { i64, i64, i64 } %3, 1
|
|
/// %c = extractvalue { i64, i64, i64 } %3, 2
|
|
/// %a1 = inttoptr i64 %a to i8*
|
|
/// %c1 = inttoptr i64 %c to %swift.refcounted*
|
|
///
|
|
/// This function is invoked three times (once each for the three arg/retvalues
|
|
/// that need to be replaced) and tries a best effort to patch up things to
|
|
/// avoid all the casts. "Inst" coming into here is the call to
|
|
/// swift_retainAndReturnThree or to an extract that returns all three words.
|
|
///
|
|
static void updateCallValueUses(CallInst &CI, unsigned EltNo) {
|
|
Value *Op = CI.getArgOperand(1+EltNo);
|
|
for (auto UI = CI.user_begin(), E = CI.user_end(); UI != E; ++UI) {
|
|
ExtractValueInst *Extract = dyn_cast<ExtractValueInst>(*UI);
|
|
|
|
// Make sure this extract is relevant to EltNo.
|
|
if (Extract == 0 || Extract->getNumIndices() != 1 ||
|
|
Extract->getIndices()[0] != EltNo)
|
|
continue;
|
|
|
|
// Both the input and result should be i64's.
|
|
assert(Extract->getType() == Op->getType() && "Should have i64's here");
|
|
|
|
for (auto UI2 = Extract->user_begin(), E = Extract->user_end(); UI2 != E; ){
|
|
IntToPtrInst *ExtractUser = dyn_cast<IntToPtrInst>(*UI2++);
|
|
PtrToIntInst *OpCast = dyn_cast<PtrToIntInst>(Op);
|
|
if (ExtractUser && OpCast &&
|
|
OpCast->getOperand(0)->getType() == ExtractUser->getType()) {
|
|
ExtractUser->replaceAllUsesWith(OpCast->getOperand(0));
|
|
ExtractUser->eraseFromParent();
|
|
}
|
|
}
|
|
|
|
// Stitch up anything other than the ptrtoint -> inttoptr.
|
|
Extract->replaceAllUsesWith(Op);
|
|
|
|
// Zap the dead ExtractValue's.
|
|
RecursivelyDeleteTriviallyDeadInstructions(Extract);
|
|
return;
|
|
}
|
|
}
|
|
|
|
/// canonicalizeInputFunction - Functions like swift_retain return an
|
|
/// argument as a low-level performance optimization. This makes it difficult
|
|
/// to reason about pointer equality though, so undo it as an initial
|
|
/// canonicalization step. After this step, all swift_retain's have been
|
|
/// replaced with swift_retain_noresult.
|
|
///
|
|
/// This also does some trivial peep-hole optimizations as we go.
|
|
static bool canonicalizeInputFunction(Function &F) {
|
|
Constant *RetainNoResultCache = 0;
|
|
|
|
bool Changed = false;
|
|
for (auto &BB : F)
|
|
for (auto I = BB.begin(); I != BB.end(); ) {
|
|
Instruction &Inst = *I++;
|
|
|
|
switch (classifyInstruction(Inst)) {
|
|
case RT_Unknown:
|
|
case RT_AllocObject:
|
|
case RT_NoMemoryAccessed:
|
|
break;
|
|
case RT_RetainNoResult: {
|
|
CallInst &CI = cast<CallInst>(Inst);
|
|
Value *ArgVal = CI.getArgOperand(0);
|
|
// retain_noresult(null) is a no-op.
|
|
if (isa<ConstantPointerNull>(ArgVal)) {
|
|
CI.eraseFromParent();
|
|
Changed = true;
|
|
++NumNoopDeleted;
|
|
continue;
|
|
}
|
|
break;
|
|
}
|
|
case RT_Retain: {
|
|
// If any x = swift_retain(y)'s got here, canonicalize them into:
|
|
// x = y; swift_retain_noresult(y).
|
|
// This is important even though the front-end doesn't generate them,
|
|
// because inlined functions can be ARC optimized, and thus may contain
|
|
// swift_retain calls.
|
|
CallInst &CI = cast<CallInst>(Inst);
|
|
Value *ArgVal = CI.getArgOperand(0);
|
|
|
|
// Rewrite uses of the result to use the argument.
|
|
if (!CI.use_empty())
|
|
Inst.replaceAllUsesWith(ArgVal);
|
|
|
|
// Insert a call to swift_retain_noresult to replace this and reset the
|
|
// iterator so that we visit it next.
|
|
I = CallInst::Create(getRetainNoResult(F, ArgVal->getType(),
|
|
RetainNoResultCache),
|
|
ArgVal, "", &CI);
|
|
CI.eraseFromParent();
|
|
Changed = true;
|
|
break;
|
|
}
|
|
case RT_Release: {
|
|
CallInst &CI = cast<CallInst>(Inst);
|
|
// swift_release(null) is a noop, zap it.
|
|
Value *ArgVal = CI.getArgOperand(0);
|
|
if (isa<ConstantPointerNull>(ArgVal)) {
|
|
CI.eraseFromParent();
|
|
Changed = true;
|
|
++NumNoopDeleted;
|
|
continue;
|
|
}
|
|
break;
|
|
}
|
|
case RT_RetainAndReturnThree: {
|
|
// (a,b,c) = swift_retainAndReturnThree(obj, d,e,f)
|
|
// -> swift_retain_noresult(obj)
|
|
// -> (a,b,c) = (d,e,f)
|
|
//
|
|
// The important case of doing this is when this function has been inlined
|
|
// into another function. In this case, there is no return anymore.
|
|
CallInst &CI = cast<CallInst>(Inst);
|
|
|
|
IRBuilder<> B(&CI);
|
|
Type *HeapObjectTy = CI.getArgOperand(0)->getType();
|
|
|
|
// Reprocess starting at the new swift_retain_noresult.
|
|
I = B.CreateCall(getRetainNoResult(F, HeapObjectTy, RetainNoResultCache),
|
|
CI.getArgOperand(0));
|
|
|
|
// See if we can eliminate all of the extractvalue's that are hanging off
|
|
// the swift_retainAndReturnThree. This is important to eliminate casts
|
|
// that will block optimizations and generally results in better IR. Note
|
|
// that this is just a best-effort attempt though.
|
|
updateCallValueUses(CI, 0);
|
|
updateCallValueUses(CI, 1);
|
|
updateCallValueUses(CI, 2);
|
|
|
|
// If our best-effort wasn't good enough, fall back to generating terrible
|
|
// but correct code.
|
|
if (!CI.use_empty()) {
|
|
Value *V = UndefValue::get(CI.getType());
|
|
V = B.CreateInsertValue(V, CI.getArgOperand(1), 0U);
|
|
V = B.CreateInsertValue(V, CI.getArgOperand(2), 1U);
|
|
V = B.CreateInsertValue(V, CI.getArgOperand(3), 2U);
|
|
CI.replaceAllUsesWith(V);
|
|
}
|
|
|
|
CI.eraseFromParent();
|
|
Changed = true;
|
|
break;
|
|
}
|
|
|
|
case RT_ObjCRelease: {
|
|
CallInst &CI = cast<CallInst>(Inst);
|
|
Value *ArgVal = CI.getArgOperand(0);
|
|
// objc_release(null) is a noop, zap it.
|
|
if (isa<ConstantPointerNull>(ArgVal)) {
|
|
CI.eraseFromParent();
|
|
Changed = true;
|
|
++NumNoopDeleted;
|
|
continue;
|
|
}
|
|
break;
|
|
}
|
|
|
|
case RT_ObjCRetain: {
|
|
// Canonicalize objc_retain so that nothing uses its result.
|
|
CallInst &CI = cast<CallInst>(Inst);
|
|
Value *ArgVal = CI.getArgOperand(0);
|
|
if (!CI.use_empty()) {
|
|
CI.replaceAllUsesWith(ArgVal);
|
|
Changed = true;
|
|
}
|
|
|
|
// objc_retain(null) is a noop, delete it.
|
|
if (isa<ConstantPointerNull>(ArgVal)) {
|
|
CI.eraseFromParent();
|
|
Changed = true;
|
|
++NumNoopDeleted;
|
|
continue;
|
|
}
|
|
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
return Changed;
|
|
}
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
// Release() Motion
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
/// performLocalReleaseMotion - Scan backwards from the specified release,
|
|
/// moving it earlier in the function if possible, over instructions that do not
|
|
/// access the released object. If we get to a retain or allocation of the
|
|
/// object, zap both.
|
|
static bool performLocalReleaseMotion(CallInst &Release, BasicBlock &BB) {
|
|
// FIXME: Call classifier should identify the object for us. Too bad C++
|
|
// doesn't have nice Swift-style enums.
|
|
Value *ReleasedObject = Release.getArgOperand(0);
|
|
|
|
BasicBlock::iterator BBI = &Release;
|
|
|
|
// Scan until we get to the top of the block.
|
|
while (BBI != BB.begin()) {
|
|
--BBI;
|
|
|
|
// Don't analyze PHI nodes. We can't move retains before them and they
|
|
// aren't "interesting".
|
|
if (isa<PHINode>(BBI) ||
|
|
// If we found the instruction that defines the value we're releasing,
|
|
// don't push the release past it.
|
|
&*BBI == ReleasedObject) {
|
|
++BBI;
|
|
goto OutOfLoop;
|
|
}
|
|
|
|
switch (classifyInstruction(*BBI)) {
|
|
case RT_Retain: // Canonicalized away, shouldn't exist.
|
|
case RT_RetainAndReturnThree:
|
|
assert(0 && "these entrypoints should be canonicalized away");
|
|
case RT_NoMemoryAccessed:
|
|
// Skip over random instructions that don't touch memory. They don't need
|
|
// protection by retain/release.
|
|
continue;
|
|
case RT_Release: {
|
|
// If we get to a release, we can generally ignore it and scan past it.
|
|
// However, if we get to a release of obviously the same object, we stop
|
|
// scanning here because it should have already be moved as early as
|
|
// possible, so there is no reason to move its friend to the same place.
|
|
//
|
|
// NOTE: If this occurs frequently, maybe we can have a release(Obj, N)
|
|
// API to drop multiple retain counts at once.
|
|
CallInst &ThisRelease = cast<CallInst>(*BBI);
|
|
Value *ThisReleasedObject = ThisRelease.getArgOperand(0);
|
|
if (ThisReleasedObject == ReleasedObject) {
|
|
//Release.dump(); ThisRelease.dump(); BB.getParent()->dump();
|
|
++BBI;
|
|
goto OutOfLoop;
|
|
}
|
|
continue;
|
|
}
|
|
|
|
case RT_RetainNoResult: { // swift_retain_noresult(obj)
|
|
CallInst &Retain = cast<CallInst>(*BBI);
|
|
Value *RetainedObject = Retain.getArgOperand(0);
|
|
|
|
// If the retain and release are to obviously pointer-equal objects, then
|
|
// we can delete both of them. We have proven that they do not protect
|
|
// anything of value.
|
|
if (RetainedObject == ReleasedObject) {
|
|
Retain.eraseFromParent();
|
|
Release.eraseFromParent();
|
|
++NumRetainReleasePairs;
|
|
return true;
|
|
}
|
|
|
|
// Otherwise, this is a retain of an object that is not statically known
|
|
// to be the same object. It may still be dynamically the same object
|
|
// though. In this case, we can't move the release past it.
|
|
// TODO: Strengthen analysis.
|
|
//Release.dump(); ThisRelease.dump(); BB.getParent()->dump();
|
|
++BBI;
|
|
goto OutOfLoop;
|
|
}
|
|
|
|
case RT_AllocObject: { // %obj = swift_alloc(...)
|
|
CallInst &Allocation = cast<CallInst>(*BBI);
|
|
|
|
// If this is an allocation of an unrelated object, just ignore it.
|
|
// TODO: This is not safe without proving the object being released is not
|
|
// related to the allocated object. Consider something silly like this:
|
|
// A = allocate()
|
|
// B = bitcast A to object
|
|
// release(B)
|
|
if (ReleasedObject != &Allocation) {
|
|
// Release.dump(); BB.getParent()->dump();
|
|
++BBI;
|
|
goto OutOfLoop;
|
|
}
|
|
|
|
// If this is a release right after an allocation of the object, then we
|
|
// can zap both.
|
|
Allocation.replaceAllUsesWith(UndefValue::get(Allocation.getType()));
|
|
Allocation.eraseFromParent();
|
|
Release.eraseFromParent();
|
|
++NumAllocateReleasePairs;
|
|
return true;
|
|
}
|
|
|
|
case RT_Unknown:
|
|
case RT_ObjCRelease:
|
|
case RT_ObjCRetain:
|
|
// BBI->dump();
|
|
// Otherwise, we get to something unknown/unhandled. Bail out for now.
|
|
++BBI;
|
|
goto OutOfLoop;
|
|
}
|
|
}
|
|
OutOfLoop:
|
|
|
|
|
|
// If we got to the top of the block, (and if the instruction didn't start
|
|
// there) move the release to the top of the block.
|
|
// TODO: This is where we'd plug in some global algorithms someday.
|
|
if (&*BBI != &Release) {
|
|
Release.moveBefore(BBI);
|
|
return true;
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
// Retain() Motion
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
/// performLocalRetainMotion - Scan forward from the specified retain, moving it
|
|
/// later in the function if possible, over instructions that provably can't
|
|
/// release the object. If we get to a release of the object, zap both.
|
|
///
|
|
/// NOTE: this handles both objc_retain and swift_retain_noresult.
|
|
///
|
|
static bool performLocalRetainMotion(CallInst &Retain, BasicBlock &BB) {
|
|
// FIXME: Call classifier should identify the object for us. Too bad C++
|
|
// doesn't have nice Swift-style enums.
|
|
Value *RetainedObject = Retain.getArgOperand(0);
|
|
|
|
BasicBlock::iterator BBI = &Retain, BBE = BB.getTerminator();
|
|
|
|
bool isObjCRetain = Retain.getCalledFunction()->getName() == "objc_retain";
|
|
|
|
bool MadeProgress = false;
|
|
|
|
// Scan until we get to the end of the block.
|
|
for (++BBI; BBI != BBE; ++BBI) {
|
|
Instruction &CurInst = *BBI;
|
|
|
|
// Classify the instruction. This switch does a "break" when the instruction
|
|
// can be skipped and is interesting, and a "continue" when it is a retain
|
|
// of the same pointer.
|
|
switch (classifyInstruction(CurInst)) {
|
|
case RT_Retain: // Canonicalized away, shouldn't exist.
|
|
case RT_RetainAndReturnThree:
|
|
assert(0 && "these entrypoints should be canonicalized away");
|
|
case RT_NoMemoryAccessed:
|
|
case RT_AllocObject:
|
|
// Skip over random instructions that don't touch memory. They don't need
|
|
// protection by retain/release.
|
|
break;
|
|
|
|
case RT_RetainNoResult: { // swift_retain_noresult(obj)
|
|
//CallInst &ThisRetain = cast<CallInst>(CurInst);
|
|
//Value *ThisRetainedObject = ThisRetain.getArgOperand(0);
|
|
|
|
// If we see a retain of the same object, we can skip over it, but we
|
|
// can't count it as progress. Just pushing a retain(x) past a retain(y)
|
|
// doesn't change the program.
|
|
continue;
|
|
}
|
|
|
|
case RT_Release: {
|
|
// If we get to a release that is provably to this object, then we can zap
|
|
// it and the retain.
|
|
CallInst &ThisRelease = cast<CallInst>(CurInst);
|
|
Value *ThisReleasedObject = ThisRelease.getArgOperand(0);
|
|
if (!isObjCRetain && ThisReleasedObject == RetainedObject) {
|
|
Retain.eraseFromParent();
|
|
ThisRelease.eraseFromParent();
|
|
++NumRetainReleasePairs;
|
|
return true;
|
|
}
|
|
|
|
// Otherwise, if this is some other pointer, we can only ignore it if we
|
|
// can prove that the two objects don't alias.
|
|
// Retain.dump(); ThisRelease.dump(); BB.getParent()->dump();
|
|
goto OutOfLoop;
|
|
}
|
|
|
|
case RT_ObjCRelease: {
|
|
// If we get to an objc_release that is provably to this object, then we
|
|
// can zap it and the objc_retain.
|
|
CallInst &ThisRelease = cast<CallInst>(CurInst);
|
|
Value *ThisReleasedObject = ThisRelease.getArgOperand(0);
|
|
if (isObjCRetain && ThisReleasedObject == RetainedObject) {
|
|
Retain.eraseFromParent();
|
|
ThisRelease.eraseFromParent();
|
|
++NumObjCRetainReleasePairs;
|
|
return true;
|
|
}
|
|
|
|
// Otherwise, if this is some other pointer, we can only ignore it if we
|
|
// can prove that the two objects don't alias.
|
|
// Retain.dump(); ThisRelease.dump(); BB.getParent()->dump();
|
|
goto OutOfLoop;
|
|
}
|
|
|
|
case RT_Unknown:
|
|
case RT_ObjCRetain:
|
|
|
|
// Load, store, memcpy etc can't do a release.
|
|
if (isa<LoadInst>(CurInst) || isa<StoreInst>(CurInst) ||
|
|
isa<MemIntrinsic>(CurInst))
|
|
break;
|
|
|
|
// CurInst->dump(); BBI->dump();
|
|
// Otherwise, we get to something unknown/unhandled. Bail out for now.
|
|
goto OutOfLoop;
|
|
}
|
|
|
|
// If the switch did a break, we made some progress moving this retain.
|
|
MadeProgress = true;
|
|
}
|
|
OutOfLoop:
|
|
|
|
// If we were able to move the retain down, move it now.
|
|
// TODO: This is where we'd plug in some global algorithms someday.
|
|
if (MadeProgress) {
|
|
Retain.moveBefore(BBI);
|
|
return true;
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
// Store-Only Object Elimination
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
/// DT_Kind - Classification for destructor semantics.
|
|
enum class DtorKind {
|
|
/// NoSideEffects - The destructor does nothing, or just touches the local
|
|
/// object in a non-observable way after it is destroyed.
|
|
NoSideEffects,
|
|
|
|
/// NoEscape - The destructor potentially has some side effects, but the
|
|
/// address of the destroyed object never escapes (in the LLVM IR sense).
|
|
NoEscape,
|
|
|
|
/// Unknown - Something potentially crazy is going on here.
|
|
Unknown
|
|
};
|
|
|
|
/// analyzeDestructor - Given the heap.metadata argument to swift_allocObject,
|
|
/// take a look a the destructor and try to decide if it has side effects or any
|
|
/// other bad effects that can prevent it from being optimized.
|
|
static DtorKind analyzeDestructor(Value *P) {
|
|
// If we have a null pointer for the metadata info, the dtor has no side
|
|
// effects. Actually, the final release would crash. This is really only
|
|
// useful for writing testcases.
|
|
if (isa<ConstantPointerNull>(P->stripPointerCasts()))
|
|
return DtorKind::NoSideEffects;
|
|
|
|
// We have to have a known heap metadata value, reject dynamically computed
|
|
// ones, or places
|
|
GlobalVariable *GV = dyn_cast<GlobalVariable>(P->stripPointerCasts());
|
|
if (GV == 0 || GV->mayBeOverridden()) return DtorKind::Unknown;
|
|
|
|
ConstantStruct *CS = dyn_cast_or_null<ConstantStruct>(GV->getInitializer());
|
|
if (CS == 0 || CS->getNumOperands() == 0) return DtorKind::Unknown;
|
|
|
|
// FIXME: Would like to abstract the dtor slot (#0) out from this to somewhere
|
|
// unified.
|
|
enum { DTorSlotOfHeapMeatadata = 0 };
|
|
Function *DtorFn =dyn_cast<Function>(CS->getOperand(DTorSlotOfHeapMeatadata));
|
|
if (DtorFn == 0 || DtorFn->mayBeOverridden() || DtorFn->hasExternalLinkage())
|
|
return DtorKind::Unknown;
|
|
|
|
// Okay, we have a body, and we can trust it. If the function is marked
|
|
// readonly, then we know it can't have any interesting side effects, so we
|
|
// don't need to analyze it at all.
|
|
if (DtorFn->onlyReadsMemory())
|
|
return DtorKind::NoSideEffects;
|
|
|
|
// The first argument is the object being destroyed.
|
|
assert(DtorFn->arg_size() == 1 && !DtorFn->isVarArg() &&
|
|
"expected a single object argument to destructors");
|
|
Value *ThisObject = DtorFn->arg_begin();
|
|
|
|
// Scan the body of the function, looking for anything scary.
|
|
for (BasicBlock &BB : *DtorFn) {
|
|
for (Instruction &I : BB) {
|
|
// Note that the destructor may not be in any particular canonical form.
|
|
switch (classifyInstruction(I)) {
|
|
case RT_NoMemoryAccessed:
|
|
case RT_AllocObject:
|
|
// Skip over random instructions that don't touch memory in the caller.
|
|
continue;
|
|
|
|
case RT_Retain: // x = swift_retain(y)
|
|
case RT_RetainAndReturnThree: // swift_retainAndReturnThree(obj,a,b,c)
|
|
case RT_RetainNoResult: { // swift_retain_noresult(obj)
|
|
|
|
// Ignore retains of the "self" object, no ressurection is possible.
|
|
Value *ThisRetainedObject = cast<CallInst>(I).getArgOperand(0);
|
|
if (ThisRetainedObject->stripPointerCasts() ==
|
|
ThisObject->stripPointerCasts())
|
|
continue;
|
|
// Otherwise, we may be retaining something scary.
|
|
break;
|
|
}
|
|
|
|
case RT_Release: {
|
|
// If we get to a release that is provably to this object, then we can
|
|
// ignore it.
|
|
Value *ThisReleasedObject = cast<CallInst>(I).getArgOperand(0);
|
|
|
|
if (ThisReleasedObject->stripPointerCasts() ==
|
|
ThisObject->stripPointerCasts())
|
|
continue;
|
|
// Otherwise, we may be retaining something scary.
|
|
break;
|
|
}
|
|
|
|
case RT_ObjCRelease:
|
|
case RT_ObjCRetain:
|
|
// Objective-C retain and release can have arbitrary side effects.
|
|
break;
|
|
|
|
case RT_Unknown:
|
|
// Ignore all instructions with no side effects.
|
|
if (!I.mayHaveSideEffects()) continue;
|
|
|
|
// store, memcpy, memmove *to* the object can be dropped.
|
|
if (StoreInst *SI = dyn_cast<StoreInst>(&I)) {
|
|
if (SI->getPointerOperand()->stripInBoundsOffsets() == ThisObject)
|
|
continue;
|
|
}
|
|
|
|
if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(&I)) {
|
|
if (MI->getDest()->stripInBoundsOffsets() == ThisObject)
|
|
continue;
|
|
}
|
|
|
|
// Otherwise, we can't remove the deallocation completely.
|
|
break;
|
|
}
|
|
|
|
// Okay, the function has some side effects, if it doesn't capture the
|
|
// object argument, at least that is something.
|
|
return DtorFn->doesNotCapture(0) ? DtorKind::NoEscape : DtorKind::Unknown;
|
|
}
|
|
}
|
|
|
|
// If we didn't find any side effects, we win.
|
|
return DtorKind::NoSideEffects;
|
|
}
|
|
|
|
|
|
/// performStoreOnlyObjectElimination - Scan the graph of uses of the specified
|
|
/// object allocation. If the object does not escape and is only stored to
|
|
/// (this happens because GVN and other optimizations hoists forward substitutes
|
|
/// all stores to the object to eliminate all loads from it), then zap the
|
|
/// object and all accesses related to it.
|
|
static bool performStoreOnlyObjectElimination(CallInst &Allocation,
|
|
BasicBlock::iterator &BBI) {
|
|
DtorKind DtorInfo = analyzeDestructor(Allocation.getArgOperand(0));
|
|
|
|
// We can't delete the object if its destructor has side effects.
|
|
if (DtorInfo != DtorKind::NoSideEffects)
|
|
return false;
|
|
|
|
// Do a depth first search exploring all of the uses of the object pointer,
|
|
// following through casts, pointer adjustments etc. If we find any loads or
|
|
// any escape sites of the object, we give up. If we succeed in walking the
|
|
// entire graph of uses, we can remove the resultant set.
|
|
SmallSetVector<Instruction*, 16> InvolvedInstructions;
|
|
SmallVector<Instruction*, 16> Worklist;
|
|
Worklist.push_back(&Allocation);
|
|
|
|
// Stores - Keep track of all of the store instructions we see.
|
|
SmallVector<StoreInst*, 16> Stores;
|
|
|
|
while (!Worklist.empty()) {
|
|
Instruction *I = Worklist.pop_back_val();
|
|
|
|
// Insert the instruction into our InvolvedInstructions set. If we have
|
|
// already seen it, then don't reprocess all of the uses.
|
|
if (!InvolvedInstructions.insert(I)) continue;
|
|
|
|
// Okay, this is the first time we've seen this instruction, proceed.
|
|
switch (classifyInstruction(*I)) {
|
|
case RT_Retain:
|
|
case RT_RetainAndReturnThree:
|
|
assert(0 && "These should be canonicalized away");
|
|
|
|
case RT_AllocObject:
|
|
// If this is a different swift_allocObject than we started with, then
|
|
// there is some computation feeding into a size or alignment computation
|
|
// that we have to keep... unless we can delete *that* entire object as
|
|
// well.
|
|
break;
|
|
|
|
// If no memory is accessed, then something is being done with the
|
|
// pointer: maybe it is bitcast or GEP'd. Since there are no side effects,
|
|
// it is perfectly fine to delete this instruction if all uses of the
|
|
// instruction are also eliminable.
|
|
case RT_NoMemoryAccessed:
|
|
if (I->mayHaveSideEffects() || isa<TerminatorInst>(I))
|
|
return false;
|
|
break;
|
|
|
|
// It is perfectly fine to eliminate various retains and releases of this
|
|
// object: we are zapping all accesses or none.
|
|
case RT_Release:
|
|
case RT_RetainNoResult:
|
|
break;
|
|
|
|
// If this is an unknown instruction, we have more interesting things to
|
|
// consider.
|
|
case RT_Unknown:
|
|
case RT_ObjCRelease:
|
|
case RT_ObjCRetain:
|
|
|
|
// Otherwise, this really is some unhandled instruction. Bail out.
|
|
return false;
|
|
}
|
|
|
|
// Okay, if we got here, the instruction can be eaten so-long as all of its
|
|
// uses can be. Scan through the uses and add them to the worklist for
|
|
// recursive processing.
|
|
for (auto UI = I->user_begin(), E = I->user_end(); UI != E; ++UI) {
|
|
Instruction *User = cast<Instruction>(*UI);
|
|
|
|
// Handle stores as a special case here: we want to make sure that the
|
|
// object is being stored *to*, not itself being stored (which would be an
|
|
// escape point). Since stores themselves don't have any uses, we can
|
|
// short-cut the classification scheme above.
|
|
if (StoreInst *SI = dyn_cast<StoreInst>(User)) {
|
|
// If this is a store *to* the object, we can zap it.
|
|
if (UI.getOperandNo() == StoreInst::getPointerOperandIndex()) {
|
|
InvolvedInstructions.insert(SI);
|
|
continue;
|
|
}
|
|
// Otherwise, using the object as a source (or size) is an escape.
|
|
return false;
|
|
}
|
|
if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(User)) {
|
|
// If this is a memset/memcpy/memmove *to* the object, we can zap it.
|
|
if (UI.getOperandNo() == 0) {
|
|
InvolvedInstructions.insert(MI);
|
|
continue;
|
|
}
|
|
// Otherwise, using the object as a source (or size) is an escape.
|
|
return false;
|
|
}
|
|
|
|
// Otherwise, normal instructions just go on the worklist for processing.
|
|
Worklist.push_back(User);
|
|
}
|
|
}
|
|
|
|
// Ok, we succeeded! This means we can zap all of the instructions that use
|
|
// the object. One thing we have to be careful of is to make sure that we
|
|
// don't invalidate "BBI" (the iterator the outer walk of the optimization
|
|
// pass is using, and indicates the next instruction to process). This would
|
|
// happen if we delete the instruction it is pointing to. Advance the
|
|
// iterator if that would happen.
|
|
while (InvolvedInstructions.count(BBI))
|
|
++BBI;
|
|
|
|
// Zap all of the instructions.
|
|
for (auto I : InvolvedInstructions) {
|
|
if (!I->use_empty())
|
|
I->replaceAllUsesWith(UndefValue::get(I->getType()));
|
|
I->eraseFromParent();
|
|
}
|
|
|
|
++NumStoreOnlyObjectsEliminated;
|
|
return true;
|
|
}
|
|
|
|
/// performGeneralOptimizations - This does a forward scan over basic blocks,
|
|
/// looking for interesting local optimizations that can be done.
|
|
static bool performGeneralOptimizations(Function &F) {
|
|
bool Changed = false;
|
|
|
|
// TODO: This is a really trivial local algorithm. It could be much better.
|
|
for (BasicBlock &BB : F) {
|
|
for (BasicBlock::iterator BBI = BB.begin(), E = BB.end(); BBI != E; ) {
|
|
// Preincrement the iterator to avoid invalidation and out trouble.
|
|
Instruction &I = *BBI++;
|
|
|
|
// Do various optimizations based on the instruction we find.
|
|
switch (classifyInstruction(I)) {
|
|
default: break;
|
|
case RT_AllocObject:
|
|
Changed |= performStoreOnlyObjectElimination(cast<CallInst>(I), BBI);
|
|
break;
|
|
case RT_Release:
|
|
Changed |= performLocalReleaseMotion(cast<CallInst>(I), BB);
|
|
break;
|
|
case RT_RetainNoResult:
|
|
case RT_ObjCRetain: {
|
|
// Retain motion is a forward pass over the block. Make sure we don't
|
|
// invalidate our iterators by parking it on the instruction before I.
|
|
BasicBlock::iterator Safe = &I;
|
|
Safe = Safe != BB.begin() ? std::prev(Safe) : BB.end();
|
|
if (performLocalRetainMotion(cast<CallInst>(I), BB)) {
|
|
// If we zapped or moved the retain, reset the iterator on the
|
|
// instruction *newly* after the prev instruction.
|
|
BBI = Safe != BB.end() ? std::next(Safe) : BB.begin();
|
|
Changed = true;
|
|
}
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
return Changed;
|
|
}
|
|
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
// SwiftARCOpt Pass
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
namespace llvm {
|
|
void initializeSwiftARCOptPass(PassRegistry&);
|
|
}
|
|
|
|
char SwiftARCOpt::ID = 0;
|
|
|
|
INITIALIZE_PASS_BEGIN(SwiftARCOpt,
|
|
"swift-arc-optimize", "Swift ARC optimization",
|
|
false, false)
|
|
INITIALIZE_PASS_DEPENDENCY(SwiftAliasAnalysis)
|
|
INITIALIZE_PASS_END(SwiftARCOpt,
|
|
"swift-arc-optimize", "Swift ARC optimization",
|
|
false, false)
|
|
|
|
// Optimization passes.
|
|
llvm::FunctionPass *swift::createSwiftARCOptPass() {
|
|
initializeSwiftARCOptPass(*llvm::PassRegistry::getPassRegistry());
|
|
return new SwiftARCOpt();
|
|
}
|
|
|
|
SwiftARCOpt::SwiftARCOpt() : FunctionPass(ID) {
|
|
}
|
|
|
|
|
|
void SwiftARCOpt::getAnalysisUsage(llvm::AnalysisUsage &AU) const {
|
|
AU.addRequired<SwiftAliasAnalysis>();
|
|
AU.setPreservesCFG();
|
|
}
|
|
|
|
bool SwiftARCOpt::runOnFunction(Function &F) {
|
|
bool Changed = false;
|
|
|
|
// First thing: canonicalize swift_retain and similar calls so that nothing
|
|
// uses their result. This exposes the copy that the function does to the
|
|
// optimizer.
|
|
Changed |= canonicalizeInputFunction(F);
|
|
|
|
// Next, do a pass with a couple of optimizations:
|
|
// 1) release() motion, eliminating retain/release pairs when it turns out
|
|
// that a pair is not protecting anything that accesses the guarded heap
|
|
// object.
|
|
// 2) deletion of stored-only objects - objects that are allocated and
|
|
// potentially retained and released, but are only stored to and don't
|
|
// escape.
|
|
Changed |= performGeneralOptimizations(F);
|
|
|
|
return Changed;
|
|
}
|
|
|
|
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
// Return Argument Optimizer
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
/// optimizeReturn3 - Look to see if we can optimize "ret (a,b,c)" - where one
|
|
/// of the three values was retained right before the return, into a
|
|
/// swift_retainAndReturnThree call. This is particularly common when returning
|
|
/// a string or array slice.
|
|
static bool optimizeReturn3(ReturnInst *TheReturn) {
|
|
// Ignore ret void.
|
|
if (TheReturn->getNumOperands() == 0) return false;
|
|
|
|
// See if this is a return of three things.
|
|
Value *RetVal = TheReturn->getOperand(0);
|
|
StructType *RetSTy = dyn_cast<StructType>(RetVal->getType());
|
|
if (RetSTy == 0 || RetSTy->getNumElements() != 3) return false;
|
|
|
|
// See if we can find scalars that feed into the return instruction. If not,
|
|
// bail out.
|
|
Value *RetVals[3];
|
|
for (unsigned i = 0; i != 3; ++i) {
|
|
RetVals[i] = FindInsertedValue(RetVal, i);
|
|
if (RetVals[i] == 0) return false;
|
|
|
|
// If the scalar isn't int64 or pointer type, we can't transform it.
|
|
if (!isa<PointerType>(RetVals[i]->getType()) &&
|
|
!RetVals[i]->getType()->isIntegerTy(64))
|
|
return false;
|
|
}
|
|
|
|
// The ARC optimizer will push the retains to be immediately before the
|
|
// return, past any insertvalues. We tolerate other non-memory instructions
|
|
// though, in case other optimizations have moved them around. Collect all
|
|
// the retain candidates.
|
|
SmallDenseMap<Value*, CallInst*, 8> RetainedPointers;
|
|
|
|
for (BasicBlock::iterator BBI = TheReturn,E = TheReturn->getParent()->begin();
|
|
BBI != E; ) {
|
|
Instruction &I = *--BBI;
|
|
|
|
switch (classifyInstruction(I)) {
|
|
case RT_Retain: {
|
|
// Collect retained pointers. If a pointer is multiply retained, it
|
|
// doesn't matter which one we aquire.
|
|
CallInst &TheRetain = cast<CallInst>(I);
|
|
RetainedPointers[TheRetain.getArgOperand(0)] = &TheRetain;
|
|
break;
|
|
}
|
|
case RT_NoMemoryAccessed:
|
|
// If the instruction doesn't access memory, ignore it.
|
|
break;
|
|
default:
|
|
// Otherwise, break out of the for loop.
|
|
BBI = E;
|
|
break;
|
|
}
|
|
}
|
|
|
|
// If there are no retain candidates, we can't form a return3.
|
|
if (RetainedPointers.empty())
|
|
return false;
|
|
|
|
// Check to see if any of the values returned is retained. If so, we can form
|
|
// a return3, which makes the retain a tail call.
|
|
CallInst *TheRetain = 0;
|
|
for (unsigned i = 0; i != 3; ++i) {
|
|
// If the return value is also retained, we found our retain.
|
|
TheRetain = RetainedPointers[RetVals[i]];
|
|
if (TheRetain) break;
|
|
|
|
// If we're returning the result of a known retain, then we can also handle
|
|
// it.
|
|
if (CallInst *CI = dyn_cast<CallInst>(RetVals[i]))
|
|
if (classifyInstruction(*CI) == RT_Retain) {
|
|
TheRetain = RetainedPointers[CI->getArgOperand(0)];
|
|
if (TheRetain) break;
|
|
}
|
|
}
|
|
|
|
// If none of the three values was retained, we can't form a return3.
|
|
if (TheRetain == 0)
|
|
return false;
|
|
|
|
// Okay, there is, which means we can perform the transformation. Get the
|
|
// argument to swift_retain (the result will be zapped when we zap the call)
|
|
// as the object to retain (of %swift.refcounted* type).
|
|
Value *RetainedObject = TheRetain->getArgOperand(0);
|
|
|
|
// Insert any new instructions before the return.
|
|
IRBuilder<> B(TheReturn);
|
|
Type *Int64Ty = B.getInt64Ty();
|
|
|
|
// The swift_retainAndReturnThree function takes the three arguments as i64.
|
|
// Cast the arguments to i64 if needed.
|
|
|
|
// Update the element with a cast to i64 if needed.
|
|
for (Value *&Elt : RetVals) {
|
|
if (isa<PointerType>(Elt->getType()))
|
|
Elt = B.CreatePtrToInt(Elt, Int64Ty);
|
|
}
|
|
|
|
// Call swift_retainAndReturnThree with our pointer to retain and the three
|
|
// i64's.
|
|
Function &F = *TheReturn->getParent()->getParent();
|
|
Constant *Cache = 0; // Not utilized.
|
|
Value *LibCall = getRetainAndReturnThree(F,RetainedObject->getType(),Cache);
|
|
CallInst *NR = B.CreateCall4(LibCall, RetainedObject, RetVals[0],RetVals[1],
|
|
RetVals[2]);
|
|
NR->setTailCall(true);
|
|
|
|
// The return type of the libcall is (i64,i64,i64). Since at least one of
|
|
// the pointers is a pointer (we retained it afterall!) we have to unpack
|
|
// the elements, bitcast at least that one, and then repack to the proper
|
|
// type expected by the ret instruction.
|
|
for (unsigned i = 0; i != 3; ++i) {
|
|
RetVals[i] = B.CreateExtractValue(NR, i);
|
|
if (RetVals[i]->getType() != RetSTy->getElementType(i))
|
|
RetVals[i] = B.CreateIntToPtr(RetVals[i], RetSTy->getElementType(i));
|
|
}
|
|
|
|
// Repack into an aggregate that can be returned.
|
|
Value *RV = UndefValue::get(RetVal->getType());
|
|
for (unsigned i = 0; i != 3; ++i)
|
|
RV = B.CreateInsertValue(RV, RetVals[i], i);
|
|
|
|
// Return the right thing and zap any instruction tree of inserts that
|
|
// existed just to feed the old return.
|
|
TheReturn->setOperand(0, RV);
|
|
RecursivelyDeleteTriviallyDeadInstructions(RetVal);
|
|
|
|
// Zap the retain that we're subsuming and we're done!
|
|
if (!TheRetain->use_empty())
|
|
TheRetain->replaceAllUsesWith(RetainedObject);
|
|
TheRetain->eraseFromParent();
|
|
++NumReturnThreeTailCallsFormed;
|
|
return true;
|
|
}
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
// SwiftARCExpandPass Pass
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
/// performARCExpansion - This implements the very late (just before code
|
|
/// generation) lowering processes that we do to expose low level performance
|
|
/// optimizations and take advantage of special features of the ABI. These
|
|
/// expansion steps can foil the general mid-level optimizer, so they are done
|
|
/// very, very, late.
|
|
///
|
|
/// Expansions include:
|
|
/// - Lowering retain calls to swift_retain (which return the retained
|
|
/// argument) to lower register pressure.
|
|
/// - Forming calls to swift_retainAndReturnThree when the last thing in a
|
|
/// function is to retain one of its result values, and when it returns
|
|
/// exactly three values.
|
|
///
|
|
/// Coming into this function, we assume that the code is in canonical form:
|
|
/// none of these calls have any uses of their return values.
|
|
bool SwiftARCExpandPass::runOnFunction(Function &F) {
|
|
Constant *RetainCache = nullptr;
|
|
bool Changed = false;
|
|
|
|
SmallVector<ReturnInst*, 8> Returns;
|
|
|
|
// Since all of the calls are canonicalized, we know that we can just walk
|
|
// through the function and collect the interesting heap object definitions by
|
|
// getting the argument to these functions.
|
|
DenseMap<Value*, TinyPtrVector<Instruction*>> DefsOfValue;
|
|
|
|
// Keep track of which order we see values in since iteration over a densemap
|
|
// isn't in a deterministic order, and isn't efficient anyway.
|
|
SmallVector<Value*, 16> DefOrder;
|
|
|
|
// Do a first pass over the function, collecting all interesting definitions.
|
|
// In this pass, we rewrite any intra-block uses that we can, since the
|
|
// SSAUpdater doesn't handle them.
|
|
DenseMap<Value*, Value*> LocalUpdates;
|
|
for (BasicBlock &BB : F) {
|
|
for (auto II = BB.begin(), E = BB.end(); II != E; ) {
|
|
// Preincrement iterator to avoid iteration issues in the loop.
|
|
Instruction &Inst = *II++;
|
|
|
|
switch (classifyInstruction(Inst)) {
|
|
case RT_Retain: assert(0 && "This should be canonicalized away!");
|
|
case RT_RetainAndReturnThree:
|
|
case RT_RetainNoResult: {
|
|
Value *ArgVal = cast<CallInst>(Inst).getArgOperand(0);
|
|
|
|
// First step: rewrite swift_retain_noresult to swift_retain, exposing
|
|
// the result value.
|
|
CallInst &CI =
|
|
*CallInst::Create(getRetain(F, ArgVal->getType(), RetainCache),
|
|
ArgVal, "", &Inst);
|
|
CI.setTailCall(true);
|
|
Inst.eraseFromParent();
|
|
|
|
if (!isa<Instruction>(ArgVal))
|
|
continue;
|
|
|
|
TinyPtrVector<Instruction*> &GlobalEntry = DefsOfValue[ArgVal];
|
|
|
|
// If this is the first definition of a value for the argument that
|
|
// we've seen, keep track of it in DefOrder.
|
|
if (GlobalEntry.empty())
|
|
DefOrder.push_back(ArgVal);
|
|
|
|
// Check to see if there is already an entry for this basic block. If
|
|
// there is another local entry, switch to using the local value and
|
|
// remove the previous value from the GlobalEntry.
|
|
Value *&LocalEntry = LocalUpdates[ArgVal];
|
|
if (LocalEntry) {
|
|
Changed = true;
|
|
CI.setArgOperand(0, LocalEntry);
|
|
assert(GlobalEntry.back() == LocalEntry && "Local/Global mismatch?");
|
|
GlobalEntry.pop_back();
|
|
}
|
|
|
|
LocalEntry = &CI;
|
|
GlobalEntry.push_back(&CI);
|
|
continue;
|
|
}
|
|
case RT_Unknown:
|
|
case RT_Release:
|
|
case RT_AllocObject:
|
|
case RT_NoMemoryAccessed:
|
|
case RT_ObjCRelease:
|
|
case RT_ObjCRetain: // TODO: Could chain together objc_retains.
|
|
// Remember returns in the first pass.
|
|
if (ReturnInst *RI = dyn_cast<ReturnInst>(&Inst))
|
|
Returns.push_back(RI);
|
|
|
|
// Just remap any uses in the value.
|
|
break;
|
|
}
|
|
|
|
// Check to see if there are any uses of a value in the LocalUpdates
|
|
// map. If so, remap it now to the locally defined version.
|
|
for (unsigned i = 0, e = Inst.getNumOperands(); i != e; ++i)
|
|
if (Value *V = LocalUpdates.lookup(Inst.getOperand(i))) {
|
|
Changed = true;
|
|
Inst.setOperand(i, V);
|
|
}
|
|
}
|
|
LocalUpdates.clear();
|
|
}
|
|
|
|
// Now that we've collected all of the interesting heap object values that are
|
|
// passed into argument-returning functions, rewrite uses of these pointers
|
|
// with optimized lifetime-shorted versions of it.
|
|
for (Value *Ptr : DefOrder) {
|
|
// If Ptr is an instruction, remember its block. If not, use the entry
|
|
// block as its block (it must be an argument, constant, etc).
|
|
BasicBlock *PtrBlock;
|
|
if (Instruction *PI = dyn_cast<Instruction>(Ptr))
|
|
PtrBlock = PI->getParent();
|
|
else
|
|
PtrBlock = &F.getEntryBlock();
|
|
|
|
TinyPtrVector<Instruction*> &Defs = DefsOfValue[Ptr];
|
|
// This is the same problem as SSA construction, so we just use LLVM's
|
|
// SSAUpdater, with each retain as a definition of the virtual value.
|
|
SSAUpdater Updater;
|
|
Updater.Initialize(Ptr->getType(), Ptr->getName());
|
|
|
|
// Set the return value of each of these calls as a definition of the
|
|
// virtual value.
|
|
for (auto D : Defs)
|
|
Updater.AddAvailableValue(D->getParent(), D);
|
|
|
|
// If we didn't add a definition for Ptr's block, then Ptr itself is
|
|
// available in its block.
|
|
if (!Updater.HasValueForBlock(PtrBlock))
|
|
Updater.AddAvailableValue(PtrBlock, Ptr);
|
|
|
|
|
|
// Rewrite uses of Ptr to their optimized forms.
|
|
for (auto UI = Ptr->user_begin(), E = Ptr->user_end(); UI != E; ) {
|
|
// Make sure to increment the use iterator before potentially rewriting
|
|
// it.
|
|
Use &U = UI.getUse();
|
|
++UI;
|
|
|
|
// If the use is in the same block that defines it and the User is not a
|
|
// PHI node, then this is a local use that shouldn't be rewritten.
|
|
Instruction *User = cast<Instruction>(U.getUser());
|
|
if (User->getParent() == PtrBlock && !isa<PHINode>(User))
|
|
continue;
|
|
|
|
// Otherwise, change it if profitable!
|
|
Updater.RewriteUse(U);
|
|
|
|
if (U.get() != Ptr)
|
|
Changed = true;
|
|
}
|
|
}
|
|
|
|
// Scan through all the returns to see if there are any that can be optimized.
|
|
// FIXME: swift_retainAndReturnThree runtime call
|
|
// is currently implemented only on x86_64.
|
|
// FIXME: optimizeReturn3() implementation assumes 64-bit
|
|
if (llvm::Triple(F.getParent()->getTargetTriple()).getArchName() == "x86_64")
|
|
for (ReturnInst *RI : Returns)
|
|
Changed |= optimizeReturn3(RI);
|
|
|
|
return Changed;
|
|
}
|
|
|
|
|
|
namespace llvm {
|
|
void initializeSwiftARCExpandPassPass(PassRegistry&);
|
|
}
|
|
|
|
char SwiftARCExpandPass::ID = 0;
|
|
INITIALIZE_PASS(SwiftARCExpandPass,
|
|
"swift-arc-expand", "Swift ARC expansion", false, false)
|
|
|
|
llvm::FunctionPass *swift::createSwiftARCExpandPass() {
|
|
initializeSwiftARCExpandPassPass(*llvm::PassRegistry::getPassRegistry());
|
|
return new SwiftARCExpandPass();
|
|
}
|