mirror of
https://github.com/apple/swift.git
synced 2025-12-14 20:36:38 +01:00
SIL optimizer: Add a new string optimization.
Optimizes String operations with constant operands.
Specifically:
* Replaces x.append(y) with x = y if x is empty.
* Removes x.append("")
* Replaces x.append(y) with x = x + y if x and y are constant strings.
* Replaces _typeName(T.self) with a constant string if T is statically known.
With this optimization it's possible to constant fold string interpolations, like "the \(Int.self) type" -> "the Int type"
This new pass runs on high-level SIL, where semantic calls are still in place.
rdar://problem/65642843
This commit is contained in:
484
lib/SILOptimizer/Transforms/StringOptimization.cpp
Normal file
484
lib/SILOptimizer/Transforms/StringOptimization.cpp
Normal file
@@ -0,0 +1,484 @@
|
||||
//===--- StringOptimization.cpp - Optimize string operations --------------===//
|
||||
//
|
||||
// This source file is part of the Swift.org open source project
|
||||
//
|
||||
// Copyright (c) 2014 - 2020 Apple Inc. and the Swift project authors
|
||||
// Licensed under Apache License v2.0 with Runtime Library Exception
|
||||
//
|
||||
// See https://swift.org/LICENSE.txt for license information
|
||||
// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This pass performs several optimizations on String operations.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#define DEBUG_TYPE "string-optimization"
|
||||
#include "swift/SILOptimizer/PassManager/Transforms.h"
|
||||
#include "swift/SILOptimizer/Analysis/ValueTracking.h"
|
||||
#include "swift/SIL/SILFunction.h"
|
||||
#include "swift/SIL/SILBasicBlock.h"
|
||||
#include "swift/SIL/SILBuilder.h"
|
||||
#include "swift/AST/SemanticAttrs.h"
|
||||
#include "swift/AST/ParameterList.h"
|
||||
#include "swift/AST/ASTMangler.h"
|
||||
#include "swift/Demangling/Demangle.h"
|
||||
#include "llvm/Support/Debug.h"
|
||||
|
||||
using namespace swift;
|
||||
|
||||
namespace {
|
||||
|
||||
/// Optimizes String operations with constant operands.
|
||||
|
||||
/// Specifically:
|
||||
/// * Replaces x.append(y) with x = y if x is empty.
|
||||
/// * Removes x.append("")
|
||||
/// * Replaces x.append(y) with x = x + y if x and y are constant strings.
|
||||
/// * Replaces _typeName(T.self) with a constant string if T is statically
|
||||
/// known.
|
||||
///
|
||||
/// This pass must run on high-level SIL, where semantic calls are still in
|
||||
/// place.
|
||||
///
|
||||
/// The optimization is implemented in a simple way. Therfore it cannot handle
|
||||
/// complicated patterns, e.g. the dataflow analysis for the String.append self
|
||||
/// argument is only done within a single block.
|
||||
/// But this is totally sufficient to be able to constant propagate strings in
|
||||
/// string interpolations.
|
||||
///
|
||||
/// If we want to make this optimization more powerful it's best done by using
|
||||
/// the ConstExprStepEvaluator (which is currently lacking a few features to be
|
||||
/// used for this optimization).
|
||||
class StringOptimization {
|
||||
|
||||
struct StringInfo {
|
||||
/// The string
|
||||
StringRef str;
|
||||
|
||||
/// Negative means: not constant
|
||||
int numCodeUnits = -1;
|
||||
|
||||
/// Not 0 for the empty-string initializer which reserves a capacity.
|
||||
int reservedCapacity = 0;
|
||||
|
||||
bool isConstant() const { return numCodeUnits >= 0; }
|
||||
bool isEmpty() const { return isConstant() && str.empty(); }
|
||||
};
|
||||
|
||||
/// The stdlib's String type.
|
||||
SILType stringType;
|
||||
|
||||
/// The String initializer which takes an UTF8 string literal as argument.
|
||||
SILFunction *makeUTF8Func = nullptr;
|
||||
|
||||
/// Caches the analysis result for an alloc_stack or an inout function
|
||||
/// argument, whether it is an "identifyable" object.
|
||||
/// See mayWriteToIdentifyableObject().
|
||||
llvm::DenseMap<SILValue, bool> identifyableObjectsCache;
|
||||
|
||||
public:
|
||||
bool run(SILFunction *F);
|
||||
|
||||
private:
|
||||
|
||||
bool optimizeBlock(SILBasicBlock &block);
|
||||
|
||||
bool optimizeStringAppend(ApplyInst *appendCall,
|
||||
llvm::DenseMap<SILValue, SILValue> &storedStrings);
|
||||
|
||||
bool optimizeTypeName(ApplyInst *typeNameCall);
|
||||
|
||||
static ApplyInst *isSemanticCall(SILInstruction *inst, StringRef attr,
|
||||
unsigned numArgs);
|
||||
StoreInst *isStringStoreToIdentifyableObject(SILInstruction *inst);
|
||||
static void invalidateModifiedObjects(SILInstruction *inst,
|
||||
llvm::DenseMap<SILValue, SILValue> &storedStrings);
|
||||
static StringInfo getStringInfo(SILValue value);
|
||||
static Optional<int> getIntConstant(SILValue value);
|
||||
static void replaceAppendWith(ApplyInst *appendCall, SILValue newValue,
|
||||
bool copyNewValue);
|
||||
ApplyInst *createStringInit(StringRef str, SILInstruction *beforeInst);
|
||||
};
|
||||
|
||||
/// The main entry point of the optimization.
|
||||
bool StringOptimization::run(SILFunction *F) {
|
||||
NominalTypeDecl *stringDecl = F->getModule().getASTContext().getStringDecl();
|
||||
if (!stringDecl)
|
||||
return false;
|
||||
stringType = SILType::getPrimitiveObjectType(
|
||||
CanType(stringDecl->getDeclaredType()));
|
||||
|
||||
bool changed = false;
|
||||
|
||||
for (SILBasicBlock &block : *F) {
|
||||
changed |= optimizeBlock(block);
|
||||
}
|
||||
return changed;
|
||||
}
|
||||
|
||||
/// Run the optimization on a basic block.
|
||||
bool StringOptimization::optimizeBlock(SILBasicBlock &block) {
|
||||
bool changed = false;
|
||||
|
||||
/// Maps identifyable objects (alloc_stack, inout parameters) to string values
|
||||
/// which are stored in those objects.
|
||||
llvm::DenseMap<SILValue, SILValue> storedStrings;
|
||||
|
||||
for (auto iter = block.begin(); iter != block.end();) {
|
||||
SILInstruction *inst = &*iter++;
|
||||
|
||||
if (StoreInst *store = isStringStoreToIdentifyableObject(inst)) {
|
||||
storedStrings[store->getDest()] = store->getSrc();
|
||||
continue;
|
||||
}
|
||||
if (ApplyInst *append = isSemanticCall(inst, semantics::STRING_APPEND, 2)) {
|
||||
if (optimizeStringAppend(append, storedStrings)) {
|
||||
changed = true;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
if (ApplyInst *typeName = isSemanticCall(inst, semantics::TYPENAME, 2)) {
|
||||
if (optimizeTypeName(typeName)) {
|
||||
changed = true;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
// Remove items from storedStrings if inst overwrites (or potentially
|
||||
// overwrites) a stored String in an identifyable object.
|
||||
invalidateModifiedObjects(inst, storedStrings);
|
||||
}
|
||||
return changed;
|
||||
}
|
||||
|
||||
/// Optimize String.append in case anything is known about the parameters.
|
||||
bool StringOptimization::optimizeStringAppend(ApplyInst *appendCall,
|
||||
llvm::DenseMap<SILValue, SILValue> &storedStrings) {
|
||||
SILValue rhs = appendCall->getArgument(0);
|
||||
StringInfo rhsString = getStringInfo(rhs);
|
||||
|
||||
// Remove lhs.append(rhs) if rhs is empty.
|
||||
if (rhsString.isEmpty()) {
|
||||
appendCall->eraseFromParent();
|
||||
return true;
|
||||
}
|
||||
|
||||
SILValue lhsAddr = appendCall->getArgument(1);
|
||||
StringInfo lhsString = getStringInfo(storedStrings[lhsAddr]);
|
||||
|
||||
// The following two optimizations are a trade-off: Performance-wise it may be
|
||||
// benefitial to initialize an empty string with reserved capacity and then
|
||||
// append multiple other string components.
|
||||
// Removing the empty string (with the reserved capacity) might result in more
|
||||
// allocations.
|
||||
// So we just do this optimization up to a certain capacity limit (found by
|
||||
// experiment).
|
||||
if (lhsString.reservedCapacity > 50)
|
||||
return false;
|
||||
|
||||
// Replace lhs.append(rhs) with 'lhs = rhs' if lhs is empty.
|
||||
if (lhsString.isEmpty()) {
|
||||
replaceAppendWith(appendCall, rhs, /*copyNewValue*/ true);
|
||||
storedStrings[lhsAddr] = rhs;
|
||||
return true;
|
||||
}
|
||||
|
||||
// Replace lhs.append(rhs) with "lhs = lhs + rhs" if both lhs and rhs are
|
||||
// constant.
|
||||
if (lhsString.isConstant() && rhsString.isConstant()) {
|
||||
std::string concat = lhsString.str;
|
||||
concat += rhsString.str;
|
||||
if (ApplyInst *stringInit = createStringInit(concat, appendCall)) {
|
||||
replaceAppendWith(appendCall, stringInit, /*copyNewValue*/ false);
|
||||
storedStrings[lhsAddr] = stringInit;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/// Try to replace a _typeName() call with a constant string if the type is
|
||||
/// statically known.
|
||||
bool StringOptimization::optimizeTypeName(ApplyInst *typeNameCall) {
|
||||
// Check, if the type is statically known.
|
||||
auto *anyType =
|
||||
dyn_cast<InitExistentialMetatypeInst>(typeNameCall->getArgument(0));
|
||||
if (!anyType)
|
||||
return false;
|
||||
auto *metatypeInst = dyn_cast<MetatypeInst>(anyType->getOperand());
|
||||
if (!metatypeInst)
|
||||
return false;
|
||||
|
||||
auto metatype = metatypeInst->getType().getAs<MetatypeType>();
|
||||
Type ty = metatype->getInstanceType();
|
||||
if (ty->hasArchetype())
|
||||
return false;
|
||||
|
||||
// Usually the "qualified" parameter of _typeName() is a constant boolean.
|
||||
Optional<int> isQualified = getIntConstant(typeNameCall->getArgument(1));
|
||||
if (!isQualified)
|
||||
return false;
|
||||
|
||||
// Create the constant type string by mangling + demangling.
|
||||
Mangle::ASTMangler mangler;
|
||||
std::string mangledTypeName = mangler.mangleTypeForTypeName(ty);
|
||||
|
||||
Demangle::DemangleOptions options;
|
||||
options.PrintForTypeName = true;
|
||||
options.QualifyEntities = (isQualified.getValue() != 0);
|
||||
std::string typeStr = Demangle::demangleSymbolAsString(mangledTypeName,
|
||||
options);
|
||||
|
||||
ApplyInst *stringInit = createStringInit(typeStr, typeNameCall);
|
||||
if (!stringInit)
|
||||
return false;
|
||||
|
||||
typeNameCall->replaceAllUsesWith(stringInit);
|
||||
typeNameCall->eraseFromParent();
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
/// Returns the apply instruction if \p inst is a call of a function which has
|
||||
/// a semantic attribute \p attr and exactly \p numArgs arguments.
|
||||
ApplyInst *StringOptimization::isSemanticCall(SILInstruction *inst,
|
||||
StringRef attr, unsigned numArgs) {
|
||||
auto *apply = dyn_cast<ApplyInst>(inst);
|
||||
if (!apply || apply->getNumArguments() != numArgs)
|
||||
return nullptr;
|
||||
|
||||
SILFunction *callee = apply->getReferencedFunctionOrNull();
|
||||
if (callee && callee->hasSemanticsAttr(attr))
|
||||
return apply;
|
||||
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
/// Returns true for all instructions which we can safely analyze as a potential
|
||||
/// write to an identifyable objects.
|
||||
///
|
||||
/// If we see any other kind of object user, which may write to an object, or
|
||||
/// let the object address escape in some unexpected way (like address
|
||||
/// projections), we'll just ignore that object and will not treat it as
|
||||
/// "identifyable" object.
|
||||
static bool mayWriteToIdentifyableObject(SILInstruction *inst) {
|
||||
// For simplicity, only handle store and apply. This is sufficient for most
|
||||
// case, especially for string interpolation.
|
||||
return isa<StoreInst>(inst) || isa<ApplyInst>(inst);
|
||||
}
|
||||
|
||||
/// Returns the store intstruction if \p inst is a store of a String to an
|
||||
/// identifyable object.
|
||||
StoreInst *StringOptimization::
|
||||
isStringStoreToIdentifyableObject(SILInstruction *inst) {
|
||||
auto *store = dyn_cast<StoreInst>(inst);
|
||||
if (!store)
|
||||
return nullptr;
|
||||
if (store->getSrc()->getType() != stringType)
|
||||
return nullptr;
|
||||
|
||||
SILValue destAddr = store->getDest();
|
||||
// We only handle alloc_stack an indirect function arguments. For those we can
|
||||
// be sure that they are not aliased, just by checking all users.
|
||||
if (!isa<AllocStackInst>(destAddr) && !isExclusiveArgument(destAddr))
|
||||
return nullptr;
|
||||
|
||||
if (identifyableObjectsCache.count(destAddr) != 0) {
|
||||
return identifyableObjectsCache[destAddr] ? store : nullptr;
|
||||
}
|
||||
|
||||
// Check if it's an "identifyable" object. This is the case if it only has
|
||||
// users which we are able to track in a simple way: stores and applies.
|
||||
for (Operand *use : destAddr->getUses()) {
|
||||
SILInstruction *user = use->getUser();
|
||||
switch (user->getKind()) {
|
||||
// Those instructions do not write to destAddr nor let they destAddr
|
||||
// escape.
|
||||
case SILInstructionKind::DebugValueAddrInst:
|
||||
case SILInstructionKind::DeallocStackInst:
|
||||
case SILInstructionKind::LoadInst:
|
||||
break;
|
||||
default:
|
||||
if (!mayWriteToIdentifyableObject(user)) {
|
||||
// We don't handle user. It is some instruction which may write to
|
||||
// destAddr or let destAddr "escape" (like an address projection).
|
||||
identifyableObjectsCache[destAddr] = false;
|
||||
return nullptr;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
identifyableObjectsCache[destAddr] = true;
|
||||
return store;
|
||||
}
|
||||
|
||||
/// Removes all objects from \p storedStrings which \p inst (potentially)
|
||||
/// modifies.
|
||||
void StringOptimization::invalidateModifiedObjects(SILInstruction *inst,
|
||||
llvm::DenseMap<SILValue, SILValue> &storedStrings) {
|
||||
// Ignore non-writing instructions, like "load", "dealloc_stack".
|
||||
// Note that identifyable objects (= keys in storedStrings) can only have
|
||||
// certain kind of instructions as users: all instruction which we handle in
|
||||
// isStringStoreToIdentifyableObject().
|
||||
if (!mayWriteToIdentifyableObject(inst))
|
||||
return;
|
||||
|
||||
for (Operand &op : inst->getAllOperands()) {
|
||||
storedStrings.erase(op.get());
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns information about value if it's a constant string.
|
||||
StringOptimization::StringInfo StringOptimization::getStringInfo(SILValue value) {
|
||||
// Start with a non-constant result.
|
||||
StringInfo result;
|
||||
|
||||
auto *apply = dyn_cast_or_null<ApplyInst>(value);
|
||||
if (!apply)
|
||||
return result;
|
||||
|
||||
SILFunction *callee = apply->getReferencedFunctionOrNull();
|
||||
if (!callee)
|
||||
return result;
|
||||
|
||||
if (callee->hasSemanticsAttr(semantics::STRING_INIT_EMPTY)) {
|
||||
// An empty string initializer.
|
||||
result.numCodeUnits = 0;
|
||||
return result;
|
||||
}
|
||||
|
||||
if (callee->hasSemanticsAttr(semantics::STRING_INIT_EMPTY_WITH_CAPACITY)) {
|
||||
// An empty string initializer with initial capacity.
|
||||
result.numCodeUnits = 0;
|
||||
result.reservedCapacity = std::numeric_limits<int>::max();
|
||||
if (apply->getNumArguments() > 0) {
|
||||
if (Optional<int> capacity = getIntConstant(apply->getArgument(0)))
|
||||
result.reservedCapacity = capacity.getValue();
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
if (callee->hasSemanticsAttr(semantics::STRING_MAKE_UTF8)) {
|
||||
// A string literal initializer.
|
||||
SILValue stringVal = apply->getArgument(0);
|
||||
auto *stringLiteral = dyn_cast<StringLiteralInst>(stringVal);
|
||||
SILValue lengthVal = apply->getArgument(1);
|
||||
auto *intLiteral = dyn_cast<IntegerLiteralInst>(lengthVal);
|
||||
if (intLiteral && stringLiteral &&
|
||||
// For simplicity, we only support UTF8 string literals.
|
||||
stringLiteral->getEncoding() == StringLiteralInst::Encoding::UTF8) {
|
||||
result.str = stringLiteral->getValue();
|
||||
result.numCodeUnits = intLiteral->getValue().getSExtValue();
|
||||
return result;
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
/// Returns the constant integer value if \a value is an Int or Bool struct with
|
||||
/// an integer_literal as operand.
|
||||
Optional<int> StringOptimization::getIntConstant(SILValue value) {
|
||||
auto *boolOrIntStruct = dyn_cast<StructInst>(value);
|
||||
if (!boolOrIntStruct || boolOrIntStruct->getNumOperands() != 1)
|
||||
return None;
|
||||
|
||||
auto *literal = dyn_cast<IntegerLiteralInst>(boolOrIntStruct->getOperand(0));
|
||||
if (!literal || literal->getValue().getActiveBits() > 64)
|
||||
return None;
|
||||
|
||||
return literal->getValue().getSExtValue();
|
||||
}
|
||||
|
||||
/// Replace a String.append() with a store of \p newValue to the destination.
|
||||
void StringOptimization::replaceAppendWith(ApplyInst *appendCall,
|
||||
SILValue newValue, bool copyNewValue) {
|
||||
SILBuilder builder(appendCall);
|
||||
SILLocation loc = appendCall->getLoc();
|
||||
SILValue destAddr = appendCall->getArgument(1);
|
||||
if (appendCall->getFunction()->hasOwnership()) {
|
||||
if (copyNewValue)
|
||||
newValue = builder.createCopyValue(loc, newValue);
|
||||
builder.createStore(loc, newValue, destAddr,
|
||||
StoreOwnershipQualifier::Assign);
|
||||
} else {
|
||||
if (copyNewValue)
|
||||
builder.createRetainValue(loc, newValue, builder.getDefaultAtomicity());
|
||||
builder.createDestroyAddr(loc, destAddr);
|
||||
builder.createStore(loc, newValue, destAddr,
|
||||
StoreOwnershipQualifier::Unqualified);
|
||||
}
|
||||
appendCall->eraseFromParent();
|
||||
}
|
||||
|
||||
/// Creates a call to a string initializer.
|
||||
ApplyInst *StringOptimization::createStringInit(StringRef str,
|
||||
SILInstruction *beforeInst) {
|
||||
SILBuilder builder(beforeInst);
|
||||
SILLocation loc = beforeInst->getLoc();
|
||||
SILModule &module = beforeInst->getFunction()->getModule();
|
||||
ASTContext &ctxt = module.getASTContext();
|
||||
|
||||
if (!makeUTF8Func) {
|
||||
// Find the String initializer which takes a string_literal as argument.
|
||||
ConstructorDecl *makeUTF8Decl = ctxt.getMakeUTF8StringDecl();
|
||||
if (!makeUTF8Decl)
|
||||
return nullptr;
|
||||
|
||||
auto Mangled = SILDeclRef(makeUTF8Decl, SILDeclRef::Kind::Allocator).mangle();
|
||||
makeUTF8Func = module.findFunction(Mangled, SILLinkage::PublicExternal);
|
||||
if (!makeUTF8Func)
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
auto *literal = builder.createStringLiteral(loc, str,
|
||||
StringLiteralInst::Encoding::UTF8);
|
||||
|
||||
auto *length = builder.createIntegerLiteral(loc,
|
||||
SILType::getBuiltinWordType(ctxt),
|
||||
literal->getCodeUnitCount());
|
||||
|
||||
auto *isAscii = builder.createIntegerLiteral(loc,
|
||||
SILType::getBuiltinIntegerType(1, ctxt),
|
||||
intmax_t(ctxt.isASCIIString(str)));
|
||||
|
||||
SILType stringMetaType = SILType::getPrimitiveObjectType(
|
||||
CanType(MetatypeType::get(stringType.getASTType(),
|
||||
MetatypeRepresentation::Thin)));
|
||||
|
||||
auto *metaTypeInst = builder.createMetatype(loc, stringMetaType);
|
||||
|
||||
auto *functionRef = builder.createFunctionRefFor(loc, makeUTF8Func);
|
||||
|
||||
return builder.createApply(loc, functionRef, SubstitutionMap(),
|
||||
{ literal, length, isAscii, metaTypeInst });
|
||||
}
|
||||
|
||||
/// The StringOptimization function pass.
|
||||
class StringOptimizationPass : public SILFunctionTransform {
|
||||
public:
|
||||
|
||||
void run() override {
|
||||
SILFunction *F = getFunction();
|
||||
if (!F->shouldOptimize())
|
||||
return;
|
||||
|
||||
LLVM_DEBUG(llvm::dbgs() << "*** StringOptimization on function: "
|
||||
<< F->getName() << " ***\n");
|
||||
|
||||
StringOptimization stringOptimization;
|
||||
bool changed = stringOptimization.run(F);
|
||||
|
||||
if (changed) {
|
||||
invalidateAnalysis(SILAnalysis::InvalidationKind::CallsAndInstructions);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
} // end anonymous namespace
|
||||
|
||||
SILTransform *swift::createStringOptimization() {
|
||||
return new StringOptimizationPass();
|
||||
}
|
||||
Reference in New Issue
Block a user