mirror of
https://github.com/apple/swift.git
synced 2025-12-14 20:36:38 +01:00
LLVM is presumably moving towards `std::string_view` - `StringRef::startswith` is deprecated on tip. `SmallString::startswith` was just renamed there (maybe with some small deprecation inbetween, but if so, we've missed it). The `SmallString::startswith` references were moved to `.str().starts_with()`, rather than adding the `starts_with` on `stable/20230725` as we only had a few of them. Open to switching that over if anyone feels strongly though.
319 lines
11 KiB
C++
319 lines
11 KiB
C++
//===---- InlinableText.cpp - Extract inlinable source text -----*- C++ -*-===//
|
|
//
|
|
// This source file is part of the Swift.org open source project
|
|
//
|
|
// Copyright (c) 2014 - 2018 Apple Inc. and the Swift project authors
|
|
// Licensed under Apache License v2.0 with Runtime Library Exception
|
|
//
|
|
// See https://swift.org/LICENSE.txt for license information
|
|
// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
#include "InlinableText.h"
|
|
#include "swift/AST/ASTContext.h"
|
|
#include "swift/AST/ASTNode.h"
|
|
#include "swift/AST/ASTVisitor.h"
|
|
#include "swift/AST/ASTWalker.h"
|
|
#include "swift/AST/Decl.h"
|
|
#include "swift/AST/Expr.h"
|
|
#include "swift/Parse/Lexer.h"
|
|
|
|
#include "llvm/ADT/SmallVector.h"
|
|
#include "llvm/ADT/SmallString.h"
|
|
|
|
using namespace swift;
|
|
|
|
/// Gets the last token that exists inside this IfConfigClause, ignoring
|
|
/// hoisted elements.
|
|
///
|
|
/// If the clause is the last element, this returns the beginning of the line
|
|
/// before the parent IfConfigDecl's #endif token. Otherwise, it's the beginning
|
|
/// of the line before the next clause's #else or #elseif token.
|
|
static SourceLoc
|
|
getEffectiveEndLoc(SourceManager &sourceMgr, const IfConfigClause *clause,
|
|
const IfConfigDecl *decl) {
|
|
auto clauses = decl->getClauses();
|
|
if (clause == &clauses.back())
|
|
return Lexer::getLocForStartOfLine(sourceMgr, decl->getEndLoc());
|
|
|
|
assert(clause >= clauses.begin() && clause < clauses.end() &&
|
|
"clauses must be contiguous");
|
|
|
|
auto *nextClause = clause + 1;
|
|
return Lexer::getLocForStartOfLine(sourceMgr, nextClause->Loc);
|
|
}
|
|
|
|
namespace {
|
|
|
|
class IsFeatureCheck : public ASTWalker {
|
|
public:
|
|
bool foundFeature = false;
|
|
|
|
/// Walk everything that's available.
|
|
MacroWalking getMacroWalkingBehavior() const override {
|
|
return MacroWalking::ArgumentsAndExpansion;
|
|
}
|
|
|
|
PreWalkResult<Expr *> walkToExprPre(Expr *expr) override {
|
|
if (auto unresolved = dyn_cast<UnresolvedDeclRefExpr>(expr)) {
|
|
if (unresolved->getName().getBaseName().userFacingName().starts_with("$"))
|
|
foundFeature = true;
|
|
}
|
|
|
|
if (auto call = dyn_cast<CallExpr>(expr)) {
|
|
if (auto unresolved = dyn_cast<UnresolvedDeclRefExpr>(call->getFn())) {
|
|
StringRef userFacing = unresolved->getName().getBaseName()
|
|
.userFacingName();
|
|
if (userFacing == "compiler" || userFacing == "_compiler_version")
|
|
foundFeature = true;
|
|
}
|
|
}
|
|
|
|
return Action::SkipNodeIf(foundFeature, expr);
|
|
}
|
|
};
|
|
|
|
bool clauseIsFeatureCheck(Expr *cond) {
|
|
IsFeatureCheck checker;
|
|
cond->walk(checker);
|
|
return checker.foundFeature;
|
|
}
|
|
|
|
/// Whether any of the clauses here involves a feature check
|
|
/// (e.g., $AsyncAwait).
|
|
bool anyClauseIsFeatureCheck(ArrayRef<IfConfigClause> clauses) {
|
|
for (const auto &clause : clauses) {
|
|
if (Expr *cond = clause.Cond) {
|
|
if (clauseIsFeatureCheck(cond))
|
|
return true;
|
|
}
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
/// A walker that searches through #if declarations, finding all text that does
|
|
/// not contribute to the final evaluated AST.
|
|
///
|
|
/// For example, in the following code:
|
|
/// ```
|
|
/// #if true
|
|
/// print("true")
|
|
/// #else
|
|
/// print("false")
|
|
/// #endif
|
|
/// ```
|
|
/// ExtractInactiveRanges will return the ranges (with leading newlines) of:
|
|
/// ```
|
|
/// #if true
|
|
/// #else
|
|
/// print("false")
|
|
/// #endif
|
|
/// ```
|
|
/// Leaving behind just 'print("true")'s range.
|
|
struct ExtractInactiveRanges : public ASTWalker {
|
|
SmallVector<CharSourceRange, 4> ranges;
|
|
SourceManager &sourceMgr;
|
|
|
|
/// Walk everything that's available.
|
|
MacroWalking getMacroWalkingBehavior() const override {
|
|
return MacroWalking::ArgumentsAndExpansion;
|
|
}
|
|
|
|
explicit ExtractInactiveRanges(SourceManager &sourceMgr)
|
|
: sourceMgr(sourceMgr) {}
|
|
|
|
/// Adds the two SourceLocs as a CharSourceRange to the set of ignored
|
|
/// ranges.
|
|
/// \note: This assumes each of these locs is a character location, not a
|
|
/// token location.
|
|
void addRange(SourceLoc start, SourceLoc end) {
|
|
auto charRange = CharSourceRange(sourceMgr, start, end);
|
|
ranges.push_back(charRange);
|
|
}
|
|
|
|
PreWalkAction walkToDeclPre(Decl *d) override {
|
|
auto icd = dyn_cast<IfConfigDecl>(d);
|
|
if (!icd)
|
|
return Action::Continue();
|
|
|
|
auto start = Lexer::getLocForStartOfLine(sourceMgr, icd->getStartLoc());
|
|
auto end = Lexer::getLocForEndOfLine(sourceMgr, icd->getEndLoc());
|
|
|
|
auto clause = icd->getActiveClause();
|
|
|
|
// If there's no active clause, add the entire #if...#endif block.
|
|
if (!clause) {
|
|
addRange(start, end);
|
|
return Action::SkipNode();
|
|
}
|
|
|
|
// If the clause is checking for a particular feature with $ or a compiler
|
|
// version, keep the whole thing.
|
|
if (anyClauseIsFeatureCheck(icd->getClauses())) {
|
|
return Action::SkipNode();
|
|
}
|
|
|
|
// Ignore range from beginning of '#if', '#elseif', or '#else' to the
|
|
// beginning of the elements of this clause.
|
|
auto elementsBegin = clause->Loc;
|
|
// If there's a condition (e.g. this isn't a '#else' block), then ignore
|
|
// everything up to the end of the condition.
|
|
if (auto cond = clause->Cond) {
|
|
elementsBegin = cond->getEndLoc();
|
|
}
|
|
addRange(start, Lexer::getLocForEndOfLine(sourceMgr, elementsBegin));
|
|
|
|
// Ignore range from effective end of the elements of this clause to the
|
|
// end of the '#endif'
|
|
addRange(getEffectiveEndLoc(sourceMgr, clause, icd), end);
|
|
|
|
// Walk into direct children of this node that are IfConfigDecls, because
|
|
// the standard walker won't walk into them.
|
|
for (auto &elt : clause->Elements)
|
|
if (elt.isDecl(DeclKind::IfConfig))
|
|
elt.get<Decl *>()->walk(*this);
|
|
|
|
return Action::SkipNode();
|
|
}
|
|
|
|
/// Gets the ignored ranges in source order.
|
|
ArrayRef<CharSourceRange> getSortedRanges() {
|
|
std::sort(ranges.begin(), ranges.end(),
|
|
[&](CharSourceRange r1, CharSourceRange r2) {
|
|
assert(!r1.overlaps(r2) && "no overlapping ranges");
|
|
return sourceMgr.isBeforeInBuffer(r1.getStart(), r2.getStart());
|
|
});
|
|
return ranges;
|
|
}
|
|
};
|
|
} // end anonymous namespace
|
|
|
|
/// Appends the textual contents of the provided source range, stripping
|
|
/// the contents of comments that appear in the source.
|
|
///
|
|
/// Given that comments are treated as whitespace, this also appends a
|
|
/// space or newline (depending if the comment was multi-line and itself
|
|
/// had newlines in the body) in place of the comment, to avoid fusing tokens
|
|
/// together.
|
|
static void appendRange(
|
|
SourceManager &sourceMgr, SourceLoc start, SourceLoc end,
|
|
SmallVectorImpl<char> &scratch) {
|
|
unsigned bufferID = sourceMgr.findBufferContainingLoc(start);
|
|
unsigned offset = sourceMgr.getLocOffsetInBuffer(start, bufferID);
|
|
unsigned endOffset = sourceMgr.getLocOffsetInBuffer(end, bufferID);
|
|
|
|
// Strip comments from the chunk before adding it by re-lexing the range.
|
|
LangOptions FakeLangOpts;
|
|
Lexer lexer(FakeLangOpts, sourceMgr, bufferID, nullptr, LexerMode::Swift,
|
|
HashbangMode::Disallowed, CommentRetentionMode::ReturnAsTokens,
|
|
offset, endOffset);
|
|
|
|
SourceLoc nonCommentStart = start;
|
|
Token token;
|
|
|
|
// Re-lex the range, and skip the full text of `tok::comment` tokens.
|
|
while (!token.is(tok::eof)) {
|
|
lexer.lex(token);
|
|
|
|
// Skip over #sourceLocation's in the file.
|
|
if (token.is(tok::pound_sourceLocation)) {
|
|
|
|
// Append the text leading up to the #sourceLocation
|
|
auto charRange = CharSourceRange(
|
|
sourceMgr, nonCommentStart, token.getLoc());
|
|
StringRef text = sourceMgr.extractText(charRange);
|
|
scratch.append(text.begin(), text.end());
|
|
|
|
// Skip to the right paren. We know the AST is already valid, so there's
|
|
// definitely a right paren.
|
|
while (!token.is(tok::r_paren)) {
|
|
lexer.lex(token);
|
|
}
|
|
|
|
nonCommentStart = Lexer::getLocForEndOfToken(sourceMgr, token.getLoc());
|
|
}
|
|
|
|
if (token.is(tok::comment)) {
|
|
// Grab the start of the full comment token (with leading trivia as well)
|
|
SourceLoc commentLoc = token.getLoc();
|
|
|
|
// Find the end of the token (with trailing trivia)
|
|
SourceLoc endLoc = Lexer::getLocForEndOfToken(sourceMgr, token.getLoc());
|
|
|
|
// The comment token's range includes leading/trailing whitespace, so trim
|
|
// whitespace and only strip the portions of the comment that are not
|
|
// whitespace.
|
|
CharSourceRange range = CharSourceRange(sourceMgr, commentLoc, endLoc);
|
|
StringRef fullTokenText = sourceMgr.extractText(range);
|
|
unsigned leadingWhitespace = fullTokenText.size() -
|
|
fullTokenText.ltrim().size();
|
|
if (leadingWhitespace > 0) {
|
|
commentLoc = commentLoc.getAdvancedLoc(leadingWhitespace);
|
|
}
|
|
|
|
unsigned trailingWhitespace = fullTokenText.size() -
|
|
fullTokenText.rtrim().size();
|
|
if (trailingWhitespace > 0) {
|
|
endLoc = endLoc.getAdvancedLoc(-trailingWhitespace);
|
|
}
|
|
|
|
// First, extract the text up to the start of the comment, including the
|
|
// whitespace.
|
|
auto charRange = CharSourceRange(sourceMgr, nonCommentStart, commentLoc);
|
|
StringRef text = sourceMgr.extractText(charRange);
|
|
scratch.append(text.begin(), text.end());
|
|
|
|
// Next, search through the comment text to see if it's a block comment
|
|
// with a newline. If so we need to re-insert a newline to avoid fusing
|
|
// multi-line tokens together.
|
|
auto commentTextRange = CharSourceRange(sourceMgr, commentLoc, endLoc);
|
|
StringRef commentText = sourceMgr.extractText(commentTextRange);
|
|
bool hasNewline = commentText.find_first_of("\n\r") != StringRef::npos;
|
|
|
|
// Use a newline as a filler character if the comment itself had a newline
|
|
// in it.
|
|
char filler = hasNewline ? '\n' : ' ';
|
|
|
|
// Append a single whitespace filler character, to avoid fusing tokens.
|
|
scratch.push_back(filler);
|
|
|
|
// Start the next region after the contents of the comment.
|
|
nonCommentStart = endLoc;
|
|
}
|
|
}
|
|
|
|
if (nonCommentStart.isValid() && nonCommentStart != end) {
|
|
auto charRange = CharSourceRange(sourceMgr, nonCommentStart, end);
|
|
StringRef text = sourceMgr.extractText(charRange);
|
|
scratch.append(text.begin(), text.end());
|
|
}
|
|
}
|
|
|
|
StringRef swift::extractInlinableText(SourceManager &sourceMgr, ASTNode node,
|
|
SmallVectorImpl<char> &scratch) {
|
|
// Extract inactive ranges from the text of the node.
|
|
ExtractInactiveRanges extractor(sourceMgr);
|
|
node.walk(extractor);
|
|
|
|
// Begin piecing together active code ranges.
|
|
|
|
// Get the full start and end of the provided node, as character locations.
|
|
SourceLoc start = node.getStartLoc();
|
|
SourceLoc end = Lexer::getLocForEndOfToken(sourceMgr, node.getEndLoc());
|
|
for (auto &range : extractor.getSortedRanges()) {
|
|
// Add the text from the current 'start' to this ignored range's start.
|
|
appendRange(sourceMgr, start, range.getStart(), scratch);
|
|
|
|
// Set 'start' to the end of this range, effectively skipping it.
|
|
start = range.getEnd();
|
|
}
|
|
|
|
// If there's leftover unignored text, add it.
|
|
if (start != end) {
|
|
appendRange(sourceMgr, start, end, scratch);
|
|
}
|
|
|
|
return { scratch.data(), scratch.size() };
|
|
}
|