Files
swift-mirror/lib/AST/InlinableText.cpp
Ben Barham 9779c18da3 Rename startswith to starts_with
LLVM is presumably moving towards `std::string_view` -
`StringRef::startswith` is deprecated on tip. `SmallString::startswith`
was just renamed there (maybe with some small deprecation inbetween, but
if so, we've missed it).

The `SmallString::startswith` references were moved to
`.str().starts_with()`, rather than adding the `starts_with` on
`stable/20230725` as we only had a few of them. Open to switching that
over if anyone feels strongly though.
2024-03-13 22:25:47 -07:00

319 lines
11 KiB
C++

//===---- InlinableText.cpp - Extract inlinable source text -----*- C++ -*-===//
//
// This source file is part of the Swift.org open source project
//
// Copyright (c) 2014 - 2018 Apple Inc. and the Swift project authors
// Licensed under Apache License v2.0 with Runtime Library Exception
//
// See https://swift.org/LICENSE.txt for license information
// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
//
//===----------------------------------------------------------------------===//
#include "InlinableText.h"
#include "swift/AST/ASTContext.h"
#include "swift/AST/ASTNode.h"
#include "swift/AST/ASTVisitor.h"
#include "swift/AST/ASTWalker.h"
#include "swift/AST/Decl.h"
#include "swift/AST/Expr.h"
#include "swift/Parse/Lexer.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/SmallString.h"
using namespace swift;
/// Gets the last token that exists inside this IfConfigClause, ignoring
/// hoisted elements.
///
/// If the clause is the last element, this returns the beginning of the line
/// before the parent IfConfigDecl's #endif token. Otherwise, it's the beginning
/// of the line before the next clause's #else or #elseif token.
static SourceLoc
getEffectiveEndLoc(SourceManager &sourceMgr, const IfConfigClause *clause,
const IfConfigDecl *decl) {
auto clauses = decl->getClauses();
if (clause == &clauses.back())
return Lexer::getLocForStartOfLine(sourceMgr, decl->getEndLoc());
assert(clause >= clauses.begin() && clause < clauses.end() &&
"clauses must be contiguous");
auto *nextClause = clause + 1;
return Lexer::getLocForStartOfLine(sourceMgr, nextClause->Loc);
}
namespace {
class IsFeatureCheck : public ASTWalker {
public:
bool foundFeature = false;
/// Walk everything that's available.
MacroWalking getMacroWalkingBehavior() const override {
return MacroWalking::ArgumentsAndExpansion;
}
PreWalkResult<Expr *> walkToExprPre(Expr *expr) override {
if (auto unresolved = dyn_cast<UnresolvedDeclRefExpr>(expr)) {
if (unresolved->getName().getBaseName().userFacingName().starts_with("$"))
foundFeature = true;
}
if (auto call = dyn_cast<CallExpr>(expr)) {
if (auto unresolved = dyn_cast<UnresolvedDeclRefExpr>(call->getFn())) {
StringRef userFacing = unresolved->getName().getBaseName()
.userFacingName();
if (userFacing == "compiler" || userFacing == "_compiler_version")
foundFeature = true;
}
}
return Action::SkipNodeIf(foundFeature, expr);
}
};
bool clauseIsFeatureCheck(Expr *cond) {
IsFeatureCheck checker;
cond->walk(checker);
return checker.foundFeature;
}
/// Whether any of the clauses here involves a feature check
/// (e.g., $AsyncAwait).
bool anyClauseIsFeatureCheck(ArrayRef<IfConfigClause> clauses) {
for (const auto &clause : clauses) {
if (Expr *cond = clause.Cond) {
if (clauseIsFeatureCheck(cond))
return true;
}
}
return false;
}
/// A walker that searches through #if declarations, finding all text that does
/// not contribute to the final evaluated AST.
///
/// For example, in the following code:
/// ```
/// #if true
/// print("true")
/// #else
/// print("false")
/// #endif
/// ```
/// ExtractInactiveRanges will return the ranges (with leading newlines) of:
/// ```
/// #if true
/// #else
/// print("false")
/// #endif
/// ```
/// Leaving behind just 'print("true")'s range.
struct ExtractInactiveRanges : public ASTWalker {
SmallVector<CharSourceRange, 4> ranges;
SourceManager &sourceMgr;
/// Walk everything that's available.
MacroWalking getMacroWalkingBehavior() const override {
return MacroWalking::ArgumentsAndExpansion;
}
explicit ExtractInactiveRanges(SourceManager &sourceMgr)
: sourceMgr(sourceMgr) {}
/// Adds the two SourceLocs as a CharSourceRange to the set of ignored
/// ranges.
/// \note: This assumes each of these locs is a character location, not a
/// token location.
void addRange(SourceLoc start, SourceLoc end) {
auto charRange = CharSourceRange(sourceMgr, start, end);
ranges.push_back(charRange);
}
PreWalkAction walkToDeclPre(Decl *d) override {
auto icd = dyn_cast<IfConfigDecl>(d);
if (!icd)
return Action::Continue();
auto start = Lexer::getLocForStartOfLine(sourceMgr, icd->getStartLoc());
auto end = Lexer::getLocForEndOfLine(sourceMgr, icd->getEndLoc());
auto clause = icd->getActiveClause();
// If there's no active clause, add the entire #if...#endif block.
if (!clause) {
addRange(start, end);
return Action::SkipNode();
}
// If the clause is checking for a particular feature with $ or a compiler
// version, keep the whole thing.
if (anyClauseIsFeatureCheck(icd->getClauses())) {
return Action::SkipNode();
}
// Ignore range from beginning of '#if', '#elseif', or '#else' to the
// beginning of the elements of this clause.
auto elementsBegin = clause->Loc;
// If there's a condition (e.g. this isn't a '#else' block), then ignore
// everything up to the end of the condition.
if (auto cond = clause->Cond) {
elementsBegin = cond->getEndLoc();
}
addRange(start, Lexer::getLocForEndOfLine(sourceMgr, elementsBegin));
// Ignore range from effective end of the elements of this clause to the
// end of the '#endif'
addRange(getEffectiveEndLoc(sourceMgr, clause, icd), end);
// Walk into direct children of this node that are IfConfigDecls, because
// the standard walker won't walk into them.
for (auto &elt : clause->Elements)
if (elt.isDecl(DeclKind::IfConfig))
elt.get<Decl *>()->walk(*this);
return Action::SkipNode();
}
/// Gets the ignored ranges in source order.
ArrayRef<CharSourceRange> getSortedRanges() {
std::sort(ranges.begin(), ranges.end(),
[&](CharSourceRange r1, CharSourceRange r2) {
assert(!r1.overlaps(r2) && "no overlapping ranges");
return sourceMgr.isBeforeInBuffer(r1.getStart(), r2.getStart());
});
return ranges;
}
};
} // end anonymous namespace
/// Appends the textual contents of the provided source range, stripping
/// the contents of comments that appear in the source.
///
/// Given that comments are treated as whitespace, this also appends a
/// space or newline (depending if the comment was multi-line and itself
/// had newlines in the body) in place of the comment, to avoid fusing tokens
/// together.
static void appendRange(
SourceManager &sourceMgr, SourceLoc start, SourceLoc end,
SmallVectorImpl<char> &scratch) {
unsigned bufferID = sourceMgr.findBufferContainingLoc(start);
unsigned offset = sourceMgr.getLocOffsetInBuffer(start, bufferID);
unsigned endOffset = sourceMgr.getLocOffsetInBuffer(end, bufferID);
// Strip comments from the chunk before adding it by re-lexing the range.
LangOptions FakeLangOpts;
Lexer lexer(FakeLangOpts, sourceMgr, bufferID, nullptr, LexerMode::Swift,
HashbangMode::Disallowed, CommentRetentionMode::ReturnAsTokens,
offset, endOffset);
SourceLoc nonCommentStart = start;
Token token;
// Re-lex the range, and skip the full text of `tok::comment` tokens.
while (!token.is(tok::eof)) {
lexer.lex(token);
// Skip over #sourceLocation's in the file.
if (token.is(tok::pound_sourceLocation)) {
// Append the text leading up to the #sourceLocation
auto charRange = CharSourceRange(
sourceMgr, nonCommentStart, token.getLoc());
StringRef text = sourceMgr.extractText(charRange);
scratch.append(text.begin(), text.end());
// Skip to the right paren. We know the AST is already valid, so there's
// definitely a right paren.
while (!token.is(tok::r_paren)) {
lexer.lex(token);
}
nonCommentStart = Lexer::getLocForEndOfToken(sourceMgr, token.getLoc());
}
if (token.is(tok::comment)) {
// Grab the start of the full comment token (with leading trivia as well)
SourceLoc commentLoc = token.getLoc();
// Find the end of the token (with trailing trivia)
SourceLoc endLoc = Lexer::getLocForEndOfToken(sourceMgr, token.getLoc());
// The comment token's range includes leading/trailing whitespace, so trim
// whitespace and only strip the portions of the comment that are not
// whitespace.
CharSourceRange range = CharSourceRange(sourceMgr, commentLoc, endLoc);
StringRef fullTokenText = sourceMgr.extractText(range);
unsigned leadingWhitespace = fullTokenText.size() -
fullTokenText.ltrim().size();
if (leadingWhitespace > 0) {
commentLoc = commentLoc.getAdvancedLoc(leadingWhitespace);
}
unsigned trailingWhitespace = fullTokenText.size() -
fullTokenText.rtrim().size();
if (trailingWhitespace > 0) {
endLoc = endLoc.getAdvancedLoc(-trailingWhitespace);
}
// First, extract the text up to the start of the comment, including the
// whitespace.
auto charRange = CharSourceRange(sourceMgr, nonCommentStart, commentLoc);
StringRef text = sourceMgr.extractText(charRange);
scratch.append(text.begin(), text.end());
// Next, search through the comment text to see if it's a block comment
// with a newline. If so we need to re-insert a newline to avoid fusing
// multi-line tokens together.
auto commentTextRange = CharSourceRange(sourceMgr, commentLoc, endLoc);
StringRef commentText = sourceMgr.extractText(commentTextRange);
bool hasNewline = commentText.find_first_of("\n\r") != StringRef::npos;
// Use a newline as a filler character if the comment itself had a newline
// in it.
char filler = hasNewline ? '\n' : ' ';
// Append a single whitespace filler character, to avoid fusing tokens.
scratch.push_back(filler);
// Start the next region after the contents of the comment.
nonCommentStart = endLoc;
}
}
if (nonCommentStart.isValid() && nonCommentStart != end) {
auto charRange = CharSourceRange(sourceMgr, nonCommentStart, end);
StringRef text = sourceMgr.extractText(charRange);
scratch.append(text.begin(), text.end());
}
}
StringRef swift::extractInlinableText(SourceManager &sourceMgr, ASTNode node,
SmallVectorImpl<char> &scratch) {
// Extract inactive ranges from the text of the node.
ExtractInactiveRanges extractor(sourceMgr);
node.walk(extractor);
// Begin piecing together active code ranges.
// Get the full start and end of the provided node, as character locations.
SourceLoc start = node.getStartLoc();
SourceLoc end = Lexer::getLocForEndOfToken(sourceMgr, node.getEndLoc());
for (auto &range : extractor.getSortedRanges()) {
// Add the text from the current 'start' to this ignored range's start.
appendRange(sourceMgr, start, range.getStart(), scratch);
// Set 'start' to the end of this range, effectively skipping it.
start = range.getEnd();
}
// If there's leftover unignored text, add it.
if (start != end) {
appendRange(sourceMgr, start, end, scratch);
}
return { scratch.data(), scratch.size() };
}