mirror of
https://github.com/apple/swift.git
synced 2025-12-14 20:36:38 +01:00
349 lines
12 KiB
C++
349 lines
12 KiB
C++
//===------------------------------ SourceComparator.h -----------*- C++-*-===//
|
|
//
|
|
// This source file is part of the Swift.org open source project
|
|
//
|
|
// Copyright (c) 2014 - 2017 Apple Inc. and the Swift project authors
|
|
// Licensed under Apache License v2.0 with Runtime Library Exception
|
|
//
|
|
// See https://swift.org/LICENSE.txt for license information
|
|
// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
// Declares the code needed to essentially "diff" the previously-compiled and
|
|
// possibly-about-to-be-compiled versions of a source file.
|
|
// Unlike Unix diff, it can go down to the granularity of characters within a
|
|
// line.
|
|
|
|
#ifndef SWIFT_DRIVER_SOURCECOMPARATOR_H
|
|
#define SWIFT_DRIVER_SOURCECOMPARATOR_H
|
|
|
|
#include "swift/AST/IncrementalRanges.h"
|
|
#include "llvm/Support/MemoryBuffer.h"
|
|
#include <string>
|
|
#include <unordered_map>
|
|
#include <vector>
|
|
|
|
namespace swift {
|
|
namespace incremental_ranges {
|
|
|
|
class SourceComparator {
|
|
|
|
public:
|
|
//==============================================================================
|
|
// MARK: Packaging indices for both sides
|
|
//==============================================================================
|
|
|
|
/// It is useful to have structures that hold both the left- and
|
|
/// right-hand-sides of a "diff" input Describes a side:
|
|
enum class Side : size_t { L = 0, R = 1 };
|
|
|
|
/// A general class to hold something that exists on both sides:
|
|
template <typename T> struct LR {
|
|
T contents[2];
|
|
/// Get the contents on a particular side
|
|
T &operator[](const Side side) { return contents[size_t(side)]; }
|
|
const T &operator[](const Side side) const {
|
|
return contents[size_t(side)];
|
|
}
|
|
|
|
/// Get a side by name:
|
|
// TODO: Could get ride of these and just use the subscripting above
|
|
T &lhs() { return contents[size_t(Side::L)]; }
|
|
T &rhs() { return contents[size_t(Side::R)]; }
|
|
|
|
const T &lhs() const { return contents[size_t(Side::L)]; }
|
|
const T &rhs() const { return contents[size_t(Side::R)]; }
|
|
LR(T lhs, T rhs) {
|
|
contents[0] = lhs;
|
|
contents[1] = rhs;
|
|
}
|
|
bool operator==(const LR &other) {
|
|
return lhs() == other.lhs() && rhs() == other.rhs();
|
|
}
|
|
};
|
|
|
|
/// A structure holding size_t's on both sides.
|
|
/// Zero-origin
|
|
template <typename Derived> struct Indices : public LR<size_t> {
|
|
Indices(size_t lhs, size_t rhs) : LR(lhs, rhs) {}
|
|
Derived operator+(size_t x) const { return Derived(lhs() + x, rhs() + x); }
|
|
Derived operator-(size_t x) const { return Derived(lhs() - x, rhs() - x); }
|
|
size_t min() const { return std::min(lhs(), rhs()); }
|
|
bool operator<(const Derived &other) const {
|
|
return lhs() < other.lhs() && rhs() < other.rhs();
|
|
}
|
|
bool operator<=(const Derived &other) const {
|
|
return lhs() <= other.lhs() && rhs() <= other.rhs();
|
|
}
|
|
};
|
|
/// Character indices into a line.
|
|
/// Zero-origin
|
|
struct CharIndices : public Indices<CharIndices> {
|
|
CharIndices(size_t lhs, size_t rhs) : Indices(lhs, rhs) {}
|
|
CharIndices &operator++() {
|
|
++lhs(), ++rhs();
|
|
return *this;
|
|
}
|
|
CharIndices &operator--() {
|
|
--lhs(), --rhs();
|
|
return *this;
|
|
}
|
|
bool bothNonZero() const { return lhs() != 0 && rhs() != 0; }
|
|
};
|
|
|
|
/// Indices into a collection of lines.
|
|
/// Zero-origin
|
|
struct LineIndices : public Indices<LineIndices> {
|
|
LineIndices() : Indices(~0, ~0) {}
|
|
LineIndices(size_t lhs, size_t rhs) : Indices(lhs, rhs) {}
|
|
LineIndices &operator=(const LineIndices &) = default;
|
|
LineIndices &operator++() {
|
|
++lhs(), ++rhs();
|
|
return *this;
|
|
}
|
|
LineIndices &operator--() {
|
|
--lhs(), --rhs();
|
|
return *this;
|
|
}
|
|
};
|
|
|
|
/// Strings on both sides.
|
|
struct StringRefs : LR<StringRef> {
|
|
StringRefs(StringRef lhs, StringRef rhs) : LR(lhs, rhs) {}
|
|
CharIndices size() const { return CharIndices(lhs().size(), rhs().size()); }
|
|
/// Do the strings match at the provided character indices?
|
|
bool matchAt(const CharIndices &indices) const {
|
|
return lhs()[indices.lhs()] == rhs()[indices.rhs()];
|
|
}
|
|
};
|
|
|
|
/// Optional strings on both sides
|
|
struct OptStringRefs : LR<Optional<StringRef>> {
|
|
OptStringRefs(Optional<StringRef> lhs, Optional<StringRef> rhs)
|
|
: LR(lhs, rhs) {}
|
|
};
|
|
|
|
/// A half-open span of lines on both sides, from start up to but not
|
|
/// including end.
|
|
struct LineSpan {
|
|
/// [start, end)
|
|
LineIndices start, end;
|
|
|
|
LineSpan(LineIndices start, LineIndices end) : start(start), end(end) {}
|
|
bool operator==(const LineSpan &rhs) {
|
|
return start == rhs.start && end == rhs.end;
|
|
}
|
|
bool bothNonEmpty() const {
|
|
return start.lhs() < end.lhs() && start.rhs() < end.rhs();
|
|
}
|
|
bool eitherNonEmpty() const {
|
|
return start.lhs() < end.lhs() || start.rhs() < end.rhs();
|
|
}
|
|
bool bothEmpty() const { return !eitherNonEmpty(); }
|
|
|
|
LineIndices size() const {
|
|
return LineIndices(end.lhs() - start.lhs(), end.rhs() - start.rhs());
|
|
}
|
|
};
|
|
|
|
/// A pair of collections of lines, one on each side
|
|
struct Lines : LR<std::vector<StringRef>> {
|
|
Lines(std::vector<StringRef> lhs, std::vector<StringRef> rhs)
|
|
: LR(lhs, rhs) {}
|
|
|
|
bool matchAt(LineIndices where) const {
|
|
return lhs()[where.lhs()] == rhs()[where.rhs()];
|
|
}
|
|
StringRefs operator[](const LineIndices &indices) const {
|
|
return StringRefs(lhs()[indices.lhs()], rhs()[indices.rhs()]);
|
|
}
|
|
/// Cannot use subscripting to get a particular side here because used it
|
|
/// above for something else. So use a named function.
|
|
const std::vector<StringRef> &side(Side s) const {
|
|
return s == Side::L ? lhs() : rhs();
|
|
}
|
|
LineIndices size() const { return LineIndices(lhs().size(), rhs().size()); }
|
|
|
|
CharIndices sizeOfLastLines() const {
|
|
return CharIndices(lhs().empty() ? 0 : lhs().back().size(),
|
|
rhs().empty() ? 0 : rhs().back().size());
|
|
}
|
|
};
|
|
|
|
/// A pair of SerializableSourceRanges, one per side.
|
|
/// (SerializableSourceRanges are 1-origin, and semi-open.)
|
|
struct LRSerializableRange : LR<SerializableSourceRange> {
|
|
LRSerializableRange(const SerializableSourceRange &lhs,
|
|
const SerializableSourceRange &rhs)
|
|
: LR(lhs, rhs) {}
|
|
};
|
|
/// A pair of vectors of SerializableSourceRanges, one per side.
|
|
struct LRRanges : LR<Ranges> {
|
|
LRRanges() : LRRanges({}, {}) {}
|
|
LRRanges(Ranges &&lhs, Ranges &&rhs) : LR(std::move(lhs), std::move(rhs)) {}
|
|
static LRRanges wholeFile() {
|
|
return {SerializableSourceRange::RangesForWholeFile(),
|
|
SerializableSourceRange::RangesForWholeFile()};
|
|
}
|
|
void push_back(const LRSerializableRange &range) {
|
|
lhs().push_back(range.lhs());
|
|
rhs().push_back(range.rhs());
|
|
}
|
|
bool empty() const {
|
|
assert(lhs().empty() == rhs().empty() && "diff should produce at least "
|
|
"empty ranges for an addition "
|
|
"to the other side");
|
|
return lhs().empty();
|
|
}
|
|
};
|
|
|
|
private:
|
|
/// The inputs, separated into lines
|
|
Lines linesToCompare;
|
|
|
|
/// The trimmed input regions to compare, after cutting away identical
|
|
/// beginnings and endings.
|
|
LineSpan regionsToCompare;
|
|
|
|
/// If line i on the left matches line j on the right, match[i] == j.
|
|
/// If line i on the left has no match, match[i] == None.
|
|
std::vector<Optional<size_t>> matches;
|
|
|
|
/// Chains matching lines together (I think)
|
|
struct PRLink {
|
|
LineIndices lines;
|
|
Optional<size_t> next;
|
|
PRLink(LineIndices lines, const Optional<size_t> next)
|
|
: lines(lines), next(next) {}
|
|
PRLink() : PRLink(LineIndices(), None) {}
|
|
};
|
|
|
|
|
|
//==============================================================================
|
|
// MARK: SortedSequence
|
|
//==============================================================================
|
|
private:
|
|
class SortedSequence {
|
|
typedef size_t Elem;
|
|
std::vector<Elem> contents;
|
|
|
|
public:
|
|
/// Find the place at which x would normally be inserted into the sequence,
|
|
/// and replace that element with x, returning the index.
|
|
/// If x is already in the sequence, do nothing and return None.
|
|
/// Append to the end if necessary.
|
|
/// Preserve sort order.
|
|
Optional<size_t> replaceNextLargerWith(const Elem x);
|
|
bool empty() const { return contents.empty(); }
|
|
|
|
Elem back() const { return contents.back(); }
|
|
|
|
size_t size() const { return contents.size(); }
|
|
|
|
private:
|
|
/// Return index of first greater element or the end index if there is none.
|
|
size_t locationOfFirstGreaterOrEqualElement(const Elem x) const;
|
|
};
|
|
|
|
//==============================================================================
|
|
// MARK: initialization
|
|
//==============================================================================
|
|
|
|
public:
|
|
SourceComparator(StringRef s1, StringRef s2);
|
|
|
|
private:
|
|
static std::vector<StringRef> splitIntoLines(StringRef s);
|
|
|
|
//==============================================================================
|
|
// MARK: Comparing
|
|
//==============================================================================
|
|
public:
|
|
/// Run the actual diff algorithm
|
|
void compare();
|
|
|
|
private:
|
|
void trimStart();
|
|
void trimEnd();
|
|
|
|
/// Return a map from a line in rhs to indices of all identical lines in
|
|
/// rhs
|
|
std::unordered_map<std::string, std::vector<size_t>>
|
|
buildEquivalenceClasses();
|
|
|
|
std::pair<std::vector<PRLink>, SortedSequence> buildDAGOfSubsequences(
|
|
std::unordered_map<std::string, std::vector<size_t>> rhsMap);
|
|
|
|
void scanMatchedLines(
|
|
std::pair<std::vector<PRLink>, SortedSequence> &&linksAndThres);
|
|
|
|
//==============================================================================
|
|
// MARK: summarizing
|
|
//==============================================================================
|
|
public:
|
|
std::vector<LineSpan> getMatchingRegions() const;
|
|
std::vector<LineSpan> getMismatchingRegions() const;
|
|
|
|
void forEachMatch(function_ref<void(const LineSpan)>) const;
|
|
void forEachMismatch(function_ref<void(const LineSpan)>) const;
|
|
|
|
/// A region is zero-origin, semi-open. An SerializableRange is 1-origin,
|
|
/// closed.
|
|
/// This function resolves changed regions to within lines. It's a bit tricky.
|
|
LRSerializableRange convertAMismatch(LineSpan mismatch) const;
|
|
|
|
LRRanges convertAllMismatches() const;
|
|
|
|
private:
|
|
bool areEitherInRange(LineIndices lines) const;
|
|
|
|
LineIndices getFirstMatch() const;
|
|
LineIndices getFirstMismatch() const;
|
|
|
|
LineIndices getNextMismatchAfter(const LineIndices nextMatch) const;
|
|
|
|
LineIndices getNextMatchAfter(const LineIndices nextMismatch) const;
|
|
|
|
//==============================================================================
|
|
// MARK: converting mismatches
|
|
//==============================================================================
|
|
CharIndices computeFirstMismatchInLine(LineSpan mismatchLines) const;
|
|
CharIndices
|
|
computeFirstMismatchInLinesOnBothSides(LineSpan mismatchLines) const;
|
|
CharIndices computeFirstMismatchInLineOnOneSide(LineSpan mismatchLines,
|
|
Side) const;
|
|
|
|
CharIndices computeEndMismatchInLine(LineSpan mismatchLines,
|
|
CharIndices firstMismatchInLine) const;
|
|
CharIndices computeEndMismatchInLineOnBothSides(
|
|
LineSpan mismatchLines, const CharIndices firstMismatchInLine) const;
|
|
CharIndices
|
|
computeEndMismatchInLineOnOneSide(LineSpan mismatchLines,
|
|
const CharIndices firstMismatchInLine,
|
|
const Side) const;
|
|
|
|
size_t computeEndLinesToGoWithChars(LineSpan mismatchLines,
|
|
CharIndices endMismatchInLine,
|
|
const Side) const;
|
|
//==============================================================================
|
|
// MARK: printing
|
|
//==============================================================================
|
|
public:
|
|
void dump() const { print(llvm::errs()); }
|
|
void print(raw_ostream &) const;
|
|
|
|
void reportMismatch(const LineSpan mismatch, raw_ostream &) const;
|
|
|
|
//==============================================================================
|
|
// MARK: testing
|
|
//==============================================================================
|
|
public:
|
|
static bool test();
|
|
};
|
|
|
|
} // namespace incremental_ranges
|
|
} // namespace swift
|
|
|
|
#endif /* SWIFT_DRIVER_SOURCECOMPARATOR_H */
|