//===------------------------------ SourceComparator.h -----------*- C++-*-===// // // This source file is part of the Swift.org open source project // // Copyright (c) 2014 - 2017 Apple Inc. and the Swift project authors // Licensed under Apache License v2.0 with Runtime Library Exception // // See https://swift.org/LICENSE.txt for license information // See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors // //===----------------------------------------------------------------------===// // Declares the code needed to essentially "diff" the previously-compiled and // possibly-about-to-be-compiled versions of a source file. // Unlike Unix diff, it can go down to the granularity of characters within a // line. #ifndef SWIFT_DRIVER_SOURCECOMPARATOR_H #define SWIFT_DRIVER_SOURCECOMPARATOR_H #include "swift/AST/IncrementalRanges.h" #include "llvm/Support/MemoryBuffer.h" #include #include #include namespace swift { namespace incremental_ranges { class SourceComparator { public: //============================================================================== // MARK: Packaging indices for both sides //============================================================================== /// It is useful to have structures that hold both the left- and /// right-hand-sides of a "diff" input Describes a side: enum class Side : size_t { L = 0, R = 1 }; /// A general class to hold something that exists on both sides: template struct LR { T contents[2]; /// Get the contents on a particular side T &operator[](const Side side) { return contents[size_t(side)]; } const T &operator[](const Side side) const { return contents[size_t(side)]; } /// Get a side by name: // TODO: Could get ride of these and just use the subscripting above T &lhs() { return contents[size_t(Side::L)]; } T &rhs() { return contents[size_t(Side::R)]; } const T &lhs() const { return contents[size_t(Side::L)]; } const T &rhs() const { return contents[size_t(Side::R)]; } LR(T lhs, T rhs) { contents[0] = lhs; contents[1] = rhs; } bool operator==(const LR &other) { return lhs() == other.lhs() && rhs() == other.rhs(); } }; /// A structure holding size_t's on both sides. /// Zero-origin template struct Indices : public LR { Indices(size_t lhs, size_t rhs) : LR(lhs, rhs) {} Derived operator+(size_t x) const { return Derived(lhs() + x, rhs() + x); } Derived operator-(size_t x) const { return Derived(lhs() - x, rhs() - x); } size_t min() const { return std::min(lhs(), rhs()); } bool operator<(const Derived &other) const { return lhs() < other.lhs() && rhs() < other.rhs(); } bool operator<=(const Derived &other) const { return lhs() <= other.lhs() && rhs() <= other.rhs(); } }; /// Character indices into a line. /// Zero-origin struct CharIndices : public Indices { CharIndices(size_t lhs, size_t rhs) : Indices(lhs, rhs) {} CharIndices &operator++() { ++lhs(), ++rhs(); return *this; } CharIndices &operator--() { --lhs(), --rhs(); return *this; } bool bothNonZero() const { return lhs() != 0 && rhs() != 0; } }; /// Indices into a collection of lines. /// Zero-origin struct LineIndices : public Indices { LineIndices() : Indices(~0, ~0) {} LineIndices(size_t lhs, size_t rhs) : Indices(lhs, rhs) {} LineIndices &operator=(const LineIndices &) = default; LineIndices &operator++() { ++lhs(), ++rhs(); return *this; } LineIndices &operator--() { --lhs(), --rhs(); return *this; } }; /// Strings on both sides. struct StringRefs : LR { StringRefs(StringRef lhs, StringRef rhs) : LR(lhs, rhs) {} CharIndices size() const { return CharIndices(lhs().size(), rhs().size()); } /// Do the strings match at the provided character indices? bool matchAt(const CharIndices &indices) const { return lhs()[indices.lhs()] == rhs()[indices.rhs()]; } }; /// Optional strings on both sides struct OptStringRefs : LR> { OptStringRefs(Optional lhs, Optional rhs) : LR(lhs, rhs) {} }; /// A half-open span of lines on both sides, from start up to but not /// including end. struct LineSpan { /// [start, end) LineIndices start, end; LineSpan(LineIndices start, LineIndices end) : start(start), end(end) {} bool operator==(const LineSpan &rhs) { return start == rhs.start && end == rhs.end; } bool bothNonEmpty() const { return start.lhs() < end.lhs() && start.rhs() < end.rhs(); } bool eitherNonEmpty() const { return start.lhs() < end.lhs() || start.rhs() < end.rhs(); } bool bothEmpty() const { return !eitherNonEmpty(); } LineIndices size() const { return LineIndices(end.lhs() - start.lhs(), end.rhs() - start.rhs()); } }; /// A pair of collections of lines, one on each side struct Lines : LR> { Lines(std::vector lhs, std::vector rhs) : LR(lhs, rhs) {} bool matchAt(LineIndices where) const { return lhs()[where.lhs()] == rhs()[where.rhs()]; } StringRefs operator[](const LineIndices &indices) const { return StringRefs(lhs()[indices.lhs()], rhs()[indices.rhs()]); } /// Cannot use subscripting to get a particular side here because used it /// above for something else. So use a named function. const std::vector &side(Side s) const { return s == Side::L ? lhs() : rhs(); } LineIndices size() const { return LineIndices(lhs().size(), rhs().size()); } CharIndices sizeOfLastLines() const { return CharIndices(lhs().empty() ? 0 : lhs().back().size(), rhs().empty() ? 0 : rhs().back().size()); } }; /// A pair of SerializableSourceRanges, one per side. /// (SerializableSourceRanges are 1-origin, and semi-open.) struct LRSerializableRange : LR { LRSerializableRange(const SerializableSourceRange &lhs, const SerializableSourceRange &rhs) : LR(lhs, rhs) {} }; /// A pair of vectors of SerializableSourceRanges, one per side. struct LRRanges : LR { LRRanges() : LRRanges({}, {}) {} LRRanges(Ranges &&lhs, Ranges &&rhs) : LR(std::move(lhs), std::move(rhs)) {} static LRRanges wholeFile() { return {SerializableSourceRange::RangesForWholeFile(), SerializableSourceRange::RangesForWholeFile()}; } void push_back(const LRSerializableRange &range) { lhs().push_back(range.lhs()); rhs().push_back(range.rhs()); } bool empty() const { assert(lhs().empty() == rhs().empty() && "diff should produce at least " "empty ranges for an addition " "to the other side"); return lhs().empty(); } }; private: /// The inputs, separated into lines Lines linesToCompare; /// The trimmed input regions to compare, after cutting away identical /// beginnings and endings. LineSpan regionsToCompare; /// If line i on the left matches line j on the right, match[i] == j. /// If line i on the left has no match, match[i] == None. std::vector> matches; /// Chains matching lines together (I think) struct PRLink { LineIndices lines; Optional next; PRLink(LineIndices lines, const Optional next) : lines(lines), next(next) {} PRLink() : PRLink(LineIndices(), None) {} }; //============================================================================== // MARK: SortedSequence //============================================================================== private: class SortedSequence { typedef size_t Elem; std::vector contents; public: /// Find the place at which x would normally be inserted into the sequence, /// and replace that element with x, returning the index. /// If x is already in the sequence, do nothing and return None. /// Append to the end if necessary. /// Preserve sort order. Optional replaceNextLargerWith(const Elem x); bool empty() const { return contents.empty(); } Elem back() const { return contents.back(); } size_t size() const { return contents.size(); } private: /// Return index of first greater element or the end index if there is none. size_t locationOfFirstGreaterOrEqualElement(const Elem x) const; }; //============================================================================== // MARK: initialization //============================================================================== public: SourceComparator(StringRef s1, StringRef s2); private: static std::vector splitIntoLines(StringRef s); //============================================================================== // MARK: Comparing //============================================================================== public: /// Run the actual diff algorithm void compare(); private: void trimStart(); void trimEnd(); /// Return a map from a line in rhs to indices of all identical lines in /// rhs std::unordered_map> buildEquivalenceClasses(); std::pair, SortedSequence> buildDAGOfSubsequences( std::unordered_map> rhsMap); void scanMatchedLines( std::pair, SortedSequence> &&linksAndThres); //============================================================================== // MARK: summarizing //============================================================================== public: std::vector getMatchingRegions() const; std::vector getMismatchingRegions() const; void forEachMatch(function_ref) const; void forEachMismatch(function_ref) const; /// A region is zero-origin, semi-open. An SerializableRange is 1-origin, /// closed. /// This function resolves changed regions to within lines. It's a bit tricky. LRSerializableRange convertAMismatch(LineSpan mismatch) const; LRRanges convertAllMismatches() const; private: bool areEitherInRange(LineIndices lines) const; LineIndices getFirstMatch() const; LineIndices getFirstMismatch() const; LineIndices getNextMismatchAfter(const LineIndices nextMatch) const; LineIndices getNextMatchAfter(const LineIndices nextMismatch) const; //============================================================================== // MARK: converting mismatches //============================================================================== CharIndices computeFirstMismatchInLine(LineSpan mismatchLines) const; CharIndices computeFirstMismatchInLinesOnBothSides(LineSpan mismatchLines) const; CharIndices computeFirstMismatchInLineOnOneSide(LineSpan mismatchLines, Side) const; CharIndices computeEndMismatchInLine(LineSpan mismatchLines, CharIndices firstMismatchInLine) const; CharIndices computeEndMismatchInLineOnBothSides( LineSpan mismatchLines, const CharIndices firstMismatchInLine) const; CharIndices computeEndMismatchInLineOnOneSide(LineSpan mismatchLines, const CharIndices firstMismatchInLine, const Side) const; size_t computeEndLinesToGoWithChars(LineSpan mismatchLines, CharIndices endMismatchInLine, const Side) const; //============================================================================== // MARK: printing //============================================================================== public: void dump() const { print(llvm::errs()); } void print(raw_ostream &) const; void reportMismatch(const LineSpan mismatch, raw_ostream &) const; //============================================================================== // MARK: testing //============================================================================== public: static bool test(); }; } // namespace incremental_ranges } // namespace swift #endif /* SWIFT_DRIVER_SOURCECOMPARATOR_H */