Merge branch 'yc/histogram-hunk-shift-fix' into jch

The final clean-up phase of the diff output could turn the result of
histogram diff algorithm suboptimal, which has been corrected.

Comments?

* yc/histogram-hunk-shift-fix:
  xdiff: re-diff shifted change groups when using histogram algorithm
This commit is contained in:
Junio C Hamano
2025-12-12 15:53:08 +09:00
3 changed files with 181 additions and 0 deletions

View File

@@ -500,6 +500,7 @@ integration_tests = [
't4070-diff-pairs.sh',
't4071-diff-minimal.sh',
't4072-diff-max-depth.sh',
't4073-diff-shifted-matched-group.sh',
't4100-apply-stat.sh',
't4101-apply-nonl.sh',
't4102-apply-rename.sh',

View File

@@ -0,0 +1,137 @@
#!/bin/sh
test_description='shifted diff groups re-diffing during histogram diff'
. ./test-lib.sh
test_expect_success 'shifted diff group should re-diff to minimize patch' '
test_write_lines A x A A A x A A A >file1 &&
test_write_lines A x A Z A x A A A >file2 &&
file1_h=$(git rev-parse --short $(git hash-object file1)) &&
file2_h=$(git rev-parse --short $(git hash-object file2)) &&
cat >expect <<-EOF &&
diff --git a/file1 b/file2
index $file1_h..$file2_h 100644
--- a/file1
+++ b/file2
@@ -1,7 +1,7 @@
A
x
A
-A
+Z
A
x
A
EOF
test_expect_code 1 git diff --no-index --histogram file1 file2 >output &&
test_cmp expect output
'
test_expect_success 're-diff should preserve diff flags' '
test_write_lines a b c a b c >file1 &&
test_write_lines x " b" z a b c >file2 &&
file1_h=$(git rev-parse --short $(git hash-object file1)) &&
file2_h=$(git rev-parse --short $(git hash-object file2)) &&
cat >expect <<-EOF &&
diff --git a/file1 b/file2
index $file1_h..$file2_h 100644
--- a/file1
+++ b/file2
@@ -1,6 +1,6 @@
-a
-b
-c
+x
+ b
+z
a
b
c
EOF
test_expect_code 1 git diff --no-index --histogram file1 file2 >output &&
test_cmp expect output &&
cat >expect_iwhite <<-EOF &&
diff --git a/file1 b/file2
index $file1_h..$file2_h 100644
--- a/file1
+++ b/file2
@@ -1,6 +1,6 @@
-a
+x
b
-c
+z
a
b
c
EOF
test_expect_code 1 git diff --no-index --histogram --ignore-all-space file1 file2 >output_iwhite &&
test_cmp expect_iwhite output_iwhite
'
test_expect_success 'shifting on either side should trigger re-diff properly' '
test_write_lines a b c a b c a b c >file1 &&
test_write_lines a b c a1 a2 a3 b c1 a b c >file2 &&
file1_h=$(git rev-parse --short $(git hash-object file1)) &&
file2_h=$(git rev-parse --short $(git hash-object file2)) &&
cat >expect1 <<-EOF &&
diff --git a/file1 b/file2
index $file1_h..$file2_h 100644
--- a/file1
+++ b/file2
@@ -1,9 +1,11 @@
a
b
c
-a
+a1
+a2
+a3
b
-c
+c1
a
b
c
EOF
test_expect_code 1 git diff --no-index --histogram file1 file2 >output1 &&
test_cmp expect1 output1 &&
cat >expect2 <<-EOF &&
diff --git a/file2 b/file1
index $file2_h..$file1_h 100644
--- a/file2
+++ b/file1
@@ -1,11 +1,9 @@
a
b
c
-a1
-a2
-a3
+a
b
-c1
+c
a
b
c
EOF
test_expect_code 1 git diff --no-index --histogram file2 file1 >output2 &&
test_cmp expect2 output2
'
test_done

View File

@@ -792,6 +792,7 @@ static int group_slide_up(xdfile_t *xdf, struct xdlgroup *g)
*/
int xdl_change_compact(xdfile_t *xdf, xdfile_t *xdfo, long flags) {
struct xdlgroup g, go;
struct xdlgroup g_orig, go_orig;
long earliest_end, end_matching_other;
long groupsize;
@@ -805,6 +806,9 @@ int xdl_change_compact(xdfile_t *xdf, xdfile_t *xdfo, long flags) {
if (g.end == g.start)
goto next;
g_orig = g;
go_orig = go;
/*
* Now shift the change up and then down as far as possible in
* each direction. If it bumps into any other changes, merge
@@ -914,6 +918,45 @@ int xdl_change_compact(xdfile_t *xdf, xdfile_t *xdfo, long flags) {
}
}
/*
* If this has a matching group from the other file, it could
* either be the original match from the diff algorithm, or
* arrived at by shifting and joining groups. When it's the
* latter, it's possible for the two newly joined sides to have
* matching lines. Re-diff the group to mark these matching
* lines as unchanged and remove from the diff output.
*
* Only do this for histogram diff as its LCS algorithm makes
* this scenario possible. In contrast, patience diff finds LCS
* of unique lines that groups cannot be shifted across.
* Myer's diff (standalone or used as fall-back in patience
* diff) already finds minimal edits so it is not possible for
* shifted groups to result in a smaller diff. (Without
* XDF_NEED_MINIMAL, Myer's isn't technically guaranteed to be
* minimal, but it should be so most of the time)
*/
if (end_matching_other != -1 &&
XDF_DIFF_ALG(flags) == XDF_HISTOGRAM_DIFF &&
(g.start != g_orig.start ||
g.end != g_orig.end ||
go.start != go_orig.start ||
go.end != go_orig.end)) {
xpparam_t xpp;
xdfenv_t xe;
memset(&xpp, 0, sizeof(xpp));
xpp.flags = flags & ~XDF_DIFF_ALGORITHM_MASK;
memcpy(&xe.xdf1, xdf, sizeof(xdfile_t));
memcpy(&xe.xdf2, xdfo, sizeof(xdfile_t));
if (xdl_fall_back_diff(&xe, &xpp,
g.start + 1, g.end - g.start,
go.start + 1, go.end - go.start)) {
return -1;
}
}
next:
/* Move past the just-processed group: */
if (group_next(xdf, &g))