diff --git a/t/meson.build b/t/meson.build index a5531df415..52a58eff2b 100644 --- a/t/meson.build +++ b/t/meson.build @@ -496,6 +496,7 @@ integration_tests = [ 't4070-diff-pairs.sh', 't4071-diff-minimal.sh', 't4072-diff-max-depth.sh', + 't4073-diff-shifted-matched-group.sh', 't4100-apply-stat.sh', 't4101-apply-nonl.sh', 't4102-apply-rename.sh', diff --git a/t/t4073-diff-shifted-matched-group.sh b/t/t4073-diff-shifted-matched-group.sh new file mode 100755 index 0000000000..0e915b78a6 --- /dev/null +++ b/t/t4073-diff-shifted-matched-group.sh @@ -0,0 +1,137 @@ +#!/bin/sh + +test_description='shifted diff groups re-diffing during histogram diff' + +. ./test-lib.sh + +test_expect_success 'shifted diff group should re-diff to minimize patch' ' + test_write_lines A x A A A x A A A >file1 && + test_write_lines A x A Z A x A A A >file2 && + + file1_h=$(git rev-parse --short $(git hash-object file1)) && + file2_h=$(git rev-parse --short $(git hash-object file2)) && + + cat >expect <<-EOF && + diff --git a/file1 b/file2 + index $file1_h..$file2_h 100644 + --- a/file1 + +++ b/file2 + @@ -1,7 +1,7 @@ + A + x + A + -A + +Z + A + x + A + EOF + + test_expect_code 1 git diff --no-index --histogram file1 file2 >output && + test_cmp expect output +' + +test_expect_success 're-diff should preserve diff flags' ' + test_write_lines a b c a b c >file1 && + test_write_lines x " b" z a b c >file2 && + + file1_h=$(git rev-parse --short $(git hash-object file1)) && + file2_h=$(git rev-parse --short $(git hash-object file2)) && + + cat >expect <<-EOF && + diff --git a/file1 b/file2 + index $file1_h..$file2_h 100644 + --- a/file1 + +++ b/file2 + @@ -1,6 +1,6 @@ + -a + -b + -c + +x + + b + +z + a + b + c + EOF + + test_expect_code 1 git diff --no-index --histogram file1 file2 >output && + test_cmp expect output && + + cat >expect_iwhite <<-EOF && + diff --git a/file1 b/file2 + index $file1_h..$file2_h 100644 + --- a/file1 + +++ b/file2 + @@ -1,6 +1,6 @@ + -a + +x + b + -c + +z + a + b + c + EOF + + test_expect_code 1 git diff --no-index --histogram --ignore-all-space file1 file2 >output_iwhite && + test_cmp expect_iwhite output_iwhite +' + +test_expect_success 'shifting on either side should trigger re-diff properly' ' + test_write_lines a b c a b c a b c >file1 && + test_write_lines a b c a1 a2 a3 b c1 a b c >file2 && + + file1_h=$(git rev-parse --short $(git hash-object file1)) && + file2_h=$(git rev-parse --short $(git hash-object file2)) && + + cat >expect1 <<-EOF && + diff --git a/file1 b/file2 + index $file1_h..$file2_h 100644 + --- a/file1 + +++ b/file2 + @@ -1,9 +1,11 @@ + a + b + c + -a + +a1 + +a2 + +a3 + b + -c + +c1 + a + b + c + EOF + + test_expect_code 1 git diff --no-index --histogram file1 file2 >output1 && + test_cmp expect1 output1 && + + cat >expect2 <<-EOF && + diff --git a/file2 b/file1 + index $file2_h..$file1_h 100644 + --- a/file2 + +++ b/file1 + @@ -1,11 +1,9 @@ + a + b + c + -a1 + -a2 + -a3 + +a + b + -c1 + +c + a + b + c + EOF + + test_expect_code 1 git diff --no-index --histogram file2 file1 >output2 && + test_cmp expect2 output2 +' + +test_done diff --git a/xdiff/xdiffi.c b/xdiff/xdiffi.c index 6f3998ee54..5d9c7b5434 100644 --- a/xdiff/xdiffi.c +++ b/xdiff/xdiffi.c @@ -793,6 +793,7 @@ static int group_slide_up(xdfile_t *xdf, struct xdlgroup *g) */ int xdl_change_compact(xdfile_t *xdf, xdfile_t *xdfo, long flags) { struct xdlgroup g, go; + struct xdlgroup g_orig, go_orig; long earliest_end, end_matching_other; long groupsize; @@ -806,6 +807,9 @@ int xdl_change_compact(xdfile_t *xdf, xdfile_t *xdfo, long flags) { if (g.end == g.start) goto next; + g_orig = g; + go_orig = go; + /* * Now shift the change up and then down as far as possible in * each direction. If it bumps into any other changes, merge @@ -915,6 +919,45 @@ int xdl_change_compact(xdfile_t *xdf, xdfile_t *xdfo, long flags) { } } + /* + * If this has a matching group from the other file, it could + * either be the original match from the diff algorithm, or + * arrived at by shifting and joining groups. When it's the + * latter, it's possible for the two newly joined sides to have + * matching lines. Re-diff the group to mark these matching + * lines as unchanged and remove from the diff output. + * + * Only do this for histogram diff as its LCS algorithm makes + * this scenario possible. In contrast, patience diff finds LCS + * of unique lines that groups cannot be shifted across. + * Myer's diff (standalone or used as fall-back in patience + * diff) already finds minimal edits so it is not possible for + * shifted groups to result in a smaller diff. (Without + * XDF_NEED_MINIMAL, Myer's isn't technically guaranteed to be + * minimal, but it should be so most of the time) + */ + if (end_matching_other != -1 && + XDF_DIFF_ALG(flags) == XDF_HISTOGRAM_DIFF && + (g.start != g_orig.start || + g.end != g_orig.end || + go.start != go_orig.start || + go.end != go_orig.end)) { + xpparam_t xpp; + xdfenv_t xe; + + memset(&xpp, 0, sizeof(xpp)); + xpp.flags = flags & ~XDF_DIFF_ALGORITHM_MASK; + + memcpy(&xe.xdf1, xdf, sizeof(xdfile_t)); + memcpy(&xe.xdf2, xdfo, sizeof(xdfile_t)); + + if (xdl_fall_back_diff(&xe, &xpp, + g.start + 1, g.end - g.start, + go.start + 1, go.end - go.start)) { + return -1; + } + } + next: /* Move past the just-processed group: */ if (group_next(xdf, &g))