From 0d53d1994670ce43964a34bdf1f68d26188bafc8 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Tue, 29 Jun 2021 02:13:02 +0000 Subject: [PATCH 1/7] p2000: add 'git checkout -' test and decrease depth As we increase our list of commands to test in p2000-sparse-operations.sh, we will want to have a slightly smaller test repository. Reduce the size by a factor of four by reducing the depth of the step that creates a big index around a moderately-sized repository. Also add a step to run 'git checkout -' on repeat. This requires having a previous location in the reflog, so add that to the initialization steps. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- t/perf/p2000-sparse-operations.sh | 27 ++++++++++++++++++++------- 1 file changed, 20 insertions(+), 7 deletions(-) diff --git a/t/perf/p2000-sparse-operations.sh b/t/perf/p2000-sparse-operations.sh index 94513c9774..f7f8c01210 100755 --- a/t/perf/p2000-sparse-operations.sh +++ b/t/perf/p2000-sparse-operations.sh @@ -6,7 +6,7 @@ test_description="test performance of Git operations using the index" test_perf_default_repo -SPARSE_CONE=f2/f4/f1 +SPARSE_CONE=f2/f4 test_expect_success 'setup repo and indexes' ' git reset --hard HEAD && @@ -27,7 +27,7 @@ test_expect_success 'setup repo and indexes' ' OLD_COMMIT=$(git rev-parse HEAD) && OLD_TREE=$(git rev-parse HEAD^{tree}) && - for i in $(test_seq 1 4) + for i in $(test_seq 1 3) do cat >in <<-EOF && 100755 blob $BLOB a @@ -43,14 +43,23 @@ test_expect_success 'setup repo and indexes' ' done && git sparse-checkout init --cone && - git branch -f wide $OLD_COMMIT && + git sparse-checkout set $SPARSE_CONE && + git checkout -b wide $OLD_COMMIT && + + for l2 in f1 f2 f3 f4 + do + echo more bogus >>$SPARSE_CONE/$l2/a && + git commit -a -m "edit $SPARSE_CONE/$l2/a" || return 1 + done && + git -c core.sparseCheckoutCone=true clone --branch=wide --sparse . full-index-v3 && ( cd full-index-v3 && git sparse-checkout init --cone && git sparse-checkout set $SPARSE_CONE && git config index.version 3 && - git update-index --index-version=3 + git update-index --index-version=3 && + git checkout HEAD~4 ) && git -c core.sparseCheckoutCone=true clone --branch=wide --sparse . full-index-v4 && ( @@ -58,7 +67,8 @@ test_expect_success 'setup repo and indexes' ' git sparse-checkout init --cone && git sparse-checkout set $SPARSE_CONE && git config index.version 4 && - git update-index --index-version=4 + git update-index --index-version=4 && + git checkout HEAD~4 ) && git -c core.sparseCheckoutCone=true clone --branch=wide --sparse . sparse-index-v3 && ( @@ -66,7 +76,8 @@ test_expect_success 'setup repo and indexes' ' git sparse-checkout init --cone --sparse-index && git sparse-checkout set $SPARSE_CONE && git config index.version 3 && - git update-index --index-version=3 + git update-index --index-version=3 && + git checkout HEAD~4 ) && git -c core.sparseCheckoutCone=true clone --branch=wide --sparse . sparse-index-v4 && ( @@ -74,7 +85,8 @@ test_expect_success 'setup repo and indexes' ' git sparse-checkout init --cone --sparse-index && git sparse-checkout set $SPARSE_CONE && git config index.version 4 && - git update-index --index-version=4 + git update-index --index-version=4 && + git checkout HEAD~4 ) ' @@ -97,5 +109,6 @@ test_perf_on_all git status test_perf_on_all git add -A test_perf_on_all git add . test_perf_on_all git commit -a -m A +test_perf_on_all git checkout -f - test_done From 11042ab9142bab44db75ba9ececf836053fdb92a Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Tue, 29 Jun 2021 02:13:03 +0000 Subject: [PATCH 2/7] p2000: compress repo names By using shorter names for the test repos, we will get a slightly more compressed performance summary without comprimising clarity. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- t/perf/p2000-sparse-operations.sh | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/t/perf/p2000-sparse-operations.sh b/t/perf/p2000-sparse-operations.sh index f7f8c01210..597626276f 100755 --- a/t/perf/p2000-sparse-operations.sh +++ b/t/perf/p2000-sparse-operations.sh @@ -52,36 +52,36 @@ test_expect_success 'setup repo and indexes' ' git commit -a -m "edit $SPARSE_CONE/$l2/a" || return 1 done && - git -c core.sparseCheckoutCone=true clone --branch=wide --sparse . full-index-v3 && + git -c core.sparseCheckoutCone=true clone --branch=wide --sparse . full-v3 && ( - cd full-index-v3 && + cd full-v3 && git sparse-checkout init --cone && git sparse-checkout set $SPARSE_CONE && git config index.version 3 && git update-index --index-version=3 && git checkout HEAD~4 ) && - git -c core.sparseCheckoutCone=true clone --branch=wide --sparse . full-index-v4 && + git -c core.sparseCheckoutCone=true clone --branch=wide --sparse . full-v4 && ( - cd full-index-v4 && + cd full-v4 && git sparse-checkout init --cone && git sparse-checkout set $SPARSE_CONE && git config index.version 4 && git update-index --index-version=4 && git checkout HEAD~4 ) && - git -c core.sparseCheckoutCone=true clone --branch=wide --sparse . sparse-index-v3 && + git -c core.sparseCheckoutCone=true clone --branch=wide --sparse . sparse-v3 && ( - cd sparse-index-v3 && + cd sparse-v3 && git sparse-checkout init --cone --sparse-index && git sparse-checkout set $SPARSE_CONE && git config index.version 3 && git update-index --index-version=3 && git checkout HEAD~4 ) && - git -c core.sparseCheckoutCone=true clone --branch=wide --sparse . sparse-index-v4 && + git -c core.sparseCheckoutCone=true clone --branch=wide --sparse . sparse-v4 && ( - cd sparse-index-v4 && + cd sparse-v4 && git sparse-checkout init --cone --sparse-index && git sparse-checkout set $SPARSE_CONE && git config index.version 4 && @@ -92,8 +92,8 @@ test_expect_success 'setup repo and indexes' ' test_perf_on_all () { command="$@" - for repo in full-index-v3 full-index-v4 \ - sparse-index-v3 sparse-index-v4 + for repo in full-v3 full-v4 \ + sparse-v3 sparse-v4 do test_perf "$command ($repo)" " ( From daa1acefc55bb6492c00519634e0a7622b3b6d69 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Tue, 29 Jun 2021 02:13:04 +0000 Subject: [PATCH 3/7] commit: integrate with sparse-index Update 'git commit' to allow using the sparse-index in memory without expanding to a full one. The only place that had an ensure_full_index() call was in cache_tree_update(). The recursive algorithm for update_one() was already updated in 2de37c536 (cache-tree: integrate with sparse directory entries, 2021-03-03) to handle sparse directory entries in the index. Most of this change involves testing different command-line options that allow specifying which on-disk changes should be included in the commit. This includes no options (only take currently-staged changes), -a (take all tracked changes), and --include (take a list of specific changes). To simplify testing that these options do not expand the index, update the test that previously verified that 'git status' does not expand the index with a helper method, ensure_not_expanded(). This allows 'git commit' to operate much faster when the sparse-checkout cone is much smaller than the full list of files at HEAD. Here are the relevant lines from p2000-sparse-operations.sh: Test HEAD~1 HEAD ---------------------------------------------------------------------------------- 2000.14: git commit -a -m A (full-v3) 0.35(0.26+0.06) 0.36(0.28+0.07) +2.9% 2000.15: git commit -a -m A (full-v4) 0.32(0.26+0.05) 0.34(0.28+0.06) +6.3% 2000.16: git commit -a -m A (sparse-v3) 0.63(0.59+0.06) 0.04(0.05+0.05) -93.7% 2000.17: git commit -a -m A (sparse-v4) 0.64(0.59+0.08) 0.04(0.04+0.04) -93.8% It is important to compare the full-index case to the sparse-index case, so the improvement for index version v4 is actually an 88% improvement in this synthetic example. In a real repository with over two million files at HEAD and 60,000 files in the sparse-checkout definition, the time for 'git commit -a' went from 2.61 seconds to 134ms. I compared this to the result if the index only contained the paths in the sparse-checkout definition and found the theoretical optimum to be 120ms, so the out-of-cone paths only add a 12% overhead. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- builtin/commit.c | 3 ++ cache-tree.c | 2 - t/t1092-sparse-checkout-compatibility.sh | 47 ++++++++++++++++++++++-- 3 files changed, 46 insertions(+), 6 deletions(-) diff --git a/builtin/commit.c b/builtin/commit.c index 12f51db158..0bc6489250 100644 --- a/builtin/commit.c +++ b/builtin/commit.c @@ -1682,6 +1682,9 @@ int cmd_commit(int argc, const char **argv, const char *prefix) if (argc == 2 && !strcmp(argv[1], "-h")) usage_with_options(builtin_commit_usage, builtin_commit_options); + prepare_repo_settings(the_repository); + the_repository->settings.command_requires_full_index = 0; + status_init_config(&s, git_commit_config); s.commit_template = 1; status_format = STATUS_FORMAT_NONE; /* Ignore status.short */ diff --git a/cache-tree.c b/cache-tree.c index 45e58666af..577b18d881 100644 --- a/cache-tree.c +++ b/cache-tree.c @@ -461,8 +461,6 @@ int cache_tree_update(struct index_state *istate, int flags) if (i) return i; - ensure_full_index(istate); - if (!istate->cache_tree) istate->cache_tree = cache_tree(); diff --git a/t/t1092-sparse-checkout-compatibility.sh b/t/t1092-sparse-checkout-compatibility.sh index cabbd42e33..d3e34d0aca 100755 --- a/t/t1092-sparse-checkout-compatibility.sh +++ b/t/t1092-sparse-checkout-compatibility.sh @@ -262,6 +262,34 @@ test_expect_success 'add, commit, checkout' ' test_all_match git checkout - ' +test_expect_success 'commit including unstaged changes' ' + init_repos && + + write_script edit-file <<-\EOF && + echo $1 >$2 + EOF + + run_on_all ../edit-file 1 a && + run_on_all ../edit-file 1 deep/a && + + test_all_match git commit -m "-a" -a && + test_all_match git status --porcelain=v2 && + + run_on_all ../edit-file 2 a && + run_on_all ../edit-file 2 deep/a && + + test_all_match git commit -m "--include" --include deep/a && + test_all_match git status --porcelain=v2 && + test_all_match git commit -m "--include" --include a && + test_all_match git status --porcelain=v2 && + + run_on_all ../edit-file 3 a && + run_on_all ../edit-file 3 deep/a && + + test_all_match git commit -m "--amend" -a --amend && + test_all_match git status --porcelain=v2 +' + test_expect_success 'status/add: outside sparse cone' ' init_repos && @@ -514,14 +542,25 @@ test_expect_success 'sparse-index is expanded and converted back' ' test_region index ensure_full_index trace2.txt ' -test_expect_success 'sparse-index is not expanded' ' - init_repos && - +ensure_not_expanded () { rm -f trace2.txt && echo >>sparse-index/untracked.txt && GIT_TRACE2_EVENT="$(pwd)/trace2.txt" GIT_TRACE2_EVENT_NESTING=10 \ - git -C sparse-index status && + git -C sparse-index "$@" && test_region ! index ensure_full_index trace2.txt +} + +test_expect_success 'sparse-index is not expanded' ' + init_repos && + + ensure_not_expanded status && + ensure_not_expanded commit --allow-empty -m empty && + echo >>sparse-index/a && + ensure_not_expanded commit -a -m a && + echo >>sparse-index/a && + ensure_not_expanded commit --include a -m a && + echo >>sparse-index/deep/deeper1/a && + ensure_not_expanded commit --include deep/deeper1/a -m deeper ' # NEEDSWORK: a sparse-checkout behaves differently from a full checkout From f934f1b47fb56d18b2b81d9288590e03e9a0ed23 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Tue, 29 Jun 2021 02:13:05 +0000 Subject: [PATCH 4/7] sparse-index: recompute cache-tree When some commands run with command_requires_full_index=1, then the index can get in a state where the in-memory cache tree is actually equal to the sparse index's cache tree instead of the full one. This results in incorrect entry_count values. By clearing the cache tree before converting to sparse, we avoid this issue. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- sparse-index.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sparse-index.c b/sparse-index.c index 53c8f711cc..c6b4feec41 100644 --- a/sparse-index.c +++ b/sparse-index.c @@ -170,6 +170,8 @@ int convert_to_sparse(struct index_state *istate) if (index_has_unmerged_entries(istate)) return 0; + /* Clear and recompute the cache-tree */ + cache_tree_free(&istate->cache_tree); if (cache_tree_update(istate, 0)) { warning(_("unable to update cache-tree, staying full")); return -1; From 1ba5f45132f987e630497c09b74af687bf4f863b Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Tue, 29 Jun 2021 02:13:06 +0000 Subject: [PATCH 5/7] checkout: stop expanding sparse indexes Previous changes did the necessary improvements to unpack-trees.c and diff-lib.c in order to modify a sparse index based on its comparision with a tree. The only remaining work is to remove some ensure_full_index() calls and add tests that verify that the index is not expanded in our interesting cases. Include 'switch' and 'restore' in these tests, as they share a base implementation with 'checkout'. Here are the relevant performance results from p2000-sparse-operations.sh: Test HEAD~1 HEAD -------------------------------------------------------------------------------- 2000.18: git checkout -f - (full-v3) 0.49(0.43+0.03) 0.47(0.39+0.05) -4.1% 2000.19: git checkout -f - (full-v4) 0.45(0.37+0.06) 0.42(0.37+0.05) -6.7% 2000.20: git checkout -f - (sparse-v3) 0.76(0.71+0.07) 0.04(0.03+0.04) -94.7% 2000.21: git checkout -f - (sparse-v4) 0.75(0.72+0.04) 0.05(0.06+0.04) -93.3% It is important to compare the full index case to the sparse index case, as the previous results for the sparse index were inflated by the index expansion. For index v4, this is an 88% improvement. On an internal repository with over two million paths at HEAD and a sparse-checkout definition containing ~60,000 of those paths, 'git checkout' went from 3.5s to 297ms with this change. The theoretical optimum where only those ~60,000 paths exist was 275ms, so the extra sparse directory entries contribute a 22ms overhead. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- builtin/checkout.c | 8 +++----- t/t1092-sparse-checkout-compatibility.sh | 10 +++++++++- 2 files changed, 12 insertions(+), 6 deletions(-) diff --git a/builtin/checkout.c b/builtin/checkout.c index f4cd7747d3..b5d477919a 100644 --- a/builtin/checkout.c +++ b/builtin/checkout.c @@ -378,9 +378,6 @@ static int checkout_worktree(const struct checkout_opts *opts, if (pc_workers > 1) init_parallel_checkout(); - /* TODO: audit for interaction with sparse-index. */ - ensure_full_index(&the_index); - for (pos = 0; pos < active_nr; pos++) { struct cache_entry *ce = active_cache[pos]; if (ce->ce_flags & CE_MATCHED) { @@ -530,8 +527,6 @@ static int checkout_paths(const struct checkout_opts *opts, * Make sure all pathspecs participated in locating the paths * to be checked out. */ - /* TODO: audit for interaction with sparse-index. */ - ensure_full_index(&the_index); for (pos = 0; pos < active_nr; pos++) if (opts->overlay_mode) mark_ce_for_checkout_overlay(active_cache[pos], @@ -1593,6 +1588,9 @@ static int checkout_main(int argc, const char **argv, const char *prefix, git_config(git_checkout_config, opts); + prepare_repo_settings(the_repository); + the_repository->settings.command_requires_full_index = 0; + opts->track = BRANCH_TRACK_UNSPECIFIED; if (!opts->accept_pathspec && !opts->accept_ref) diff --git a/t/t1092-sparse-checkout-compatibility.sh b/t/t1092-sparse-checkout-compatibility.sh index d3e34d0aca..fde3b41aba 100755 --- a/t/t1092-sparse-checkout-compatibility.sh +++ b/t/t1092-sparse-checkout-compatibility.sh @@ -560,7 +560,15 @@ test_expect_success 'sparse-index is not expanded' ' echo >>sparse-index/a && ensure_not_expanded commit --include a -m a && echo >>sparse-index/deep/deeper1/a && - ensure_not_expanded commit --include deep/deeper1/a -m deeper + ensure_not_expanded commit --include deep/deeper1/a -m deeper && + ensure_not_expanded checkout rename-out-to-out && + ensure_not_expanded checkout - && + ensure_not_expanded switch rename-out-to-out && + ensure_not_expanded switch - && + git -C sparse-index reset --hard && + ensure_not_expanded checkout rename-out-to-out -- deep/deeper1 && + git -C sparse-index reset --hard && + ensure_not_expanded restore -s rename-out-to-out -- deep/deeper1 ' # NEEDSWORK: a sparse-checkout behaves differently from a full checkout From 70569fadceb8f2e766803c2f02b08e5fd4a9ee19 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Tue, 20 Jul 2021 20:14:40 +0000 Subject: [PATCH 6/7] t1092: document bad 'git checkout' behavior Add new branches to the test repo that demonstrate directory/file conflicts in different ways. Since the directory 'folder1/' has adjacent files 'folder1-', 'folder1.txt', and 'folder10' it causes searches for 'folder1/' to land in a different place in the index than a search for 'folder1'. This causes a change in behavior when working with the df-conflict-1 and df-conflict-2 branches, whose only difference is that the first uses 'folder1' as the conflict and the other uses 'folder2' which does not have these adjacent files. We can extend two tests that compare the behavior across different 'git checkout' commands, and we see already that the behavior will be different in some cases and not in others. The difference between the two test loops is that one uses 'git reset --hard' between iterations. Further, we isolate the behavior of creating a staged change within a directory and then checking out a branch where that directory is replaced with a file. A full checkout behaves differently across these two cases, while a sparse-checkout cone behaves consistently. In both cases, the behavior is wrong. In one case, the staged change is dropped entirely. The other case the staged change is kept, replacing the file at that location, but none of the other files in the directory are kept. Likely, the correct behavior in this case is to reject the checkout and report the conflict, leaving HEAD in its previous location. None of the cases behave this way currently. Use comments to demonstrate that the tested behavior is only a documentation of the current, incorrect behavior to ensure we do not _accidentally_ change it. Instead, we would prefer to change it on purpose with a future change. At this point, the sparse-index does not handle these 'git checkout' commands correctly. Or rather, it _does_ reject the 'git checkout' when we have the staged change, but for the wrong reason. It also rejects the 'git checkout' commands when there is no staged change and we want to replace a directory with a file. A fix for that unstaged case will follow in the next change, but that will make the sparse-index agree with the full checkout case in these documented incorrect behaviors. Helped-by: Elijah Newren Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- t/t1092-sparse-checkout-compatibility.sh | 142 ++++++++++++++++++++++- 1 file changed, 140 insertions(+), 2 deletions(-) diff --git a/t/t1092-sparse-checkout-compatibility.sh b/t/t1092-sparse-checkout-compatibility.sh index fde3b41aba..79b4a8ce19 100755 --- a/t/t1092-sparse-checkout-compatibility.sh +++ b/t/t1092-sparse-checkout-compatibility.sh @@ -95,6 +95,25 @@ test_expect_success 'setup' ' git add . && git commit -m "rename deep/deeper1/... to folder1/..." && + git checkout -b df-conflict-1 base && + rm -rf folder1 && + echo content >folder1 && + git add . && + git commit -m "dir to file" && + + git checkout -b df-conflict-2 base && + rm -rf folder2 && + echo content >folder2 && + git add . && + git commit -m "dir to file" && + + git checkout -b fd-conflict base && + rm a && + mkdir a && + echo content >a/a && + git add . && + git commit -m "file to dir" && + git checkout -b deepest base && echo "updated deepest" >deep/deeper1/deepest/a && git commit -a -m "update deepest" && @@ -358,10 +377,16 @@ test_expect_success 'diff --staged' ' test_all_match git diff --staged ' +# NEEDSWORK: sparse-checkout behaves differently from full-checkout when +# running this test with 'df-conflict-2' after 'df-conflict-1'. test_expect_success 'diff with renames and conflicts' ' init_repos && - for branch in rename-out-to-out rename-out-to-in rename-in-to-out + for branch in rename-out-to-out \ + rename-out-to-in \ + rename-in-to-out \ + df-conflict-1 \ + fd-conflict do test_all_match git checkout rename-base && test_all_match git checkout $branch -- . && @@ -371,10 +396,15 @@ test_expect_success 'diff with renames and conflicts' ' done ' +# NEEDSWORK: the sparse-index fails to move HEAD across a directory/file +# conflict such as when checking out df-conflict-1 and df-conflict2. test_expect_success 'diff with directory/file conflicts' ' init_repos && - for branch in rename-out-to-out rename-out-to-in rename-in-to-out + for branch in rename-out-to-out \ + rename-out-to-in \ + rename-in-to-out \ + fd-conflict do git -C full-checkout reset --hard && test_sparse_match git reset --hard && @@ -606,4 +636,112 @@ test_expect_success 'add everything with deep new file' ' test_all_match git status --porcelain=v2 ' +# NEEDSWORK: 'git checkout' behaves incorrectly in the case of +# directory/file conflicts, even without sparse-checkout. Use this +# test only as a documentation of the incorrect behavior, not a +# measure of how it _should_ behave. +test_expect_success 'checkout behaves oddly with df-conflict-1' ' + init_repos && + + test_sparse_match git sparse-checkout disable && + + write_script edit-content <<-\EOF && + echo content >>folder1/larger-content + git add folder1 + EOF + + run_on_all ../edit-content && + test_all_match git status --porcelain=v2 && + + git -C sparse-checkout sparse-checkout init --cone && + git -C sparse-index sparse-checkout init --cone --sparse-index && + + test_all_match git status --porcelain=v2 && + + # This checkout command should fail, because we have a staged + # change to folder1/larger-content, but the destination changes + # folder1 to a file. + git -C full-checkout checkout df-conflict-1 \ + 1>full-checkout-out \ + 2>full-checkout-err && + git -C sparse-checkout checkout df-conflict-1 \ + 1>sparse-checkout-out \ + 2>sparse-checkout-err && + + # NEEDSWORK: the sparse-index case refuses to change HEAD here, + # but for the wrong reason. + test_must_fail git -C sparse-index checkout df-conflict-1 \ + 1>sparse-index-out \ + 2>sparse-index-err && + + # Instead, the checkout deletes the folder1 file and adds the + # folder1/larger-content file, leaving all other paths that were + # in folder1/ as deleted (without any warning). + cat >expect <<-EOF && + D folder1 + A folder1/larger-content + EOF + test_cmp expect full-checkout-out && + test_cmp expect sparse-checkout-out && + + # stderr: Switched to branch df-conflict-1 + test_cmp full-checkout-err sparse-checkout-err +' + +# NEEDSWORK: 'git checkout' behaves incorrectly in the case of +# directory/file conflicts, even without sparse-checkout. Use this +# test only as a documentation of the incorrect behavior, not a +# measure of how it _should_ behave. +test_expect_success 'checkout behaves oddly with df-conflict-2' ' + init_repos && + + test_sparse_match git sparse-checkout disable && + + write_script edit-content <<-\EOF && + echo content >>folder2/larger-content + git add folder2 + EOF + + run_on_all ../edit-content && + test_all_match git status --porcelain=v2 && + + git -C sparse-checkout sparse-checkout init --cone && + git -C sparse-index sparse-checkout init --cone --sparse-index && + + test_all_match git status --porcelain=v2 && + + # This checkout command should fail, because we have a staged + # change to folder1/larger-content, but the destination changes + # folder1 to a file. + git -C full-checkout checkout df-conflict-2 \ + 1>full-checkout-out \ + 2>full-checkout-err && + git -C sparse-checkout checkout df-conflict-2 \ + 1>sparse-checkout-out \ + 2>sparse-checkout-err && + + # NEEDSWORK: the sparse-index case refuses to change HEAD + # here, but for the wrong reason. + test_must_fail git -C sparse-index checkout df-conflict-2 \ + 1>sparse-index-out \ + 2>sparse-index-err && + + # The full checkout deviates from the df-conflict-1 case here! + # It drops the change to folder1/larger-content and leaves the + # folder1 path as-is on disk. + test_must_be_empty full-checkout-out && + + # In the sparse-checkout case, the checkout deletes the folder1 + # file and adds the folder1/larger-content file, leaving all other + # paths that were in folder1/ as deleted (without any warning). + cat >expect <<-EOF && + D folder2 + A folder2/larger-content + EOF + test_cmp expect sparse-checkout-out && + + # Switched to branch df-conflict-1 + test_cmp full-checkout-err sparse-checkout-err +' + test_done From e05cdb17e89a2d3257533d47350b3138bfce8737 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Tue, 20 Jul 2021 20:14:41 +0000 Subject: [PATCH 7/7] unpack-trees: resolve sparse-directory/file conflicts When running unpack_trees() with a sparse index, we attempt to operate on the index without expanding the sparse directory entries. Thus, we operate by manipulating entire directories and passing them to the unpack function. In the case of the 'git checkout' command, this is the twoway_merge() function. There are several cases in twoway_merge() that handle different situations. One new one to add is the case of a directory/file conflict where the directory is sparse. Before the sparse index, such a conflict would appear as a list of file additions and deletions. Now, twoway_merge() initializes 'current', 'oldtree', and 'newtree' from src[0], src[1], and src[2], then sets 'oldtree' to NULL because it is equal to the df_conflict_entry. The way to determine that we have a directory/file conflict is to test that 'current' and 'newtree' disagree on being sparse directory entries. When we are in this case, we want to resolve the situation by calling merged_entry(). This allows replacing the 'current' entry with the 'newtree' entry. This is important for cases where we want to run 'git checkout' across the conflict and have the new HEAD represent the new file type at that path. The first NEEDSWORK comment dropped in t1092 demonstrates this necessary behavior. However, we still are in a confusing state when 'current' corresponds to a staged change within a sparse directory that is not present at HEAD. This should be atypical, because it requires adding a change outside of the sparse-checkout cone, but it is possible. Since we are unable to determine that this is a staged change within twoway_merge(), we cannot add a case to reject the merge at this point. I believe this is due to the use of df_conflict_entry in the place of 'oldtree' instead of using the valud at HEAD, which would provide some perspective to this decision. Any change that would allow this differentiation for staged entries would need to involve information further up in unpack_trees(). That work should be done, sometime, because we are further confusing the behavior of a directory/file conflict when staging a change in the directory. The two cases 'checkout behaves oddly with df-conflict-?' in t1092 demonstrate that even without a sparse-checkout, Git is not consistent in its behavior. Neither of the two options seems correct, either. This change makes the sparse-index behave differently than the typcial sparse-checkout case, but it does match the full checkout behavior in the df-conflict-2 case. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- t/t1092-sparse-checkout-compatibility.sh | 24 ++++++++++++------------ unpack-trees.c | 11 +++++++++++ 2 files changed, 23 insertions(+), 12 deletions(-) diff --git a/t/t1092-sparse-checkout-compatibility.sh b/t/t1092-sparse-checkout-compatibility.sh index 79b4a8ce19..91e30d6ec2 100755 --- a/t/t1092-sparse-checkout-compatibility.sh +++ b/t/t1092-sparse-checkout-compatibility.sh @@ -396,14 +396,14 @@ test_expect_success 'diff with renames and conflicts' ' done ' -# NEEDSWORK: the sparse-index fails to move HEAD across a directory/file -# conflict such as when checking out df-conflict-1 and df-conflict2. test_expect_success 'diff with directory/file conflicts' ' init_repos && for branch in rename-out-to-out \ rename-out-to-in \ rename-in-to-out \ + df-conflict-1 \ + df-conflict-2 \ fd-conflict do git -C full-checkout reset --hard && @@ -667,10 +667,7 @@ test_expect_success 'checkout behaves oddly with df-conflict-1' ' git -C sparse-checkout checkout df-conflict-1 \ 1>sparse-checkout-out \ 2>sparse-checkout-err && - - # NEEDSWORK: the sparse-index case refuses to change HEAD here, - # but for the wrong reason. - test_must_fail git -C sparse-index checkout df-conflict-1 \ + git -C sparse-index checkout df-conflict-1 \ 1>sparse-index-out \ 2>sparse-index-err && @@ -684,7 +681,11 @@ test_expect_success 'checkout behaves oddly with df-conflict-1' ' test_cmp expect full-checkout-out && test_cmp expect sparse-checkout-out && + # The sparse-index reports no output + test_must_be_empty sparse-index-out && + # stderr: Switched to branch df-conflict-1 + test_cmp full-checkout-err sparse-checkout-err && test_cmp full-checkout-err sparse-checkout-err ' @@ -719,17 +720,15 @@ test_expect_success 'checkout behaves oddly with df-conflict-2' ' git -C sparse-checkout checkout df-conflict-2 \ 1>sparse-checkout-out \ 2>sparse-checkout-err && - - # NEEDSWORK: the sparse-index case refuses to change HEAD - # here, but for the wrong reason. - test_must_fail git -C sparse-index checkout df-conflict-2 \ + git -C sparse-index checkout df-conflict-2 \ 1>sparse-index-out \ 2>sparse-index-err && # The full checkout deviates from the df-conflict-1 case here! # It drops the change to folder1/larger-content and leaves the - # folder1 path as-is on disk. + # folder1 path as-is on disk. The sparse-index behaves the same. test_must_be_empty full-checkout-out && + test_must_be_empty sparse-index-out && # In the sparse-checkout case, the checkout deletes the folder1 # file and adds the folder1/larger-content file, leaving all other @@ -741,7 +740,8 @@ test_expect_success 'checkout behaves oddly with df-conflict-2' ' test_cmp expect sparse-checkout-out && # Switched to branch df-conflict-1 - test_cmp full-checkout-err sparse-checkout-err + test_cmp full-checkout-err sparse-checkout-err && + test_cmp full-checkout-err sparse-index-err ' test_done diff --git a/unpack-trees.c b/unpack-trees.c index 0a5135ab39..309c1352f5 100644 --- a/unpack-trees.c +++ b/unpack-trees.c @@ -2619,6 +2619,17 @@ int twoway_merge(const struct cache_entry * const *src, same(current, oldtree) && !same(current, newtree)) { /* 20 or 21 */ return merged_entry(newtree, current, o); + } else if (current && !oldtree && newtree && + S_ISSPARSEDIR(current->ce_mode) != S_ISSPARSEDIR(newtree->ce_mode) && + ce_stage(current) == 0) { + /* + * This case is a directory/file conflict across the sparse-index + * boundary. When we are changing from one path to another via + * 'git checkout', then we want to replace one entry with another + * via merged_entry(). If there are staged changes, then we should + * reject the merge instead. + */ + return merged_entry(newtree, current, o); } else return reject_merge(current, o); }