From baa73e2b75645a088268266a408f502457663876 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Mon, 23 May 2022 13:48:37 +0000 Subject: [PATCH 01/10] t1092: refactor 'sparse-index contents' test Before expanding this test with more involved cases, first extract the repeated logic into a new test_sparse_checkout_set helper. This helper checks that 'git sparse-checkout set ...' succeeds and then verifies that certain directories have sparse directory entries in the sparse index. It also verifies that the in-cone directories are _not_ sparse directory entries in the sparse index. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- t/t1092-sparse-checkout-compatibility.sh | 67 +++++++++++++++--------- 1 file changed, 42 insertions(+), 25 deletions(-) diff --git a/t/t1092-sparse-checkout-compatibility.sh b/t/t1092-sparse-checkout-compatibility.sh index 236ab53028..9355e75a5d 100755 --- a/t/t1092-sparse-checkout-compatibility.sh +++ b/t/t1092-sparse-checkout-compatibility.sh @@ -205,36 +205,53 @@ test_sparse_unstaged () { done } +# Usage: test_sprase_checkout_set " ... " " ... " +# Verifies that "git sparse-checkout set ... " succeeds and +# leaves the sparse index in a state where ... are sparse +# directories (and ... are not). +test_sparse_checkout_set () { + CONE_DIRS=$1 && + SPARSE_DIRS=$2 && + git -C sparse-index sparse-checkout set $CONE_DIRS && + git -C sparse-index ls-files --sparse --stage >cache && + + # Check that the directories outside of the sparse-checkout cone + # have sparse directory entries. + for dir in $SPARSE_DIRS + do + TREE=$(git -C sparse-index rev-parse HEAD:$dir) && + grep "040000 $TREE 0 $dir/" cache \ + || return 1 + done && + + # Check that the directories in the sparse-checkout cone + # are not sparse directory entries. + for dir in $CONE_DIRS + do + TREE=$(git -C sparse-index rev-parse HEAD:$dir) && + ! grep "040000 $TREE 0 $dir/" cache \ + || return 1 + done +} + test_expect_success 'sparse-index contents' ' init_repos && - git -C sparse-index ls-files --sparse --stage >cache && - for dir in folder1 folder2 x - do - TREE=$(git -C sparse-index rev-parse HEAD:$dir) && - grep "040000 $TREE 0 $dir/" cache \ - || return 1 - done && + # Remove deep, add three other directories. + test_sparse_checkout_set \ + "folder1 folder2 x" \ + "before deep" && - git -C sparse-index sparse-checkout set folder1 && + # Remove folder1, add deep + test_sparse_checkout_set \ + "deep folder2 x" \ + "before folder1" && - git -C sparse-index ls-files --sparse --stage >cache && - for dir in deep folder2 x - do - TREE=$(git -C sparse-index rev-parse HEAD:$dir) && - grep "040000 $TREE 0 $dir/" cache \ - || return 1 - done && - - git -C sparse-index sparse-checkout set deep/deeper1 && - - git -C sparse-index ls-files --sparse --stage >cache && - for dir in deep/deeper2 folder1 folder2 x - do - TREE=$(git -C sparse-index rev-parse HEAD:$dir) && - grep "040000 $TREE 0 $dir/" cache \ - || return 1 - done && + # Replace deep with deep/deeper2 (dropping deep/deeper1) + # Add folder1 + test_sparse_checkout_set \ + "deep/deeper2 folder1 folder2 x" \ + "before deep/deeper1" && # Disabling the sparse-index replaces tree entries with full ones git -C sparse-index sparse-checkout init --no-sparse-index && From 8846847a142d720f63b5cbf9f0481c7694445ace Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Mon, 23 May 2022 13:48:38 +0000 Subject: [PATCH 02/10] t1092: stress test 'git sparse-checkout set' The 'sparse-index contents' test checks that the sparse index has the correct set of sparse directories in the index after modifying the cone mode patterns using 'git sparse-checkout set'. Add to the coverage here by adding more complicated scenarios that were not previously tested. In order to check paths that do not exist at HEAD, we need to modify the test_sparse_checkout_set helper slightly: 1. Add the --skip-checks argument to the 'set' command to avoid failures when passing paths that do not exist at HEAD. 2. When looking for the non-existence of sparse directories for the paths in $CONE_DIRS, allow the rev-list command to fail because the path does not exist at HEAD. This allows us to add some interesting test cases. Helped-by: Victoria Dye Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- t/t1092-sparse-checkout-compatibility.sh | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/t/t1092-sparse-checkout-compatibility.sh b/t/t1092-sparse-checkout-compatibility.sh index 9355e75a5d..a8f3ce8aa2 100755 --- a/t/t1092-sparse-checkout-compatibility.sh +++ b/t/t1092-sparse-checkout-compatibility.sh @@ -212,7 +212,7 @@ test_sparse_unstaged () { test_sparse_checkout_set () { CONE_DIRS=$1 && SPARSE_DIRS=$2 && - git -C sparse-index sparse-checkout set $CONE_DIRS && + git -C sparse-index sparse-checkout set --skip-checks $CONE_DIRS && git -C sparse-index ls-files --sparse --stage >cache && # Check that the directories outside of the sparse-checkout cone @@ -228,7 +228,9 @@ test_sparse_checkout_set () { # are not sparse directory entries. for dir in $CONE_DIRS do - TREE=$(git -C sparse-index rev-parse HEAD:$dir) && + # Allow TREE to not exist because + # $dir does not exist at HEAD. + TREE=$(git -C sparse-index rev-parse HEAD:$dir) || ! grep "040000 $TREE 0 $dir/" cache \ || return 1 done @@ -253,6 +255,19 @@ test_expect_success 'sparse-index contents' ' "deep/deeper2 folder1 folder2 x" \ "before deep/deeper1" && + # Replace deep/deeper2 with deep/deeper1 + # Replace folder1 with folder1/0/0 + # Replace folder2 with non-existent folder2/2/3 + # Add non-existent "bogus" + test_sparse_checkout_set \ + "bogus deep/deeper1 folder1/0/0 folder2/2/3 x" \ + "before deep/deeper2 folder2/0" && + + # Drop down to only files at root + test_sparse_checkout_set \ + "" \ + "before deep folder1 folder2 x" && + # Disabling the sparse-index replaces tree entries with full ones git -C sparse-index sparse-checkout init --no-sparse-index && test_sparse_match git ls-files --stage --sparse From dce241b020cf32c9485c7ef23247f0b003731afa Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Mon, 23 May 2022 13:48:39 +0000 Subject: [PATCH 03/10] sparse-index: create expand_index() This is the first change in a series to allow modifying the sparse-checkout pattern set without expanding a sparse index to a full one in the process. Here, we focus on the problem of expanding the pattern set through a command like 'git sparse-checkout add ' which needs to create new index entries for the paths now being written to the worktree. To achieve this, we need to be able to replace sparse directory entries with their contained files and subdirectories. Once this is complete, other code paths can discover those cache entries and write the corresponding files to disk before committing the index. We already have logic in ensure_full_index() that expands the index entries, so we will use that as our base. Create a new method, expand_index(), which takes a pattern list, but for now mostly ignores it. The current implementation is only correct when the pattern list is NULL as that does the same as ensure_full_index(). In fact, ensure_full_index() is converted to a shim over expand_index(). A future update will actually implement expand_index() to its full capabilities. For now, it is created and documented. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- sparse-index.c | 32 +++++++++++++++++++++++++++++--- sparse-index.h | 13 +++++++++++++ 2 files changed, 42 insertions(+), 3 deletions(-) diff --git a/sparse-index.c b/sparse-index.c index 8636af72de..a11b5cf131 100644 --- a/sparse-index.c +++ b/sparse-index.c @@ -248,19 +248,40 @@ static int add_path_to_index(const struct object_id *oid, return 0; } -void ensure_full_index(struct index_state *istate) +void expand_index(struct index_state *istate, struct pattern_list *pl) { int i; struct index_state *full; struct strbuf base = STRBUF_INIT; + const char *tr_region; + /* + * If the index is already full, then keep it full. We will convert + * it to a sparse index on write, if possible. + */ if (!istate || !istate->sparse_index) return; + /* + * If our index is sparse, but our new pattern set does not use + * cone mode patterns, then we need to expand the index before we + * continue. A NULL pattern set indicates a full expansion to a + * full index. + */ + if (pl && !pl->use_cone_patterns) + pl = NULL; + if (!istate->repo) istate->repo = the_repository; - trace2_region_enter("index", "ensure_full_index", istate->repo); + /* + * A NULL pattern set indicates we are expanding a full index, so + * we use a special region name that indicates the full expansion. + * This is used by test cases, but also helps to differentiate the + * two cases. + */ + tr_region = pl ? "expand_index" : "ensure_full_index"; + trace2_region_enter("index", tr_region, istate->repo); /* initialize basics of new index */ full = xcalloc(1, sizeof(struct index_state)); @@ -322,7 +343,12 @@ void ensure_full_index(struct index_state *istate) cache_tree_free(&istate->cache_tree); cache_tree_update(istate, 0); - trace2_region_leave("index", "ensure_full_index", istate->repo); + trace2_region_leave("index", tr_region, istate->repo); +} + +void ensure_full_index(struct index_state *istate) +{ + expand_index(istate, NULL); } void ensure_correct_sparsity(struct index_state *istate) diff --git a/sparse-index.h b/sparse-index.h index 633d4fb7e3..b1f2cdbb16 100644 --- a/sparse-index.h +++ b/sparse-index.h @@ -23,4 +23,17 @@ void expand_to_path(struct index_state *istate, struct repository; int set_sparse_index_config(struct repository *repo, int enable); +struct pattern_list; + +/** + * Scan the given index and compare its entries to the given pattern list. + * If the index is sparse and the pattern list uses cone mode patterns, + * then modify the index to contain the all of the file entries within that + * new pattern list. This expands sparse directories only as far as needed. + * + * If the pattern list is NULL or does not use cone mode patterns, then the + * index is expanded to a full index. + */ +void expand_index(struct index_state *istate, struct pattern_list *pl); + #endif From 9fadb373dd4a670e761776560d3c40f6fcc80360 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Mon, 23 May 2022 13:48:40 +0000 Subject: [PATCH 04/10] sparse-index: introduce partially-sparse indexes A future change will present a temporary, in-memory mode where the index can both contain sparse directory entries but also not be completely collapsed to the smallest possible sparse directories. This will be necessary for modifying the sparse-checkout definition while using a sparse index. For now, convert the single-bit member 'sparse_index' in 'struct index_state' to be a an 'enum sparse_index_mode' with three modes: * INDEX_EXPANDED (0): No sparse directories exist. This is always the case for repositories that do not use cone-mode sparse-checkout. * INDEX_COLLAPSED: Sparse directories may exist. Files outside the sparse-checkout cone are reduced to sparse directory entries whenever possible. * INDEX_PARTIALLY_SPARSE: Sparse directories may exist. Some file entries outside the sparse-checkout cone may exist. Running convert_to_sparse() may further reduce those files to sparse directory entries. The main reason to store this extra information is to allow convert_to_sparse() to short-circuit when the index is already in INDEX_EXPANDED mode but to actually do the necessary work when in INDEX_PARTIALLY_SPARSE mode. The INDEX_PARTIALLY_SPARSE mode will be used in an upcoming change. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- builtin/sparse-checkout.c | 2 +- cache.h | 33 +++++++++++++++++++++++++-------- read-cache.c | 6 +++--- sparse-index.c | 6 +++--- 4 files changed, 32 insertions(+), 15 deletions(-) diff --git a/builtin/sparse-checkout.c b/builtin/sparse-checkout.c index 0217d44c5b..5b054400bf 100644 --- a/builtin/sparse-checkout.c +++ b/builtin/sparse-checkout.c @@ -128,7 +128,7 @@ static void clean_tracked_sparse_directories(struct repository *r) * sparse index will not delete directories that contain * conflicted entries or submodules. */ - if (!r->index->sparse_index) { + if (r->index->sparse_index == INDEX_EXPANDED) { /* * If something, such as a merge conflict or other concern, * prevents us from converting to a sparse index, then do diff --git a/cache.h b/cache.h index 6226f6a8a5..e171ce882a 100644 --- a/cache.h +++ b/cache.h @@ -310,6 +310,29 @@ struct untracked_cache; struct progress; struct pattern_list; +enum sparse_index_mode { + /* + * There are no sparse directories in the index at all. + * + * Repositories that don't use cone-mode sparse-checkout will + * always have their indexes in this mode. + */ + INDEX_EXPANDED = 0, + + /* + * The index has already been collapsed to sparse directories + * whereever possible. + */ + INDEX_COLLAPSED, + + /* + * The sparse directories that exist are outside the + * sparse-checkout boundary, but it is possible that some file + * entries could collapse to sparse directory entries. + */ + INDEX_PARTIALLY_SPARSE, +}; + struct index_state { struct cache_entry **cache; unsigned int version; @@ -323,14 +346,8 @@ struct index_state { drop_cache_tree : 1, updated_workdir : 1, updated_skipworktree : 1, - fsmonitor_has_run_once : 1, - - /* - * sparse_index == 1 when sparse-directory - * entries exist. Requires sparse-checkout - * in cone mode. - */ - sparse_index : 1; + fsmonitor_has_run_once : 1; + enum sparse_index_mode sparse_index; struct hashmap name_hash; struct hashmap dir_hash; struct object_id oid; diff --git a/read-cache.c b/read-cache.c index 4df97e185e..b236042eee 100644 --- a/read-cache.c +++ b/read-cache.c @@ -112,7 +112,7 @@ static const char *alternate_index_output; static void set_index_entry(struct index_state *istate, int nr, struct cache_entry *ce) { if (S_ISSPARSEDIR(ce->ce_mode)) - istate->sparse_index = 1; + istate->sparse_index = INDEX_COLLAPSED; istate->cache[nr] = ce; add_name_hash(istate, ce); @@ -1856,7 +1856,7 @@ static int read_index_extension(struct index_state *istate, break; case CACHE_EXT_SPARSE_DIRECTORIES: /* no content, only an indicator */ - istate->sparse_index = 1; + istate->sparse_index = INDEX_COLLAPSED; break; default: if (*ext < 'A' || 'Z' < *ext) @@ -3149,7 +3149,7 @@ static int do_write_locked_index(struct index_state *istate, struct lock_file *l unsigned flags) { int ret; - int was_full = !istate->sparse_index; + int was_full = istate->sparse_index == INDEX_EXPANDED; ret = convert_to_sparse(istate, 0); diff --git a/sparse-index.c b/sparse-index.c index a11b5cf131..7848910c15 100644 --- a/sparse-index.c +++ b/sparse-index.c @@ -173,7 +173,7 @@ int convert_to_sparse(struct index_state *istate, int flags) * If the index is already sparse, empty, or otherwise * cannot be converted to sparse, do not convert. */ - if (istate->sparse_index || !istate->cache_nr || + if (istate->sparse_index == INDEX_COLLAPSED || !istate->cache_nr || !is_sparse_index_allowed(istate, flags)) return 0; @@ -214,7 +214,7 @@ int convert_to_sparse(struct index_state *istate, int flags) FREE_AND_NULL(istate->fsmonitor_dirty); FREE_AND_NULL(istate->fsmonitor_last_update); - istate->sparse_index = 1; + istate->sparse_index = INDEX_COLLAPSED; trace2_region_leave("index", "convert_to_sparse", istate->repo); return 0; } @@ -259,7 +259,7 @@ void expand_index(struct index_state *istate, struct pattern_list *pl) * If the index is already full, then keep it full. We will convert * it to a sparse index on write, if possible. */ - if (!istate || !istate->sparse_index) + if (!istate || istate->sparse_index == INDEX_EXPANDED) return; /* From 080ab56a46ad65068201a768a04464341117fe81 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Mon, 23 May 2022 13:48:41 +0000 Subject: [PATCH 05/10] cache-tree: implement cache_tree_find_path() Given a 'struct cache_tree', it may be beneficial to navigate directly to a node within that corresponds to a given path name. Create cache_tree_find_path() for this function. It returns NULL when no such path exists. The implementation is adapted from do_invalidate_path() which does a similar search but also modifies the nodes it finds along the way. The method could be implemented simply using tail-recursion, but this while loop does the same thing. This new method is not currently used, but will be in an upcoming change. Helped-by: Junio C Hamano Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- cache-tree.c | 27 +++++++++++++++++++++++++++ cache-tree.h | 2 ++ 2 files changed, 29 insertions(+) diff --git a/cache-tree.c b/cache-tree.c index 6752f69d51..f42db920d1 100644 --- a/cache-tree.c +++ b/cache-tree.c @@ -100,6 +100,33 @@ struct cache_tree_sub *cache_tree_sub(struct cache_tree *it, const char *path) return find_subtree(it, path, pathlen, 1); } +struct cache_tree *cache_tree_find_path(struct cache_tree *it, const char *path) +{ + const char *slash; + int namelen; + struct cache_tree_sub it_sub = { + .cache_tree = it, + }; + struct cache_tree_sub *down = &it_sub; + + while (down) { + slash = strchrnul(path, '/'); + namelen = slash - path; + down->cache_tree->entry_count = -1; + if (!*slash) { + int pos; + pos = cache_tree_subtree_pos(down->cache_tree, path, namelen); + if (0 <= pos) + return down->cache_tree->down[pos]->cache_tree; + return NULL; + } + down = find_subtree(it, path, namelen, 0); + path = slash + 1; + } + + return NULL; +} + static int do_invalidate_path(struct cache_tree *it, const char *path) { /* a/b/c diff --git a/cache-tree.h b/cache-tree.h index 8efeccebfc..f75f8e74dc 100644 --- a/cache-tree.h +++ b/cache-tree.h @@ -29,6 +29,8 @@ struct cache_tree_sub *cache_tree_sub(struct cache_tree *, const char *); int cache_tree_subtree_pos(struct cache_tree *it, const char *path, int pathlen); +struct cache_tree *cache_tree_find_path(struct cache_tree *it, const char *path); + void cache_tree_write(struct strbuf *, struct cache_tree *root); struct cache_tree *cache_tree_read(const char *buffer, unsigned long size); From 2d443389fddf1b9b50664669b55c701a53f12eb2 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Mon, 23 May 2022 13:48:42 +0000 Subject: [PATCH 06/10] sparse-checkout: --no-sparse-index needs a full index When the --no-sparse-index option is supplied, the sparse-checkout builtin should explicitly ask to expand a sparse index to a full one. This is currently done implicitly due to the command_requires_full_index protection, but that will be removed in an upcoming change. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- builtin/sparse-checkout.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/builtin/sparse-checkout.c b/builtin/sparse-checkout.c index 5b054400bf..1db51c3fd7 100644 --- a/builtin/sparse-checkout.c +++ b/builtin/sparse-checkout.c @@ -413,6 +413,9 @@ static int update_modes(int *cone_mode, int *sparse_index) /* force an index rewrite */ repo_read_index(the_repository); the_repository->index->updated_workdir = 1; + + if (!*sparse_index) + ensure_full_index(the_repository->index); } return 0; From 0243930af40f1ede50598c7de0965bdbe6fcbe30 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Mon, 23 May 2022 13:48:43 +0000 Subject: [PATCH 07/10] sparse-index: partially expand directories The expand_to_pattern_list() method expands sparse directory entries to their list of contained files when either the pattern list is NULL or the directory is contained in the new pattern list's cone mode patterns. It is possible that the pattern list has a recursive match with a directory 'A/B/C/' and so an existing sparse directory 'A/B/' would need to be expanded. If there exists a directory 'A/B/D/', then that directory should not be expanded and instead we can create a sparse directory. To implement this, we plug into the add_path_to_index() callback for the call to read_tree_at(). Since we now need access to both the index we are writing and the pattern list we are comparing, create a 'struct modify_index_context' to use as a data transfer object. It is important that we use the given pattern list since we will use this pattern list to change the sparse-checkout patterns and cannot use istate->sparse_checkout_patterns. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- sparse-index.c | 57 +++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 50 insertions(+), 7 deletions(-) diff --git a/sparse-index.c b/sparse-index.c index 7848910c15..a881f85181 100644 --- a/sparse-index.c +++ b/sparse-index.c @@ -9,6 +9,11 @@ #include "dir.h" #include "fsmonitor.h" +struct modify_index_context { + struct index_state *write; + struct pattern_list *pl; +}; + static struct cache_entry *construct_sparse_dir_entry( struct index_state *istate, const char *sparse_dir, @@ -231,18 +236,52 @@ static int add_path_to_index(const struct object_id *oid, struct strbuf *base, const char *path, unsigned int mode, void *context) { - struct index_state *istate = (struct index_state *)context; + struct modify_index_context *ctx = (struct modify_index_context *)context; struct cache_entry *ce; size_t len = base->len; - if (S_ISDIR(mode)) - return READ_TREE_RECURSIVE; + if (S_ISDIR(mode)) { + int dtype; + size_t baselen = base->len; + if (!ctx->pl) + return READ_TREE_RECURSIVE; - strbuf_addstr(base, path); + /* + * Have we expanded to a point outside of the sparse-checkout? + * + * Artificially pad the path name with a slash "/" to + * indicate it as a directory, and add an arbitrary file + * name ("-") so we can consider base->buf as a file name + * to match against the cone-mode patterns. + * + * If we compared just "path", then we would expand more + * than we should. Since every file at root is always + * included, we would expand every directory at root at + * least one level deep instead of using sparse directory + * entries. + */ + strbuf_addstr(base, path); + strbuf_add(base, "/-", 2); - ce = make_cache_entry(istate, mode, oid, base->buf, 0, 0); + if (path_matches_pattern_list(base->buf, base->len, + NULL, &dtype, + ctx->pl, ctx->write)) { + strbuf_setlen(base, baselen); + return READ_TREE_RECURSIVE; + } + + /* + * The path "{base}{path}/" is a sparse directory. Create the correct + * name for inserting the entry into the index. + */ + strbuf_setlen(base, base->len - 1); + } else { + strbuf_addstr(base, path); + } + + ce = make_cache_entry(ctx->write, mode, oid, base->buf, 0, 0); ce->ce_flags |= CE_SKIP_WORKTREE | CE_EXTENDED; - set_index_entry(istate, istate->cache_nr++, ce); + set_index_entry(ctx->write, ctx->write->cache_nr++, ce); strbuf_setlen(base, len); return 0; @@ -254,6 +293,7 @@ void expand_index(struct index_state *istate, struct pattern_list *pl) struct index_state *full; struct strbuf base = STRBUF_INIT; const char *tr_region; + struct modify_index_context ctx; /* * If the index is already full, then keep it full. We will convert @@ -293,6 +333,9 @@ void expand_index(struct index_state *istate, struct pattern_list *pl) full->cache_nr = 0; ALLOC_ARRAY(full->cache, full->cache_alloc); + ctx.write = full; + ctx.pl = pl; + for (i = 0; i < istate->cache_nr; i++) { struct cache_entry *ce = istate->cache[i]; struct tree *tree; @@ -318,7 +361,7 @@ void expand_index(struct index_state *istate, struct pattern_list *pl) strbuf_add(&base, ce->name, strlen(ce->name)); read_tree_at(istate->repo, tree, &base, &ps, - add_path_to_index, full); + add_path_to_index, &ctx); /* free directory entries. full entries are re-used */ discard_cache_entry(ce); From ac8acb4f2c70dd95c582bd5d4fb4f689f82ff3c6 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Mon, 23 May 2022 13:48:44 +0000 Subject: [PATCH 08/10] sparse-index: complete partial expansion To complete the implementation of expand_to_pattern_list(), we need to detect when a sparse directory entry should remain sparse. This avoids a full expansion, so we now need to use the PARTIALLY_SPARSE mode to indicate this state. There still are no callers to this method, but we will add one in the next change. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- sparse-index.c | 39 ++++++++++++++++++++++++++++++++++++--- 1 file changed, 36 insertions(+), 3 deletions(-) diff --git a/sparse-index.c b/sparse-index.c index a881f85181..2c0a18380f 100644 --- a/sparse-index.c +++ b/sparse-index.c @@ -308,8 +308,24 @@ void expand_index(struct index_state *istate, struct pattern_list *pl) * continue. A NULL pattern set indicates a full expansion to a * full index. */ - if (pl && !pl->use_cone_patterns) + if (pl && !pl->use_cone_patterns) { pl = NULL; + } else { + /* + * We might contract file entries into sparse-directory + * entries, and for that we will need the cache tree to + * be recomputed. + */ + cache_tree_free(&istate->cache_tree); + + /* + * If there is a problem creating the cache tree, then we + * need to expand to a full index since we cannot satisfy + * the current request as a sparse index. + */ + if (cache_tree_update(istate, 0)) + pl = NULL; + } if (!istate->repo) istate->repo = the_repository; @@ -327,8 +343,14 @@ void expand_index(struct index_state *istate, struct pattern_list *pl) full = xcalloc(1, sizeof(struct index_state)); memcpy(full, istate, sizeof(struct index_state)); + /* + * This slightly-misnamed 'full' index might still be sparse if we + * are only modifying the list of sparse directories. This hinges + * on whether we have a non-NULL pattern list. + */ + full->sparse_index = pl ? INDEX_PARTIALLY_SPARSE : INDEX_EXPANDED; + /* then change the necessary things */ - full->sparse_index = 0; full->cache_alloc = (3 * istate->cache_alloc) / 2; full->cache_nr = 0; ALLOC_ARRAY(full->cache, full->cache_alloc); @@ -340,11 +362,22 @@ void expand_index(struct index_state *istate, struct pattern_list *pl) struct cache_entry *ce = istate->cache[i]; struct tree *tree; struct pathspec ps; + int dtype; if (!S_ISSPARSEDIR(ce->ce_mode)) { set_index_entry(full, full->cache_nr++, ce); continue; } + + /* We now have a sparse directory entry. Should we expand? */ + if (pl && + path_matches_pattern_list(ce->name, ce->ce_namelen, + NULL, &dtype, + pl, istate) == NOT_MATCHED) { + set_index_entry(full, full->cache_nr++, ce); + continue; + } + if (!(ce->ce_flags & CE_SKIP_WORKTREE)) warning(_("index entry is a directory, but not sparse (%08x)"), ce->ce_flags); @@ -370,7 +403,7 @@ void expand_index(struct index_state *istate, struct pattern_list *pl) /* Copy back into original index. */ memcpy(&istate->name_hash, &full->name_hash, sizeof(full->name_hash)); memcpy(&istate->dir_hash, &full->dir_hash, sizeof(full->dir_hash)); - istate->sparse_index = 0; + istate->sparse_index = pl ? INDEX_PARTIALLY_SPARSE : INDEX_EXPANDED; free(istate->cache); istate->cache = full->cache; istate->cache_nr = full->cache_nr; From b0b40c0468abd09cc0fa64da02a92d798e25d47d Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Mon, 23 May 2022 13:48:45 +0000 Subject: [PATCH 09/10] p2000: add test for 'git sparse-checkout [add|set]' The sparse-checkout builtin is almost completely integrated with the sparse index, allowing the sparse-checkout boundary to be modified without expanding a sparse index to a full one. Add a test to p2000-sparse-operations.sh that adds a directory to the sparse-checkout definition, then removes it. Using both operations is important to ensure that the operation is doing the same work in each repetition as well as leaving the test repo in a good state for later tests. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- t/perf/p2000-sparse-operations.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/t/perf/p2000-sparse-operations.sh b/t/perf/p2000-sparse-operations.sh index 382716cfca..ce5cfac571 100755 --- a/t/perf/p2000-sparse-operations.sh +++ b/t/perf/p2000-sparse-operations.sh @@ -110,6 +110,7 @@ test_perf_on_all git add -A test_perf_on_all git add . test_perf_on_all git commit -a -m A test_perf_on_all git checkout -f - +test_perf_on_all "git sparse-checkout add f2/f3/f1 && git sparse-checkout set $SPARSE_CONE" test_perf_on_all git reset test_perf_on_all git reset --hard test_perf_on_all git reset -- does-not-exist From 598b1e7d0982fd71a25d861dccc1d580ef14ac90 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Mon, 23 May 2022 13:48:46 +0000 Subject: [PATCH 10/10] sparse-checkout: integrate with sparse index When modifying the sparse-checkout definition, the sparse-checkout builtin calls update_sparsity() to modify the SKIP_WORKTREE bits of all cache entries in the index. Before, we needed the index to be fully expanded in order to ensure we had the full list of files necessary that match the new patterns. Insert a call to reset_sparse_directories() that expands sparse directories that are within the new pattern list, but only far enough that every necessary file path now exists as a cache entry. The remaining logic within update_sparsity() will modify the SKIP_WORKTREE bits appropriately. This allows us to disable command_requires_full_index within the sparse-checkout builtin. Add tests that demonstrate that we are not expanding to a full index unnecessarily. We can see the improved performance in the p2000 test script: Test HEAD~1 HEAD ------------------------------------------------------------------------ 2000.24: git ... (sparse-v3) 2.14(1.55+0.58) 1.57(1.03+0.53) -26.6% 2000.25: git ... (sparse-v4) 2.20(1.62+0.57) 1.58(0.98+0.59) -28.2% These reductions of 26-28% are small compared to most examples, but the time is dominated by writing a new copy of the base repository to the worktree and then deleting it again. The fact that the previous index expansion was such a large portion of the time is telling how important it is to complete this sparse index integration. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- builtin/sparse-checkout.c | 3 +++ t/t1092-sparse-checkout-compatibility.sh | 25 ++++++++++++++++++++++++ unpack-trees.c | 4 ++++ 3 files changed, 32 insertions(+) diff --git a/builtin/sparse-checkout.c b/builtin/sparse-checkout.c index 1db51c3fd7..67d1d146de 100644 --- a/builtin/sparse-checkout.c +++ b/builtin/sparse-checkout.c @@ -937,6 +937,9 @@ int cmd_sparse_checkout(int argc, const char **argv, const char *prefix) git_config(git_default_config, NULL); + prepare_repo_settings(the_repository); + the_repository->settings.command_requires_full_index = 0; + if (argc > 0) { if (!strcmp(argv[0], "list")) return sparse_checkout_list(argc, argv); diff --git a/t/t1092-sparse-checkout-compatibility.sh b/t/t1092-sparse-checkout-compatibility.sh index a8f3ce8aa2..1bd7eeab86 100755 --- a/t/t1092-sparse-checkout-compatibility.sh +++ b/t/t1092-sparse-checkout-compatibility.sh @@ -1548,6 +1548,31 @@ test_expect_success 'ls-files' ' ensure_not_expanded ls-files --sparse ' +test_expect_success 'sparse index is not expanded: sparse-checkout' ' + init_repos && + + ensure_not_expanded sparse-checkout set deep/deeper2 && + ensure_not_expanded sparse-checkout set deep/deeper1 && + ensure_not_expanded sparse-checkout set deep && + ensure_not_expanded sparse-checkout add folder1 && + ensure_not_expanded sparse-checkout set deep/deeper1 && + ensure_not_expanded sparse-checkout set folder2 && + + # Demonstrate that the checks that "folder1/a" is a file + # do not cause a sparse-index expansion (since it is in the + # sparse-checkout cone). + echo >>sparse-index/folder2/a && + git -C sparse-index add folder2/a && + + ensure_not_expanded sparse-checkout add folder1 && + + # Skip checks here, since deep/deeper1 is inside a sparse directory + # that must be expanded to check whether `deep/deeper1` is a file + # or not. + ensure_not_expanded sparse-checkout set --skip-checks deep/deeper1 && + ensure_not_expanded sparse-checkout set +' + # NEEDSWORK: a sparse-checkout behaves differently from a full checkout # in this scenario, but it shouldn't. test_expect_success 'reset mixed and checkout orphan' ' diff --git a/unpack-trees.c b/unpack-trees.c index 7f528d35cc..8908b27c03 100644 --- a/unpack-trees.c +++ b/unpack-trees.c @@ -18,6 +18,7 @@ #include "promisor-remote.h" #include "entry.h" #include "parallel-checkout.h" +#include "sparse-index.h" /* * Error messages expected by scripts out of plumbing commands such as @@ -2018,6 +2019,9 @@ enum update_sparsity_result update_sparsity(struct unpack_trees_options *o) goto skip_sparse_checkout; } + /* Expand sparse directories as needed */ + expand_index(o->src_index, o->pl); + /* Set NEW_SKIP_WORKTREE on existing entries. */ mark_all_ce_unused(o->src_index); mark_new_skip_worktree(o->pl, o->src_index, 0,