From b7983adb5180c62586753754ae22a24ce8f7a04c Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Tue, 23 Sep 2025 12:17:00 +0200 Subject: [PATCH 01/15] packfile: introduce a new `struct packfile_store` Information about an object database's packfiles is currently distributed across two different structures: - `struct packed_git` contains the `next` pointer as well as the `mru_head`, both of which serve to store the list of packfiles. - `struct object_database` contains several fields that relate to the packfiles. So we don't really have a central data structure that tracks our packfiles, and consequently responsibilities aren't always clear cut. A consequence for the upcoming pluggable object databases is that this makes it very hard to move management of packfiles from the object database level down into the object database source. Introduce a new `struct packfile_store` which is about to become the single source of truth for managing packfiles. Right now this data structure doesn't yet contain anything, but in subsequent patches we will move all data structures that relate to packfiles and that are currently contained in `struct object_database` into this new home. Note that this is only a first step: most importantly, we won't (yet) move the `struct packed_git::next` pointer around. This will happen in a subsequent patch series though so that `struct packed_git` will really only host information about the specific packfile it represents. Further note that the new structure still sits at the wrong level at the end of this patch series: as mentioned, it should eventually sit at the level of the object database source, not at the object database level. But introducing the packfile store now already makes it way easier to eventually push down the now-selfcontained data structure by one level. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- odb.c | 1 + odb.h | 3 ++- packfile.c | 13 +++++++++++++ packfile.h | 18 ++++++++++++++++++ 4 files changed, 34 insertions(+), 1 deletion(-) diff --git a/odb.c b/odb.c index 75c443fe66..a2289ea97d 100644 --- a/odb.c +++ b/odb.c @@ -996,6 +996,7 @@ struct object_database *odb_new(struct repository *repo) memset(o, 0, sizeof(*o)); o->repo = repo; + o->packfiles = packfile_store_new(o); INIT_LIST_HEAD(&o->packed_git_mru); hashmap_init(&o->pack_map, pack_map_entry_cmp, NULL, 0); pthread_mutex_init(&o->replace_mutex, NULL); diff --git a/odb.h b/odb.h index 51fe8a5a92..33034eaf2f 100644 --- a/odb.h +++ b/odb.h @@ -91,6 +91,7 @@ struct odb_source { }; struct packed_git; +struct packfile_store; struct cached_object_entry; /* @@ -136,7 +137,7 @@ struct object_database { * * should only be accessed directly by packfile.c */ - + struct packfile_store *packfiles; struct packed_git *packed_git; /* A most-recently-used ordered version of the packed_git list. */ struct list_head packed_git_mru; diff --git a/packfile.c b/packfile.c index acb680966d..130d3e2507 100644 --- a/packfile.c +++ b/packfile.c @@ -2332,3 +2332,16 @@ int parse_pack_header_option(const char *in, unsigned char *out, unsigned int *l *len = hdr - out; return 0; } + +struct packfile_store *packfile_store_new(struct object_database *odb) +{ + struct packfile_store *store; + CALLOC_ARRAY(store, 1); + store->odb = odb; + return store; +} + +void packfile_store_free(struct packfile_store *store) +{ + free(store); +} diff --git a/packfile.h b/packfile.h index f16753f2a9..8d31fd619a 100644 --- a/packfile.h +++ b/packfile.h @@ -52,6 +52,24 @@ struct packed_git { char pack_name[FLEX_ARRAY]; /* more */ }; +/* + * A store that manages packfiles for a given object database. + */ +struct packfile_store { + struct object_database *odb; +}; + +/* + * Allocate and initialize a new empty packfile store for the given object + * database. + */ +struct packfile_store *packfile_store_new(struct object_database *odb); + +/* + * Free the packfile store and all its associated state. + */ +void packfile_store_free(struct packfile_store *store); + static inline int pack_map_entry_cmp(const void *cmp_data UNUSED, const struct hashmap_entry *entry, const struct hashmap_entry *entry2, From 535b7a667a94d5882add829e30e20b6dfa076640 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Tue, 23 Sep 2025 12:17:01 +0200 Subject: [PATCH 02/15] odb: move list of packfiles into `struct packfile_store` The object database tracks the list of packfiles it currently knows about. With the introduction of the `struct packfile_store` we have a better place to host this list though. Move the list accordingly. Extract the logic from `odb_clear()` that knows to close all such packfiles and move it into the new subsystem, as well. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- odb.c | 12 ++---------- odb.h | 1 - packfile.c | 42 +++++++++++++++++++++++++----------------- packfile.h | 15 ++++++++++++++- 4 files changed, 41 insertions(+), 29 deletions(-) diff --git a/odb.c b/odb.c index a2289ea97d..7201d01406 100644 --- a/odb.c +++ b/odb.c @@ -1038,16 +1038,8 @@ void odb_clear(struct object_database *o) INIT_LIST_HEAD(&o->packed_git_mru); close_object_store(o); - - /* - * `close_object_store()` only closes the packfiles, but doesn't free - * them. We thus have to do this manually. - */ - for (struct packed_git *p = o->packed_git, *next; p; p = next) { - next = p->next; - free(p); - } - o->packed_git = NULL; + packfile_store_free(o->packfiles); + o->packfiles = NULL; hashmap_clear(&o->pack_map); string_list_clear(&o->submodule_source_paths, 0); diff --git a/odb.h b/odb.h index 33034eaf2f..22a170b434 100644 --- a/odb.h +++ b/odb.h @@ -138,7 +138,6 @@ struct object_database { * should only be accessed directly by packfile.c */ struct packfile_store *packfiles; - struct packed_git *packed_git; /* A most-recently-used ordered version of the packed_git list. */ struct list_head packed_git_mru; diff --git a/packfile.c b/packfile.c index 130d3e2507..36bc240107 100644 --- a/packfile.c +++ b/packfile.c @@ -278,7 +278,7 @@ static int unuse_one_window(struct packed_git *current) if (current) scan_windows(current, &lru_p, &lru_w, &lru_l); - for (p = current->repo->objects->packed_git; p; p = p->next) + for (p = current->repo->objects->packfiles->packs; p; p = p->next) scan_windows(p, &lru_p, &lru_w, &lru_l); if (lru_p) { munmap(lru_w->base, lru_w->len); @@ -362,13 +362,8 @@ void close_pack(struct packed_git *p) void close_object_store(struct object_database *o) { struct odb_source *source; - struct packed_git *p; - for (p = o->packed_git; p; p = p->next) - if (p->do_not_close) - BUG("want to close pack marked 'do-not-close'"); - else - close_pack(p); + packfile_store_close(o->packfiles); for (source = o->sources; source; source = source->next) { if (source->midx) @@ -468,7 +463,7 @@ static int close_one_pack(struct repository *r) struct pack_window *mru_w = NULL; int accept_windows_inuse = 1; - for (p = r->objects->packed_git; p; p = p->next) { + for (p = r->objects->packfiles->packs; p; p = p->next) { if (p->pack_fd == -1) continue; find_lru_pack(p, &lru_p, &mru_w, &accept_windows_inuse); @@ -789,8 +784,8 @@ void install_packed_git(struct repository *r, struct packed_git *pack) if (pack->pack_fd != -1) pack_open_fds++; - pack->next = r->objects->packed_git; - r->objects->packed_git = pack; + pack->next = r->objects->packfiles->packs; + r->objects->packfiles->packs = pack; hashmap_entry_init(&pack->packmap_ent, strhash(pack->pack_name)); hashmap_add(&r->objects->pack_map, &pack->packmap_ent); @@ -974,7 +969,7 @@ unsigned long repo_approximate_object_count(struct repository *r) count += m->num_objects; } - for (p = r->objects->packed_git; p; p = p->next) { + for (p = r->objects->packfiles->packs; p; p = p->next) { if (open_pack_index(p)) continue; count += p->num_objects; @@ -1015,7 +1010,7 @@ static int sort_pack(const struct packed_git *a, const struct packed_git *b) static void rearrange_packed_git(struct repository *r) { - sort_packs(&r->objects->packed_git, sort_pack); + sort_packs(&r->objects->packfiles->packs, sort_pack); } static void prepare_packed_git_mru(struct repository *r) @@ -1024,7 +1019,7 @@ static void prepare_packed_git_mru(struct repository *r) INIT_LIST_HEAD(&r->objects->packed_git_mru); - for (p = r->objects->packed_git; p; p = p->next) + for (p = r->objects->packfiles->packs; p; p = p->next) list_add_tail(&p->mru, &r->objects->packed_git_mru); } @@ -1073,7 +1068,7 @@ void reprepare_packed_git(struct repository *r) struct packed_git *get_packed_git(struct repository *r) { prepare_packed_git(r); - return r->objects->packed_git; + return r->objects->packfiles->packs; } struct multi_pack_index *get_multi_pack_index(struct odb_source *source) @@ -1094,7 +1089,7 @@ struct packed_git *get_all_packs(struct repository *r) prepare_midx_pack(m, i); } - return r->objects->packed_git; + return r->objects->packfiles->packs; } struct list_head *get_packed_git_mru(struct repository *r) @@ -1219,7 +1214,7 @@ const struct packed_git *has_packed_and_bad(struct repository *r, { struct packed_git *p; - for (p = r->objects->packed_git; p; p = p->next) + for (p = r->objects->packfiles->packs; p; p = p->next) if (oidset_contains(&p->bad_objects, oid)) return p; return NULL; @@ -2080,7 +2075,7 @@ int find_pack_entry(struct repository *r, const struct object_id *oid, struct pa if (source->midx && fill_midx_entry(source->midx, oid, e)) return 1; - if (!r->objects->packed_git) + if (!r->objects->packfiles->packs) return 0; list_for_each(pos, &r->objects->packed_git_mru) { @@ -2343,5 +2338,18 @@ struct packfile_store *packfile_store_new(struct object_database *odb) void packfile_store_free(struct packfile_store *store) { + for (struct packed_git *p = store->packs, *next; p; p = next) { + next = p->next; + free(p); + } free(store); } + +void packfile_store_close(struct packfile_store *store) +{ + for (struct packed_git *p = store->packs; p; p = p->next) { + if (p->do_not_close) + BUG("want to close pack marked 'do-not-close'"); + close_pack(p); + } +} diff --git a/packfile.h b/packfile.h index 8d31fd619a..d7ac8d24b4 100644 --- a/packfile.h +++ b/packfile.h @@ -57,6 +57,12 @@ struct packed_git { */ struct packfile_store { struct object_database *odb; + + /* + * The list of packfiles in the order in which they are being added to + * the store. + */ + struct packed_git *packs; }; /* @@ -66,10 +72,17 @@ struct packfile_store { struct packfile_store *packfile_store_new(struct object_database *odb); /* - * Free the packfile store and all its associated state. + * Free the packfile store and all its associated state. All packfiles + * tracked by the store will be closed. */ void packfile_store_free(struct packfile_store *store); +/* + * Close all packfiles associated with this store. The packfiles won't be + * free'd, so they can be re-opened at a later point in time. + */ +void packfile_store_close(struct packfile_store *store); + static inline int pack_map_entry_cmp(const void *cmp_data UNUSED, const struct hashmap_entry *entry, const struct hashmap_entry *entry2, From 3421cb56a8b37425f2a47695adfa4a29a06a9d2e Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Tue, 23 Sep 2025 12:17:02 +0200 Subject: [PATCH 03/15] odb: move initialization bit into `struct packfile_store` The object database knows to skip re-initializing the list of packfiles in case it's already been initialized. Whether or not that is the case is tracked via a separate `initialized` bit that is stored in the object database. With the introduction of the `struct packfile_store` we have a better place to host this bit though. Move it accordingly. While at it, convert the field into a boolean now that we're allowed to use them in our code base. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- odb.h | 6 ------ packfile.c | 6 +++--- packfile.h | 6 ++++++ 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/odb.h b/odb.h index 22a170b434..bf1b4d4677 100644 --- a/odb.h +++ b/odb.h @@ -169,12 +169,6 @@ struct object_database { unsigned long approximate_object_count; unsigned approximate_object_count_valid : 1; - /* - * Whether packed_git has already been populated with this repository's - * packs. - */ - unsigned packed_git_initialized : 1; - /* * Submodule source paths that will be added as additional sources to * allow lookup of submodule objects via the main object database. diff --git a/packfile.c b/packfile.c index 36bc240107..f37557eac5 100644 --- a/packfile.c +++ b/packfile.c @@ -1027,7 +1027,7 @@ static void prepare_packed_git(struct repository *r) { struct odb_source *source; - if (r->objects->packed_git_initialized) + if (r->objects->packfiles->initialized) return; odb_prepare_alternates(r->objects); @@ -1038,7 +1038,7 @@ static void prepare_packed_git(struct repository *r) rearrange_packed_git(r); prepare_packed_git_mru(r); - r->objects->packed_git_initialized = 1; + r->objects->packfiles->initialized = true; } void reprepare_packed_git(struct repository *r) @@ -1060,7 +1060,7 @@ void reprepare_packed_git(struct repository *r) odb_clear_loose_cache(source); r->objects->approximate_object_count_valid = 0; - r->objects->packed_git_initialized = 0; + r->objects->packfiles->initialized = false; prepare_packed_git(r); obj_read_unlock(); } diff --git a/packfile.h b/packfile.h index d7ac8d24b4..cf81091175 100644 --- a/packfile.h +++ b/packfile.h @@ -63,6 +63,12 @@ struct packfile_store { * the store. */ struct packed_git *packs; + + /* + * Whether packfiles have already been populated with this store's + * packs. + */ + bool initialized; }; /* From 14aaf5c9d889a4988ffc64b39fe38bd19b930a50 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Tue, 23 Sep 2025 12:17:03 +0200 Subject: [PATCH 04/15] odb: move packfile map into `struct packfile_store` The object database tracks a map of packfiles by their respective paths, which is used to figure out whether a given packfile has already been loaded. With the introduction of the `struct packfile_store` we have a better place to host this list though. Move the map accordingly. `pack_map_entry_cmp()` isn't used anywhere but in "packfile.c" anymore after this change, so we convert it to a static function, as well. Note that we also drop the `inline` hint: the function is used as a callback function exclusively, and callbacks cannot be inlined. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- midx.c | 2 +- odb.c | 2 -- odb.h | 8 +------- packfile.c | 20 ++++++++++++++++++-- packfile.h | 20 ++++++-------------- 5 files changed, 26 insertions(+), 26 deletions(-) diff --git a/midx.c b/midx.c index 7726c13d7e..e96970efbf 100644 --- a/midx.c +++ b/midx.c @@ -460,7 +460,7 @@ int prepare_midx_pack(struct multi_pack_index *m, strbuf_addbuf(&key, &pack_name); strbuf_strip_suffix(&key, ".idx"); strbuf_addstr(&key, ".pack"); - p = hashmap_get_entry_from_hash(&r->objects->pack_map, + p = hashmap_get_entry_from_hash(&r->objects->packfiles->map, strhash(key.buf), key.buf, struct packed_git, packmap_ent); if (!p) { diff --git a/odb.c b/odb.c index 7201d01406..737d98c911 100644 --- a/odb.c +++ b/odb.c @@ -998,7 +998,6 @@ struct object_database *odb_new(struct repository *repo) o->repo = repo; o->packfiles = packfile_store_new(o); INIT_LIST_HEAD(&o->packed_git_mru); - hashmap_init(&o->pack_map, pack_map_entry_cmp, NULL, 0); pthread_mutex_init(&o->replace_mutex, NULL); string_list_init_dup(&o->submodule_source_paths); return o; @@ -1041,6 +1040,5 @@ void odb_clear(struct object_database *o) packfile_store_free(o->packfiles); o->packfiles = NULL; - hashmap_clear(&o->pack_map); string_list_clear(&o->submodule_source_paths, 0); } diff --git a/odb.h b/odb.h index bf1b4d4677..b79e7280c1 100644 --- a/odb.h +++ b/odb.h @@ -135,7 +135,7 @@ struct object_database { /* * private data * - * should only be accessed directly by packfile.c + * Should only be accessed directly by packfile.c and midx.c. */ struct packfile_store *packfiles; /* A most-recently-used ordered version of the packed_git list. */ @@ -155,12 +155,6 @@ struct object_database { struct cached_object_entry *cached_objects; size_t cached_object_nr, cached_object_alloc; - /* - * A map of packfiles to packed_git structs for tracking which - * packs have been loaded already. - */ - struct hashmap pack_map; - /* * A fast, rough count of the number of objects in the repository. * These two fields are not meant for direct access. Use diff --git a/packfile.c b/packfile.c index f37557eac5..17e0b8ab27 100644 --- a/packfile.c +++ b/packfile.c @@ -788,7 +788,7 @@ void install_packed_git(struct repository *r, struct packed_git *pack) r->objects->packfiles->packs = pack; hashmap_entry_init(&pack->packmap_ent, strhash(pack->pack_name)); - hashmap_add(&r->objects->pack_map, &pack->packmap_ent); + hashmap_add(&r->objects->packfiles->map, &pack->packmap_ent); } void (*report_garbage)(unsigned seen_bits, const char *path); @@ -901,7 +901,7 @@ static void prepare_pack(const char *full_name, size_t full_name_len, hashmap_entry_init(&hent, hash); /* Don't reopen a pack we already have. */ - if (!hashmap_get(&data->r->objects->pack_map, &hent, pack_name)) { + if (!hashmap_get(&data->r->objects->packfiles->map, &hent, pack_name)) { p = add_packed_git(data->r, full_name, full_name_len, data->local); if (p) install_packed_git(data->r, p); @@ -2328,11 +2328,26 @@ int parse_pack_header_option(const char *in, unsigned char *out, unsigned int *l return 0; } +static int pack_map_entry_cmp(const void *cmp_data UNUSED, + const struct hashmap_entry *entry, + const struct hashmap_entry *entry2, + const void *keydata) +{ + const char *key = keydata; + const struct packed_git *pg1, *pg2; + + pg1 = container_of(entry, const struct packed_git, packmap_ent); + pg2 = container_of(entry2, const struct packed_git, packmap_ent); + + return strcmp(pg1->pack_name, key ? key : pg2->pack_name); +} + struct packfile_store *packfile_store_new(struct object_database *odb) { struct packfile_store *store; CALLOC_ARRAY(store, 1); store->odb = odb; + hashmap_init(&store->map, pack_map_entry_cmp, NULL, 0); return store; } @@ -2342,6 +2357,7 @@ void packfile_store_free(struct packfile_store *store) next = p->next; free(p); } + hashmap_clear(&store->map); free(store); } diff --git a/packfile.h b/packfile.h index cf81091175..9bbef51164 100644 --- a/packfile.h +++ b/packfile.h @@ -64,6 +64,12 @@ struct packfile_store { */ struct packed_git *packs; + /* + * A map of packfile names to packed_git structs for tracking which + * packs have been loaded already. + */ + struct hashmap map; + /* * Whether packfiles have already been populated with this store's * packs. @@ -89,20 +95,6 @@ void packfile_store_free(struct packfile_store *store); */ void packfile_store_close(struct packfile_store *store); -static inline int pack_map_entry_cmp(const void *cmp_data UNUSED, - const struct hashmap_entry *entry, - const struct hashmap_entry *entry2, - const void *keydata) -{ - const char *key = keydata; - const struct packed_git *pg1, *pg2; - - pg1 = container_of(entry, const struct packed_git, packmap_ent); - pg2 = container_of(entry2, const struct packed_git, packmap_ent); - - return strcmp(pg1->pack_name, key ? key : pg2->pack_name); -} - struct pack_window { struct pack_window *next; unsigned char *base; From fe835b0ca0ba4d6968cd2d1f824c178547934792 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Tue, 23 Sep 2025 12:17:04 +0200 Subject: [PATCH 05/15] odb: move MRU list of packfiles into `struct packfile_store` The object database tracks the list of packfiles in most-recently-used order, which is mostly used to favor reading from packfiles that contain most of the objects that we're currently accessing. With the introduction of the `struct packfile_store` we have a better place to host this list though. Move the list accordingly. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- midx.c | 2 +- odb.c | 2 -- odb.h | 4 ---- packfile.c | 11 ++++++----- packfile.h | 3 +++ 5 files changed, 10 insertions(+), 12 deletions(-) diff --git a/midx.c b/midx.c index e96970efbf..91c7b3917d 100644 --- a/midx.c +++ b/midx.c @@ -468,7 +468,7 @@ int prepare_midx_pack(struct multi_pack_index *m, m->source->local); if (p) { install_packed_git(r, p); - list_add_tail(&p->mru, &r->objects->packed_git_mru); + list_add_tail(&p->mru, &r->objects->packfiles->mru); } } diff --git a/odb.c b/odb.c index 737d98c911..32e982bf0b 100644 --- a/odb.c +++ b/odb.c @@ -997,7 +997,6 @@ struct object_database *odb_new(struct repository *repo) memset(o, 0, sizeof(*o)); o->repo = repo; o->packfiles = packfile_store_new(o); - INIT_LIST_HEAD(&o->packed_git_mru); pthread_mutex_init(&o->replace_mutex, NULL); string_list_init_dup(&o->submodule_source_paths); return o; @@ -1035,7 +1034,6 @@ void odb_clear(struct object_database *o) free((char *) o->cached_objects[i].value.buf); FREE_AND_NULL(o->cached_objects); - INIT_LIST_HEAD(&o->packed_git_mru); close_object_store(o); packfile_store_free(o->packfiles); o->packfiles = NULL; diff --git a/odb.h b/odb.h index b79e7280c1..3044b6a661 100644 --- a/odb.h +++ b/odb.h @@ -3,7 +3,6 @@ #include "hashmap.h" #include "object.h" -#include "list.h" #include "oidset.h" #include "oidmap.h" #include "string-list.h" @@ -138,9 +137,6 @@ struct object_database { * Should only be accessed directly by packfile.c and midx.c. */ struct packfile_store *packfiles; - /* A most-recently-used ordered version of the packed_git list. */ - struct list_head packed_git_mru; - struct { struct packed_git **packs; unsigned flags; diff --git a/packfile.c b/packfile.c index 17e0b8ab27..861d7ffd6f 100644 --- a/packfile.c +++ b/packfile.c @@ -1017,10 +1017,10 @@ static void prepare_packed_git_mru(struct repository *r) { struct packed_git *p; - INIT_LIST_HEAD(&r->objects->packed_git_mru); + INIT_LIST_HEAD(&r->objects->packfiles->mru); for (p = r->objects->packfiles->packs; p; p = p->next) - list_add_tail(&p->mru, &r->objects->packed_git_mru); + list_add_tail(&p->mru, &r->objects->packfiles->mru); } static void prepare_packed_git(struct repository *r) @@ -1095,7 +1095,7 @@ struct packed_git *get_all_packs(struct repository *r) struct list_head *get_packed_git_mru(struct repository *r) { prepare_packed_git(r); - return &r->objects->packed_git_mru; + return &r->objects->packfiles->mru; } unsigned long unpack_object_header_buffer(const unsigned char *buf, @@ -2078,10 +2078,10 @@ int find_pack_entry(struct repository *r, const struct object_id *oid, struct pa if (!r->objects->packfiles->packs) return 0; - list_for_each(pos, &r->objects->packed_git_mru) { + list_for_each(pos, &r->objects->packfiles->mru) { struct packed_git *p = list_entry(pos, struct packed_git, mru); if (!p->multi_pack_index && fill_pack_entry(oid, e, p)) { - list_move(&p->mru, &r->objects->packed_git_mru); + list_move(&p->mru, &r->objects->packfiles->mru); return 1; } } @@ -2347,6 +2347,7 @@ struct packfile_store *packfile_store_new(struct object_database *odb) struct packfile_store *store; CALLOC_ARRAY(store, 1); store->odb = odb; + INIT_LIST_HEAD(&store->mru); hashmap_init(&store->map, pack_map_entry_cmp, NULL, 0); return store; } diff --git a/packfile.h b/packfile.h index 9bbef51164..d48d46cc1b 100644 --- a/packfile.h +++ b/packfile.h @@ -64,6 +64,9 @@ struct packfile_store { */ struct packed_git *packs; + /* A most-recently-used ordered version of the packs list. */ + struct list_head mru; + /* * A map of packfile names to packed_git structs for tracking which * packs have been loaded already. From bd1a521de869dc9b26ca88efc5eae022222918c1 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Tue, 23 Sep 2025 12:17:05 +0200 Subject: [PATCH 06/15] odb: move kept cache into `struct packfile_store` The object database tracks a cache of "kept" packfiles, which is used by git-pack-objects(1) to handle cruft objects. With the introduction of the `struct packfile_store` we have a better place to host this cache though. Move the cache accordingly. This moves the last bit of packfile-related state from the object database into the packfile store. Adapt the comment for the `packfiles` pointer in `struct object_database` to reflect this. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- odb.h | 10 +--------- packfile.c | 16 ++++++++-------- packfile.h | 14 ++++++++++++++ 3 files changed, 23 insertions(+), 17 deletions(-) diff --git a/odb.h b/odb.h index 3044b6a661..9dd7bb6bc3 100644 --- a/odb.h +++ b/odb.h @@ -131,16 +131,8 @@ struct object_database { struct commit_graph *commit_graph; unsigned commit_graph_attempted : 1; /* if loading has been attempted */ - /* - * private data - * - * Should only be accessed directly by packfile.c and midx.c. - */ + /* Should only be accessed directly by packfile.c and midx.c. */ struct packfile_store *packfiles; - struct { - struct packed_git **packs; - unsigned flags; - } kept_pack_cache; /* * This is meant to hold a *small* number of objects that you would diff --git a/packfile.c b/packfile.c index 861d7ffd6f..95a78f267f 100644 --- a/packfile.c +++ b/packfile.c @@ -2091,19 +2091,19 @@ int find_pack_entry(struct repository *r, const struct object_id *oid, struct pa static void maybe_invalidate_kept_pack_cache(struct repository *r, unsigned flags) { - if (!r->objects->kept_pack_cache.packs) + if (!r->objects->packfiles->kept_cache.packs) return; - if (r->objects->kept_pack_cache.flags == flags) + if (r->objects->packfiles->kept_cache.flags == flags) return; - FREE_AND_NULL(r->objects->kept_pack_cache.packs); - r->objects->kept_pack_cache.flags = 0; + FREE_AND_NULL(r->objects->packfiles->kept_cache.packs); + r->objects->packfiles->kept_cache.flags = 0; } struct packed_git **kept_pack_cache(struct repository *r, unsigned flags) { maybe_invalidate_kept_pack_cache(r, flags); - if (!r->objects->kept_pack_cache.packs) { + if (!r->objects->packfiles->kept_cache.packs) { struct packed_git **packs = NULL; size_t nr = 0, alloc = 0; struct packed_git *p; @@ -2126,11 +2126,11 @@ struct packed_git **kept_pack_cache(struct repository *r, unsigned flags) ALLOC_GROW(packs, nr + 1, alloc); packs[nr] = NULL; - r->objects->kept_pack_cache.packs = packs; - r->objects->kept_pack_cache.flags = flags; + r->objects->packfiles->kept_cache.packs = packs; + r->objects->packfiles->kept_cache.flags = flags; } - return r->objects->kept_pack_cache.packs; + return r->objects->packfiles->kept_cache.packs; } int find_kept_pack_entry(struct repository *r, diff --git a/packfile.h b/packfile.h index d48d46cc1b..bf66211986 100644 --- a/packfile.h +++ b/packfile.h @@ -64,6 +64,20 @@ struct packfile_store { */ struct packed_git *packs; + /* + * Cache of packfiles which are marked as "kept", either because there + * is an on-disk ".keep" file or because they are marked as "kept" in + * memory. + * + * Should not be accessed directly, but via `kept_pack_cache()`. The + * list of packs gets invalidated when the stored flags and the flags + * passed to `kept_pack_cache()` mismatch. + */ + struct { + struct packed_git **packs; + unsigned flags; + } kept_cache; + /* A most-recently-used ordered version of the packs list. */ struct list_head mru; From 995ee880277144207b2cb45069218aa972fb350b Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Tue, 23 Sep 2025 12:17:06 +0200 Subject: [PATCH 07/15] packfile: reorder functions to avoid function declaration Reorder functions so that we can avoid a forward declaration of `prepare_packed_git()`. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- packfile.c | 67 +++++++++++++++++++++++++++--------------------------- 1 file changed, 33 insertions(+), 34 deletions(-) diff --git a/packfile.c b/packfile.c index 95a78f267f..5588a7ad6d 100644 --- a/packfile.c +++ b/packfile.c @@ -946,40 +946,6 @@ static void prepare_packed_git_one(struct odb_source *source) string_list_clear(data.garbage, 0); } -static void prepare_packed_git(struct repository *r); -/* - * Give a fast, rough count of the number of objects in the repository. This - * ignores loose objects completely. If you have a lot of them, then either - * you should repack because your performance will be awful, or they are - * all unreachable objects about to be pruned, in which case they're not really - * interesting as a measure of repo size in the first place. - */ -unsigned long repo_approximate_object_count(struct repository *r) -{ - if (!r->objects->approximate_object_count_valid) { - struct odb_source *source; - unsigned long count = 0; - struct packed_git *p; - - prepare_packed_git(r); - - for (source = r->objects->sources; source; source = source->next) { - struct multi_pack_index *m = get_multi_pack_index(source); - if (m) - count += m->num_objects; - } - - for (p = r->objects->packfiles->packs; p; p = p->next) { - if (open_pack_index(p)) - continue; - count += p->num_objects; - } - r->objects->approximate_object_count = count; - r->objects->approximate_object_count_valid = 1; - } - return r->objects->approximate_object_count; -} - DEFINE_LIST_SORT(static, sort_packs, struct packed_git, next); static int sort_pack(const struct packed_git *a, const struct packed_git *b) @@ -1098,6 +1064,39 @@ struct list_head *get_packed_git_mru(struct repository *r) return &r->objects->packfiles->mru; } +/* + * Give a fast, rough count of the number of objects in the repository. This + * ignores loose objects completely. If you have a lot of them, then either + * you should repack because your performance will be awful, or they are + * all unreachable objects about to be pruned, in which case they're not really + * interesting as a measure of repo size in the first place. + */ +unsigned long repo_approximate_object_count(struct repository *r) +{ + if (!r->objects->approximate_object_count_valid) { + struct odb_source *source; + unsigned long count = 0; + struct packed_git *p; + + prepare_packed_git(r); + + for (source = r->objects->sources; source; source = source->next) { + struct multi_pack_index *m = get_multi_pack_index(source); + if (m) + count += m->num_objects; + } + + for (p = r->objects->packfiles->packs; p; p = p->next) { + if (open_pack_index(p)) + continue; + count += p->num_objects; + } + r->objects->approximate_object_count = count; + r->objects->approximate_object_count_valid = 1; + } + return r->objects->approximate_object_count; +} + unsigned long unpack_object_header_buffer(const unsigned char *buf, unsigned long len, enum object_type *type, unsigned long *sizep) { From c36ecc0685a75f913fe4871766715221c71f4b09 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Tue, 23 Sep 2025 12:17:07 +0200 Subject: [PATCH 08/15] packfile: refactor `prepare_packed_git()` to work on packfile store The `prepare_packed_git()` function and its friends are responsible for loading packfiles as well as the multi-pack index for a given object database. Refactor these functions to accept a packfile store instead of a repository to clarify their scope. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- packfile.c | 41 ++++++++++++++++++----------------------- 1 file changed, 18 insertions(+), 23 deletions(-) diff --git a/packfile.c b/packfile.c index 5588a7ad6d..095c85919b 100644 --- a/packfile.c +++ b/packfile.c @@ -974,37 +974,32 @@ static int sort_pack(const struct packed_git *a, const struct packed_git *b) return -1; } -static void rearrange_packed_git(struct repository *r) -{ - sort_packs(&r->objects->packfiles->packs, sort_pack); -} - -static void prepare_packed_git_mru(struct repository *r) +static void packfile_store_prepare_mru(struct packfile_store *store) { struct packed_git *p; - INIT_LIST_HEAD(&r->objects->packfiles->mru); + INIT_LIST_HEAD(&store->mru); - for (p = r->objects->packfiles->packs; p; p = p->next) - list_add_tail(&p->mru, &r->objects->packfiles->mru); + for (p = store->packs; p; p = p->next) + list_add_tail(&p->mru, &store->mru); } -static void prepare_packed_git(struct repository *r) +static void packfile_store_prepare(struct packfile_store *store) { struct odb_source *source; - if (r->objects->packfiles->initialized) + if (store->initialized) return; - odb_prepare_alternates(r->objects); - for (source = r->objects->sources; source; source = source->next) { + odb_prepare_alternates(store->odb); + for (source = store->odb->sources; source; source = source->next) { prepare_multi_pack_index_one(source); prepare_packed_git_one(source); } - rearrange_packed_git(r); + sort_packs(&store->packs, sort_pack); - prepare_packed_git_mru(r); - r->objects->packfiles->initialized = true; + packfile_store_prepare_mru(store); + store->initialized = true; } void reprepare_packed_git(struct repository *r) @@ -1027,25 +1022,25 @@ void reprepare_packed_git(struct repository *r) r->objects->approximate_object_count_valid = 0; r->objects->packfiles->initialized = false; - prepare_packed_git(r); + packfile_store_prepare(r->objects->packfiles); obj_read_unlock(); } struct packed_git *get_packed_git(struct repository *r) { - prepare_packed_git(r); + packfile_store_prepare(r->objects->packfiles); return r->objects->packfiles->packs; } struct multi_pack_index *get_multi_pack_index(struct odb_source *source) { - prepare_packed_git(source->odb->repo); + packfile_store_prepare(source->odb->packfiles); return source->midx; } struct packed_git *get_all_packs(struct repository *r) { - prepare_packed_git(r); + packfile_store_prepare(r->objects->packfiles); for (struct odb_source *source = r->objects->sources; source; source = source->next) { struct multi_pack_index *m = source->midx; @@ -1060,7 +1055,7 @@ struct packed_git *get_all_packs(struct repository *r) struct list_head *get_packed_git_mru(struct repository *r) { - prepare_packed_git(r); + packfile_store_prepare(r->objects->packfiles); return &r->objects->packfiles->mru; } @@ -1078,7 +1073,7 @@ unsigned long repo_approximate_object_count(struct repository *r) unsigned long count = 0; struct packed_git *p; - prepare_packed_git(r); + packfile_store_prepare(r->objects->packfiles); for (source = r->objects->sources; source; source = source->next) { struct multi_pack_index *m = get_multi_pack_index(source); @@ -2068,7 +2063,7 @@ int find_pack_entry(struct repository *r, const struct object_id *oid, struct pa { struct list_head *pos; - prepare_packed_git(r); + packfile_store_prepare(r->objects->packfiles); for (struct odb_source *source = r->objects->sources; source; source = source->next) if (source->midx && fill_midx_entry(source->midx, oid, e)) From 78237ea53d6546aeab7adb2c7547a1177311ccde Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Tue, 23 Sep 2025 12:17:08 +0200 Subject: [PATCH 09/15] packfile: split up responsibilities of `reprepare_packed_git()` In `reprepare_packed_git()` we perform a couple of operations: - We reload alternate object directories. - We clear the loose object cache. - We reprepare packfiles. While the logic is hosted in "packfile.c", it clearly reaches into other subsystems that aren't related to packfiles. Split up the responsibility and introduce `odb_reprepare()` which now becomes responsible for repreparing the whole object database. The existing `reprepare_packed_git()` function is refactored accordingly and only cares about reloading the packfile store now. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- builtin/backfill.c | 2 +- builtin/gc.c | 4 ++-- builtin/receive-pack.c | 2 +- builtin/repack.c | 2 +- bulk-checkin.c | 2 +- connected.c | 2 +- fetch-pack.c | 4 ++-- object-name.c | 2 +- odb.c | 27 ++++++++++++++++++++++++++- odb.h | 6 ++++++ packfile.c | 26 ++++---------------------- packfile.h | 9 ++++++++- transport-helper.c | 2 +- 13 files changed, 55 insertions(+), 35 deletions(-) diff --git a/builtin/backfill.c b/builtin/backfill.c index 80056abe47..e80fc1b694 100644 --- a/builtin/backfill.c +++ b/builtin/backfill.c @@ -53,7 +53,7 @@ static void download_batch(struct backfill_context *ctx) * We likely have a new packfile. Add it to the packed list to * avoid possible duplicate downloads of the same objects. */ - reprepare_packed_git(ctx->repo); + odb_reprepare(ctx->repo->objects); } static int fill_missing_blobs(const char *path UNUSED, diff --git a/builtin/gc.c b/builtin/gc.c index 03ae4926b2..aeca06a08b 100644 --- a/builtin/gc.c +++ b/builtin/gc.c @@ -1042,7 +1042,7 @@ int cmd_gc(int argc, die(FAILED_RUN, "rerere"); report_garbage = report_pack_garbage; - reprepare_packed_git(the_repository); + odb_reprepare(the_repository->objects); if (pack_garbage.nr > 0) { close_object_store(the_repository->objects); clean_pack_garbage(); @@ -1491,7 +1491,7 @@ static off_t get_auto_pack_size(void) struct packed_git *p; struct repository *r = the_repository; - reprepare_packed_git(r); + odb_reprepare(r->objects); for (p = get_all_packs(r); p; p = p->next) { if (p->pack_size > max_size) { second_largest_size = max_size; diff --git a/builtin/receive-pack.c b/builtin/receive-pack.c index 1113137a6f..c9288a9c7e 100644 --- a/builtin/receive-pack.c +++ b/builtin/receive-pack.c @@ -2389,7 +2389,7 @@ static const char *unpack(int err_fd, struct shallow_info *si) status = finish_command(&child); if (status) return "index-pack abnormal exit"; - reprepare_packed_git(the_repository); + odb_reprepare(the_repository->objects); } return NULL; } diff --git a/builtin/repack.c b/builtin/repack.c index c490a51e91..5ff27fc8e2 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -1685,7 +1685,7 @@ int cmd_repack(int argc, goto cleanup; } - reprepare_packed_git(the_repository); + odb_reprepare(the_repository->objects); if (delete_redundant) { int opts = 0; diff --git a/bulk-checkin.c b/bulk-checkin.c index b2809ab039..f65439a748 100644 --- a/bulk-checkin.c +++ b/bulk-checkin.c @@ -90,7 +90,7 @@ clear_exit: strbuf_release(&packname); /* Make objects we just wrote available to ourselves */ - reprepare_packed_git(the_repository); + odb_reprepare(the_repository->objects); } /* diff --git a/connected.c b/connected.c index 18c13245d8..d6e9682fd9 100644 --- a/connected.c +++ b/connected.c @@ -72,7 +72,7 @@ int check_connected(oid_iterate_fn fn, void *cb_data, * Before checking for promisor packs, be sure we have the * latest pack-files loaded into memory. */ - reprepare_packed_git(the_repository); + odb_reprepare(the_repository->objects); do { struct packed_git *p; diff --git a/fetch-pack.c b/fetch-pack.c index 6ed5662951..fe7a84bf2f 100644 --- a/fetch-pack.c +++ b/fetch-pack.c @@ -1983,7 +1983,7 @@ static void update_shallow(struct fetch_pack_args *args, * remote is shallow, but this is a clone, there are * no objects in repo to worry about. Accept any * shallow points that exist in the pack (iow in repo - * after get_pack() and reprepare_packed_git()) + * after get_pack() and odb_reprepare()) */ struct oid_array extra = OID_ARRAY_INIT; struct object_id *oid = si->shallow->oid; @@ -2108,7 +2108,7 @@ struct ref *fetch_pack(struct fetch_pack_args *args, ref_cpy = do_fetch_pack(args, fd, ref, sought, nr_sought, &si, pack_lockfiles); } - reprepare_packed_git(the_repository); + odb_reprepare(the_repository->objects); if (!args->cloning && args->deepen) { struct check_connected_options opt = CHECK_CONNECTED_INIT; diff --git a/object-name.c b/object-name.c index 732056ff5e..df9e0c5f02 100644 --- a/object-name.c +++ b/object-name.c @@ -596,7 +596,7 @@ static enum get_oid_result get_short_oid(struct repository *r, * or migrated from loose to packed. */ if (status == MISSING_OBJECT) { - reprepare_packed_git(r); + odb_reprepare(r->objects); find_short_object_filename(&ds); find_short_packed_object(&ds); status = finish_object_disambiguation(&ds, oid); diff --git a/odb.c b/odb.c index 32e982bf0b..65a6cc67b6 100644 --- a/odb.c +++ b/odb.c @@ -694,7 +694,7 @@ static int do_oid_object_info_extended(struct object_database *odb, /* Not a loose object; someone else may have just packed it. */ if (!(flags & OBJECT_INFO_QUICK)) { - reprepare_packed_git(odb->repo); + odb_reprepare(odb->repo->objects); if (find_pack_entry(odb->repo, real, &e)) break; } @@ -1040,3 +1040,28 @@ void odb_clear(struct object_database *o) string_list_clear(&o->submodule_source_paths, 0); } + +void odb_reprepare(struct object_database *o) +{ + struct odb_source *source; + + obj_read_lock(); + + /* + * Reprepare alt odbs, in case the alternates file was modified + * during the course of this process. This only _adds_ odbs to + * the linked list, so existing odbs will continue to exist for + * the lifetime of the process. + */ + o->loaded_alternates = 0; + odb_prepare_alternates(o); + + for (source = o->sources; source; source = source->next) + odb_clear_loose_cache(source); + + o->approximate_object_count_valid = 0; + + packfile_store_reprepare(o->packfiles); + + obj_read_unlock(); +} diff --git a/odb.h b/odb.h index 9dd7bb6bc3..ab39e3605d 100644 --- a/odb.h +++ b/odb.h @@ -161,6 +161,12 @@ struct object_database { struct object_database *odb_new(struct repository *repo); void odb_clear(struct object_database *o); +/* + * Clear caches, reload alternates and then reload object sources so that new + * objects may become accessible. + */ +void odb_reprepare(struct object_database *o); + /* * Find source by its object directory path. Returns a `NULL` pointer in case * the source could not be found. diff --git a/packfile.c b/packfile.c index 095c85919b..950b98aac5 100644 --- a/packfile.c +++ b/packfile.c @@ -1002,28 +1002,10 @@ static void packfile_store_prepare(struct packfile_store *store) store->initialized = true; } -void reprepare_packed_git(struct repository *r) +void packfile_store_reprepare(struct packfile_store *store) { - struct odb_source *source; - - obj_read_lock(); - - /* - * Reprepare alt odbs, in case the alternates file was modified - * during the course of this process. This only _adds_ odbs to - * the linked list, so existing odbs will continue to exist for - * the lifetime of the process. - */ - r->objects->loaded_alternates = 0; - odb_prepare_alternates(r->objects); - - for (source = r->objects->sources; source; source = source->next) - odb_clear_loose_cache(source); - - r->objects->approximate_object_count_valid = 0; - r->objects->packfiles->initialized = false; - packfile_store_prepare(r->objects->packfiles); - obj_read_unlock(); + store->initialized = false; + packfile_store_prepare(store); } struct packed_git *get_packed_git(struct repository *r) @@ -1144,7 +1126,7 @@ unsigned long get_size_from_delta(struct packed_git *p, * * Other worrying sections could be the call to close_pack_fd(), * which can close packs even with in-use windows, and to - * reprepare_packed_git(). Regarding the former, mmap doc says: + * odb_reprepare(). Regarding the former, mmap doc says: * "closing the file descriptor does not unmap the region". And * for the latter, it won't re-open already available packs. */ diff --git a/packfile.h b/packfile.h index bf66211986..a85ff607fe 100644 --- a/packfile.h +++ b/packfile.h @@ -112,6 +112,14 @@ void packfile_store_free(struct packfile_store *store); */ void packfile_store_close(struct packfile_store *store); +/* + * Clear the packfile caches and try to look up any new packfiles that have + * appeared since last preparing the packfiles store. + * + * This function must be called under the `odb_read_lock()`. + */ +void packfile_store_reprepare(struct packfile_store *store); + struct pack_window { struct pack_window *next; unsigned char *base; @@ -188,7 +196,6 @@ int for_each_packed_object(struct repository *repo, each_packed_object_fn cb, #define PACKDIR_FILE_GARBAGE 4 extern void (*report_garbage)(unsigned seen_bits, const char *path); -void reprepare_packed_git(struct repository *r); void install_packed_git(struct repository *r, struct packed_git *pack); struct packed_git *get_packed_git(struct repository *r); diff --git a/transport-helper.c b/transport-helper.c index 0789e5bca5..4d95d84f9e 100644 --- a/transport-helper.c +++ b/transport-helper.c @@ -450,7 +450,7 @@ static int fetch_with_fetch(struct transport *transport, } strbuf_release(&buf); - reprepare_packed_git(the_repository); + odb_reprepare(the_repository->objects); return 0; } From f6f236d926915411eca28cb1c47619fdacf6eafb Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Tue, 23 Sep 2025 12:17:09 +0200 Subject: [PATCH 10/15] packfile: refactor `install_packed_git()` to work on packfile store The `install_packed_git()` functions adds a packfile to a specific object store. Refactor it to accept a packfile store instead of a repository to clarify its scope. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- builtin/fast-import.c | 2 +- builtin/index-pack.c | 2 +- http.c | 2 +- http.h | 2 +- midx.c | 2 +- packfile.c | 11 ++++++----- packfile.h | 9 +++++++-- 7 files changed, 18 insertions(+), 12 deletions(-) diff --git a/builtin/fast-import.c b/builtin/fast-import.c index 2c35f9345d..e9d82b31c3 100644 --- a/builtin/fast-import.c +++ b/builtin/fast-import.c @@ -901,7 +901,7 @@ static void end_packfile(void) if (!new_p) die("core git rejected index %s", idx_name); all_packs[pack_id] = new_p; - install_packed_git(the_repository, new_p); + packfile_store_add_pack(the_repository->objects->packfiles, new_p); free(idx_name); /* Print the boundary */ diff --git a/builtin/index-pack.c b/builtin/index-pack.c index f91c301bba..ed490dfad4 100644 --- a/builtin/index-pack.c +++ b/builtin/index-pack.c @@ -1645,7 +1645,7 @@ static void final(const char *final_pack_name, const char *curr_pack_name, p = add_packed_git(the_repository, final_index_name, strlen(final_index_name), 0); if (p) - install_packed_git(the_repository, p); + packfile_store_add_pack(the_repository->objects->packfiles, p); } if (!from_stdin) { diff --git a/http.c b/http.c index 98853d6483..af2120b64c 100644 --- a/http.c +++ b/http.c @@ -2541,7 +2541,7 @@ void http_install_packfile(struct packed_git *p, lst = &((*lst)->next); *lst = (*lst)->next; - install_packed_git(the_repository, p); + packfile_store_add_pack(the_repository->objects->packfiles, p); } struct http_pack_request *new_http_pack_request( diff --git a/http.h b/http.h index 36202139f4..e5a5380c6c 100644 --- a/http.h +++ b/http.h @@ -210,7 +210,7 @@ int finish_http_pack_request(struct http_pack_request *preq); void release_http_pack_request(struct http_pack_request *preq); /* - * Remove p from the given list, and invoke install_packed_git() on it. + * Remove p from the given list, and invoke packfile_store_add_pack() on it. * * This is a convenience function for users that have obtained a list of packs * from http_get_info_packs() and have chosen a specific pack to fetch. diff --git a/midx.c b/midx.c index 91c7b3917d..69c44be71c 100644 --- a/midx.c +++ b/midx.c @@ -467,7 +467,7 @@ int prepare_midx_pack(struct multi_pack_index *m, p = add_packed_git(r, pack_name.buf, pack_name.len, m->source->local); if (p) { - install_packed_git(r, p); + packfile_store_add_pack(r->objects->packfiles, p); list_add_tail(&p->mru, &r->objects->packfiles->mru); } } diff --git a/packfile.c b/packfile.c index 950b98aac5..af806aba09 100644 --- a/packfile.c +++ b/packfile.c @@ -779,16 +779,17 @@ struct packed_git *add_packed_git(struct repository *r, const char *path, return p; } -void install_packed_git(struct repository *r, struct packed_git *pack) +void packfile_store_add_pack(struct packfile_store *store, + struct packed_git *pack) { if (pack->pack_fd != -1) pack_open_fds++; - pack->next = r->objects->packfiles->packs; - r->objects->packfiles->packs = pack; + pack->next = store->packs; + store->packs = pack; hashmap_entry_init(&pack->packmap_ent, strhash(pack->pack_name)); - hashmap_add(&r->objects->packfiles->map, &pack->packmap_ent); + hashmap_add(&store->map, &pack->packmap_ent); } void (*report_garbage)(unsigned seen_bits, const char *path); @@ -904,7 +905,7 @@ static void prepare_pack(const char *full_name, size_t full_name_len, if (!hashmap_get(&data->r->objects->packfiles->map, &hent, pack_name)) { p = add_packed_git(data->r, full_name, full_name_len, data->local); if (p) - install_packed_git(data->r, p); + packfile_store_add_pack(data->r->objects->packfiles, p); } free(pack_name); } diff --git a/packfile.h b/packfile.h index a85ff607fe..ba4b0cef9c 100644 --- a/packfile.h +++ b/packfile.h @@ -120,6 +120,13 @@ void packfile_store_close(struct packfile_store *store); */ void packfile_store_reprepare(struct packfile_store *store); +/* + * Add the pack to the store so that contained objects become accessible via + * the store. This moves ownership into the store. + */ +void packfile_store_add_pack(struct packfile_store *store, + struct packed_git *pack); + struct pack_window { struct pack_window *next; unsigned char *base; @@ -196,8 +203,6 @@ int for_each_packed_object(struct repository *repo, each_packed_object_fn cb, #define PACKDIR_FILE_GARBAGE 4 extern void (*report_garbage)(unsigned seen_bits, const char *path); -void install_packed_git(struct repository *r, struct packed_git *pack); - struct packed_git *get_packed_git(struct repository *r); struct list_head *get_packed_git_mru(struct repository *r); struct multi_pack_index *get_multi_pack_index(struct odb_source *source); From d67530f6bbe56f1951b8fd2fcdaae255bf552e2d Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Tue, 23 Sep 2025 12:17:10 +0200 Subject: [PATCH 11/15] packfile: introduce function to load and add packfiles We have a recurring pattern where we essentially perform an upsert of a packfile in case it isn't yet known by the packfile store. The logic to do so is non-trivial as we have to reconstruct the packfile's key, check the map of packfiles, then create the new packfile and finally add it to the store. Introduce a new function that does this dance for us. Refactor callsites to use it. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- builtin/fast-import.c | 4 ++-- builtin/index-pack.c | 10 +++------- midx.c | 23 ++++------------------ packfile.c | 44 ++++++++++++++++++++++++++++++------------- packfile.h | 8 ++++++++ 5 files changed, 48 insertions(+), 41 deletions(-) diff --git a/builtin/fast-import.c b/builtin/fast-import.c index e9d82b31c3..a26e79689d 100644 --- a/builtin/fast-import.c +++ b/builtin/fast-import.c @@ -897,11 +897,11 @@ static void end_packfile(void) idx_name = keep_pack(create_index()); /* Register the packfile with core git's machinery. */ - new_p = add_packed_git(pack_data->repo, idx_name, strlen(idx_name), 1); + new_p = packfile_store_load_pack(pack_data->repo->objects->packfiles, + idx_name, 1); if (!new_p) die("core git rejected index %s", idx_name); all_packs[pack_id] = new_p; - packfile_store_add_pack(the_repository->objects->packfiles, new_p); free(idx_name); /* Print the boundary */ diff --git a/builtin/index-pack.c b/builtin/index-pack.c index ed490dfad4..2b78ba7fe4 100644 --- a/builtin/index-pack.c +++ b/builtin/index-pack.c @@ -1640,13 +1640,9 @@ static void final(const char *final_pack_name, const char *curr_pack_name, rename_tmp_packfile(&final_index_name, curr_index_name, &index_name, hash, "idx", 1); - if (do_fsck_object) { - struct packed_git *p; - p = add_packed_git(the_repository, final_index_name, - strlen(final_index_name), 0); - if (p) - packfile_store_add_pack(the_repository->objects->packfiles, p); - } + if (do_fsck_object) + packfile_store_load_pack(the_repository->objects->packfiles, + final_index_name, 0); if (!from_stdin) { printf("%s\n", hash_to_hex(hash)); diff --git a/midx.c b/midx.c index 69c44be71c..3faeaf2f8f 100644 --- a/midx.c +++ b/midx.c @@ -443,7 +443,6 @@ int prepare_midx_pack(struct multi_pack_index *m, { struct repository *r = m->source->odb->repo; struct strbuf pack_name = STRBUF_INIT; - struct strbuf key = STRBUF_INIT; struct packed_git *p; pack_int_id = midx_for_pack(&m, pack_int_id); @@ -455,25 +454,11 @@ int prepare_midx_pack(struct multi_pack_index *m, strbuf_addf(&pack_name, "%s/pack/%s", m->source->path, m->pack_names[pack_int_id]); - - /* pack_map holds the ".pack" name, but we have the .idx */ - strbuf_addbuf(&key, &pack_name); - strbuf_strip_suffix(&key, ".idx"); - strbuf_addstr(&key, ".pack"); - p = hashmap_get_entry_from_hash(&r->objects->packfiles->map, - strhash(key.buf), key.buf, - struct packed_git, packmap_ent); - if (!p) { - p = add_packed_git(r, pack_name.buf, pack_name.len, - m->source->local); - if (p) { - packfile_store_add_pack(r->objects->packfiles, p); - list_add_tail(&p->mru, &r->objects->packfiles->mru); - } - } - + p = packfile_store_load_pack(r->objects->packfiles, + pack_name.buf, m->source->local); + if (p) + list_add_tail(&p->mru, &r->objects->packfiles->mru); strbuf_release(&pack_name); - strbuf_release(&key); if (!p) { m->packs[pack_int_id] = MIDX_PACK_ERROR; diff --git a/packfile.c b/packfile.c index af806aba09..9224ca424c 100644 --- a/packfile.c +++ b/packfile.c @@ -792,6 +792,33 @@ void packfile_store_add_pack(struct packfile_store *store, hashmap_add(&store->map, &pack->packmap_ent); } +struct packed_git *packfile_store_load_pack(struct packfile_store *store, + const char *idx_path, int local) +{ + struct strbuf key = STRBUF_INIT; + struct packed_git *p; + + /* + * We're being called with the path to the index file, but `pack_map` + * holds the path to the packfile itself. + */ + strbuf_addstr(&key, idx_path); + strbuf_strip_suffix(&key, ".idx"); + strbuf_addstr(&key, ".pack"); + + p = hashmap_get_entry_from_hash(&store->map, strhash(key.buf), key.buf, + struct packed_git, packmap_ent); + if (!p) { + p = add_packed_git(store->odb->repo, idx_path, + strlen(idx_path), local); + if (p) + packfile_store_add_pack(store, p); + } + + strbuf_release(&key); + return p; +} + void (*report_garbage)(unsigned seen_bits, const char *path); static void report_helper(const struct string_list *list, @@ -891,23 +918,14 @@ static void prepare_pack(const char *full_name, size_t full_name_len, const char *file_name, void *_data) { struct prepare_pack_data *data = (struct prepare_pack_data *)_data; - struct packed_git *p; size_t base_len = full_name_len; if (strip_suffix_mem(full_name, &base_len, ".idx") && !(data->m && midx_contains_pack(data->m, file_name))) { - struct hashmap_entry hent; - char *pack_name = xstrfmt("%.*s.pack", (int)base_len, full_name); - unsigned int hash = strhash(pack_name); - hashmap_entry_init(&hent, hash); - - /* Don't reopen a pack we already have. */ - if (!hashmap_get(&data->r->objects->packfiles->map, &hent, pack_name)) { - p = add_packed_git(data->r, full_name, full_name_len, data->local); - if (p) - packfile_store_add_pack(data->r->objects->packfiles, p); - } - free(pack_name); + char *trimmed_path = xstrndup(full_name, full_name_len); + packfile_store_load_pack(data->r->objects->packfiles, + trimmed_path, data->local); + free(trimmed_path); } if (!report_garbage) diff --git a/packfile.h b/packfile.h index ba4b0cef9c..fcefcbbef6 100644 --- a/packfile.h +++ b/packfile.h @@ -127,6 +127,14 @@ void packfile_store_reprepare(struct packfile_store *store); void packfile_store_add_pack(struct packfile_store *store, struct packed_git *pack); +/* + * Open the packfile and add it to the store if it isn't yet known. Returns + * either the newly opened packfile or the preexisting packfile. Returns a + * `NULL` pointer in case the packfile could not be opened. + */ +struct packed_git *packfile_store_load_pack(struct packfile_store *store, + const char *idx_path, int local); + struct pack_window { struct pack_window *next; unsigned char *base; From ab8aff4a6b2a1d5aa79deeb64bdeecc0234b4ddf Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Tue, 23 Sep 2025 12:17:11 +0200 Subject: [PATCH 12/15] packfile: move `get_multi_pack_index()` into "midx.c" The `get_multi_pack_index()` function is declared and implemented in the packfile subsystem, even though it really belongs into the multi-pack index subsystem. The reason for this is likely that it needs to call `packfile_store_prepare()`, which is not exposed by the packfile system. In a subsequent commit we're about to add another caller outside of the packfile system though, so we'll have to expose the function anyway. Do so now already and move `get_multi_pack_index()` into the MIDX subsystem. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- midx.c | 6 ++++++ midx.h | 1 + packfile.c | 8 +------- packfile.h | 10 +++++++++- 4 files changed, 17 insertions(+), 8 deletions(-) diff --git a/midx.c b/midx.c index 3faeaf2f8f..1d6269f957 100644 --- a/midx.c +++ b/midx.c @@ -93,6 +93,12 @@ static int midx_read_object_offsets(const unsigned char *chunk_start, return 0; } +struct multi_pack_index *get_multi_pack_index(struct odb_source *source) +{ + packfile_store_prepare(source->odb->packfiles); + return source->midx; +} + static struct multi_pack_index *load_multi_pack_index_one(struct odb_source *source, const char *midx_name) { diff --git a/midx.h b/midx.h index e241d2d690..6e54d73503 100644 --- a/midx.h +++ b/midx.h @@ -94,6 +94,7 @@ void get_midx_chain_filename(struct odb_source *source, struct strbuf *out); void get_split_midx_filename_ext(struct odb_source *source, struct strbuf *buf, const unsigned char *hash, const char *ext); +struct multi_pack_index *get_multi_pack_index(struct odb_source *source); struct multi_pack_index *load_multi_pack_index(struct odb_source *source); int prepare_midx_pack(struct multi_pack_index *m, uint32_t pack_int_id); struct packed_git *nth_midxed_pack(struct multi_pack_index *m, diff --git a/packfile.c b/packfile.c index 9224ca424c..7a9193e5ef 100644 --- a/packfile.c +++ b/packfile.c @@ -1003,7 +1003,7 @@ static void packfile_store_prepare_mru(struct packfile_store *store) list_add_tail(&p->mru, &store->mru); } -static void packfile_store_prepare(struct packfile_store *store) +void packfile_store_prepare(struct packfile_store *store) { struct odb_source *source; @@ -1033,12 +1033,6 @@ struct packed_git *get_packed_git(struct repository *r) return r->objects->packfiles->packs; } -struct multi_pack_index *get_multi_pack_index(struct odb_source *source) -{ - packfile_store_prepare(source->odb->packfiles); - return source->midx; -} - struct packed_git *get_all_packs(struct repository *r) { packfile_store_prepare(r->objects->packfiles); diff --git a/packfile.h b/packfile.h index fcefcbbef6..a9e561ac39 100644 --- a/packfile.h +++ b/packfile.h @@ -112,6 +112,15 @@ void packfile_store_free(struct packfile_store *store); */ void packfile_store_close(struct packfile_store *store); +/* + * Prepare the packfile store by loading packfiles and multi-pack indices for + * all alternates. This becomes a no-op if the store is already prepared. + * + * It shouldn't typically be necessary to call this function directly, as + * functions that access the store know to prepare it. + */ +void packfile_store_prepare(struct packfile_store *store); + /* * Clear the packfile caches and try to look up any new packfiles that have * appeared since last preparing the packfiles store. @@ -213,7 +222,6 @@ extern void (*report_garbage)(unsigned seen_bits, const char *path); struct packed_git *get_packed_git(struct repository *r); struct list_head *get_packed_git_mru(struct repository *r); -struct multi_pack_index *get_multi_pack_index(struct odb_source *source); struct packed_git *get_all_packs(struct repository *r); /* From 751808b2a18acba76b824aed4d8b7442bd7f5fca Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Tue, 23 Sep 2025 12:17:12 +0200 Subject: [PATCH 13/15] packfile: refactor `get_packed_git()` to work on packfile store The `get_packed_git()` function prepares the packfile store and then returns its packfiles. Refactor it to accept a packfile store instead of a repository to clarify its scope. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- builtin/gc.c | 2 +- builtin/grep.c | 2 +- object-name.c | 4 ++-- packfile.c | 6 +++--- packfile.h | 7 ++++++- 5 files changed, 13 insertions(+), 8 deletions(-) diff --git a/builtin/gc.c b/builtin/gc.c index aeca06a08b..ec6735a540 100644 --- a/builtin/gc.c +++ b/builtin/gc.c @@ -1423,7 +1423,7 @@ static int incremental_repack_auto_condition(struct gc_config *cfg UNUSED) if (incremental_repack_auto_limit < 0) return 1; - for (p = get_packed_git(the_repository); + for (p = packfile_store_get_packs(the_repository->objects->packfiles); count < incremental_repack_auto_limit && p; p = p->next) { if (!p->multi_pack_index) diff --git a/builtin/grep.c b/builtin/grep.c index 5df6537333..63a4959568 100644 --- a/builtin/grep.c +++ b/builtin/grep.c @@ -1214,7 +1214,7 @@ int cmd_grep(int argc, if (recurse_submodules) repo_read_gitmodules(the_repository, 1); if (startup_info->have_repository) - (void)get_packed_git(the_repository); + (void)packfile_store_get_packs(the_repository->objects->packfiles); start_threads(&opt); } else { diff --git a/object-name.c b/object-name.c index df9e0c5f02..53356819a3 100644 --- a/object-name.c +++ b/object-name.c @@ -213,7 +213,7 @@ static void find_short_packed_object(struct disambiguate_state *ds) unique_in_midx(m, ds); } - for (p = get_packed_git(ds->repo); p && !ds->ambiguous; + for (p = packfile_store_get_packs(ds->repo->objects->packfiles); p && !ds->ambiguous; p = p->next) unique_in_pack(p, ds); } @@ -806,7 +806,7 @@ static void find_abbrev_len_packed(struct min_abbrev_data *mad) find_abbrev_len_for_midx(m, mad); } - for (p = get_packed_git(mad->repo); p; p = p->next) + for (p = packfile_store_get_packs(mad->repo->objects->packfiles); p; p = p->next) find_abbrev_len_for_pack(p, mad); } diff --git a/packfile.c b/packfile.c index 7a9193e5ef..b37f43afb5 100644 --- a/packfile.c +++ b/packfile.c @@ -1027,10 +1027,10 @@ void packfile_store_reprepare(struct packfile_store *store) packfile_store_prepare(store); } -struct packed_git *get_packed_git(struct repository *r) +struct packed_git *packfile_store_get_packs(struct packfile_store *store) { - packfile_store_prepare(r->objects->packfiles); - return r->objects->packfiles->packs; + packfile_store_prepare(store); + return store->packs; } struct packed_git *get_all_packs(struct repository *r) diff --git a/packfile.h b/packfile.h index a9e561ac39..0b691ded7e 100644 --- a/packfile.h +++ b/packfile.h @@ -136,6 +136,12 @@ void packfile_store_reprepare(struct packfile_store *store); void packfile_store_add_pack(struct packfile_store *store, struct packed_git *pack); +/* + * Get packs managed by the given store. Does not load the MIDX or any packs + * referenced by it. + */ +struct packed_git *packfile_store_get_packs(struct packfile_store *store); + /* * Open the packfile and add it to the store if it isn't yet known. Returns * either the newly opened packfile or the preexisting packfile. Returns a @@ -220,7 +226,6 @@ int for_each_packed_object(struct repository *repo, each_packed_object_fn cb, #define PACKDIR_FILE_GARBAGE 4 extern void (*report_garbage)(unsigned seen_bits, const char *path); -struct packed_git *get_packed_git(struct repository *r); struct list_head *get_packed_git_mru(struct repository *r); struct packed_git *get_all_packs(struct repository *r); From d2779beb36ff64eb062103db14006f7ae6da5f37 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Tue, 23 Sep 2025 12:17:13 +0200 Subject: [PATCH 14/15] packfile: refactor `get_all_packs()` to work on packfile store The `get_all_packs()` function prepares the packfile store and then returns its packfiles. Refactor it to accept a packfile store instead of a repository to clarify its scope. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- builtin/cat-file.c | 3 ++- builtin/count-objects.c | 3 ++- builtin/fast-import.c | 6 ++++-- builtin/fsck.c | 11 +++++++---- builtin/gc.c | 8 +++++--- builtin/pack-objects.c | 28 +++++++++++++++++++--------- builtin/pack-redundant.c | 6 ++++-- builtin/repack.c | 9 ++++++--- connected.c | 3 ++- http-backend.c | 5 +++-- http.c | 3 ++- pack-bitmap.c | 4 ++-- pack-objects.c | 3 ++- packfile.c | 12 ++++++------ packfile.h | 7 ++++++- server-info.c | 3 ++- t/helper/test-find-pack.c | 2 +- t/helper/test-pack-mtimes.c | 2 +- 18 files changed, 76 insertions(+), 42 deletions(-) diff --git a/builtin/cat-file.c b/builtin/cat-file.c index fce0b06451..ee6715fa52 100644 --- a/builtin/cat-file.c +++ b/builtin/cat-file.c @@ -852,9 +852,10 @@ static void batch_each_object(struct batch_options *opt, if (bitmap && !for_each_bitmapped_object(bitmap, &opt->objects_filter, batch_one_object_bitmapped, &payload)) { + struct packfile_store *packs = the_repository->objects->packfiles; struct packed_git *pack; - for (pack = get_all_packs(the_repository); pack; pack = pack->next) { + for (pack = packfile_store_get_all_packs(packs); pack; pack = pack->next) { if (bitmap_index_contains_pack(bitmap, pack) || open_pack_index(pack)) continue; diff --git a/builtin/count-objects.c b/builtin/count-objects.c index a61d3b46aa..f2f407c2a7 100644 --- a/builtin/count-objects.c +++ b/builtin/count-objects.c @@ -122,6 +122,7 @@ int cmd_count_objects(int argc, count_loose, count_cruft, NULL, NULL); if (verbose) { + struct packfile_store *packs = the_repository->objects->packfiles; struct packed_git *p; unsigned long num_pack = 0; off_t size_pack = 0; @@ -129,7 +130,7 @@ int cmd_count_objects(int argc, struct strbuf pack_buf = STRBUF_INIT; struct strbuf garbage_buf = STRBUF_INIT; - for (p = get_all_packs(the_repository); p; p = p->next) { + for (p = packfile_store_get_all_packs(packs); p; p = p->next) { if (!p->pack_local) continue; if (open_pack_index(p)) diff --git a/builtin/fast-import.c b/builtin/fast-import.c index a26e79689d..b1d5549815 100644 --- a/builtin/fast-import.c +++ b/builtin/fast-import.c @@ -952,6 +952,7 @@ static int store_object( struct object_id *oidout, uintmax_t mark) { + struct packfile_store *packs = the_repository->objects->packfiles; void *out, *delta; struct object_entry *e; unsigned char hdr[96]; @@ -975,7 +976,7 @@ static int store_object( if (e->idx.offset) { duplicate_count_by_type[type]++; return 1; - } else if (find_oid_pack(&oid, get_all_packs(the_repository))) { + } else if (find_oid_pack(&oid, packfile_store_get_all_packs(packs))) { e->type = type; e->pack_id = MAX_PACK_ID; e->idx.offset = 1; /* just not zero! */ @@ -1092,6 +1093,7 @@ static void truncate_pack(struct hashfile_checkpoint *checkpoint) static void stream_blob(uintmax_t len, struct object_id *oidout, uintmax_t mark) { + struct packfile_store *packs = the_repository->objects->packfiles; size_t in_sz = 64 * 1024, out_sz = 64 * 1024; unsigned char *in_buf = xmalloc(in_sz); unsigned char *out_buf = xmalloc(out_sz); @@ -1175,7 +1177,7 @@ static void stream_blob(uintmax_t len, struct object_id *oidout, uintmax_t mark) duplicate_count_by_type[OBJ_BLOB]++; truncate_pack(&checkpoint); - } else if (find_oid_pack(&oid, get_all_packs(the_repository))) { + } else if (find_oid_pack(&oid, packfile_store_get_all_packs(packs))) { e->type = OBJ_BLOB; e->pack_id = MAX_PACK_ID; e->idx.offset = 1; /* just not zero! */ diff --git a/builtin/fsck.c b/builtin/fsck.c index d2eb9d4fbe..8ee95e0d67 100644 --- a/builtin/fsck.c +++ b/builtin/fsck.c @@ -867,19 +867,20 @@ static int mark_packed_for_connectivity(const struct object_id *oid, static int check_pack_rev_indexes(struct repository *r, int show_progress) { + struct packfile_store *packs = r->objects->packfiles; struct progress *progress = NULL; uint32_t pack_count = 0; int res = 0; if (show_progress) { - for (struct packed_git *p = get_all_packs(r); p; p = p->next) + for (struct packed_git *p = packfile_store_get_all_packs(packs); p; p = p->next) pack_count++; progress = start_delayed_progress(the_repository, "Verifying reverse pack-indexes", pack_count); pack_count = 0; } - for (struct packed_git *p = get_all_packs(r); p; p = p->next) { + for (struct packed_git *p = packfile_store_get_all_packs(packs); p; p = p->next) { int load_error = load_pack_revindex_from_disk(p); if (load_error < 0) { @@ -999,6 +1000,8 @@ int cmd_fsck(int argc, for_each_packed_object(the_repository, mark_packed_for_connectivity, NULL, 0); } else { + struct packfile_store *packs = the_repository->objects->packfiles; + odb_prepare_alternates(the_repository->objects); for (source = the_repository->objects->sources; source; source = source->next) fsck_source(source); @@ -1009,7 +1012,7 @@ int cmd_fsck(int argc, struct progress *progress = NULL; if (show_progress) { - for (p = get_all_packs(the_repository); p; + for (p = packfile_store_get_all_packs(packs); p; p = p->next) { if (open_pack_index(p)) continue; @@ -1019,7 +1022,7 @@ int cmd_fsck(int argc, progress = start_progress(the_repository, _("Checking objects"), total); } - for (p = get_all_packs(the_repository); p; + for (p = packfile_store_get_all_packs(packs); p; p = p->next) { /* verify gives error messages itself */ if (verify_pack(the_repository, diff --git a/builtin/gc.c b/builtin/gc.c index ec6735a540..e19e13d978 100644 --- a/builtin/gc.c +++ b/builtin/gc.c @@ -487,9 +487,10 @@ static int too_many_loose_objects(struct gc_config *cfg) static struct packed_git *find_base_packs(struct string_list *packs, unsigned long limit) { + struct packfile_store *packfiles = the_repository->objects->packfiles; struct packed_git *p, *base = NULL; - for (p = get_all_packs(the_repository); p; p = p->next) { + for (p = packfile_store_get_all_packs(packfiles); p; p = p->next) { if (!p->pack_local || p->is_cruft) continue; if (limit) { @@ -508,13 +509,14 @@ static struct packed_git *find_base_packs(struct string_list *packs, static int too_many_packs(struct gc_config *cfg) { + struct packfile_store *packs = the_repository->objects->packfiles; struct packed_git *p; int cnt; if (cfg->gc_auto_pack_limit <= 0) return 0; - for (cnt = 0, p = get_all_packs(the_repository); p; p = p->next) { + for (cnt = 0, p = packfile_store_get_all_packs(packs); p; p = p->next) { if (!p->pack_local) continue; if (p->pack_keep) @@ -1492,7 +1494,7 @@ static off_t get_auto_pack_size(void) struct repository *r = the_repository; odb_reprepare(r->objects); - for (p = get_all_packs(r); p; p = p->next) { + for (p = packfile_store_get_all_packs(r->objects->packfiles); p; p = p->next) { if (p->pack_size > max_size) { second_largest_size = max_size; max_size = p->pack_size; diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c index 1494afcf3d..de351b757a 100644 --- a/builtin/pack-objects.c +++ b/builtin/pack-objects.c @@ -3831,6 +3831,7 @@ static int pack_mtime_cmp(const void *_a, const void *_b) static void read_packs_list_from_stdin(struct rev_info *revs) { + struct packfile_store *packs = the_repository->objects->packfiles; struct strbuf buf = STRBUF_INIT; struct string_list include_packs = STRING_LIST_INIT_DUP; struct string_list exclude_packs = STRING_LIST_INIT_DUP; @@ -3855,7 +3856,7 @@ static void read_packs_list_from_stdin(struct rev_info *revs) string_list_sort(&exclude_packs); string_list_remove_duplicates(&exclude_packs, 0); - for (p = get_all_packs(the_repository); p; p = p->next) { + for (p = packfile_store_get_all_packs(packs); p; p = p->next) { const char *pack_name = pack_basename(p); if ((item = string_list_lookup(&include_packs, pack_name))) @@ -4076,6 +4077,7 @@ static void enumerate_cruft_objects(void) static void enumerate_and_traverse_cruft_objects(struct string_list *fresh_packs) { + struct packfile_store *packs = the_repository->objects->packfiles; struct packed_git *p; struct rev_info revs; int ret; @@ -4105,7 +4107,7 @@ static void enumerate_and_traverse_cruft_objects(struct string_list *fresh_packs * Re-mark only the fresh packs as kept so that objects in * unknown packs do not halt the reachability traversal early. */ - for (p = get_all_packs(the_repository); p; p = p->next) + for (p = packfile_store_get_all_packs(packs); p; p = p->next) p->pack_keep_in_core = 0; mark_pack_kept_in_core(fresh_packs, 1); @@ -4122,6 +4124,7 @@ static void enumerate_and_traverse_cruft_objects(struct string_list *fresh_packs static void read_cruft_objects(void) { + struct packfile_store *packs = the_repository->objects->packfiles; struct strbuf buf = STRBUF_INIT; struct string_list discard_packs = STRING_LIST_INIT_DUP; struct string_list fresh_packs = STRING_LIST_INIT_DUP; @@ -4142,7 +4145,7 @@ static void read_cruft_objects(void) string_list_sort(&discard_packs); string_list_sort(&fresh_packs); - for (p = get_all_packs(the_repository); p; p = p->next) { + for (p = packfile_store_get_all_packs(packs); p; p = p->next) { const char *pack_name = pack_basename(p); struct string_list_item *item; @@ -4390,11 +4393,12 @@ static void add_unreachable_loose_objects(struct rev_info *revs) static int has_sha1_pack_kept_or_nonlocal(const struct object_id *oid) { + struct packfile_store *packs = the_repository->objects->packfiles; static struct packed_git *last_found = (void *)1; struct packed_git *p; p = (last_found != (void *)1) ? last_found : - get_all_packs(the_repository); + packfile_store_get_all_packs(packs); while (p) { if ((!p->pack_local || p->pack_keep || @@ -4404,7 +4408,7 @@ static int has_sha1_pack_kept_or_nonlocal(const struct object_id *oid) return 1; } if (p == last_found) - p = get_all_packs(the_repository); + p = packfile_store_get_all_packs(packs); else p = p->next; if (p == last_found) @@ -4436,12 +4440,13 @@ static int loosened_object_can_be_discarded(const struct object_id *oid, static void loosen_unused_packed_objects(void) { + struct packfile_store *packs = the_repository->objects->packfiles; struct packed_git *p; uint32_t i; uint32_t loosened_objects_nr = 0; struct object_id oid; - for (p = get_all_packs(the_repository); p; p = p->next) { + for (p = packfile_store_get_all_packs(packs); p; p = p->next) { if (!p->pack_local || p->pack_keep || p->pack_keep_in_core) continue; @@ -4742,12 +4747,13 @@ static void get_object_list(struct rev_info *revs, int ac, const char **av) static void add_extra_kept_packs(const struct string_list *names) { + struct packfile_store *packs = the_repository->objects->packfiles; struct packed_git *p; if (!names->nr) return; - for (p = get_all_packs(the_repository); p; p = p->next) { + for (p = packfile_store_get_all_packs(packs); p; p = p->next) { const char *name = basename(p->pack_name); int i; @@ -5185,8 +5191,10 @@ int cmd_pack_objects(int argc, add_extra_kept_packs(&keep_pack_list); if (ignore_packed_keep_on_disk) { + struct packfile_store *packs = the_repository->objects->packfiles; struct packed_git *p; - for (p = get_all_packs(the_repository); p; p = p->next) + + for (p = packfile_store_get_all_packs(packs); p; p = p->next) if (p->pack_local && p->pack_keep) break; if (!p) /* no keep-able packs found */ @@ -5198,8 +5206,10 @@ int cmd_pack_objects(int argc, * want to unset "local" based on looking at packs, as * it also covers non-local objects */ + struct packfile_store *packs = the_repository->objects->packfiles; struct packed_git *p; - for (p = get_all_packs(the_repository); p; p = p->next) { + + for (p = packfile_store_get_all_packs(packs); p; p = p->next) { if (!p->pack_local) { have_non_local_packs = 1; break; diff --git a/builtin/pack-redundant.c b/builtin/pack-redundant.c index fe81c293e3..dd28171f0a 100644 --- a/builtin/pack-redundant.c +++ b/builtin/pack-redundant.c @@ -566,7 +566,8 @@ static struct pack_list * add_pack(struct packed_git *p) static struct pack_list * add_pack_file(const char *filename) { - struct packed_git *p = get_all_packs(the_repository); + struct packfile_store *packs = the_repository->objects->packfiles; + struct packed_git *p = packfile_store_get_all_packs(packs); if (strlen(filename) < 40) die("Bad pack filename: %s", filename); @@ -581,7 +582,8 @@ static struct pack_list * add_pack_file(const char *filename) static void load_all(void) { - struct packed_git *p = get_all_packs(the_repository); + struct packfile_store *packs = the_repository->objects->packfiles; + struct packed_git *p = packfile_store_get_all_packs(packs); while (p) { add_pack(p); diff --git a/builtin/repack.c b/builtin/repack.c index 5ff27fc8e2..e8730808c5 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -265,10 +265,11 @@ static void existing_packs_release(struct existing_packs *existing) static void collect_pack_filenames(struct existing_packs *existing, const struct string_list *extra_keep) { + struct packfile_store *packs = the_repository->objects->packfiles; struct packed_git *p; struct strbuf buf = STRBUF_INIT; - for (p = get_all_packs(the_repository); p; p = p->next) { + for (p = packfile_store_get_all_packs(packs); p; p = p->next) { int i; const char *base; @@ -497,10 +498,11 @@ static void init_pack_geometry(struct pack_geometry *geometry, struct existing_packs *existing, const struct pack_objects_args *args) { + struct packfile_store *packs = the_repository->objects->packfiles; struct packed_git *p; struct strbuf buf = STRBUF_INIT; - for (p = get_all_packs(the_repository); p; p = p->next) { + for (p = packfile_store_get_all_packs(packs); p; p = p->next) { if (args->local && !p->pack_local) /* * When asked to only repack local packfiles we skip @@ -1137,11 +1139,12 @@ static int write_filtered_pack(const struct pack_objects_args *args, static void combine_small_cruft_packs(FILE *in, size_t combine_cruft_below_size, struct existing_packs *existing) { + struct packfile_store *packs = the_repository->objects->packfiles; struct packed_git *p; struct strbuf buf = STRBUF_INIT; size_t i; - for (p = get_all_packs(the_repository); p; p = p->next) { + for (p = packfile_store_get_all_packs(packs); p; p = p->next) { if (!(p->is_cruft && p->pack_local)) continue; diff --git a/connected.c b/connected.c index d6e9682fd9..b288a18b17 100644 --- a/connected.c +++ b/connected.c @@ -74,9 +74,10 @@ int check_connected(oid_iterate_fn fn, void *cb_data, */ odb_reprepare(the_repository->objects); do { + struct packfile_store *packs = the_repository->objects->packfiles; struct packed_git *p; - for (p = get_all_packs(the_repository); p; p = p->next) { + for (p = packfile_store_get_all_packs(packs); p; p = p->next) { if (!p->pack_promisor) continue; if (find_pack_entry_one(oid, p)) diff --git a/http-backend.c b/http-backend.c index d5dfe762bb..9084058f1e 100644 --- a/http-backend.c +++ b/http-backend.c @@ -603,18 +603,19 @@ static void get_head(struct strbuf *hdr, char *arg UNUSED) static void get_info_packs(struct strbuf *hdr, char *arg UNUSED) { size_t objdirlen = strlen(repo_get_object_directory(the_repository)); + struct packfile_store *packs = the_repository->objects->packfiles; struct strbuf buf = STRBUF_INIT; struct packed_git *p; size_t cnt = 0; select_getanyfile(hdr); - for (p = get_all_packs(the_repository); p; p = p->next) { + for (p = packfile_store_get_all_packs(packs); p; p = p->next) { if (p->pack_local) cnt++; } strbuf_grow(&buf, cnt * 53 + 2); - for (p = get_all_packs(the_repository); p; p = p->next) { + for (p = packfile_store_get_all_packs(packs); p; p = p->next) { if (p->pack_local) strbuf_addf(&buf, "P %s\n", p->pack_name + objdirlen + 6); } diff --git a/http.c b/http.c index af2120b64c..077e879de9 100644 --- a/http.c +++ b/http.c @@ -2408,6 +2408,7 @@ static char *fetch_pack_index(unsigned char *hash, const char *base_url) static int fetch_and_setup_pack_index(struct packed_git **packs_head, unsigned char *sha1, const char *base_url) { + struct packfile_store *packs = the_repository->objects->packfiles; struct packed_git *new_pack, *p; char *tmp_idx = NULL; int ret; @@ -2416,7 +2417,7 @@ static int fetch_and_setup_pack_index(struct packed_git **packs_head, * If we already have the pack locally, no need to fetch its index or * even add it to list; we already have all of its objects. */ - for (p = get_all_packs(the_repository); p; p = p->next) { + for (p = packfile_store_get_all_packs(packs); p; p = p->next) { if (hasheq(p->hash, sha1, the_repository->hash_algo)) return 0; } diff --git a/pack-bitmap.c b/pack-bitmap.c index 058bdb5d7d..ac71035d77 100644 --- a/pack-bitmap.c +++ b/pack-bitmap.c @@ -664,7 +664,7 @@ static int open_pack_bitmap(struct repository *r, struct packed_git *p; int ret = -1; - for (p = get_all_packs(r); p; p = p->next) { + for (p = packfile_store_get_all_packs(r->objects->packfiles); p; p = p->next) { if (open_pack_bitmap_1(bitmap_git, p) == 0) { ret = 0; /* @@ -3362,7 +3362,7 @@ int verify_bitmap_files(struct repository *r) free(midx_bitmap_name); } - for (struct packed_git *p = get_all_packs(r); + for (struct packed_git *p = packfile_store_get_all_packs(r->objects->packfiles); p; p = p->next) { char *pack_bitmap_name = pack_bitmap_filename(p); res |= verify_bitmap_file(r->hash_algo, pack_bitmap_name); diff --git a/pack-objects.c b/pack-objects.c index a9d9855063..d8eb679735 100644 --- a/pack-objects.c +++ b/pack-objects.c @@ -86,6 +86,7 @@ struct object_entry *packlist_find(struct packing_data *pdata, static void prepare_in_pack_by_idx(struct packing_data *pdata) { + struct packfile_store *packs = pdata->repo->objects->packfiles; struct packed_git **mapping, *p; int cnt = 0, nr = 1U << OE_IN_PACK_BITS; @@ -95,7 +96,7 @@ static void prepare_in_pack_by_idx(struct packing_data *pdata) * (i.e. in_pack_idx also zero) should return NULL. */ mapping[cnt++] = NULL; - for (p = get_all_packs(pdata->repo); p; p = p->next, cnt++) { + for (p = packfile_store_get_all_packs(packs); p; p = p->next, cnt++) { if (cnt == nr) { free(mapping); return; diff --git a/packfile.c b/packfile.c index b37f43afb5..cd5431b6aa 100644 --- a/packfile.c +++ b/packfile.c @@ -1033,11 +1033,11 @@ struct packed_git *packfile_store_get_packs(struct packfile_store *store) return store->packs; } -struct packed_git *get_all_packs(struct repository *r) +struct packed_git *packfile_store_get_all_packs(struct packfile_store *store) { - packfile_store_prepare(r->objects->packfiles); + packfile_store_prepare(store); - for (struct odb_source *source = r->objects->sources; source; source = source->next) { + for (struct odb_source *source = store->odb->sources; source; source = source->next) { struct multi_pack_index *m = source->midx; if (!m) continue; @@ -1045,7 +1045,7 @@ struct packed_git *get_all_packs(struct repository *r) prepare_midx_pack(m, i); } - return r->objects->packfiles->packs; + return store->packs; } struct list_head *get_packed_git_mru(struct repository *r) @@ -2105,7 +2105,7 @@ struct packed_git **kept_pack_cache(struct repository *r, unsigned flags) * covers, one kept and one not kept, but the midx returns only * the non-kept version. */ - for (p = get_all_packs(r); p; p = p->next) { + for (p = packfile_store_get_all_packs(r->objects->packfiles); p; p = p->next) { if ((p->pack_keep && (flags & ON_DISK_KEEP_PACKS)) || (p->pack_keep_in_core && (flags & IN_CORE_KEEP_PACKS))) { ALLOC_GROW(packs, nr + 1, alloc); @@ -2202,7 +2202,7 @@ int for_each_packed_object(struct repository *repo, each_packed_object_fn cb, int r = 0; int pack_errors = 0; - for (p = get_all_packs(repo); p; p = p->next) { + for (p = packfile_store_get_all_packs(repo->objects->packfiles); p; p = p->next) { if ((flags & FOR_EACH_OBJECT_LOCAL_ONLY) && !p->pack_local) continue; if ((flags & FOR_EACH_OBJECT_PROMISOR_ONLY) && diff --git a/packfile.h b/packfile.h index 0b691ded7e..1afb9cd664 100644 --- a/packfile.h +++ b/packfile.h @@ -142,6 +142,12 @@ void packfile_store_add_pack(struct packfile_store *store, */ struct packed_git *packfile_store_get_packs(struct packfile_store *store); +/* + * Get all packs managed by the given store, including packfiles that are + * referenced by multi-pack indices. + */ +struct packed_git *packfile_store_get_all_packs(struct packfile_store *store); + /* * Open the packfile and add it to the store if it isn't yet known. Returns * either the newly opened packfile or the preexisting packfile. Returns a @@ -227,7 +233,6 @@ int for_each_packed_object(struct repository *repo, each_packed_object_fn cb, extern void (*report_garbage)(unsigned seen_bits, const char *path); struct list_head *get_packed_git_mru(struct repository *r); -struct packed_git *get_all_packs(struct repository *r); /* * Give a rough count of objects in the repository. This sacrifices accuracy diff --git a/server-info.c b/server-info.c index 9bb30d9ab7..1d33de821e 100644 --- a/server-info.c +++ b/server-info.c @@ -287,12 +287,13 @@ static int compare_info(const void *a_, const void *b_) static void init_pack_info(struct repository *r, const char *infofile, int force) { + struct packfile_store *packs = r->objects->packfiles; struct packed_git *p; int stale; int i; size_t alloc = 0; - for (p = get_all_packs(r); p; p = p->next) { + for (p = packfile_store_get_all_packs(packs); p; p = p->next) { /* we ignore things on alternate path since they are * not available to the pullers in general. */ diff --git a/t/helper/test-find-pack.c b/t/helper/test-find-pack.c index 611a13a326..e001dc3066 100644 --- a/t/helper/test-find-pack.c +++ b/t/helper/test-find-pack.c @@ -39,7 +39,7 @@ int cmd__find_pack(int argc, const char **argv) if (repo_get_oid(the_repository, argv[0], &oid)) die("cannot parse %s as an object name", argv[0]); - for (p = get_all_packs(the_repository); p; p = p->next) + for (p = packfile_store_get_all_packs(the_repository->objects->packfiles); p; p = p->next) if (find_pack_entry_one(&oid, p)) { printf("%s\n", p->pack_name); actual_count++; diff --git a/t/helper/test-pack-mtimes.c b/t/helper/test-pack-mtimes.c index d51aaa3dc4..7c428c1601 100644 --- a/t/helper/test-pack-mtimes.c +++ b/t/helper/test-pack-mtimes.c @@ -37,7 +37,7 @@ int cmd__pack_mtimes(int argc, const char **argv) if (argc != 2) usage(pack_mtimes_usage); - for (p = get_all_packs(the_repository); p; p = p->next) { + for (p = packfile_store_get_all_packs(the_repository->objects->packfiles); p; p = p->next) { strbuf_addstr(&buf, basename(p->pack_name)); strbuf_strip_suffix(&buf, ".pack"); strbuf_addstr(&buf, ".mtimes"); From dd52a29b78d80e425be660f3b443a42e0374a7d1 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Tue, 23 Sep 2025 12:17:14 +0200 Subject: [PATCH 15/15] packfile: refactor `get_packed_git_mru()` to work on packfile store The `get_packed_git_mru()` function prepares the packfile store and then returns its packfiles in most-recently-used order. Refactor it to accept a packfile store instead of a repository to clarify its scope. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- builtin/pack-objects.c | 4 ++-- packfile.c | 6 +++--- packfile.h | 7 +++++-- 3 files changed, 10 insertions(+), 7 deletions(-) diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c index de351b757a..61bbbdfb83 100644 --- a/builtin/pack-objects.c +++ b/builtin/pack-objects.c @@ -1748,12 +1748,12 @@ static int want_object_in_pack_mtime(const struct object_id *oid, } } - list_for_each(pos, get_packed_git_mru(the_repository)) { + list_for_each(pos, packfile_store_get_packs_mru(the_repository->objects->packfiles)) { struct packed_git *p = list_entry(pos, struct packed_git, mru); want = want_object_in_pack_one(p, oid, exclude, found_pack, found_offset, found_mtime); if (!exclude && want > 0) list_move(&p->mru, - get_packed_git_mru(the_repository)); + packfile_store_get_packs_mru(the_repository->objects->packfiles)); if (want != -1) return want; } diff --git a/packfile.c b/packfile.c index cd5431b6aa..5a7caec292 100644 --- a/packfile.c +++ b/packfile.c @@ -1048,10 +1048,10 @@ struct packed_git *packfile_store_get_all_packs(struct packfile_store *store) return store->packs; } -struct list_head *get_packed_git_mru(struct repository *r) +struct list_head *packfile_store_get_packs_mru(struct packfile_store *store) { - packfile_store_prepare(r->objects->packfiles); - return &r->objects->packfiles->mru; + packfile_store_prepare(store); + return &store->mru; } /* diff --git a/packfile.h b/packfile.h index 1afb9cd664..e7a5792b6c 100644 --- a/packfile.h +++ b/packfile.h @@ -148,6 +148,11 @@ struct packed_git *packfile_store_get_packs(struct packfile_store *store); */ struct packed_git *packfile_store_get_all_packs(struct packfile_store *store); +/* + * Get all packs in most-recently-used order. + */ +struct list_head *packfile_store_get_packs_mru(struct packfile_store *store); + /* * Open the packfile and add it to the store if it isn't yet known. Returns * either the newly opened packfile or the preexisting packfile. Returns a @@ -232,8 +237,6 @@ int for_each_packed_object(struct repository *repo, each_packed_object_fn cb, #define PACKDIR_FILE_GARBAGE 4 extern void (*report_garbage)(unsigned seen_bits, const char *path); -struct list_head *get_packed_git_mru(struct repository *r); - /* * Give a rough count of objects in the repository. This sacrifices accuracy * for speed.