Merge branch 'ps/repack-avoid-noop-midx-rewrite' into jch

Even when there is no changes in the packfile and no need to
recompute bitmaps, "git repack" recomputed and updated the MIDX
file, which has been corrected.

Comments?

* ps/repack-avoid-noop-midx-rewrite:
  midx-write: skip rewriting MIDX with `--stdin-packs` unless needed
  midx-write: extract function to test whether MIDX needs updating
  midx: fix `BUG()` when getting preferred pack without a reverse index
This commit is contained in:
Junio C Hamano
2025-12-12 15:53:09 +09:00
5 changed files with 194 additions and 21 deletions

View File

@@ -1014,6 +1014,65 @@ static void clear_midx_files(struct odb_source *source,
strbuf_release(&buf);
}
static bool midx_needs_update(struct multi_pack_index *midx, struct write_midx_context *ctx)
{
struct strset packs = STRSET_INIT;
struct strbuf buf = STRBUF_INIT;
bool needed = true;
/*
* Ignore incremental updates for now. The assumption is that any
* incremental update would be either empty (in which case we will bail
* out later) or it would actually cover at least one new pack.
*/
if (ctx->incremental)
goto out;
/*
* Otherwise, we need to verify that the packs covered by the existing
* MIDX match the packs that we already have. The logic to do so is way
* more complicated than it has any right to be. This is because:
*
* - We cannot assume any ordering.
*
* - The MIDX packs may not be loaded at all, and loading them would
* be wasteful. So we need to use the pack names tracked by the
* MIDX itself.
*
* - The MIDX pack names are tracking the ".idx" files, whereas the
* packs themselves are tracking the ".pack" files. So we need to
* strip suffixes.
*/
if (ctx->nr != midx->num_packs + midx->num_packs_in_base)
goto out;
for (uint32_t i = 0; i < ctx->nr; i++) {
strbuf_reset(&buf);
strbuf_addstr(&buf, pack_basename(ctx->info[i].p));
strbuf_strip_suffix(&buf, ".pack");
if (!strset_add(&packs, buf.buf))
BUG("same pack added twice?");
}
for (uint32_t i = 0; i < ctx->nr; i++) {
strbuf_reset(&buf);
strbuf_addstr(&buf, midx->pack_names[i]);
strbuf_strip_suffix(&buf, ".idx");
if (!strset_contains(&packs, buf.buf))
goto out;
strset_remove(&packs, buf.buf);
}
needed = false;
out:
strbuf_release(&buf);
strset_clear(&packs);
return needed;
}
static int write_midx_internal(struct odb_source *source,
struct string_list *packs_to_include,
struct string_list *packs_to_drop,
@@ -1031,6 +1090,7 @@ static int write_midx_internal(struct odb_source *source,
struct write_midx_context ctx = {
.preferred_pack_idx = NO_PREFERRED_PACK,
};
struct multi_pack_index *midx_to_free = NULL;
int bitmapped_packs_concat_len = 0;
int pack_name_concat_len = 0;
int dropped_packs = 0;
@@ -1111,27 +1171,39 @@ static int write_midx_internal(struct odb_source *source,
for_each_file_in_pack_dir(source->path, add_pack_to_midx, &ctx);
stop_progress(&ctx.progress);
if ((ctx.m && ctx.nr == ctx.m->num_packs + ctx.m->num_packs_in_base) &&
!ctx.incremental &&
!(packs_to_include || packs_to_drop)) {
struct bitmap_index *bitmap_git;
int bitmap_exists;
int want_bitmap = flags & MIDX_WRITE_BITMAP;
if (!packs_to_drop) {
/*
* If there is no MIDX then either it doesn't exist, or we're
* doing a geometric repack. Try to load it from the source to
* tell these two cases apart.
*/
struct multi_pack_index *midx = ctx.m;
if (!midx)
midx = midx_to_free = load_multi_pack_index(ctx.source);
bitmap_git = prepare_midx_bitmap_git(ctx.m);
bitmap_exists = bitmap_git && bitmap_is_midx(bitmap_git);
free_bitmap_index(bitmap_git);
if (midx && !midx_needs_update(midx, &ctx)) {
struct bitmap_index *bitmap_git;
int bitmap_exists;
int want_bitmap = flags & MIDX_WRITE_BITMAP;
if (bitmap_exists || !want_bitmap) {
/*
* The correct MIDX already exists, and so does a
* corresponding bitmap (or one wasn't requested).
*/
if (!want_bitmap)
clear_midx_files_ext(source, "bitmap", NULL);
result = 0;
goto cleanup;
bitmap_git = prepare_midx_bitmap_git(midx);
bitmap_exists = bitmap_git && bitmap_is_midx(bitmap_git);
free_bitmap_index(bitmap_git);
if (bitmap_exists || !want_bitmap) {
/*
* The correct MIDX already exists, and so does a
* corresponding bitmap (or one wasn't requested).
*/
if (!want_bitmap)
clear_midx_files_ext(source, "bitmap", NULL);
result = 0;
goto cleanup;
}
}
close_midx(midx_to_free);
midx_to_free = NULL;
}
if (ctx.incremental && !ctx.nr) {
@@ -1487,6 +1559,7 @@ cleanup:
free(keep_hashes);
}
strbuf_release(&midx_name);
close_midx(midx_to_free);
trace2_region_leave("midx", "write_midx_internal", r);

2
midx.c
View File

@@ -686,7 +686,7 @@ int midx_preferred_pack(struct multi_pack_index *m, uint32_t *pack_int_id)
{
if (m->preferred_pack_idx == -1) {
uint32_t midx_pos;
if (load_midx_revindex(m) < 0) {
if (load_midx_revindex(m)) {
m->preferred_pack_idx = -2;
return -1;
}

View File

@@ -72,7 +72,8 @@ int verify_pack_revindex(struct packed_git *p);
* multi-pack index by mmap-ing it and assigning pointers in the
* multi_pack_index to point at it.
*
* A negative number is returned on error.
* A negative number is returned on error. A positive number is returned in
* case the multi-pack-index does not have a reverse index.
*/
int load_midx_revindex(struct multi_pack_index *m);

View File

@@ -350,9 +350,73 @@ test_expect_success 'preferred pack from existing MIDX without bitmaps' '
# the new MIDX
git multi-pack-index write --preferred-pack=pack-$pack.pack
)
'
test_expect_success 'preferred pack cannot be determined without bitmap' '
test_when_finished "rm -fr preferred-can-be-queried" &&
git init preferred-can-be-queried &&
(
cd preferred-can-be-queried &&
test_commit initial &&
git repack -Adl --write-midx --no-write-bitmap-index &&
test_must_fail test-tool read-midx --preferred-pack .git/objects 2>err &&
test_grep "could not determine MIDX preferred pack" err &&
git repack -Adl --write-midx --write-bitmap-index &&
test-tool read-midx --preferred-pack .git/objects
)
'
test_midx_is_retained () {
test-tool chmtime =0 .git/objects/pack/multi-pack-index &&
ls -l .git/objects/pack/multi-pack-index >expect &&
git multi-pack-index write "$@" &&
ls -l .git/objects/pack/multi-pack-index >actual &&
test_cmp expect actual
}
test_midx_is_rewritten () {
test-tool chmtime =0 .git/objects/pack/multi-pack-index &&
ls -l .git/objects/pack/multi-pack-index >expect &&
git multi-pack-index write "$@" &&
ls -l .git/objects/pack/multi-pack-index >actual &&
! test_cmp expect actual
}
test_expect_success 'up-to-date multi-pack-index is retained' '
test_when_finished "rm -fr midx-up-to-date" &&
git init midx-up-to-date &&
(
cd midx-up-to-date &&
# Write the initial pack that contains the most objects.
test_commit first &&
test_commit second &&
git repack -Ad --write-midx &&
test_midx_is_retained &&
# Writing a new bitmap index should cause us to regenerate the MIDX.
test_midx_is_rewritten --bitmap &&
test_midx_is_retained --bitmap &&
# Ensure that writing a new packfile causes us to rewrite the index.
test_commit incremental &&
git repack -d &&
test_midx_is_rewritten &&
test_midx_is_retained &&
for pack in .git/objects/pack/*.idx
do
basename "$pack" || exit 1
done >stdin &&
test_line_count = 2 stdin &&
test_midx_is_retained --stdin-packs <stdin &&
head -n1 stdin >stdin.trimmed &&
test_midx_is_rewritten --stdin-packs <stdin.trimmed
)
'
test_done
test_expect_success 'verify multi-pack-index success' '
git multi-pack-index verify --object-dir=$objdir
'

View File

@@ -287,6 +287,41 @@ test_expect_success '--geometric with pack.packSizeLimit' '
)
'
test_expect_success '--geometric --write-midx retains up-to-date MIDX without bitmap index' '
test_when_finished "rm -fr repo" &&
git init repo &&
(
cd repo &&
test_commit initial &&
test_path_is_missing .git/objects/pack/multi-pack-index &&
git repack --geometric=2 --write-midx --no-write-bitmap-index &&
test_path_is_file .git/objects/pack/multi-pack-index &&
test-tool chmtime =0 .git/objects/pack/multi-pack-index &&
ls -l .git/objects/pack/ >expect &&
git repack --geometric=2 --write-midx --no-write-bitmap-index &&
ls -l .git/objects/pack/ >actual &&
test_cmp expect actual
)
'
test_expect_success '--geometric --write-midx retains up-to-date MIDX with bitmap index' '
test_when_finished "rm -fr repo" &&
git init repo &&
test_commit -C repo initial &&
test_path_is_missing repo/.git/objects/pack/multi-pack-index &&
git -C repo repack --geometric=2 --write-midx --write-bitmap-index &&
test_path_is_file repo/.git/objects/pack/multi-pack-index &&
test-tool chmtime =0 repo/.git/objects/pack/multi-pack-index &&
ls -l repo/.git/objects/pack/ >expect &&
git -C repo repack --geometric=2 --write-midx --write-bitmap-index &&
ls -l repo/.git/objects/pack/ >actual &&
test_cmp expect actual
'
test_expect_success '--geometric --write-midx with packfiles in main and alternate ODB' '
test_when_finished "rm -fr shared member" &&