Merge branch 'tb/incremental-midx-part-3.2' into seen

Further work on incremental repacking using MIDX/bitmap

* tb/incremental-midx-part-3.2:
  midx: enable reachability bitmaps during MIDX compaction
  midx: implement MIDX compaction
  t/helper/test-read-midx.c: plug memory leak when selecting layer
  midx-write.c: factor fanout layering from `compute_sorted_entries()`
  midx-write.c: enumerate `pack_int_id` values directly
  midx-write.c: extract `fill_pack_from_midx()`
  midx-write.c: introduce `midx_pack_perm()` helper
  git-compat-util.h: introduce `u32_add()`
  midx: do not require packs to be sorted in lexicographic order
  midx-write.c: introduce `struct write_midx_opts`
  midx-write.c: don't use `pack_perm` when assigning `bitmap_pos`
  t/t5319-multi-pack-index.sh: fix copy-and-paste error in t5319.39
  git-multi-pack-index(1): align SYNOPSIS with 'git multi-pack-index -h'
  git-multi-pack-index(1): remove non-existent incompatibility
  builtin/multi-pack-index.c: make '--progress' a common option
  midx: split `get_midx_checksum()` by adding `get_midx_hash()`
  midx: mark `get_midx_checksum()` arguments as const
This commit is contained in:
Junio C Hamano
2025-12-12 22:11:50 +09:00
13 changed files with 737 additions and 149 deletions

View File

@@ -9,7 +9,14 @@ git-multi-pack-index - Write and verify multi-pack-indexes
SYNOPSIS SYNOPSIS
-------- --------
[verse] [verse]
'git multi-pack-index' [--object-dir=<dir>] [--[no-]bitmap] <sub-command> 'git multi-pack-index' [<options>] write [--preferred-pack=<pack>]
[--[no-]bitmap] [--[no-]incremental] [--[no-]stdin-packs]
[--refs-snapshot=<path>]
'git multi-pack-index' [<options>] compact [--[no-]incremental]
[--[no-]bitmap] <from> <to>
'git multi-pack-index' [<options>] verify
'git multi-pack-index' [<options>] expire
'git multi-pack-index' [<options>] repack [--batch-size=<size>]
DESCRIPTION DESCRIPTION
----------- -----------
@@ -18,6 +25,8 @@ Write or verify a multi-pack-index (MIDX) file.
OPTIONS OPTIONS
------- -------
The following command-line options are applicable to all sub-commands:
--object-dir=<dir>:: --object-dir=<dir>::
Use given directory for the location of Git objects. We check Use given directory for the location of Git objects. We check
`<dir>/packs/multi-pack-index` for the current MIDX file, and `<dir>/packs/multi-pack-index` for the current MIDX file, and
@@ -73,7 +82,18 @@ marker).
Write an incremental MIDX file containing only objects Write an incremental MIDX file containing only objects
and packs not present in an existing MIDX layer. and packs not present in an existing MIDX layer.
Migrates non-incremental MIDXs to incremental ones when Migrates non-incremental MIDXs to incremental ones when
necessary. Incompatible with `--bitmap`. necessary.
--
compact::
Write a new MIDX layer containing only objects and packs present
in the range `<from>` to `<to>`, where both arguments are
checksums of existing layers in the MIDX chain.
+
--
--incremental::
Write the result to a MIDX chain instead of writing a
stand-alone MIDX. Incompatible with `--bitmap`.
-- --
verify:: verify::

View File

@@ -13,8 +13,13 @@
#include "repository.h" #include "repository.h"
#define BUILTIN_MIDX_WRITE_USAGE \ #define BUILTIN_MIDX_WRITE_USAGE \
N_("git multi-pack-index [<options>] write [--preferred-pack=<pack>]" \ N_("git multi-pack-index [<options>] write [--preferred-pack=<pack>]\n" \
"[--refs-snapshot=<path>]") " [--[no-]bitmap] [--[no-]incremental] [--[no-]stdin-packs]\n" \
" [--refs-snapshot=<path>]")
#define BUILTIN_MIDX_COMPACT_USAGE \
N_("git multi-pack-index [<options>] compact [--[no-]incremental]\n" \
" [--[no-]bitmap] <from> <to>")
#define BUILTIN_MIDX_VERIFY_USAGE \ #define BUILTIN_MIDX_VERIFY_USAGE \
N_("git multi-pack-index [<options>] verify") N_("git multi-pack-index [<options>] verify")
@@ -29,6 +34,10 @@ static char const * const builtin_multi_pack_index_write_usage[] = {
BUILTIN_MIDX_WRITE_USAGE, BUILTIN_MIDX_WRITE_USAGE,
NULL NULL
}; };
static char const * const builtin_multi_pack_index_compact_usage[] = {
BUILTIN_MIDX_COMPACT_USAGE,
NULL
};
static char const * const builtin_multi_pack_index_verify_usage[] = { static char const * const builtin_multi_pack_index_verify_usage[] = {
BUILTIN_MIDX_VERIFY_USAGE, BUILTIN_MIDX_VERIFY_USAGE,
NULL NULL
@@ -43,6 +52,7 @@ static char const * const builtin_multi_pack_index_repack_usage[] = {
}; };
static char const * const builtin_multi_pack_index_usage[] = { static char const * const builtin_multi_pack_index_usage[] = {
BUILTIN_MIDX_WRITE_USAGE, BUILTIN_MIDX_WRITE_USAGE,
BUILTIN_MIDX_COMPACT_USAGE,
BUILTIN_MIDX_VERIFY_USAGE, BUILTIN_MIDX_VERIFY_USAGE,
BUILTIN_MIDX_EXPIRE_USAGE, BUILTIN_MIDX_EXPIRE_USAGE,
BUILTIN_MIDX_REPACK_USAGE, BUILTIN_MIDX_REPACK_USAGE,
@@ -84,6 +94,8 @@ static struct option common_opts[] = {
N_("directory"), N_("directory"),
N_("object directory containing set of packfile and pack-index pairs"), N_("object directory containing set of packfile and pack-index pairs"),
parse_object_dir), parse_object_dir),
OPT_BIT(0, "progress", &opts.flags, N_("force progress reporting"),
MIDX_PROGRESS),
OPT_END(), OPT_END(),
}; };
@@ -138,8 +150,6 @@ static int cmd_multi_pack_index_write(int argc, const char **argv,
N_("pack for reuse when computing a multi-pack bitmap")), N_("pack for reuse when computing a multi-pack bitmap")),
OPT_BIT(0, "bitmap", &opts.flags, N_("write multi-pack bitmap"), OPT_BIT(0, "bitmap", &opts.flags, N_("write multi-pack bitmap"),
MIDX_WRITE_BITMAP | MIDX_WRITE_REV_INDEX), MIDX_WRITE_BITMAP | MIDX_WRITE_REV_INDEX),
OPT_BIT(0, "progress", &opts.flags,
N_("force progress reporting"), MIDX_PROGRESS),
OPT_BIT(0, "incremental", &opts.flags, OPT_BIT(0, "incremental", &opts.flags,
N_("write a new incremental MIDX"), MIDX_WRITE_INCREMENTAL), N_("write a new incremental MIDX"), MIDX_WRITE_INCREMENTAL),
OPT_BOOL(0, "stdin-packs", &opts.stdin_packs, OPT_BOOL(0, "stdin-packs", &opts.stdin_packs,
@@ -194,14 +204,71 @@ static int cmd_multi_pack_index_write(int argc, const char **argv,
return ret; return ret;
} }
static int cmd_multi_pack_index_compact(int argc, const char **argv,
const char *prefix,
struct repository *repo)
{
struct multi_pack_index *m, *cur;
struct multi_pack_index *from_midx = NULL;
struct multi_pack_index *to_midx = NULL;
struct odb_source *source;
int ret;
struct option *options;
static struct option builtin_multi_pack_index_compact_options[] = {
OPT_BIT(0, "bitmap", &opts.flags, N_("write multi-pack bitmap"),
MIDX_WRITE_BITMAP | MIDX_WRITE_REV_INDEX),
OPT_BIT(0, "incremental", &opts.flags,
N_("write a new incremental MIDX"), MIDX_WRITE_INCREMENTAL),
OPT_END(),
};
repo_config(repo, git_multi_pack_index_write_config, NULL);
options = add_common_options(builtin_multi_pack_index_compact_options);
trace2_cmd_mode(argv[0]);
if (isatty(2))
opts.flags |= MIDX_PROGRESS;
argc = parse_options(argc, argv, prefix,
options, builtin_multi_pack_index_compact_usage,
0);
if (argc != 2)
usage_with_options(builtin_multi_pack_index_compact_usage,
options);
source = handle_object_dir_option(the_repository);
FREE_AND_NULL(options);
m = get_multi_pack_index(source);
for (cur = m; cur && !(from_midx && to_midx); cur = cur->base_midx) {
const char *midx_csum = get_midx_checksum(cur);
if (!from_midx && !strcmp(midx_csum, argv[0]))
from_midx = cur;
if (!to_midx && !strcmp(midx_csum, argv[1]))
to_midx = cur;
}
if (!from_midx)
die(_("could not find MIDX 'from': %s"), argv[0]);
if (!to_midx)
die(_("could not find MIDX 'to': %s"), argv[1]);
ret = write_midx_file_compact(source, from_midx, to_midx, opts.flags);
return ret;
}
static int cmd_multi_pack_index_verify(int argc, const char **argv, static int cmd_multi_pack_index_verify(int argc, const char **argv,
const char *prefix, const char *prefix,
struct repository *repo UNUSED) struct repository *repo UNUSED)
{ {
struct option *options; struct option *options;
static struct option builtin_multi_pack_index_verify_options[] = { static struct option builtin_multi_pack_index_verify_options[] = {
OPT_BIT(0, "progress", &opts.flags,
N_("force progress reporting"), MIDX_PROGRESS),
OPT_END(), OPT_END(),
}; };
struct odb_source *source; struct odb_source *source;
@@ -231,8 +298,6 @@ static int cmd_multi_pack_index_expire(int argc, const char **argv,
{ {
struct option *options; struct option *options;
static struct option builtin_multi_pack_index_expire_options[] = { static struct option builtin_multi_pack_index_expire_options[] = {
OPT_BIT(0, "progress", &opts.flags,
N_("force progress reporting"), MIDX_PROGRESS),
OPT_END(), OPT_END(),
}; };
struct odb_source *source; struct odb_source *source;
@@ -264,8 +329,6 @@ static int cmd_multi_pack_index_repack(int argc, const char **argv,
static struct option builtin_multi_pack_index_repack_options[] = { static struct option builtin_multi_pack_index_repack_options[] = {
OPT_UNSIGNED(0, "batch-size", &opts.batch_size, OPT_UNSIGNED(0, "batch-size", &opts.batch_size,
N_("during repack, collect pack-files of smaller size into a batch that is larger than this size")), N_("during repack, collect pack-files of smaller size into a batch that is larger than this size")),
OPT_BIT(0, "progress", &opts.flags,
N_("force progress reporting"), MIDX_PROGRESS),
OPT_END(), OPT_END(),
}; };
struct odb_source *source; struct odb_source *source;
@@ -300,6 +363,7 @@ int cmd_multi_pack_index(int argc,
struct option builtin_multi_pack_index_options[] = { struct option builtin_multi_pack_index_options[] = {
OPT_SUBCOMMAND("repack", &fn, cmd_multi_pack_index_repack), OPT_SUBCOMMAND("repack", &fn, cmd_multi_pack_index_repack),
OPT_SUBCOMMAND("write", &fn, cmd_multi_pack_index_write), OPT_SUBCOMMAND("write", &fn, cmd_multi_pack_index_write),
OPT_SUBCOMMAND("compact", &fn, cmd_multi_pack_index_compact),
OPT_SUBCOMMAND("verify", &fn, cmd_multi_pack_index_verify), OPT_SUBCOMMAND("verify", &fn, cmd_multi_pack_index_verify),
OPT_SUBCOMMAND("expire", &fn, cmd_multi_pack_index_expire), OPT_SUBCOMMAND("expire", &fn, cmd_multi_pack_index_expire),
OPT_END(), OPT_END(),

View File

@@ -641,6 +641,14 @@ static inline int cast_size_t_to_int(size_t a)
return (int)a; return (int)a;
} }
static inline uint32_t u32_add(uint32_t a, uint32_t b)
{
if (unsigned_add_overflows(a, b))
die("uint32_t overflow: %"PRIuMAX" + %"PRIuMAX,
(uintmax_t)a, (uintmax_t)b);
return a + b;
}
static inline uint64_t u64_mult(uint64_t a, uint64_t b) static inline uint64_t u64_mult(uint64_t a, uint64_t b)
{ {
if (unsigned_mult_overflows(a, b)) if (unsigned_mult_overflows(a, b))

View File

@@ -108,12 +108,24 @@ struct write_midx_context {
int incremental; int incremental;
uint32_t num_multi_pack_indexes_before; uint32_t num_multi_pack_indexes_before;
struct multi_pack_index *compact_from;
struct multi_pack_index *compact_to;
int compact;
struct string_list *to_include; struct string_list *to_include;
struct repository *repo; struct repository *repo;
struct odb_source *source; struct odb_source *source;
}; };
static uint32_t midx_pack_perm(struct write_midx_context *ctx,
uint32_t orig_pack_int_id)
{
if (ctx->compact)
orig_pack_int_id -= ctx->compact_from->num_packs_in_base;
return ctx->pack_perm[orig_pack_int_id];
}
static int should_include_pack(const struct write_midx_context *ctx, static int should_include_pack(const struct write_midx_context *ctx,
const char *file_name) const char *file_name)
{ {
@@ -317,6 +329,45 @@ static void midx_fanout_add_pack_fanout(struct midx_fanout *fanout,
} }
} }
static void midx_fanout_add(struct midx_fanout *fanout,
struct write_midx_context *ctx,
uint32_t start_pack,
uint32_t cur_fanout)
{
uint32_t cur_pack;
if (ctx->m && !ctx->incremental)
midx_fanout_add_midx_fanout(fanout, ctx->m, cur_fanout,
ctx->preferred_pack_idx);
for (cur_pack = start_pack; cur_pack < ctx->nr; cur_pack++) {
int preferred = cur_pack == ctx->preferred_pack_idx;
midx_fanout_add_pack_fanout(fanout, ctx->info, cur_pack,
preferred, cur_fanout);
}
if (ctx->preferred_pack_idx != NO_PREFERRED_PACK &&
ctx->preferred_pack_idx < start_pack)
midx_fanout_add_pack_fanout(fanout, ctx->info,
ctx->preferred_pack_idx, 1,
cur_fanout);
}
static void midx_fanout_add_compact(struct midx_fanout *fanout,
struct write_midx_context *ctx,
uint32_t cur_fanout)
{
struct multi_pack_index *m = ctx->compact_to;
ASSERT(ctx->compact);
while (m && m != ctx->compact_from->base_midx) {
midx_fanout_add_midx_fanout(fanout, m, cur_fanout,
NO_PREFERRED_PACK);
m = m->base_midx;
}
}
/* /*
* It is possible to artificially get into a state where there are many * It is possible to artificially get into a state where there are many
* duplicate copies of objects. That can create high memory pressure if * duplicate copies of objects. That can create high memory pressure if
@@ -335,6 +386,9 @@ static void compute_sorted_entries(struct write_midx_context *ctx,
size_t alloc_objects, total_objects = 0; size_t alloc_objects, total_objects = 0;
struct midx_fanout fanout = { 0 }; struct midx_fanout fanout = { 0 };
if (ctx->compact)
ASSERT(!start_pack);
for (cur_pack = start_pack; cur_pack < ctx->nr; cur_pack++) for (cur_pack = start_pack; cur_pack < ctx->nr; cur_pack++)
total_objects = st_add(total_objects, total_objects = st_add(total_objects,
ctx->info[cur_pack].p->num_objects); ctx->info[cur_pack].p->num_objects);
@@ -353,23 +407,10 @@ static void compute_sorted_entries(struct write_midx_context *ctx,
for (cur_fanout = 0; cur_fanout < 256; cur_fanout++) { for (cur_fanout = 0; cur_fanout < 256; cur_fanout++) {
fanout.nr = 0; fanout.nr = 0;
if (ctx->m && !ctx->incremental) if (ctx->compact)
midx_fanout_add_midx_fanout(&fanout, ctx->m, cur_fanout, midx_fanout_add_compact(&fanout, ctx, cur_fanout);
ctx->preferred_pack_idx); else
midx_fanout_add(&fanout, ctx, start_pack, cur_fanout);
for (cur_pack = start_pack; cur_pack < ctx->nr; cur_pack++) {
int preferred = cur_pack == ctx->preferred_pack_idx;
midx_fanout_add_pack_fanout(&fanout,
ctx->info, cur_pack,
preferred, cur_fanout);
}
if (ctx->preferred_pack_idx != NO_PREFERRED_PACK &&
ctx->preferred_pack_idx < start_pack)
midx_fanout_add_pack_fanout(&fanout, ctx->info,
ctx->preferred_pack_idx, 1,
cur_fanout);
midx_fanout_sort(&fanout); midx_fanout_sort(&fanout);
/* /*
@@ -410,11 +451,6 @@ static int write_midx_pack_names(struct hashfile *f, void *data)
if (ctx->info[i].expired) if (ctx->info[i].expired)
continue; continue;
if (i && strcmp(ctx->info[i].pack_name, ctx->info[i - 1].pack_name) <= 0)
BUG("incorrect pack-file order: %s before %s",
ctx->info[i - 1].pack_name,
ctx->info[i].pack_name);
writelen = strlen(ctx->info[i].pack_name) + 1; writelen = strlen(ctx->info[i].pack_name) + 1;
hashwrite(f, ctx->info[i].pack_name, writelen); hashwrite(f, ctx->info[i].pack_name, writelen);
written += writelen; written += writelen;
@@ -514,12 +550,12 @@ static int write_midx_object_offsets(struct hashfile *f,
for (i = 0; i < ctx->entries_nr; i++) { for (i = 0; i < ctx->entries_nr; i++) {
struct pack_midx_entry *obj = list++; struct pack_midx_entry *obj = list++;
if (ctx->pack_perm[obj->pack_int_id] == PACK_EXPIRED) if (midx_pack_perm(ctx, obj->pack_int_id) == PACK_EXPIRED)
BUG("object %s is in an expired pack with int-id %d", BUG("object %s is in an expired pack with int-id %d",
oid_to_hex(&obj->oid), oid_to_hex(&obj->oid),
obj->pack_int_id); obj->pack_int_id);
hashwrite_be32(f, ctx->pack_perm[obj->pack_int_id]); hashwrite_be32(f, midx_pack_perm(ctx, obj->pack_int_id));
if (ctx->large_offsets_needed && obj->offset >> 31) if (ctx->large_offsets_needed && obj->offset >> 31)
hashwrite_be32(f, MIDX_LARGE_OFFSET_NEEDED | nr_large_offset++); hashwrite_be32(f, MIDX_LARGE_OFFSET_NEEDED | nr_large_offset++);
@@ -620,8 +656,8 @@ static uint32_t *midx_pack_order(struct write_midx_context *ctx)
for (i = 0; i < ctx->entries_nr; i++) { for (i = 0; i < ctx->entries_nr; i++) {
struct pack_midx_entry *e = &ctx->entries[i]; struct pack_midx_entry *e = &ctx->entries[i];
data[i].nr = i; data[i].nr = i;
data[i].pack = ctx->pack_perm[e->pack_int_id]; data[i].pack = midx_pack_perm(ctx, e->pack_int_id);
if (!e->preferred) if (!e->preferred || ctx->compact)
data[i].pack |= (1U << 31); data[i].pack |= (1U << 31);
data[i].offset = e->offset; data[i].offset = e->offset;
} }
@@ -630,14 +666,14 @@ static uint32_t *midx_pack_order(struct write_midx_context *ctx)
for (i = 0; i < ctx->entries_nr; i++) { for (i = 0; i < ctx->entries_nr; i++) {
struct pack_midx_entry *e = &ctx->entries[data[i].nr]; struct pack_midx_entry *e = &ctx->entries[data[i].nr];
struct pack_info *pack = &ctx->info[ctx->pack_perm[e->pack_int_id]]; struct pack_info *pack = &ctx->info[midx_pack_perm(ctx, e->pack_int_id)];
if (pack->bitmap_pos == BITMAP_POS_UNKNOWN) if (pack->bitmap_pos == BITMAP_POS_UNKNOWN)
pack->bitmap_pos = i + base_objects; pack->bitmap_pos = i + base_objects;
pack->bitmap_nr++; pack->bitmap_nr++;
pack_order[i] = data[i].nr; pack_order[i] = data[i].nr;
} }
for (i = 0; i < ctx->nr; i++) { for (i = 0; i < ctx->nr; i++) {
struct pack_info *pack = &ctx->info[ctx->pack_perm[i]]; struct pack_info *pack = &ctx->info[i];
if (pack->bitmap_pos == BITMAP_POS_UNKNOWN) if (pack->bitmap_pos == BITMAP_POS_UNKNOWN)
pack->bitmap_pos = 0; pack->bitmap_pos = 0;
} }
@@ -691,7 +727,7 @@ static void prepare_midx_packing_data(struct packing_data *pdata,
struct object_entry *to = packlist_alloc(pdata, &from->oid); struct object_entry *to = packlist_alloc(pdata, &from->oid);
oe_set_in_pack(pdata, to, oe_set_in_pack(pdata, to,
ctx->info[ctx->pack_perm[from->pack_int_id]].p); ctx->info[midx_pack_perm(ctx, from->pack_int_id)].p);
} }
trace2_region_leave("midx", "prepare_midx_packing_data", ctx->repo); trace2_region_leave("midx", "prepare_midx_packing_data", ctx->repo);
@@ -909,6 +945,21 @@ cleanup:
return ret; return ret;
} }
static int fill_pack_from_midx(struct pack_info *info,
struct multi_pack_index *m,
uint32_t pack_int_id)
{
if (prepare_midx_pack(m, pack_int_id))
return error(_("could not load pack %d"), pack_int_id);
fill_pack_info(info,
m->packs[pack_int_id - m->num_packs_in_base],
m->pack_names[pack_int_id - m->num_packs_in_base],
pack_int_id);
return 0;
}
static int fill_packs_from_midx(struct write_midx_context *ctx) static int fill_packs_from_midx(struct write_midx_context *ctx)
{ {
struct multi_pack_index *m; struct multi_pack_index *m;
@@ -916,19 +967,85 @@ static int fill_packs_from_midx(struct write_midx_context *ctx)
for (m = ctx->m; m; m = m->base_midx) { for (m = ctx->m; m; m = m->base_midx) {
uint32_t i; uint32_t i;
for (i = 0; i < m->num_packs; i++) { for (i = m->num_packs_in_base;
if (prepare_midx_pack(m, m->num_packs_in_base + i)) i < m->num_packs_in_base + m->num_packs; i++) {
return error(_("could not load pack"));
ALLOC_GROW(ctx->info, ctx->nr + 1, ctx->alloc); ALLOC_GROW(ctx->info, ctx->nr + 1, ctx->alloc);
fill_pack_info(&ctx->info[ctx->nr++], m->packs[i],
m->pack_names[i], if (fill_pack_from_midx(&ctx->info[ctx->nr], m, i) < 0)
m->num_packs_in_base + i); return -1;
ctx->nr++;
} }
} }
return 0; return 0;
} }
static uint32_t compactible_packs_between(const struct multi_pack_index *from,
const struct multi_pack_index *to)
{
uint32_t nr;
ASSERT(from && to);
nr = u32_add(to->num_packs, to->num_packs_in_base);
if (nr < from->num_packs_in_base)
BUG("unexpected number of packs in base during compaction: "
"%"PRIu32" < %"PRIu32, nr, from->num_packs_in_base);
return nr - from->num_packs_in_base;
}
static int fill_packs_from_midx_range(struct write_midx_context *ctx,
int bitmap_order)
{
struct multi_pack_index *m = ctx->compact_to;
uint32_t packs_nr;
ASSERT(ctx->compact && !ctx->nr);
ASSERT(ctx->compact_from);
ASSERT(ctx->compact_to);
packs_nr = compactible_packs_between(ctx->compact_from,
ctx->compact_to);
ALLOC_GROW(ctx->info, packs_nr, ctx->alloc);
while (m != ctx->compact_from->base_midx) {
uint32_t pack_int_id, preferred_pack_id;
uint32_t i;
if (bitmap_order) {
if (midx_preferred_pack(m, &preferred_pack_id) < 0)
die(_("could not determine preferred pack"));
} else {
preferred_pack_id = m->num_packs_in_base;
}
pack_int_id = m->num_packs_in_base - ctx->compact_from->num_packs_in_base;
if (fill_pack_from_midx(&ctx->info[pack_int_id++], m,
preferred_pack_id) < 0)
return -1;
for (i = m->num_packs_in_base;
i < m->num_packs_in_base + m->num_packs; i++) {
if (preferred_pack_id == i)
continue;
if (fill_pack_from_midx(&ctx->info[pack_int_id++], m,
i) < 0)
return -1;
}
ctx->nr += m->num_packs;
m = m->base_midx;
}
ASSERT(ctx->nr == packs_nr);
return 0;
}
static struct { static struct {
const char *non_split; const char *non_split;
const char *split; const char *split;
@@ -955,7 +1072,7 @@ static int link_midx_to_chain(struct multi_pack_index *m)
} }
for (i = 0; i < ARRAY_SIZE(midx_exts); i++) { for (i = 0; i < ARRAY_SIZE(midx_exts); i++) {
const unsigned char *hash = get_midx_checksum(m); const unsigned char *hash = get_midx_hash(m);
get_midx_filename_ext(m->source, &from, get_midx_filename_ext(m->source, &from,
hash, midx_exts[i].non_split); hash, midx_exts[i].non_split);
@@ -1025,7 +1142,7 @@ static bool midx_needs_update(struct multi_pack_index *midx, struct write_midx_c
* incremental update would be either empty (in which case we will bail * incremental update would be either empty (in which case we will bail
* out later) or it would actually cover at least one new pack. * out later) or it would actually cover at least one new pack.
*/ */
if (ctx->incremental) if (ctx->incremental || ctx->compact)
goto out; goto out;
/* /*
@@ -1073,14 +1190,30 @@ out:
return needed; return needed;
} }
static int write_midx_internal(struct odb_source *source, static int midx_hashcmp(const struct multi_pack_index *a,
struct string_list *packs_to_include, const struct multi_pack_index *b,
struct string_list *packs_to_drop, const struct git_hash_algo *algop)
const char *preferred_pack_name,
const char *refs_snapshot,
unsigned flags)
{ {
struct repository *r = source->odb->repo; return hashcmp(get_midx_hash(a), get_midx_hash(b), algop);
}
struct write_midx_opts {
struct odb_source *source;
struct string_list *packs_to_include;
struct string_list *packs_to_drop;
struct multi_pack_index *compact_from;
struct multi_pack_index *compact_to;
const char *preferred_pack_name;
const char *refs_snapshot;
unsigned flags;
};
static int write_midx_internal(struct write_midx_opts *opts)
{
struct repository *r = opts->source->odb->repo;
struct strbuf midx_name = STRBUF_INIT; struct strbuf midx_name = STRBUF_INIT;
unsigned char midx_hash[GIT_MAX_RAWSZ]; unsigned char midx_hash[GIT_MAX_RAWSZ];
uint32_t start_pack; uint32_t start_pack;
@@ -1096,27 +1229,39 @@ static int write_midx_internal(struct odb_source *source,
int dropped_packs = 0; int dropped_packs = 0;
int result = -1; int result = -1;
const char **keep_hashes = NULL; const char **keep_hashes = NULL;
size_t keep_hashes_nr = 0;
struct chunkfile *cf; struct chunkfile *cf;
trace2_region_enter("midx", "write_midx_internal", r); trace2_region_enter("midx", "write_midx_internal", r);
ctx.repo = r; ctx.repo = r;
ctx.source = source; ctx.source = opts->source;
ctx.incremental = !!(flags & MIDX_WRITE_INCREMENTAL); ctx.incremental = !!(opts->flags & MIDX_WRITE_INCREMENTAL);
ctx.compact = !!(opts->flags & MIDX_WRITE_COMPACT);
if (ctx.compact) {
if (!opts->compact_from)
BUG("expected non-NULL 'from' MIDX during compaction");
if (!opts->compact_to)
BUG("expected non-NULL 'to' MIDX during compaction");
ctx.compact_from = opts->compact_from;
ctx.compact_to = opts->compact_to;
}
if (ctx.incremental) if (ctx.incremental)
strbuf_addf(&midx_name, strbuf_addf(&midx_name,
"%s/pack/multi-pack-index.d/tmp_midx_XXXXXX", "%s/pack/multi-pack-index.d/tmp_midx_XXXXXX",
source->path); opts->source->path);
else else
get_midx_filename(source, &midx_name); get_midx_filename(opts->source, &midx_name);
if (safe_create_leading_directories(r, midx_name.buf)) if (safe_create_leading_directories(r, midx_name.buf))
die_errno(_("unable to create leading directories of %s"), die_errno(_("unable to create leading directories of %s"),
midx_name.buf); midx_name.buf);
if (!packs_to_include || ctx.incremental) { if (!opts->packs_to_include || ctx.incremental) {
struct multi_pack_index *m = get_multi_pack_index(source); struct multi_pack_index *m = get_multi_pack_index(opts->source);
if (m && !midx_checksum_valid(m)) { if (m && !midx_checksum_valid(m)) {
warning(_("ignoring existing multi-pack-index; checksum mismatch")); warning(_("ignoring existing multi-pack-index; checksum mismatch"));
m = NULL; m = NULL;
@@ -1131,11 +1276,18 @@ static int write_midx_internal(struct odb_source *source,
*/ */
if (ctx.incremental) if (ctx.incremental)
ctx.base_midx = m; ctx.base_midx = m;
else if (!packs_to_include) if (!opts->packs_to_include)
ctx.m = m; ctx.m = m;
} }
} }
/*
* If compacting MIDX layer(s) in the range [from, to], then the
* compacted MIDX will share the same base MIDX as 'from'.
*/
if (ctx.compact)
ctx.base_midx = ctx.compact_from->base_midx;
ctx.nr = 0; ctx.nr = 0;
ctx.alloc = ctx.m ? ctx.m->num_packs + ctx.m->num_packs_in_base : 16; ctx.alloc = ctx.m ? ctx.m->num_packs + ctx.m->num_packs_in_base : 16;
ctx.info = NULL; ctx.info = NULL;
@@ -1144,34 +1296,42 @@ static int write_midx_internal(struct odb_source *source,
if (ctx.incremental) { if (ctx.incremental) {
struct multi_pack_index *m = ctx.base_midx; struct multi_pack_index *m = ctx.base_midx;
while (m) { while (m) {
if (flags & MIDX_WRITE_BITMAP && load_midx_revindex(m)) { if (opts->flags & MIDX_WRITE_BITMAP && load_midx_revindex(m)) {
error(_("could not load reverse index for MIDX %s"), error(_("could not load reverse index for MIDX %s"),
hash_to_hex_algop(get_midx_checksum(m), get_midx_checksum(m));
m->source->odb->repo->hash_algo));
goto cleanup; goto cleanup;
} }
ctx.num_multi_pack_indexes_before++; ctx.num_multi_pack_indexes_before++;
m = m->base_midx; m = m->base_midx;
} }
} else if (ctx.m && fill_packs_from_midx(&ctx)) { } else if (ctx.m && !ctx.compact && fill_packs_from_midx(&ctx)) {
goto cleanup; goto cleanup;
} }
start_pack = ctx.nr; start_pack = ctx.nr;
ctx.pack_paths_checked = 0; ctx.pack_paths_checked = 0;
if (flags & MIDX_PROGRESS) if (opts->flags & MIDX_PROGRESS)
ctx.progress = start_delayed_progress(r, ctx.progress = start_delayed_progress(r,
_("Adding packfiles to multi-pack-index"), 0); _("Adding packfiles to multi-pack-index"), 0);
else else
ctx.progress = NULL; ctx.progress = NULL;
ctx.to_include = packs_to_include; if (ctx.compact) {
int bitmap_order = 0;
if (opts->preferred_pack_name)
bitmap_order |= 1;
else if (opts->flags & (MIDX_WRITE_REV_INDEX | MIDX_WRITE_BITMAP))
bitmap_order |= 1;
for_each_file_in_pack_dir(source->path, add_pack_to_midx, &ctx); fill_packs_from_midx_range(&ctx, bitmap_order);
} else {
ctx.to_include = opts->packs_to_include;
for_each_file_in_pack_dir(opts->source->path, add_pack_to_midx, &ctx);
}
stop_progress(&ctx.progress); stop_progress(&ctx.progress);
if (!packs_to_drop) { if (!opts->packs_to_drop) {
/* /*
* If there is no MIDX then either it doesn't exist, or we're * If there is no MIDX then either it doesn't exist, or we're
* doing a geometric repack. Try to load it from the source to * doing a geometric repack. Try to load it from the source to
@@ -1184,7 +1344,7 @@ static int write_midx_internal(struct odb_source *source,
if (midx && !midx_needs_update(midx, &ctx)) { if (midx && !midx_needs_update(midx, &ctx)) {
struct bitmap_index *bitmap_git; struct bitmap_index *bitmap_git;
int bitmap_exists; int bitmap_exists;
int want_bitmap = flags & MIDX_WRITE_BITMAP; int want_bitmap = opts->flags & MIDX_WRITE_BITMAP;
bitmap_git = prepare_midx_bitmap_git(midx); bitmap_git = prepare_midx_bitmap_git(midx);
bitmap_exists = bitmap_git && bitmap_is_midx(bitmap_git); bitmap_exists = bitmap_git && bitmap_is_midx(bitmap_git);
@@ -1196,7 +1356,8 @@ static int write_midx_internal(struct odb_source *source,
* corresponding bitmap (or one wasn't requested). * corresponding bitmap (or one wasn't requested).
*/ */
if (!want_bitmap) if (!want_bitmap)
clear_midx_files_ext(source, "bitmap", NULL); clear_midx_files_ext(opts->source,
"bitmap", NULL);
result = 0; result = 0;
goto cleanup; goto cleanup;
} }
@@ -1211,11 +1372,11 @@ static int write_midx_internal(struct odb_source *source,
goto cleanup; /* nothing to do */ goto cleanup; /* nothing to do */
} }
if (preferred_pack_name) { if (opts->preferred_pack_name) {
ctx.preferred_pack_idx = NO_PREFERRED_PACK; ctx.preferred_pack_idx = NO_PREFERRED_PACK;
for (size_t i = 0; i < ctx.nr; i++) { for (size_t i = 0; i < ctx.nr; i++) {
if (!cmp_idx_or_pack_name(preferred_pack_name, if (!cmp_idx_or_pack_name(opts->preferred_pack_name,
ctx.info[i].pack_name)) { ctx.info[i].pack_name)) {
ctx.preferred_pack_idx = i; ctx.preferred_pack_idx = i;
break; break;
@@ -1224,9 +1385,9 @@ static int write_midx_internal(struct odb_source *source,
if (ctx.preferred_pack_idx == NO_PREFERRED_PACK) if (ctx.preferred_pack_idx == NO_PREFERRED_PACK)
warning(_("unknown preferred pack: '%s'"), warning(_("unknown preferred pack: '%s'"),
preferred_pack_name); opts->preferred_pack_name);
} else if (ctx.nr && } else if (ctx.nr &&
(flags & (MIDX_WRITE_REV_INDEX | MIDX_WRITE_BITMAP))) { (opts->flags & (MIDX_WRITE_REV_INDEX | MIDX_WRITE_BITMAP))) {
struct packed_git *oldest = ctx.info[0].p; struct packed_git *oldest = ctx.info[0].p;
ctx.preferred_pack_idx = 0; ctx.preferred_pack_idx = 0;
@@ -1237,7 +1398,7 @@ static int write_midx_internal(struct odb_source *source,
*/ */
open_pack_index(oldest); open_pack_index(oldest);
if (packs_to_drop && packs_to_drop->nr) if (opts->packs_to_drop && opts->packs_to_drop->nr)
BUG("cannot write a MIDX bitmap during expiration"); BUG("cannot write a MIDX bitmap during expiration");
/* /*
@@ -1297,22 +1458,26 @@ static int write_midx_internal(struct odb_source *source,
ctx.large_offsets_needed = 1; ctx.large_offsets_needed = 1;
} }
QSORT(ctx.info, ctx.nr, pack_info_compare); if (!ctx.compact)
QSORT(ctx.info, ctx.nr, pack_info_compare);
if (packs_to_drop && packs_to_drop->nr) { if (opts->packs_to_drop && opts->packs_to_drop->nr) {
size_t drop_index = 0; size_t drop_index = 0;
int missing_drops = 0; int missing_drops = 0;
for (size_t i = 0; i < ctx.nr && drop_index < packs_to_drop->nr; i++) { ASSERT(!ctx.compact);
for (size_t i = 0;
i < ctx.nr && drop_index < opts->packs_to_drop->nr; i++) {
int cmp = strcmp(ctx.info[i].pack_name, int cmp = strcmp(ctx.info[i].pack_name,
packs_to_drop->items[drop_index].string); opts->packs_to_drop->items[drop_index].string);
if (!cmp) { if (!cmp) {
drop_index++; drop_index++;
ctx.info[i].expired = 1; ctx.info[i].expired = 1;
} else if (cmp > 0) { } else if (cmp > 0) {
error(_("did not see pack-file %s to drop"), error(_("did not see pack-file %s to drop"),
packs_to_drop->items[drop_index].string); opts->packs_to_drop->items[drop_index].string);
drop_index++; drop_index++;
missing_drops++; missing_drops++;
i--; i--;
@@ -1333,12 +1498,20 @@ static int write_midx_internal(struct odb_source *source,
*/ */
ALLOC_ARRAY(ctx.pack_perm, ctx.nr); ALLOC_ARRAY(ctx.pack_perm, ctx.nr);
for (size_t i = 0; i < ctx.nr; i++) { for (size_t i = 0; i < ctx.nr; i++) {
uint32_t from = ctx.info[i].orig_pack_int_id;
uint32_t to;
if (ctx.info[i].expired) { if (ctx.info[i].expired) {
to = PACK_EXPIRED;
dropped_packs++; dropped_packs++;
ctx.pack_perm[ctx.info[i].orig_pack_int_id] = PACK_EXPIRED;
} else { } else {
ctx.pack_perm[ctx.info[i].orig_pack_int_id] = i - dropped_packs; to = i - dropped_packs;
} }
if (ctx.compact)
from -= ctx.compact_from->num_packs_in_base;
ctx.pack_perm[from] = to;
} }
for (size_t i = 0; i < ctx.nr; i++) { for (size_t i = 0; i < ctx.nr; i++) {
@@ -1349,16 +1522,16 @@ static int write_midx_internal(struct odb_source *source,
} }
/* Check that the preferred pack wasn't expired (if given). */ /* Check that the preferred pack wasn't expired (if given). */
if (preferred_pack_name) { if (opts->preferred_pack_name) {
struct pack_info *preferred = bsearch(preferred_pack_name, struct pack_info *preferred = bsearch(opts->preferred_pack_name,
ctx.info, ctx.nr, ctx.info, ctx.nr,
sizeof(*ctx.info), sizeof(*ctx.info),
idx_or_pack_name_cmp); idx_or_pack_name_cmp);
if (preferred) { if (preferred) {
uint32_t perm = ctx.pack_perm[preferred->orig_pack_int_id]; uint32_t perm = midx_pack_perm(&ctx, preferred->orig_pack_int_id);
if (perm == PACK_EXPIRED) if (perm == PACK_EXPIRED)
warning(_("preferred pack '%s' is expired"), warning(_("preferred pack '%s' is expired"),
preferred_pack_name); opts->preferred_pack_name);
} }
} }
@@ -1372,15 +1545,15 @@ static int write_midx_internal(struct odb_source *source,
} }
if (!ctx.entries_nr) { if (!ctx.entries_nr) {
if (flags & MIDX_WRITE_BITMAP) if (opts->flags & MIDX_WRITE_BITMAP)
warning(_("refusing to write multi-pack .bitmap without any objects")); warning(_("refusing to write multi-pack .bitmap without any objects"));
flags &= ~(MIDX_WRITE_REV_INDEX | MIDX_WRITE_BITMAP); opts->flags &= ~(MIDX_WRITE_REV_INDEX | MIDX_WRITE_BITMAP);
} }
if (ctx.incremental) { if (ctx.incremental) {
struct strbuf lock_name = STRBUF_INIT; struct strbuf lock_name = STRBUF_INIT;
get_midx_chain_filename(source, &lock_name); get_midx_chain_filename(opts->source, &lock_name);
hold_lock_file_for_update(&lk, lock_name.buf, LOCK_DIE_ON_ERROR); hold_lock_file_for_update(&lk, lock_name.buf, LOCK_DIE_ON_ERROR);
strbuf_release(&lock_name); strbuf_release(&lock_name);
@@ -1423,7 +1596,7 @@ static int write_midx_internal(struct odb_source *source,
MIDX_CHUNK_LARGE_OFFSET_WIDTH), MIDX_CHUNK_LARGE_OFFSET_WIDTH),
write_midx_large_offsets); write_midx_large_offsets);
if (flags & (MIDX_WRITE_REV_INDEX | MIDX_WRITE_BITMAP)) { if (opts->flags & (MIDX_WRITE_REV_INDEX | MIDX_WRITE_BITMAP)) {
ctx.pack_order = midx_pack_order(&ctx); ctx.pack_order = midx_pack_order(&ctx);
add_chunk(cf, MIDX_CHUNKID_REVINDEX, add_chunk(cf, MIDX_CHUNKID_REVINDEX,
st_mult(ctx.entries_nr, sizeof(uint32_t)), st_mult(ctx.entries_nr, sizeof(uint32_t)),
@@ -1441,11 +1614,11 @@ static int write_midx_internal(struct odb_source *source,
CSUM_FSYNC | CSUM_HASH_IN_STREAM); CSUM_FSYNC | CSUM_HASH_IN_STREAM);
free_chunkfile(cf); free_chunkfile(cf);
if (flags & MIDX_WRITE_REV_INDEX && if (opts->flags & MIDX_WRITE_REV_INDEX &&
git_env_bool("GIT_TEST_MIDX_WRITE_REV", 0)) git_env_bool("GIT_TEST_MIDX_WRITE_REV", 0))
write_midx_reverse_index(&ctx, midx_hash); write_midx_reverse_index(&ctx, midx_hash);
if (flags & MIDX_WRITE_BITMAP) { if (opts->flags & MIDX_WRITE_BITMAP) {
struct packing_data pdata; struct packing_data pdata;
struct commit **commits; struct commit **commits;
uint32_t commits_nr; uint32_t commits_nr;
@@ -1455,7 +1628,7 @@ static int write_midx_internal(struct odb_source *source,
prepare_midx_packing_data(&pdata, &ctx); prepare_midx_packing_data(&pdata, &ctx);
commits = find_commits_for_midx_bitmap(&commits_nr, refs_snapshot, &ctx); commits = find_commits_for_midx_bitmap(&commits_nr, opts->refs_snapshot, &ctx);
/* /*
* The previous steps translated the information from * The previous steps translated the information from
@@ -1468,7 +1641,7 @@ static int write_midx_internal(struct odb_source *source,
if (write_midx_bitmap(&ctx, if (write_midx_bitmap(&ctx,
midx_hash, &pdata, commits, commits_nr, midx_hash, &pdata, commits, commits_nr,
flags) < 0) { opts->flags) < 0) {
error(_("could not write multi-pack bitmap")); error(_("could not write multi-pack bitmap"));
clear_packing_data(&pdata); clear_packing_data(&pdata);
free(commits); free(commits);
@@ -1486,7 +1659,24 @@ static int write_midx_internal(struct odb_source *source,
if (ctx.num_multi_pack_indexes_before == UINT32_MAX) if (ctx.num_multi_pack_indexes_before == UINT32_MAX)
die(_("too many multi-pack-indexes")); die(_("too many multi-pack-indexes"));
CALLOC_ARRAY(keep_hashes, ctx.num_multi_pack_indexes_before + 1); if (ctx.compact) {
struct multi_pack_index *m;
/*
* Keep all MIDX layers excluding those in the range [from, to].
*/
for (m = ctx.base_midx; m; m = m->base_midx)
keep_hashes_nr++;
for (m = ctx.m;
m && midx_hashcmp(m, ctx.compact_to, r->hash_algo);
m = m->base_midx)
keep_hashes_nr++;
keep_hashes_nr++; /* include the compacted layer */
} else {
keep_hashes_nr = ctx.num_multi_pack_indexes_before + 1;
}
CALLOC_ARRAY(keep_hashes, keep_hashes_nr);
if (ctx.incremental) { if (ctx.incremental) {
FILE *chainf = fdopen_lock_file(&lk, "w"); FILE *chainf = fdopen_lock_file(&lk, "w");
@@ -1501,7 +1691,7 @@ static int write_midx_internal(struct odb_source *source,
if (link_midx_to_chain(ctx.base_midx) < 0) if (link_midx_to_chain(ctx.base_midx) < 0)
goto cleanup; goto cleanup;
get_split_midx_filename_ext(source, &final_midx_name, get_split_midx_filename_ext(opts->source, &final_midx_name,
midx_hash, MIDX_EXT_MIDX); midx_hash, MIDX_EXT_MIDX);
if (rename_tempfile(&incr, final_midx_name.buf) < 0) { if (rename_tempfile(&incr, final_midx_name.buf) < 0) {
@@ -1511,18 +1701,47 @@ static int write_midx_internal(struct odb_source *source,
strbuf_release(&final_midx_name); strbuf_release(&final_midx_name);
keep_hashes[ctx.num_multi_pack_indexes_before] = if (ctx.compact) {
xstrdup(hash_to_hex_algop(midx_hash, r->hash_algo)); struct multi_pack_index *m;
uint32_t num_layers_before_from = 0;
uint32_t i;
for (uint32_t i = 0; i < ctx.num_multi_pack_indexes_before; i++) { for (m = ctx.base_midx; m; m = m->base_midx)
uint32_t j = ctx.num_multi_pack_indexes_before - i - 1; num_layers_before_from++;
keep_hashes[j] = xstrdup(hash_to_hex_algop(get_midx_checksum(m), m = ctx.base_midx;
for (i = 0; i < num_layers_before_from; i++) {
uint32_t j = num_layers_before_from - i - 1;
keep_hashes[j] = xstrdup(get_midx_checksum(m));
m = m->base_midx;
}
keep_hashes[i] = xstrdup(hash_to_hex_algop(midx_hash,
r->hash_algo)); r->hash_algo));
m = m->base_midx;
i = 0;
for (m = ctx.m;
m && midx_hashcmp(m, ctx.compact_to, r->hash_algo);
m = m->base_midx) {
keep_hashes[keep_hashes_nr - i - 1] =
xstrdup(get_midx_checksum(m));
i++;
}
} else {
keep_hashes[ctx.num_multi_pack_indexes_before] =
xstrdup(hash_to_hex_algop(midx_hash,
r->hash_algo));
for (uint32_t i = 0; i < ctx.num_multi_pack_indexes_before; i++) {
uint32_t j = ctx.num_multi_pack_indexes_before - i - 1;
keep_hashes[j] = xstrdup(get_midx_checksum(m));
m = m->base_midx;
}
} }
for (uint32_t i = 0; i <= ctx.num_multi_pack_indexes_before; i++) for (uint32_t i = 0; i < keep_hashes_nr; i++)
fprintf(get_lock_file_fp(&lk), "%s\n", keep_hashes[i]); fprintf(get_lock_file_fp(&lk), "%s\n", keep_hashes[i]);
} else { } else {
keep_hashes[ctx.num_multi_pack_indexes_before] = keep_hashes[ctx.num_multi_pack_indexes_before] =
@@ -1535,8 +1754,7 @@ static int write_midx_internal(struct odb_source *source,
if (commit_lock_file(&lk) < 0) if (commit_lock_file(&lk) < 0)
die_errno(_("could not write multi-pack-index")); die_errno(_("could not write multi-pack-index"));
clear_midx_files(source, keep_hashes, clear_midx_files(opts->source, keep_hashes, keep_hashes_nr,
ctx.num_multi_pack_indexes_before + 1,
ctx.incremental); ctx.incremental);
result = 0; result = 0;
@@ -1554,7 +1772,7 @@ cleanup:
free(ctx.pack_perm); free(ctx.pack_perm);
free(ctx.pack_order); free(ctx.pack_order);
if (keep_hashes) { if (keep_hashes) {
for (uint32_t i = 0; i <= ctx.num_multi_pack_indexes_before; i++) for (uint32_t i = 0; i < keep_hashes_nr; i++)
free((char *)keep_hashes[i]); free((char *)keep_hashes[i]);
free(keep_hashes); free(keep_hashes);
} }
@@ -1570,9 +1788,14 @@ int write_midx_file(struct odb_source *source,
const char *preferred_pack_name, const char *preferred_pack_name,
const char *refs_snapshot, unsigned flags) const char *refs_snapshot, unsigned flags)
{ {
return write_midx_internal(source, NULL, NULL, struct write_midx_opts opts = {
preferred_pack_name, refs_snapshot, .source = source,
flags); .preferred_pack_name = preferred_pack_name,
.refs_snapshot = refs_snapshot,
.flags = flags,
};
return write_midx_internal(&opts);
} }
int write_midx_file_only(struct odb_source *source, int write_midx_file_only(struct odb_source *source,
@@ -1580,8 +1803,30 @@ int write_midx_file_only(struct odb_source *source,
const char *preferred_pack_name, const char *preferred_pack_name,
const char *refs_snapshot, unsigned flags) const char *refs_snapshot, unsigned flags)
{ {
return write_midx_internal(source, packs_to_include, NULL, struct write_midx_opts opts = {
preferred_pack_name, refs_snapshot, flags); .source = source,
.packs_to_include = packs_to_include,
.preferred_pack_name = preferred_pack_name,
.refs_snapshot = refs_snapshot,
.flags = flags,
};
return write_midx_internal(&opts);
}
int write_midx_file_compact(struct odb_source *source,
struct multi_pack_index *from,
struct multi_pack_index *to,
unsigned flags)
{
struct write_midx_opts opts = {
.source = source,
.compact_from = from,
.compact_to = to,
.flags = flags | MIDX_WRITE_COMPACT,
};
return write_midx_internal(&opts);
} }
int expire_midx_packs(struct odb_source *source, unsigned flags) int expire_midx_packs(struct odb_source *source, unsigned flags)
@@ -1641,8 +1886,11 @@ int expire_midx_packs(struct odb_source *source, unsigned flags)
free(count); free(count);
if (packs_to_drop.nr) if (packs_to_drop.nr)
result = write_midx_internal(source, NULL, result = write_midx_internal(&(struct write_midx_opts) {
&packs_to_drop, NULL, NULL, flags); .source = source,
.packs_to_drop = &packs_to_drop,
.flags = flags & MIDX_PROGRESS,
});
string_list_clear(&packs_to_drop, 0); string_list_clear(&packs_to_drop, 0);
@@ -1849,8 +2097,10 @@ int midx_repack(struct odb_source *source, size_t batch_size, unsigned flags)
goto cleanup; goto cleanup;
} }
result = write_midx_internal(source, NULL, NULL, NULL, NULL, result = write_midx_internal(&(struct write_midx_opts) {
flags); .source = source,
.flags = flags,
});
cleanup: cleanup:
free(include_pack); free(include_pack);

36
midx.c
View File

@@ -24,7 +24,13 @@ void clear_incremental_midx_files_ext(struct odb_source *source, const char *ext
int cmp_idx_or_pack_name(const char *idx_or_pack_name, int cmp_idx_or_pack_name(const char *idx_or_pack_name,
const char *idx_name); const char *idx_name);
const unsigned char *get_midx_checksum(struct multi_pack_index *m) const char *get_midx_checksum(const struct multi_pack_index *m)
{
return hash_to_hex_algop(get_midx_hash(m),
m->source->odb->repo->hash_algo);
}
const unsigned char *get_midx_hash(const struct multi_pack_index *m)
{ {
return m->data + m->data_len - m->source->odb->repo->hash_algo->rawsz; return m->data + m->data_len - m->source->odb->repo->hash_algo->rawsz;
} }
@@ -203,11 +209,6 @@ static struct multi_pack_index *load_multi_pack_index_one(struct odb_source *sou
if (!end) if (!end)
die(_("multi-pack-index pack-name chunk is too short")); die(_("multi-pack-index pack-name chunk is too short"));
cur_pack_name = end + 1; cur_pack_name = end + 1;
if (i && strcmp(m->pack_names[i], m->pack_names[i - 1]) <= 0)
die(_("multi-pack-index pack names out of order: '%s' before '%s'"),
m->pack_names[i - 1],
m->pack_names[i]);
} }
trace2_data_intmax("midx", r, "load/num_packs", m->num_packs); trace2_data_intmax("midx", r, "load/num_packs", m->num_packs);
@@ -405,6 +406,7 @@ void close_midx(struct multi_pack_index *m)
} }
FREE_AND_NULL(m->packs); FREE_AND_NULL(m->packs);
FREE_AND_NULL(m->pack_names); FREE_AND_NULL(m->pack_names);
FREE_AND_NULL(m->pack_names_sorted);
free(m); free(m);
} }
@@ -650,17 +652,37 @@ int cmp_idx_or_pack_name(const char *idx_or_pack_name,
return strcmp(idx_or_pack_name, idx_name); return strcmp(idx_or_pack_name, idx_name);
} }
static int midx_pack_names_cmp(const void *a, const void *b, void *m_)
{
struct multi_pack_index *m = m_;
return strcmp(m->pack_names[*(const size_t *)a],
m->pack_names[*(const size_t *)b]);
}
static int midx_contains_pack_1(struct multi_pack_index *m, static int midx_contains_pack_1(struct multi_pack_index *m,
const char *idx_or_pack_name) const char *idx_or_pack_name)
{ {
uint32_t first = 0, last = m->num_packs; uint32_t first = 0, last = m->num_packs;
if (!m->pack_names_sorted) {
uint32_t i;
ALLOC_ARRAY(m->pack_names_sorted, m->num_packs);
for (i = 0; i < m->num_packs; i++)
m->pack_names_sorted[i] = i;
QSORT_S(m->pack_names_sorted, m->num_packs, midx_pack_names_cmp,
m);
}
while (first < last) { while (first < last) {
uint32_t mid = first + (last - first) / 2; uint32_t mid = first + (last - first) / 2;
const char *current; const char *current;
int cmp; int cmp;
current = m->pack_names[mid]; current = m->pack_names[m->pack_names_sorted[mid]];
cmp = cmp_idx_or_pack_name(idx_or_pack_name, current); cmp = cmp_idx_or_pack_name(idx_or_pack_name, current);
if (!cmp) if (!cmp)
return 1; return 1;

9
midx.h
View File

@@ -71,6 +71,7 @@ struct multi_pack_index {
uint32_t num_packs_in_base; uint32_t num_packs_in_base;
const char **pack_names; const char **pack_names;
size_t *pack_names_sorted;
struct packed_git **packs; struct packed_git **packs;
}; };
@@ -80,12 +81,14 @@ struct multi_pack_index {
#define MIDX_WRITE_BITMAP_HASH_CACHE (1 << 3) #define MIDX_WRITE_BITMAP_HASH_CACHE (1 << 3)
#define MIDX_WRITE_BITMAP_LOOKUP_TABLE (1 << 4) #define MIDX_WRITE_BITMAP_LOOKUP_TABLE (1 << 4)
#define MIDX_WRITE_INCREMENTAL (1 << 5) #define MIDX_WRITE_INCREMENTAL (1 << 5)
#define MIDX_WRITE_COMPACT (1 << 6)
#define MIDX_EXT_REV "rev" #define MIDX_EXT_REV "rev"
#define MIDX_EXT_BITMAP "bitmap" #define MIDX_EXT_BITMAP "bitmap"
#define MIDX_EXT_MIDX "midx" #define MIDX_EXT_MIDX "midx"
const unsigned char *get_midx_checksum(struct multi_pack_index *m); const char *get_midx_checksum(const struct multi_pack_index *m) /* static buffer */;
const unsigned char *get_midx_hash(const struct multi_pack_index *m);
void get_midx_filename(struct odb_source *source, struct strbuf *out); void get_midx_filename(struct odb_source *source, struct strbuf *out);
void get_midx_filename_ext(struct odb_source *source, struct strbuf *out, void get_midx_filename_ext(struct odb_source *source, struct strbuf *out,
const unsigned char *hash, const char *ext); const unsigned char *hash, const char *ext);
@@ -128,6 +131,10 @@ int write_midx_file_only(struct odb_source *source,
struct string_list *packs_to_include, struct string_list *packs_to_include,
const char *preferred_pack_name, const char *preferred_pack_name,
const char *refs_snapshot, unsigned flags); const char *refs_snapshot, unsigned flags);
int write_midx_file_compact(struct odb_source *source,
struct multi_pack_index *from,
struct multi_pack_index *to,
unsigned flags);
void clear_midx_file(struct repository *r); void clear_midx_file(struct repository *r);
int verify_midx_file(struct odb_source *source, unsigned flags); int verify_midx_file(struct odb_source *source, unsigned flags);
int expire_midx_packs(struct odb_source *source, unsigned flags); int expire_midx_packs(struct odb_source *source, unsigned flags);

View File

@@ -441,11 +441,11 @@ char *midx_bitmap_filename(struct multi_pack_index *midx)
struct strbuf buf = STRBUF_INIT; struct strbuf buf = STRBUF_INIT;
if (midx->has_chain) if (midx->has_chain)
get_split_midx_filename_ext(midx->source, &buf, get_split_midx_filename_ext(midx->source, &buf,
get_midx_checksum(midx), get_midx_hash(midx),
MIDX_EXT_BITMAP); MIDX_EXT_BITMAP);
else else
get_midx_filename_ext(midx->source, &buf, get_midx_filename_ext(midx->source, &buf,
get_midx_checksum(midx), get_midx_hash(midx),
MIDX_EXT_BITMAP); MIDX_EXT_BITMAP);
return strbuf_detach(&buf, NULL); return strbuf_detach(&buf, NULL);
@@ -502,7 +502,7 @@ static int open_midx_bitmap_1(struct bitmap_index *bitmap_git,
if (load_bitmap_header(bitmap_git) < 0) if (load_bitmap_header(bitmap_git) < 0)
goto cleanup; goto cleanup;
if (!hasheq(get_midx_checksum(bitmap_git->midx), bitmap_git->checksum, if (!hasheq(get_midx_hash(bitmap_git->midx), bitmap_git->checksum,
bitmap_repo(bitmap_git)->hash_algo)) { bitmap_repo(bitmap_git)->hash_algo)) {
error(_("checksum doesn't match in MIDX and bitmap")); error(_("checksum doesn't match in MIDX and bitmap"));
goto cleanup; goto cleanup;
@@ -2820,8 +2820,7 @@ void test_bitmap_walk(struct rev_info *revs)
if (bitmap_is_midx(found)) if (bitmap_is_midx(found))
fprintf_ln(stderr, "Located via MIDX '%s'.", fprintf_ln(stderr, "Located via MIDX '%s'.",
hash_to_hex_algop(get_midx_checksum(found->midx), get_midx_checksum(found->midx));
revs->repo->hash_algo));
else else
fprintf_ln(stderr, "Located via pack '%s'.", fprintf_ln(stderr, "Located via pack '%s'.",
hash_to_hex_algop(found->pack->hash, hash_to_hex_algop(found->pack->hash,

View File

@@ -390,11 +390,11 @@ int load_midx_revindex(struct multi_pack_index *m)
if (m->has_chain) if (m->has_chain)
get_split_midx_filename_ext(m->source, &revindex_name, get_split_midx_filename_ext(m->source, &revindex_name,
get_midx_checksum(m), get_midx_hash(m),
MIDX_EXT_REV); MIDX_EXT_REV);
else else
get_midx_filename_ext(m->source, &revindex_name, get_midx_filename_ext(m->source, &revindex_name,
get_midx_checksum(m), get_midx_hash(m),
MIDX_EXT_REV); MIDX_EXT_REV);
ret = load_revindex_from_disk(m->source->odb->repo->hash_algo, ret = load_revindex_from_disk(m->source->odb->repo->hash_algo,

View File

@@ -26,18 +26,22 @@ static int read_midx_file(const char *object_dir, const char *checksum,
int show_objects) int show_objects)
{ {
uint32_t i; uint32_t i;
struct multi_pack_index *m; struct multi_pack_index *m, *tip;
int ret = 0;
m = setup_midx(object_dir); m = tip = setup_midx(object_dir);
if (!m) if (!m)
return 1; return 1;
if (checksum) { if (checksum) {
while (m && strcmp(hash_to_hex(get_midx_checksum(m)), checksum)) while (m && strcmp(get_midx_checksum(m), checksum))
m = m->base_midx; m = m->base_midx;
if (!m) if (!m) {
return 1; ret = error(_("could not find MIDX with checksum %s"),
checksum);
goto out;
}
} }
printf("header: %08x %d %d %d %d\n", printf("header: %08x %d %d %d %d\n",
@@ -82,9 +86,10 @@ static int read_midx_file(const char *object_dir, const char *checksum,
} }
} }
close_midx(m); out:
close_midx(tip);
return 0; return ret;
} }
static int read_midx_checksum(const char *object_dir) static int read_midx_checksum(const char *object_dir)
@@ -94,7 +99,7 @@ static int read_midx_checksum(const char *object_dir)
m = setup_midx(object_dir); m = setup_midx(object_dir);
if (!m) if (!m)
return 1; return 1;
printf("%s\n", hash_to_hex(get_midx_checksum(m))); printf("%s\n", get_midx_checksum(m));
close_midx(m); close_midx(m);
return 0; return 0;

View File

@@ -617,6 +617,7 @@ integration_tests = [
't5332-multi-pack-reuse.sh', 't5332-multi-pack-reuse.sh',
't5333-pseudo-merge-bitmaps.sh', 't5333-pseudo-merge-bitmaps.sh',
't5334-incremental-multi-pack-index.sh', 't5334-incremental-multi-pack-index.sh',
't5335-compact-multi-pack-index.sh',
't5351-unpack-large-objects.sh', 't5351-unpack-large-objects.sh',
't5400-send-pack.sh', 't5400-send-pack.sh',
't5401-update-hooks.sh', 't5401-update-hooks.sh',

View File

@@ -33,7 +33,6 @@ merge
merge-file merge-file
merge-index merge-index
merge-one-file merge-one-file
multi-pack-index
name-rev name-rev
notes notes
push push

View File

@@ -514,12 +514,7 @@ test_expect_success 'verify invalid chunk offset' '
"improper chunk offset(s)" "improper chunk offset(s)"
' '
test_expect_success 'verify packnames out of order' ' test_expect_success 'verify missing pack' '
corrupt_midx_and_verify $MIDX_BYTE_PACKNAME_ORDER "z" $objdir \
"pack names out of order"
'
test_expect_success 'verify packnames out of order' '
corrupt_midx_and_verify $MIDX_BYTE_PACKNAME_ORDER "a" $objdir \ corrupt_midx_and_verify $MIDX_BYTE_PACKNAME_ORDER "a" $objdir \
"failed to load pack" "failed to load pack"
' '

View File

@@ -0,0 +1,218 @@
#!/bin/sh
test_description='multi-pack-index compaction'
. ./test-lib.sh
GIT_TEST_MULTI_PACK_INDEX=0
GIT_TEST_MULTI_PACK_INDEX_WRITE_BITMAP=0
GIT_TEST_MULTI_PACK_INDEX_WRITE_INCREMENTAL=0
objdir=.git/objects
packdir=$objdir/pack
midxdir=$packdir/multi-pack-index.d
midx_chain=$midxdir/multi-pack-index-chain
nth_line() {
local n="$1"
shift
awk "NR==$n" "$@"
}
write_packs () {
for c in "$@"
do
test_commit "$c" &&
git pack-objects --all --unpacked $packdir/pack-$c &&
git prune-packed &&
git multi-pack-index write --incremental --bitmap || return 1
done
}
test_midx_layer_packs () {
local checksum="$1" &&
shift &&
test-tool read-midx $objdir "$checksum" >out &&
printf "%s\n" "$@" >expect &&
# NOTE: do *not* pipe through sort here, we want to ensure the
# order of packs is preserved during compaction.
grep "^pack-" out | cut -d"-" -f2 >actual &&
test_cmp expect actual
}
test_midx_layer_object_uniqueness () {
: >objs.all
while read layer
do
test-tool read-midx --show-objects $objdir "$layer" >out &&
grep "\.pack$" out | cut -d" " -f1 | sort >objs.layer &&
test_stdout_line_count = 0 comm -12 objs.all objs.layer &&
cat objs.all objs.layer | sort >objs.tmp &&
mv objs.tmp objs.all || return 1
done <$midx_chain
}
test_expect_success 'MIDX compaction with lex-ordered pack names' '
git init midx-compact-lex-order &&
(
cd midx-compact-lex-order &&
write_packs A B C D E &&
test_line_count = 5 $midx_chain &&
git multi-pack-index compact --incremental --bitmap \
"$(nth_line 2 "$midx_chain")" \
"$(nth_line 4 "$midx_chain")" &&
test_line_count = 3 $midx_chain &&
test_midx_layer_packs "$(nth_line 1 "$midx_chain")" A &&
test_midx_layer_packs "$(nth_line 2 "$midx_chain")" B C D &&
test_midx_layer_packs "$(nth_line 3 "$midx_chain")" E &&
test_midx_layer_object_uniqueness
)
'
test_expect_success 'MIDX compaction with non-lex-ordered pack names' '
git init midx-compact-non-lex-order &&
(
cd midx-compact-non-lex-order &&
write_packs D C A B E &&
test_line_count = 5 $midx_chain &&
git multi-pack-index compact --incremental --bitmap \
"$(nth_line 2 "$midx_chain")" \
"$(nth_line 4 "$midx_chain")" &&
test_line_count = 3 $midx_chain &&
test_midx_layer_packs "$(nth_line 1 "$midx_chain")" D &&
test_midx_layer_packs "$(nth_line 2 "$midx_chain")" C A B &&
test_midx_layer_packs "$(nth_line 3 "$midx_chain")" E &&
test_midx_layer_object_uniqueness
)
'
midx_objs_by_pack () {
awk '/\.pack$/ { split($3, a, "-"); print a[2], $1 }' | sort
}
tag_objs_from_pack () {
objs="$(git rev-list --objects --no-object-names "$2")" &&
printf "$1 %s\n" $objs | sort
}
test_expect_success 'MIDX compaction preserves pack object selection' '
git init midx-compact-preserve-selection &&
(
cd midx-compact-preserve-selection &&
test_commit A &&
test_commit B &&
# Create two packs, one containing just the objects from
# A, and another containing all objects from the
# repository.
p1="$(echo A | git pack-objects --revs --delta-base-offset \
$packdir/pack-1)" &&
p0="$(echo B | git pack-objects --revs --delta-base-offset \
$packdir/pack-0)" &&
echo "pack-1-$p1.idx" | git multi-pack-index write \
--incremental --bitmap --stdin-packs &&
echo "pack-0-$p0.idx" | git multi-pack-index write \
--incremental --bitmap --stdin-packs &&
write_packs C &&
git multi-pack-index compact --incremental --bitmap \
"$(nth_line 1 "$midx_chain")" \
"$(nth_line 2 "$midx_chain")" &&
test-tool read-midx --show-objects $objdir \
"$(nth_line 1 "$midx_chain")" >AB.info &&
test-tool read-midx --show-objects $objdir \
"$(nth_line 2 "$midx_chain")" >C.info &&
midx_objs_by_pack <AB.info >AB.actual &&
midx_objs_by_pack <C.info >C.actual &&
{
tag_objs_from_pack 1 A &&
tag_objs_from_pack 0 A..B
} | sort >AB.expect &&
tag_objs_from_pack C B..C >C.expect &&
test_cmp AB.expect AB.actual &&
test_cmp C.expect C.actual
)
'
test_expect_success 'MIDX compaction with bitmaps' '
git init midx-compact-with-bitmaps &&
(
cd midx-compact-with-bitmaps &&
write_packs foo bar baz quux woot &&
test-tool read-midx --bitmap $objdir >bitmap.expect &&
git multi-pack-index compact --incremental --bitmap \
"$(nth_line 2 "$midx_chain")" \
"$(nth_line 4 "$midx_chain")" &&
test-tool read-midx --bitmap $objdir >bitmap.actual &&
test_cmp bitmap.expect bitmap.actual &&
true
)
'
test_expect_success 'MIDX compaction with bitmaps (non-trivial)' '
git init midx-compact-with-bitmaps-non-trivial &&
(
cd midx-compact-with-bitmaps-non-trivial &&
git branch -m main &&
# D(4)
# /
# A(1) --- B(2) --- C(3) --- G(7)
# \
# E(5) --- F(6)
write_packs A B C &&
git checkout -b side &&
write_packs D &&
git checkout -b other B &&
write_packs E F &&
git checkout main &&
write_packs G &&
cat $midx_chain &&
# Compact layers 2-4, leaving us with:
#
# [A, [B, C, D], E, F, G]
git multi-pack-index compact --incremental --bitmap \
"$(nth_line 2 "$midx_chain")" \
"$(nth_line 4 "$midx_chain")" &&
# Then compact the top two layers, condensing the above
# such that the new 4th layer contains F and G.
#
# [A, [B, C, D], E, [F, G]]
git multi-pack-index compact --incremental --bitmap \
"$(nth_line 4 "$midx_chain")" \
"$(nth_line 5 "$midx_chain")" &&
cat $midx_chain
)
'
test_done