Merge branch 'tb/pseudo-merge-bugfixes'

Fixes many bugs in pseudo-merge code.

* tb/pseudo-merge-bugfixes:
  pack-bitmap: prevent pattern leak on pseudo-merge re-assignment
  Documentation: fix broken `sampleRate` in gitpacking(7)
  pack-bitmap: reject pseudo-merge "sampleRate" of 0
  pack-bitmap: parse commits in `find_pseudo_merge_group_for_ref()`
  pack-bitmap: fix pseudo-merge lookup for shared commits
  pack-bitmap: fix inverted binary search in `pseudo_merge_at()`
  pack-bitmap-write: sort pseudo-merge commit lookup table in pack order
  t5333: demonstrate various pseudo-merge bugs
  t/helper: add 'test-tool bitmap write' subcommand
This commit is contained in:
Junio C Hamano
2026-05-27 14:15:43 +09:00
7 changed files with 402 additions and 14 deletions
@@ -47,8 +47,8 @@ will be updated more often than a reference pointing at an old commit.
bitmapPseudoMerge.<name>.sampleRate::
Determines the proportion of non-bitmapped commits (among
reference tips) which are selected for inclusion in an
unstable pseudo-merge bitmap. Must be between `0` and `1`
(inclusive). The default is `1`.
unstable pseudo-merge bitmap. Must be greater than `0` and
less than or equal to `1`. The default is `1`.
bitmapPseudoMerge.<name>.threshold::
Determines the minimum age of non-bitmapped commits (among
+2 -2
View File
@@ -150,7 +150,7 @@ with a configuration like so:
pattern = "refs/"
threshold = now
stableThreshold = never
sampleRate = 100
sampleRate = 1
maxMerges = 64
----
@@ -177,7 +177,7 @@ like:
pattern = "refs/virtual/([0-9]+)/(heads|tags)/"
threshold = now
stableThreshold = never
sampleRate = 100
sampleRate = 1
maxMerges = 64
----
+21 -2
View File
@@ -819,6 +819,20 @@ static void write_selected_commits_v1(struct bitmap_writer *writer,
}
}
static int pseudo_merge_commit_pos_cmp(const void *_va, const void *_vb,
void *_data)
{
struct bitmap_writer *writer = _data;
uint32_t pos_a = find_object_pos(writer, _va, NULL);
uint32_t pos_b = find_object_pos(writer, _vb, NULL);
if (pos_a < pos_b)
return -1;
if (pos_a > pos_b)
return 1;
return 0;
}
static void write_pseudo_merges(struct bitmap_writer *writer,
struct hashfile *f)
{
@@ -863,7 +877,7 @@ static void write_pseudo_merges(struct bitmap_writer *writer,
next_ext = st_add(hashfile_total(f),
st_mult(kh_size(writer->pseudo_merge_commits),
sizeof(uint64_t)));
sizeof(uint32_t) + sizeof(uint64_t)));
table_start = hashfile_total(f);
@@ -876,7 +890,12 @@ static void write_pseudo_merges(struct bitmap_writer *writer,
oid_array_append(&commits, &kh_key(writer->pseudo_merge_commits, i));
}
oid_array_sort(&commits);
/*
* Sort the commits by their bit position so that the lookup
* table can be binary searched by the reader (see
* find_pseudo_merge()).
*/
QSORT_S(commits.oid, commits.nr, pseudo_merge_commit_pos_cmp, writer);
/* write lookup table (non-extended) */
for (i = 0; i < commits.nr; i++) {
+12 -7
View File
@@ -150,7 +150,10 @@ static int pseudo_merge_config(const char *var, const char *value,
if (!strcmp(key, "pattern")) {
struct strbuf re = STRBUF_INIT;
free(group->pattern);
if (group->pattern) {
regfree(group->pattern);
free(group->pattern);
}
if (*value != '^')
strbuf_addch(&re, '^');
strbuf_addstr(&re, value);
@@ -169,8 +172,8 @@ static int pseudo_merge_config(const char *var, const char *value,
}
} else if (!strcmp(key, "samplerate")) {
group->sample_rate = git_config_double(var, value, ctx->kvi);
if (!(0 <= group->sample_rate && group->sample_rate <= 1)) {
warning(_("%s must be between 0 and 1, using default"), var);
if (!(0 < group->sample_rate && group->sample_rate <= 1)) {
warning(_("%s must be between 0 (exclusive) and 1, using default"), var);
group->sample_rate = DEFAULT_PSEUDO_MERGE_SAMPLE_RATE;
}
} else if (!strcmp(key, "threshold")) {
@@ -236,6 +239,8 @@ static int find_pseudo_merge_group_for_ref(const struct reference *ref, void *_d
c = lookup_commit(the_repository, maybe_peeled);
if (!c)
return 0;
if (repo_parse_commit(the_repository, c))
return 0;
if (!packlist_find(writer->to_pack, maybe_peeled))
return 0;
@@ -559,9 +564,9 @@ static struct pseudo_merge *pseudo_merge_at(const struct pseudo_merge_map *pm,
if (got == want)
return use_pseudo_merge(pm, &pm->v[mi]);
else if (got < want)
hi = mi;
else
lo = mi + 1;
else
hi = mi;
}
warning(_("could not find pseudo-merge for commit %s at offset %"PRIuMAX),
@@ -600,7 +605,7 @@ static int nth_pseudo_merge_ext(const struct pseudo_merge_map *pm,
return error(_("out-of-bounds read: (%"PRIuMAX" >= %"PRIuMAX")"),
(uintmax_t)ofs, (uintmax_t)pm->map_size);
read_pseudo_merge_commit_at(merge, pm->map + ofs);
merge->pseudo_merge_ofs = ofs;
return 0;
}
@@ -671,7 +676,7 @@ int apply_pseudo_merges_for_commit(const struct pseudo_merge_map *pm,
off_t ofs = merge_commit.pseudo_merge_ofs & ~((uint64_t)1<<63);
uint32_t i;
if (pseudo_merge_ext_at(pm, &ext, ofs) < -1) {
if (pseudo_merge_ext_at(pm, &ext, ofs) < 0) {
warning(_("could not read extended pseudo-merge table "
"for commit %s"),
oid_to_hex(&commit->object.oid));
+112 -1
View File
@@ -2,7 +2,10 @@
#include "test-tool.h"
#include "git-compat-util.h"
#include "hex.h"
#include "odb.h"
#include "pack-bitmap.h"
#include "pseudo-merge.h"
#include "setup.h"
static int bitmap_list_commits(void)
@@ -35,6 +38,111 @@ static int bitmap_dump_pseudo_merge_objects(uint32_t n)
return test_bitmap_pseudo_merge_objects(the_repository, n);
}
static int add_packed_object(const struct object_id *oid,
struct packed_git *pack,
uint32_t pos,
void *_data)
{
struct packing_data *packed = _data;
struct object_entry *entry;
struct object_info oi = OBJECT_INFO_INIT;
enum object_type type;
oi.typep = &type;
entry = packlist_alloc(packed, oid);
entry->idx.offset = nth_packed_object_offset(pack, pos);
if (packed_object_info(pack, entry->idx.offset, &oi) < 0)
die("could not get type of object %s",
oid_to_hex(oid));
oe_set_type(entry, type);
oe_set_in_pack(packed, entry, pack);
return 0;
}
static int idx_oid_cmp(const void *va, const void *vb)
{
const struct pack_idx_entry *a = *(const struct pack_idx_entry **)va;
const struct pack_idx_entry *b = *(const struct pack_idx_entry **)vb;
return oidcmp(&a->oid, &b->oid);
}
static int bitmap_write(const char *basename)
{
struct packed_git *p = NULL;
struct packing_data packed = { 0 };
struct bitmap_writer writer;
struct pack_idx_entry **index;
struct strbuf buf = STRBUF_INIT;
uint32_t i;
prepare_repo_settings(the_repository);
repo_for_each_pack(the_repository, p) {
if (!strcmp(pack_basename(p), basename))
break;
}
if (!p)
die("could not find pack '%s'", basename);
if (open_pack_index(p))
die("cannot open pack index for '%s'", p->pack_name);
prepare_packing_data(the_repository, &packed);
for_each_object_in_pack(p, add_packed_object, &packed,
ODB_FOR_EACH_OBJECT_PACK_ORDER);
/*
* Build the index array now that data.packed.objects[] is
* fully allocated (packlist_alloc() may have reallocated it
* during the loop above).
*/
ALLOC_ARRAY(index, p->num_objects);
for (i = 0; i < p->num_objects; i++)
index[i] = &packed.objects[i].idx;
bitmap_writer_init(&writer, the_repository, &packed, NULL);
bitmap_writer_build_type_index(&writer, index);
while (strbuf_getline_lf(&buf, stdin) != EOF) {
struct object_id oid;
struct commit *c;
if (get_oid_hex(buf.buf, &oid))
die("invalid OID: %s", buf.buf);
c = lookup_commit(the_repository, &oid);
if (!c || repo_parse_commit(the_repository, c))
die("could not parse commit %s", buf.buf);
bitmap_writer_push_commit(&writer, c, 0);
}
select_pseudo_merges(&writer);
if (bitmap_writer_build(&writer) < 0)
die("failed to build bitmaps");
bitmap_writer_set_checksum(&writer, p->hash);
QSORT(index, p->num_objects, idx_oid_cmp);
strbuf_reset(&buf);
strbuf_addstr(&buf, p->pack_name);
strbuf_strip_suffix(&buf, ".pack");
strbuf_addstr(&buf, ".bitmap");
bitmap_writer_finish(&writer, index, buf.buf, 0);
bitmap_writer_free(&writer);
strbuf_release(&buf);
free(index);
clear_packing_data(&packed);
return 0;
}
int cmd__bitmap(int argc, const char **argv)
{
setup_git_directory();
@@ -51,13 +159,16 @@ int cmd__bitmap(int argc, const char **argv)
return bitmap_dump_pseudo_merge_commits(atoi(argv[2]));
if (argc == 3 && !strcmp(argv[1], "dump-pseudo-merge-objects"))
return bitmap_dump_pseudo_merge_objects(atoi(argv[2]));
if (argc == 3 && !strcmp(argv[1], "write"))
return bitmap_write(argv[2]);
usage("\ttest-tool bitmap list-commits\n"
"\ttest-tool bitmap list-commits-with-offset\n"
"\ttest-tool bitmap dump-hashes\n"
"\ttest-tool bitmap dump-pseudo-merges\n"
"\ttest-tool bitmap dump-pseudo-merge-commits <n>\n"
"\ttest-tool bitmap dump-pseudo-merge-objects <n>");
"\ttest-tool bitmap dump-pseudo-merge-objects <n>\n"
"\ttest-tool bitmap write <pack-basename> < <commit-list>");
return -1;
}
+24
View File
@@ -648,4 +648,28 @@ test_expect_success 'truncated bitmap fails gracefully (lookup table)' '
test_grep corrupted.bitmap.index stderr
'
test_expect_success 'test-tool bitmap write determines bitmap selection' '
test_when_finished "rm -fr bitmap-write-helper" &&
git init bitmap-write-helper &&
(
cd bitmap-write-helper &&
test_commit_bulk 64 &&
git repack -ad &&
pack="$(ls .git/objects/pack/pack-*.pack)" &&
git rev-parse HEAD >in &&
test-tool bitmap write "$(basename $pack)" <in &&
test-tool bitmap list-commits >bitmaps.raw &&
sort bitmaps.raw >bitmaps &&
test_cmp in bitmaps &&
git rev-list --count --objects --use-bitmap-index HEAD >actual &&
git rev-list --count --objects HEAD >expect &&
test_cmp expect actual
)
'
test_done
+229
View File
@@ -462,4 +462,233 @@ test_expect_success 'use pseudo-merge in boundary traversal' '
)
'
test_expect_success 'apply pseudo-merges during fill-in traversal' '
test_when_finished "rm -fr pseudo-merge-fill-in-traversal" &&
git init pseudo-merge-fill-in-traversal &&
(
cd pseudo-merge-fill-in-traversal &&
git config bitmapPseudoMerge.test.pattern refs/tags/ &&
git config bitmapPseudoMerge.test.maxMerges 1 &&
git config bitmapPseudoMerge.test.stableThreshold never &&
test_commit_bulk 64 &&
tag_everything &&
git repack -ad &&
pack=$(ls .git/objects/pack/pack-*.pack) &&
git rev-parse HEAD~63 >in &&
test-tool bitmap write "$(basename $pack)" <in &&
test_pseudo_merges >merges &&
test_line_count = 1 merges &&
test_commit stale &&
git rev-list --count --objects HEAD >expect &&
: >trace2.txt &&
GIT_TRACE2_EVENT=$PWD/trace2.txt \
git rev-list --count --objects --use-bitmap-index HEAD >actual &&
test_pseudo_merges_satisfied 1 <trace2.txt &&
test_cmp expect actual
)
'
test_expect_success 'apply pseudo-merges from multiple groups during fill-in' '
test_when_finished "rm -fr pseudo-merge-fill-in-multi" &&
git init pseudo-merge-fill-in-multi &&
(
cd pseudo-merge-fill-in-multi &&
test_commit base &&
base=$(git rev-parse HEAD) &&
for side in left right
do
git checkout -B $side base &&
test_commit_bulk --id=$side 64 &&
git rev-list --no-object-names HEAD --not $base >in &&
while read oid
do
echo "create refs/group-$side/$oid $oid" || return 1
done <in | git update-ref --stdin || return 1
done &&
git checkout left &&
git merge right &&
git repack -ad &&
git config bitmapPseudoMerge.left.pattern "refs/group-left/" &&
git config bitmapPseudoMerge.left.maxMerges 1 &&
git config bitmapPseudoMerge.left.stableThreshold never &&
git config bitmapPseudoMerge.right.pattern "refs/group-right/" &&
git config bitmapPseudoMerge.right.maxMerges 1 &&
git config bitmapPseudoMerge.right.stableThreshold never &&
pack="$(ls .git/objects/pack/pack-*.pack)" &&
git rev-parse "$base" >in &&
test-tool bitmap write "$(basename $pack)" <in &&
test_pseudo_merges >merges &&
test_line_count = 2 merges &&
test_commit stale &&
git rev-list --count --objects HEAD >expect &&
: >trace2.txt &&
GIT_TRACE2_EVENT=$PWD/trace2.txt \
git rev-list --count --objects --use-bitmap-index HEAD >actual &&
test_pseudo_merges_satisfied 2 <trace2.txt &&
test_cmp expect actual
)
'
test_expect_success 'apply pseudo-merges with overlapping groups during fill-in' '
test_when_finished "rm -fr pseudo-merge-fill-in-overlap" &&
git init pseudo-merge-fill-in-overlap &&
(
cd pseudo-merge-fill-in-overlap &&
test_commit_bulk 64 &&
tag_everything &&
git repack -ad &&
pack="$(ls .git/objects/pack/pack-*.pack)" &&
# Use two pseudo-merge group patterns that both match
# refs/tags/, so every tagged commit belongs to both
# groups. This exercises the extended lookup table
# path in apply_pseudo_merges_for_commit().
git config bitmapPseudoMerge.all.pattern "refs/tags/" &&
git config bitmapPseudoMerge.all.maxMerges 1 &&
git config bitmapPseudoMerge.all.stableThreshold never &&
git config bitmapPseudoMerge.tags.pattern "refs/tags/" &&
git config bitmapPseudoMerge.tags.maxMerges 1 &&
git config bitmapPseudoMerge.tags.stableThreshold never &&
git rev-parse HEAD~63 >in &&
test-tool bitmap write "$(basename $pack)" <in &&
test_pseudo_merges >merges &&
test_line_count = 2 merges &&
test_commit stale &&
git rev-list --count --objects HEAD >expect &&
: >trace2.txt &&
GIT_TRACE2_EVENT=$PWD/trace2.txt \
git rev-list --count --objects --use-bitmap-index HEAD >actual &&
test_pseudo_merges_satisfied 2 <trace2.txt &&
test_cmp expect actual
)
'
test_expect_success 'pseudo-merge commits are correctly classified by date' '
test_when_finished "rm -fr pseudo-merge-date-classification" &&
git init pseudo-merge-date-classification &&
(
cd pseudo-merge-date-classification &&
test_commit_bulk 64 &&
tag_everything &&
git repack -ad &&
pack="$(ls .git/objects/pack/pack-*.pack)" &&
# Configure two pseudo-merge groups: one that only
# matches "stable" refs (older than one month), and
# one that matches all refs. With 64 tags whose
# commits are all younger than one month, the
# "stable" group should have zero pseudo-merges and
# the "all" group should have one.
#
# Use GIT_TEST_DATE_NOW to align "now" (and therefore
# "1.month.ago") with the test_tick timestamps so that
# the commits are within the last month.
#
# Without parsing the commit, its date field would
# be zero, causing it to satisfy date <= threshold
# for the "stable" group as well, and both groups
# would produce pseudo-merges.
git config bitmapPseudoMerge.stable.pattern "refs/tags/" &&
git config bitmapPseudoMerge.stable.maxMerges 64 &&
git config bitmapPseudoMerge.stable.stableThreshold never &&
git config bitmapPseudoMerge.stable.threshold 1.month.ago &&
git config bitmapPseudoMerge.all.pattern "refs/tags/" &&
git config bitmapPseudoMerge.all.maxMerges 1 &&
git config bitmapPseudoMerge.all.stableThreshold never &&
git config bitmapPseudoMerge.all.threshold now &&
git rev-parse HEAD~63 >in &&
GIT_TEST_DATE_NOW=$test_tick \
test-tool bitmap write "$(basename $pack)" <in &&
test_pseudo_merges >merges &&
test_line_count = 1 merges
)
'
test_expect_success 'sampleRate=0 does not cause division by zero' '
test_when_finished "rm -fr pseudo-merge-sample-rate-zero" &&
git init pseudo-merge-sample-rate-zero &&
(
cd pseudo-merge-sample-rate-zero &&
test_commit_bulk 64 &&
tag_everything &&
git repack -ad &&
pack="$(ls .git/objects/pack/pack-*.pack)" &&
git config bitmapPseudoMerge.test.pattern "refs/tags/" &&
git config bitmapPseudoMerge.test.maxMerges 1 &&
git config bitmapPseudoMerge.test.sampleRate 0 &&
git config bitmapPseudoMerge.test.threshold now &&
git config bitmapPseudoMerge.test.stableThreshold never &&
git rev-parse HEAD~63 >in &&
test-tool bitmap write "$(basename $pack)" <in
)
'
test_expect_success 'duplicate pseudo-merge pattern does not leak' '
test_when_finished "rm -fr pseudo-merge-dup-pattern" &&
git init pseudo-merge-dup-pattern &&
(
cd pseudo-merge-dup-pattern &&
test_commit_bulk 64 &&
tag_everything &&
git repack -ad &&
pack=$(ls .git/objects/pack/pack-*.pack) &&
# Set the same group'\''s pattern twice. The second
# assignment should cleanly release the compiled regex
# from the first without leaking.
git config bitmapPseudoMerge.test.pattern "refs/tags/" &&
git config --add bitmapPseudoMerge.test.pattern "refs/tags/" &&
git config bitmapPseudoMerge.test.maxMerges 1 &&
git config bitmapPseudoMerge.test.threshold now &&
git config bitmapPseudoMerge.test.stableThreshold never &&
git rev-parse HEAD~63 >in &&
test-tool bitmap write "$(basename $pack)" <in &&
test_pseudo_merges >merges &&
test_line_count = 1 merges
)
'
test_done