From dd3693eb0859274d62feac8047e1d486b3beaf31 Mon Sep 17 00:00:00 2001 From: Andrew Au Date: Thu, 12 Mar 2026 21:49:37 +0000 Subject: [PATCH 01/93] transport-helper, connect: use clean_on_exit to reap children on abnormal exit When a long-running service (e.g., a source indexer) runs as PID 1 inside a container and repeatedly spawns git, git may in turn spawn child processes such as git-remote-https or ssh. If git exits abnormally (e.g., via exit(128) on a transport error), the normal cleanup paths (disconnect_helper, finish_connect) are bypassed, and these children are never waited on. The children are reparented to PID 1, which does not reap them, so they accumulate as zombies over time. Set clean_on_exit and wait_after_clean on child_process structs in both transport-helper.c and connect.c so that the existing run-command cleanup infrastructure handles reaping on any exit path. This avoids rolling custom atexit handlers that call finish_command(), which could deadlock if the child is blocked waiting for the parent to close a pipe. The clean_on_exit mechanism sends SIGTERM first, then waits, ensuring the child terminates promptly. It also handles signal-based exits, not just atexit. Signed-off-by: Andrew Au Signed-off-by: Junio C Hamano --- connect.c | 4 ++++ transport-helper.c | 2 ++ 2 files changed, 6 insertions(+) diff --git a/connect.c b/connect.c index a02583a102..fcd35c5539 100644 --- a/connect.c +++ b/connect.c @@ -1054,6 +1054,8 @@ static struct child_process *git_proxy_connect(int fd[2], char *host) strvec_push(&proxy->args, port); proxy->in = -1; proxy->out = -1; + proxy->clean_on_exit = 1; + proxy->wait_after_clean = 1; if (start_command(proxy)) die(_("cannot start proxy %s"), git_proxy_command); fd[0] = proxy->out; /* read from proxy stdout */ @@ -1515,6 +1517,8 @@ struct child_process *git_connect(int fd[2], const char *url, } strvec_push(&conn->args, cmd.buf); + conn->clean_on_exit = 1; + conn->wait_after_clean = 1; if (start_command(conn)) die(_("unable to fork")); diff --git a/transport-helper.c b/transport-helper.c index 4d95d84f9e..570d7c6439 100644 --- a/transport-helper.c +++ b/transport-helper.c @@ -154,6 +154,8 @@ static struct child_process *get_helper(struct transport *transport) helper->trace2_child_class = helper->args.v[0]; /* "remote-" */ + helper->clean_on_exit = 1; + helper->wait_after_clean = 1; code = start_command(helper); if (code < 0 && errno == ENOENT) die(_("unable to find remote helper for '%s'"), data->name); From 81cf6ccc29002467f44798ada7d74993a44c94b0 Mon Sep 17 00:00:00 2001 From: Michael Montalbo Date: Tue, 17 Mar 2026 02:21:32 +0000 Subject: [PATCH 02/93] line-log: fix crash when combined with pickaxe options queue_diffs() passes the caller's diff_options, which may carry user-specified pickaxe state, to diff_tree_oid() and diffcore_std() when detecting renames for line-level history tracking. When pickaxe options are present on the command line (-G and -S to filter by text pattern, --find-object to filter by object identity), diffcore_std() also runs diffcore_pickaxe(), which may discard diff pairs that are relevant for rename detection. Losing those pairs breaks rename following. Before a2bb801f6a (line-log: avoid unnecessary full tree diffs, 2019-08-21), this silently truncated history at rename boundaries. That commit moved filter_diffs_for_paths() inside the rename- detection block, so it only runs when diff_might_be_rename() returns true. When pickaxe discards a rename pair, the rename goes undetected, and a deletion pair at a subsequent commit passes through uncleaned, reaching process_diff_filepair() with an invalid filespec and triggering an assertion failure. Fix this by building a private diff_options for the rename-detection path inside queue_diffs(), following the same pattern used by blame's find_rename(). This isolates the rename machinery from unrelated user-specified options. Reported-by: Matthew Hughes Signed-off-by: Michael Montalbo Signed-off-by: Junio C Hamano --- line-log.c | 24 +++++++++++++++----- t/t4211-line-log.sh | 55 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 74 insertions(+), 5 deletions(-) diff --git a/line-log.c b/line-log.c index eeaf68454e..9d12ece181 100644 --- a/line-log.c +++ b/line-log.c @@ -858,15 +858,29 @@ static void queue_diffs(struct line_log_data *range, diff_queue_clear(&diff_queued_diff); diff_tree_oid(parent_tree_oid, tree_oid, "", opt); if (opt->detect_rename && diff_might_be_rename()) { - /* must look at the full tree diff to detect renames */ - clear_pathspec(&opt->pathspec); - diff_queue_clear(&diff_queued_diff); + struct diff_options rename_opts; - diff_tree_oid(parent_tree_oid, tree_oid, "", opt); + /* + * Build a private diff_options for rename detection so + * that any user-specified options on the original opts + * (e.g. pickaxe) cannot discard diff pairs needed for + * rename tracking. Similar to blame's find_rename(). + */ + repo_diff_setup(opt->repo, &rename_opts); + rename_opts.flags.recursive = 1; + rename_opts.detect_rename = opt->detect_rename; + rename_opts.rename_score = opt->rename_score; + rename_opts.output_format = DIFF_FORMAT_NO_OUTPUT; + diff_setup_done(&rename_opts); + + /* must look at the full tree diff to detect renames */ + diff_queue_clear(&diff_queued_diff); + diff_tree_oid(parent_tree_oid, tree_oid, "", &rename_opts); filter_diffs_for_paths(range, 1); - diffcore_std(opt); + diffcore_std(&rename_opts); filter_diffs_for_paths(range, 0); + diff_free(&rename_opts); } move_diff_queue(queue, &diff_queued_diff); } diff --git a/t/t4211-line-log.sh b/t/t4211-line-log.sh index 0a7c3ca42f..659a943aa1 100755 --- a/t/t4211-line-log.sh +++ b/t/t4211-line-log.sh @@ -367,4 +367,59 @@ test_expect_success 'show line-log with graph' ' test_cmp expect actual ' +test_expect_success 'setup for -L with -G/-S/--find-object and a merge with rename' ' + git checkout --orphan pickaxe-rename && + git reset --hard && + + echo content >file && + git add file && + git commit -m "add file" && + + git checkout -b pickaxe-rename-side && + git mv file renamed-file && + git commit -m "rename file" && + + git checkout pickaxe-rename && + git commit --allow-empty -m "diverge" && + git merge --no-edit pickaxe-rename-side && + + git mv renamed-file file && + git commit -m "rename back" +' + +test_expect_success '-L -G does not crash with merge and rename' ' + git log --format="%s" --no-patch -L 1,1:file -G "." >actual +' + +test_expect_success '-L -S does not crash with merge and rename' ' + git log --format="%s" --no-patch -L 1,1:file -S content >actual +' + +test_expect_success '-L --find-object does not crash with merge and rename' ' + git log --format="%s" --no-patch -L 1,1:file \ + --find-object=$(git rev-parse HEAD:file) >actual +' + +# Commit-level filtering with pickaxe does not yet work for -L. +# show_log() prints the commit header before diffcore_std() runs +# pickaxe, so commits cannot be suppressed even when no diff pairs +# survive filtering. Fixing this would require deferring show_log() +# until after diffcore_std(), which is a larger restructuring of the +# log-tree output pipeline. +test_expect_failure '-L -G should filter commits by pattern' ' + git log --format="%s" --no-patch -L 1,1:file -G "nomatch" >actual && + test_must_be_empty actual +' + +test_expect_failure '-L -S should filter commits by pattern' ' + git log --format="%s" --no-patch -L 1,1:file -S "nomatch" >actual && + test_must_be_empty actual +' + +test_expect_failure '-L --find-object should filter commits by object' ' + git log --format="%s" --no-patch -L 1,1:file \ + --find-object=$ZERO_OID >actual && + test_must_be_empty actual +' + test_done From 86e986f166d207e1f4b80062c2befb4f94c191c4 Mon Sep 17 00:00:00 2001 From: Michael Montalbo Date: Tue, 17 Mar 2026 02:21:33 +0000 Subject: [PATCH 03/93] line-log: route -L output through the standard diff pipeline `git log -L` has always bypassed the standard diff pipeline. `dump_diff_hacky()` in line-log.c hand-rolls its own diff headers and hunk output, which means most diff formatting options are silently ignored. A NEEDSWORK comment has acknowledged this since the feature was introduced: /* * NEEDSWORK: manually building a diff here is not the Right * Thing(tm). log -L should be built into the diff pipeline. */ Remove `dump_diff_hacky()` and its helpers and route -L output through `builtin_diff()` / `fn_out_consume()`, the same path used by `git diff` and `git log -p`. The mechanism is a pair of callback wrappers that sit between `xdi_diff_outf()` and `fn_out_consume()`, filtering xdiff's output to only the tracked line ranges. To ensure xdiff emits all lines within each range as context, the context length is inflated to span the largest range. Wire up the `-L` implies `--patch` default in revision setup rather than forcing it at output time, so `line_log_print()` is just `diffcore_std()` + `diff_flush()` with no format save/restore. Rename detection is a no-op since pairs are already resolved during the history walk in `queue_diffs()`, but running `diffcore_std()` means `-S`/`-G` (pickaxe), `--orderfile`, and `--diff-filter` now work with `-L`, and `diff_resolve_rename_copy()` sets pair statuses correctly without manual assignment. Switch `diff_filepair_dup()` from `xmalloc` to `xcalloc` so that new fields (including `line_ranges`) are zero-initialized by default. As a result, diff formatting options that were previously silently ignored (e.g. --word-diff, --no-prefix, -w, --color-moved) now work with -L, and output gains `index` lines, `new file mode` headers, and funcname context in `@@` headers. This is a user-visible output change: tools that parse -L output may need to handle the additional header lines. The context-length inflation means xdiff may process more output than needed for very wide line ranges, but benchmarks on files up to 7800 lines show no measurable regression. Signed-off-by: Michael Montalbo Signed-off-by: Junio C Hamano --- diff.c | 279 +++++++++++++++++- diffcore.h | 16 + line-log.c | 174 ++--------- line-log.h | 14 +- revision.c | 2 + t/t4211-line-log.sh | 12 +- t/t4211/sha1/expect.beginning-of-file | 4 + t/t4211/sha1/expect.end-of-file | 11 +- t/t4211/sha1/expect.move-support-f | 5 + t/t4211/sha1/expect.multiple | 10 +- t/t4211/sha1/expect.multiple-overlapping | 7 + t/t4211/sha1/expect.multiple-superset | 7 + t/t4211/sha1/expect.no-assertion-error | 12 +- t/t4211/sha1/expect.parallel-change-f-to-main | 7 + t/t4211/sha1/expect.simple-f | 4 + t/t4211/sha1/expect.simple-f-to-main | 5 + t/t4211/sha1/expect.simple-main | 11 +- t/t4211/sha1/expect.simple-main-to-end | 11 +- t/t4211/sha1/expect.two-ranges | 10 +- t/t4211/sha1/expect.vanishes-early | 10 +- t/t4211/sha256/expect.beginning-of-file | 4 + t/t4211/sha256/expect.end-of-file | 11 +- t/t4211/sha256/expect.move-support-f | 5 + t/t4211/sha256/expect.multiple | 10 +- t/t4211/sha256/expect.multiple-overlapping | 7 + t/t4211/sha256/expect.multiple-superset | 7 + t/t4211/sha256/expect.no-assertion-error | 12 +- .../sha256/expect.parallel-change-f-to-main | 7 + t/t4211/sha256/expect.simple-f | 4 + t/t4211/sha256/expect.simple-f-to-main | 5 + t/t4211/sha256/expect.simple-main | 11 +- t/t4211/sha256/expect.simple-main-to-end | 11 +- t/t4211/sha256/expect.two-ranges | 10 +- t/t4211/sha256/expect.vanishes-early | 10 +- 34 files changed, 512 insertions(+), 213 deletions(-) diff --git a/diff.c b/diff.c index 501648a5c4..f79e37a210 100644 --- a/diff.c +++ b/diff.c @@ -608,6 +608,52 @@ struct emit_callback { struct strbuf *header; }; +/* + * State for the line-range callback wrappers that sit between + * xdi_diff_outf() and fn_out_consume(). xdiff produces a normal, + * unfiltered diff; the wrappers intercept each hunk header and line, + * track post-image position, and forward only lines that fall within + * the requested ranges. Contiguous in-range lines are collected into + * range hunks and flushed with a synthetic @@ header so that + * fn_out_consume() sees well-formed unified-diff fragments. + * + * Removal lines ('-') cannot be classified by post-image position, so + * they are buffered in pending_rm until the next '+' or ' ' line + * reveals whether they precede an in-range line (flush into range hunk) or + * an out-of-range line (discard). + */ +struct line_range_callback { + xdiff_emit_line_fn orig_line_fn; + void *orig_cb_data; + const struct range_set *ranges; /* 0-based [start, end) */ + unsigned int cur_range; /* index into the range_set */ + + /* Post/pre-image line counters (1-based, set from hunk headers) */ + long lno_post; + long lno_pre; + + /* + * Function name from most recent xdiff hunk header; + * size matches struct func_line.buf in xdiff/xemit.c. + */ + char func[80]; + long funclen; + + /* Range hunk being accumulated for the current range */ + struct strbuf rhunk; + long rhunk_old_begin, rhunk_old_count; + long rhunk_new_begin, rhunk_new_count; + int rhunk_active; + int rhunk_has_changes; /* any '+' or '-' lines? */ + + /* Removal lines not yet known to be in-range */ + struct strbuf pending_rm; + int pending_rm_count; + long pending_rm_pre_begin; /* pre-image line of first pending */ + + int ret; /* latched error from orig_line_fn */ +}; + static int count_lines(const char *data, int size) { int count, ch, completely_empty = 1, nl_just_seen = 0; @@ -2493,6 +2539,188 @@ static int quick_consume(void *priv, char *line UNUSED, unsigned long len UNUSED return 1; } +static void discard_pending_rm(struct line_range_callback *s) +{ + strbuf_reset(&s->pending_rm); + s->pending_rm_count = 0; +} + +static void flush_rhunk(struct line_range_callback *s) +{ + struct strbuf hdr = STRBUF_INIT; + const char *p, *end; + + if (!s->rhunk_active || s->ret) + return; + + /* Drain any pending removal lines into the range hunk */ + if (s->pending_rm_count) { + strbuf_addbuf(&s->rhunk, &s->pending_rm); + s->rhunk_old_count += s->pending_rm_count; + s->rhunk_has_changes = 1; + discard_pending_rm(s); + } + + /* + * Suppress context-only hunks: they contain no actual changes + * and would just be noise. This can happen when the inflated + * ctxlen causes xdiff to emit context covering a range that + * has no changes in this commit. + */ + if (!s->rhunk_has_changes) { + s->rhunk_active = 0; + strbuf_reset(&s->rhunk); + return; + } + + strbuf_addf(&hdr, "@@ -%ld,%ld +%ld,%ld @@", + s->rhunk_old_begin, s->rhunk_old_count, + s->rhunk_new_begin, s->rhunk_new_count); + if (s->funclen > 0) { + strbuf_addch(&hdr, ' '); + strbuf_add(&hdr, s->func, s->funclen); + } + strbuf_addch(&hdr, '\n'); + + s->ret = s->orig_line_fn(s->orig_cb_data, hdr.buf, hdr.len); + strbuf_release(&hdr); + + /* + * Replay buffered lines one at a time through fn_out_consume. + * The cast discards const because xdiff_emit_line_fn takes + * char *, though fn_out_consume does not modify the buffer. + */ + p = s->rhunk.buf; + end = p + s->rhunk.len; + while (!s->ret && p < end) { + const char *eol = memchr(p, '\n', end - p); + unsigned long line_len = eol ? (unsigned long)(eol - p + 1) + : (unsigned long)(end - p); + s->ret = s->orig_line_fn(s->orig_cb_data, (char *)p, line_len); + p += line_len; + } + + s->rhunk_active = 0; + strbuf_reset(&s->rhunk); +} + +static void line_range_hunk_fn(void *data, + long old_begin, long old_nr UNUSED, + long new_begin, long new_nr UNUSED, + const char *func, long funclen) +{ + struct line_range_callback *s = data; + + /* + * When count > 0, begin is 1-based. When count == 0, begin is + * adjusted down by 1 by xdl_emit_hunk_hdr(), but no lines of + * that type will arrive, so the value is unused. + * + * Any pending removal lines from the previous xdiff hunk are + * intentionally left in pending_rm: the line callback will + * flush or discard them when the next content line reveals + * whether the removals precede in-range content. + */ + s->lno_post = new_begin; + s->lno_pre = old_begin; + + if (funclen > 0) { + if (funclen > (long)sizeof(s->func)) + funclen = sizeof(s->func); + memcpy(s->func, func, funclen); + } + s->funclen = funclen; +} + +static int line_range_line_fn(void *priv, char *line, unsigned long len) +{ + struct line_range_callback *s = priv; + const struct range *cur; + long lno_0, cur_pre; + + if (s->ret) + return s->ret; + + if (line[0] == '-') { + if (!s->pending_rm_count) + s->pending_rm_pre_begin = s->lno_pre; + s->lno_pre++; + strbuf_add(&s->pending_rm, line, len); + s->pending_rm_count++; + return s->ret; + } + + if (line[0] == '\\') { + if (s->pending_rm_count) + strbuf_add(&s->pending_rm, line, len); + else if (s->rhunk_active) + strbuf_add(&s->rhunk, line, len); + /* otherwise outside tracked range; drop silently */ + return s->ret; + } + + if (line[0] != '+' && line[0] != ' ') + BUG("unexpected diff line type '%c'", line[0]); + + lno_0 = s->lno_post - 1; + cur_pre = s->lno_pre; /* save before advancing for context lines */ + s->lno_post++; + if (line[0] == ' ') + s->lno_pre++; + + /* Advance past ranges we've passed */ + while (s->cur_range < s->ranges->nr && + lno_0 >= s->ranges->ranges[s->cur_range].end) { + if (s->rhunk_active) + flush_rhunk(s); + discard_pending_rm(s); + s->cur_range++; + } + + /* Past all ranges */ + if (s->cur_range >= s->ranges->nr) { + discard_pending_rm(s); + return s->ret; + } + + cur = &s->ranges->ranges[s->cur_range]; + + /* Before current range */ + if (lno_0 < cur->start) { + discard_pending_rm(s); + return s->ret; + } + + /* In range so start a new range hunk if needed */ + if (!s->rhunk_active) { + s->rhunk_active = 1; + s->rhunk_has_changes = 0; + s->rhunk_new_begin = lno_0 + 1; + s->rhunk_old_begin = s->pending_rm_count + ? s->pending_rm_pre_begin : cur_pre; + s->rhunk_old_count = 0; + s->rhunk_new_count = 0; + strbuf_reset(&s->rhunk); + } + + /* Flush pending removals into range hunk */ + if (s->pending_rm_count) { + strbuf_addbuf(&s->rhunk, &s->pending_rm); + s->rhunk_old_count += s->pending_rm_count; + s->rhunk_has_changes = 1; + discard_pending_rm(s); + } + + strbuf_add(&s->rhunk, line, len); + s->rhunk_new_count++; + if (line[0] == '+') + s->rhunk_has_changes = 1; + else + s->rhunk_old_count++; + + return s->ret; +} + static void pprint_rename(struct strbuf *name, const char *a, const char *b) { const char *old_name = a; @@ -3596,7 +3824,8 @@ static void builtin_diff(const char *name_a, const char *xfrm_msg, int must_show_header, struct diff_options *o, - int complete_rewrite) + int complete_rewrite, + const struct range_set *line_ranges) { mmfile_t mf1, mf2; const char *lbl[2]; @@ -3837,6 +4066,52 @@ static void builtin_diff(const char *name_a, */ xdi_diff_outf(&mf1, &mf2, NULL, quick_consume, &ecbdata, &xpp, &xecfg); + } else if (line_ranges) { + struct line_range_callback lr_state; + unsigned int i; + long max_span = 0; + + memset(&lr_state, 0, sizeof(lr_state)); + lr_state.orig_line_fn = fn_out_consume; + lr_state.orig_cb_data = &ecbdata; + lr_state.ranges = line_ranges; + strbuf_init(&lr_state.rhunk, 0); + strbuf_init(&lr_state.pending_rm, 0); + + /* + * Inflate ctxlen so that all changes within + * any single range are merged into one xdiff + * hunk and the inter-change context is emitted. + * The callback clips back to range boundaries. + * + * The optimal ctxlen depends on where changes + * fall within the range, which is only known + * after xdiff runs; the max range span is the + * upper bound that guarantees correctness in a + * single pass. + */ + for (i = 0; i < line_ranges->nr; i++) { + long span = line_ranges->ranges[i].end - + line_ranges->ranges[i].start; + if (span > max_span) + max_span = span; + } + if (max_span > xecfg.ctxlen) + xecfg.ctxlen = max_span; + + if (xdi_diff_outf(&mf1, &mf2, + line_range_hunk_fn, + line_range_line_fn, + &lr_state, &xpp, &xecfg)) + die("unable to generate diff for %s", + one->path); + + flush_rhunk(&lr_state); + if (lr_state.ret) + die("unable to generate diff for %s", + one->path); + strbuf_release(&lr_state.rhunk); + strbuf_release(&lr_state.pending_rm); } else if (xdi_diff_outf(&mf1, &mf2, NULL, fn_out_consume, &ecbdata, &xpp, &xecfg)) die("unable to generate diff for %s", one->path); @@ -4678,7 +4953,7 @@ static void run_diff_cmd(const struct external_diff *pgm, builtin_diff(name, other ? other : name, one, two, xfrm_msg, must_show_header, - o, complete_rewrite); + o, complete_rewrite, p->line_ranges); if (p->status == DIFF_STATUS_COPIED || p->status == DIFF_STATUS_RENAMED) o->found_changes = 1; diff --git a/diffcore.h b/diffcore.h index 9c0a0e7aaf..d75038d1b3 100644 --- a/diffcore.h +++ b/diffcore.h @@ -19,6 +19,17 @@ struct userdiff_driver; * in anything else. */ +/* A range [start, end). Lines are numbered starting at 0. */ +struct range { + long start, end; +}; + +/* A set of ranges. The ranges must always be disjoint and sorted. */ +struct range_set { + unsigned int alloc, nr; + struct range *ranges; +}; + /* We internally use unsigned short as the score value, * and rely on an int capable to hold 32-bits. -B can take * -Bmerge_score/break_score format and the two scores are @@ -106,6 +117,11 @@ int diff_filespec_is_binary(struct repository *, struct diff_filespec *); struct diff_filepair { struct diff_filespec *one; struct diff_filespec *two; + /* + * Tracked line ranges for -L filtering; borrowed from + * line_log_data and must not be freed. + */ + const struct range_set *line_ranges; unsigned short int score; char status; /* M C R A D U etc. (see Documentation/diff-format.adoc or DIFF_STATUS_* in diff.h) */ unsigned broken_pair : 1; diff --git a/line-log.c b/line-log.c index 9d12ece181..858a899cd2 100644 --- a/line-log.c +++ b/line-log.c @@ -885,160 +885,6 @@ static void queue_diffs(struct line_log_data *range, move_diff_queue(queue, &diff_queued_diff); } -static char *get_nth_line(long line, unsigned long *ends, void *data) -{ - if (line == 0) - return (char *)data; - else - return (char *)data + ends[line] + 1; -} - -static void print_line(const char *prefix, char first, - long line, unsigned long *ends, void *data, - const char *color, const char *reset, FILE *file) -{ - char *begin = get_nth_line(line, ends, data); - char *end = get_nth_line(line+1, ends, data); - int had_nl = 0; - - if (end > begin && end[-1] == '\n') { - end--; - had_nl = 1; - } - - fputs(prefix, file); - fputs(color, file); - putc(first, file); - fwrite(begin, 1, end-begin, file); - fputs(reset, file); - putc('\n', file); - if (!had_nl) - fputs("\\ No newline at end of file\n", file); -} - -static void dump_diff_hacky_one(struct rev_info *rev, struct line_log_data *range) -{ - unsigned int i, j = 0; - long p_lines, t_lines; - unsigned long *p_ends = NULL, *t_ends = NULL; - struct diff_filepair *pair = range->pair; - struct diff_ranges *diff = &range->diff; - - struct diff_options *opt = &rev->diffopt; - const char *prefix = diff_line_prefix(opt); - const char *c_reset = diff_get_color(opt->use_color, DIFF_RESET); - const char *c_frag = diff_get_color(opt->use_color, DIFF_FRAGINFO); - const char *c_meta = diff_get_color(opt->use_color, DIFF_METAINFO); - const char *c_old = diff_get_color(opt->use_color, DIFF_FILE_OLD); - const char *c_new = diff_get_color(opt->use_color, DIFF_FILE_NEW); - const char *c_context = diff_get_color(opt->use_color, DIFF_CONTEXT); - - if (!pair || !diff) - goto out; - - if (pair->one->oid_valid) - fill_line_ends(rev->diffopt.repo, pair->one, &p_lines, &p_ends); - fill_line_ends(rev->diffopt.repo, pair->two, &t_lines, &t_ends); - - fprintf(opt->file, "%s%sdiff --git a/%s b/%s%s\n", prefix, c_meta, pair->one->path, pair->two->path, c_reset); - fprintf(opt->file, "%s%s--- %s%s%s\n", prefix, c_meta, - pair->one->oid_valid ? "a/" : "", - pair->one->oid_valid ? pair->one->path : "/dev/null", - c_reset); - fprintf(opt->file, "%s%s+++ b/%s%s\n", prefix, c_meta, pair->two->path, c_reset); - for (i = 0; i < range->ranges.nr; i++) { - long p_start, p_end; - long t_start = range->ranges.ranges[i].start; - long t_end = range->ranges.ranges[i].end; - long t_cur = t_start; - unsigned int j_last; - - /* - * If a diff range touches multiple line ranges, then all - * those line ranges should be shown, so take a step back if - * the current line range is still in the previous diff range - * (even if only partially). - */ - if (j > 0 && diff->target.ranges[j-1].end > t_start) - j--; - - while (j < diff->target.nr && diff->target.ranges[j].end < t_start) - j++; - if (j == diff->target.nr || diff->target.ranges[j].start >= t_end) - continue; - - /* Scan ahead to determine the last diff that falls in this range */ - j_last = j; - while (j_last < diff->target.nr && diff->target.ranges[j_last].start < t_end) - j_last++; - if (j_last > j) - j_last--; - - /* - * Compute parent hunk headers: we know that the diff - * has the correct line numbers (but not all hunks). - * So it suffices to shift the start/end according to - * the line numbers of the first/last hunk(s) that - * fall in this range. - */ - if (t_start < diff->target.ranges[j].start) - p_start = diff->parent.ranges[j].start - (diff->target.ranges[j].start-t_start); - else - p_start = diff->parent.ranges[j].start; - if (t_end > diff->target.ranges[j_last].end) - p_end = diff->parent.ranges[j_last].end + (t_end-diff->target.ranges[j_last].end); - else - p_end = diff->parent.ranges[j_last].end; - - if (!p_start && !p_end) { - p_start = -1; - p_end = -1; - } - - /* Now output a diff hunk for this range */ - fprintf(opt->file, "%s%s@@ -%ld,%ld +%ld,%ld @@%s\n", - prefix, c_frag, - p_start+1, p_end-p_start, t_start+1, t_end-t_start, - c_reset); - while (j < diff->target.nr && diff->target.ranges[j].start < t_end) { - int k; - for (; t_cur < diff->target.ranges[j].start; t_cur++) - print_line(prefix, ' ', t_cur, t_ends, pair->two->data, - c_context, c_reset, opt->file); - for (k = diff->parent.ranges[j].start; k < diff->parent.ranges[j].end; k++) - print_line(prefix, '-', k, p_ends, pair->one->data, - c_old, c_reset, opt->file); - for (; t_cur < diff->target.ranges[j].end && t_cur < t_end; t_cur++) - print_line(prefix, '+', t_cur, t_ends, pair->two->data, - c_new, c_reset, opt->file); - j++; - } - for (; t_cur < t_end; t_cur++) - print_line(prefix, ' ', t_cur, t_ends, pair->two->data, - c_context, c_reset, opt->file); - } - -out: - free(p_ends); - free(t_ends); -} - -/* - * NEEDSWORK: manually building a diff here is not the Right - * Thing(tm). log -L should be built into the diff pipeline. - */ -static void dump_diff_hacky(struct rev_info *rev, struct line_log_data *range) -{ - const char *prefix = diff_line_prefix(&rev->diffopt); - - fprintf(rev->diffopt.file, "%s\n", prefix); - - while (range) { - dump_diff_hacky_one(rev, range); - range = range->next; - } -} - /* * Unlike most other functions, this destructively operates on * 'range'. @@ -1102,7 +948,7 @@ static int process_diff_filepair(struct rev_info *rev, static struct diff_filepair *diff_filepair_dup(struct diff_filepair *pair) { - struct diff_filepair *new_filepair = xmalloc(sizeof(struct diff_filepair)); + struct diff_filepair *new_filepair = xcalloc(1, sizeof(struct diff_filepair)); new_filepair->one = pair->one; new_filepair->two = pair->two; new_filepair->one->count++; @@ -1160,11 +1006,25 @@ static int process_all_files(struct line_log_data **range_out, int line_log_print(struct rev_info *rev, struct commit *commit) { - show_log(rev); if (!(rev->diffopt.output_format & DIFF_FORMAT_NO_OUTPUT)) { struct line_log_data *range = lookup_line_range(rev, commit); - dump_diff_hacky(rev, range); + struct line_log_data *r; + const char *prefix = diff_line_prefix(&rev->diffopt); + + fprintf(rev->diffopt.file, "%s\n", prefix); + + for (r = range; r; r = r->next) { + if (r->pair) { + struct diff_filepair *p = + diff_filepair_dup(r->pair); + p->line_ranges = &r->ranges; + diff_q(&diff_queued_diff, p); + } + } + + diffcore_std(&rev->diffopt); + diff_flush(&rev->diffopt); } return 1; } diff --git a/line-log.h b/line-log.h index e9dadbc1a5..04a6ea64d3 100644 --- a/line-log.h +++ b/line-log.h @@ -1,22 +1,12 @@ #ifndef LINE_LOG_H #define LINE_LOG_H +#include "diffcore.h" /* struct range, struct range_set */ + struct rev_info; struct commit; struct string_list; -/* A range [start,end]. Lines are numbered starting at 0, and the - * ranges include start but exclude end. */ -struct range { - long start, end; -}; - -/* A set of ranges. The ranges must always be disjoint and sorted. */ -struct range_set { - unsigned int alloc, nr; - struct range *ranges; -}; - /* A diff, encoded as the set of pre- and post-image ranges where the * files differ. A pair of ranges corresponds to a hunk. */ struct diff_ranges { diff --git a/revision.c b/revision.c index 402eb1b029..12e04bc53a 100644 --- a/revision.c +++ b/revision.c @@ -3111,6 +3111,8 @@ int setup_revisions(int argc, const char **argv, struct rev_info *revs, struct s if (want_ancestry(revs)) revs->limited = 1; revs->topo_order = 1; + if (!revs->diffopt.output_format) + revs->diffopt.output_format = DIFF_FORMAT_PATCH; } if (revs->topo_order && !generation_numbers_enabled(the_repository)) diff --git a/t/t4211-line-log.sh b/t/t4211-line-log.sh index 659a943aa1..6a307e911b 100755 --- a/t/t4211-line-log.sh +++ b/t/t4211-line-log.sh @@ -129,7 +129,7 @@ test_expect_success '-L with --output' ' git checkout parallel-change && git log --output=log -L :main:b.c >output && test_must_be_empty output && - test_line_count = 70 log + test_line_count = 75 log ' test_expect_success 'range_set_union' ' @@ -340,13 +340,19 @@ test_expect_success 'zero-width regex .* matches any function name' ' ' test_expect_success 'show line-log with graph' ' + git checkout parent-oids && + head_blob_old=$(git rev-parse --short HEAD^:file.c) && + head_blob_new=$(git rev-parse --short HEAD:file.c) && + root_blob=$(git rev-parse --short HEAD~4:file.c) && + null_blob=$(test_oid zero | cut -c1-7) && qz_to_tab_space >expect <<-EOF && * $head_oid Modify func2() in file.c |Z | diff --git a/file.c b/file.c + | index $head_blob_old..$head_blob_new 100644 | --- a/file.c | +++ b/file.c - | @@ -6,4 +6,4 @@ + | @@ -6,4 +6,4 @@ int func1() | int func2() | { | - return F2; @@ -355,6 +361,8 @@ test_expect_success 'show line-log with graph' ' * $root_oid Add func1() and func2() in file.c ZZ diff --git a/file.c b/file.c + new file mode 100644 + index $null_blob..$root_blob --- /dev/null +++ b/file.c @@ -0,0 +6,4 @@ diff --git a/t/t4211/sha1/expect.beginning-of-file b/t/t4211/sha1/expect.beginning-of-file index 91b4054898..52c90afb3a 100644 --- a/t/t4211/sha1/expect.beginning-of-file +++ b/t/t4211/sha1/expect.beginning-of-file @@ -5,6 +5,7 @@ Date: Thu Feb 28 10:47:40 2013 +0100 change at very beginning diff --git a/a.c b/a.c +index bdb2bb1..5e709a1 100644 --- a/a.c +++ b/a.c @@ -1,3 +1,4 @@ @@ -20,6 +21,7 @@ Date: Thu Feb 28 10:45:16 2013 +0100 touch both functions diff --git a/a.c b/a.c +index 3233403..e51de13 100644 --- a/a.c +++ b/a.c @@ -1,3 +1,3 @@ @@ -35,6 +37,8 @@ Date: Thu Feb 28 10:44:48 2013 +0100 initial diff --git a/a.c b/a.c +new file mode 100644 +index 0000000..444e415 --- /dev/null +++ b/a.c @@ -0,0 +1,3 @@ diff --git a/t/t4211/sha1/expect.end-of-file b/t/t4211/sha1/expect.end-of-file index bd25bb2f59..c40036899a 100644 --- a/t/t4211/sha1/expect.end-of-file +++ b/t/t4211/sha1/expect.end-of-file @@ -5,9 +5,10 @@ Date: Thu Feb 28 10:48:43 2013 +0100 change back to complete line diff --git a/a.c b/a.c +index 0b9cae5..5de3ea4 100644 --- a/a.c +++ b/a.c -@@ -20,3 +20,5 @@ +@@ -20,3 +20,5 @@ long f(long x) printf("%ld\n", f(15)); return 0; -} @@ -23,9 +24,10 @@ Date: Thu Feb 28 10:48:10 2013 +0100 change to an incomplete line at end diff --git a/a.c b/a.c +index 5e709a1..0b9cae5 100644 --- a/a.c +++ b/a.c -@@ -20,3 +20,3 @@ +@@ -20,3 +20,3 @@ int main () printf("%ld\n", f(15)); return 0; -} @@ -39,9 +41,10 @@ Date: Thu Feb 28 10:45:16 2013 +0100 touch both functions diff --git a/a.c b/a.c +index 3233403..e51de13 100644 --- a/a.c +++ b/a.c -@@ -19,3 +19,3 @@ +@@ -19,3 +19,3 @@ int f(int x) - printf("%d\n", f(15)); + printf("%ld\n", f(15)); return 0; @@ -54,6 +57,8 @@ Date: Thu Feb 28 10:44:48 2013 +0100 initial diff --git a/a.c b/a.c +new file mode 100644 +index 0000000..444e415 --- /dev/null +++ b/a.c @@ -0,0 +18,3 @@ diff --git a/t/t4211/sha1/expect.move-support-f b/t/t4211/sha1/expect.move-support-f index c905e01bc2..ead6500d4d 100644 --- a/t/t4211/sha1/expect.move-support-f +++ b/t/t4211/sha1/expect.move-support-f @@ -5,6 +5,7 @@ Date: Thu Feb 28 10:49:50 2013 +0100 another simple change diff --git a/b.c b/b.c +index 5de3ea4..bf79c2f 100644 --- a/b.c +++ b/b.c @@ -4,9 +4,9 @@ @@ -26,6 +27,7 @@ Date: Thu Feb 28 10:45:16 2013 +0100 touch both functions diff --git a/a.c b/a.c +index 3233403..e51de13 100644 --- a/a.c +++ b/a.c @@ -3,9 +3,9 @@ @@ -47,6 +49,7 @@ Date: Thu Feb 28 10:44:55 2013 +0100 change f() diff --git a/a.c b/a.c +index 444e415..3233403 100644 --- a/a.c +++ b/a.c @@ -3,8 +3,9 @@ @@ -67,6 +70,8 @@ Date: Thu Feb 28 10:44:48 2013 +0100 initial diff --git a/a.c b/a.c +new file mode 100644 +index 0000000..444e415 --- /dev/null +++ b/a.c @@ -0,0 +3,8 @@ diff --git a/t/t4211/sha1/expect.multiple b/t/t4211/sha1/expect.multiple index 1eee8a7801..a41851a51d 100644 --- a/t/t4211/sha1/expect.multiple +++ b/t/t4211/sha1/expect.multiple @@ -5,9 +5,10 @@ Date: Thu Feb 28 10:48:43 2013 +0100 change back to complete line diff --git a/a.c b/a.c +index 0b9cae5..5de3ea4 100644 --- a/a.c +++ b/a.c -@@ -18,5 +18,7 @@ +@@ -18,5 +18,7 @@ long f(long x) int main () { printf("%ld\n", f(15)); @@ -25,9 +26,10 @@ Date: Thu Feb 28 10:48:10 2013 +0100 change to an incomplete line at end diff --git a/a.c b/a.c +index 5e709a1..0b9cae5 100644 --- a/a.c +++ b/a.c -@@ -18,5 +18,5 @@ +@@ -18,5 +18,5 @@ long f(long x) int main () { printf("%ld\n", f(15)); @@ -43,6 +45,7 @@ Date: Thu Feb 28 10:45:16 2013 +0100 touch both functions diff --git a/a.c b/a.c +index 3233403..e51de13 100644 --- a/a.c +++ b/a.c @@ -3,9 +3,9 @@ @@ -71,6 +74,7 @@ Date: Thu Feb 28 10:44:55 2013 +0100 change f() diff --git a/a.c b/a.c +index 444e415..3233403 100644 --- a/a.c +++ b/a.c @@ -3,8 +3,9 @@ @@ -91,6 +95,8 @@ Date: Thu Feb 28 10:44:48 2013 +0100 initial diff --git a/a.c b/a.c +new file mode 100644 +index 0000000..444e415 --- /dev/null +++ b/a.c @@ -0,0 +3,8 @@ diff --git a/t/t4211/sha1/expect.multiple-overlapping b/t/t4211/sha1/expect.multiple-overlapping index d930b6eec4..0ec9990eab 100644 --- a/t/t4211/sha1/expect.multiple-overlapping +++ b/t/t4211/sha1/expect.multiple-overlapping @@ -5,6 +5,7 @@ Date: Thu Feb 28 10:48:43 2013 +0100 change back to complete line diff --git a/a.c b/a.c +index 0b9cae5..5de3ea4 100644 --- a/a.c +++ b/a.c @@ -4,19 +4,21 @@ @@ -39,6 +40,7 @@ Date: Thu Feb 28 10:48:10 2013 +0100 change to an incomplete line at end diff --git a/a.c b/a.c +index 5e709a1..0b9cae5 100644 --- a/a.c +++ b/a.c @@ -4,19 +4,19 @@ @@ -71,6 +73,7 @@ Date: Thu Feb 28 10:45:41 2013 +0100 touch comment diff --git a/a.c b/a.c +index e51de13..bdb2bb1 100644 --- a/a.c +++ b/a.c @@ -3,19 +3,19 @@ @@ -102,6 +105,7 @@ Date: Thu Feb 28 10:45:16 2013 +0100 touch both functions diff --git a/a.c b/a.c +index 3233403..e51de13 100644 --- a/a.c +++ b/a.c @@ -3,19 +3,19 @@ @@ -134,6 +138,7 @@ Date: Thu Feb 28 10:44:55 2013 +0100 change f() diff --git a/a.c b/a.c +index 444e415..3233403 100644 --- a/a.c +++ b/a.c @@ -3,18 +3,19 @@ @@ -164,6 +169,8 @@ Date: Thu Feb 28 10:44:48 2013 +0100 initial diff --git a/a.c b/a.c +new file mode 100644 +index 0000000..444e415 --- /dev/null +++ b/a.c @@ -0,0 +3,18 @@ diff --git a/t/t4211/sha1/expect.multiple-superset b/t/t4211/sha1/expect.multiple-superset index d930b6eec4..0ec9990eab 100644 --- a/t/t4211/sha1/expect.multiple-superset +++ b/t/t4211/sha1/expect.multiple-superset @@ -5,6 +5,7 @@ Date: Thu Feb 28 10:48:43 2013 +0100 change back to complete line diff --git a/a.c b/a.c +index 0b9cae5..5de3ea4 100644 --- a/a.c +++ b/a.c @@ -4,19 +4,21 @@ @@ -39,6 +40,7 @@ Date: Thu Feb 28 10:48:10 2013 +0100 change to an incomplete line at end diff --git a/a.c b/a.c +index 5e709a1..0b9cae5 100644 --- a/a.c +++ b/a.c @@ -4,19 +4,19 @@ @@ -71,6 +73,7 @@ Date: Thu Feb 28 10:45:41 2013 +0100 touch comment diff --git a/a.c b/a.c +index e51de13..bdb2bb1 100644 --- a/a.c +++ b/a.c @@ -3,19 +3,19 @@ @@ -102,6 +105,7 @@ Date: Thu Feb 28 10:45:16 2013 +0100 touch both functions diff --git a/a.c b/a.c +index 3233403..e51de13 100644 --- a/a.c +++ b/a.c @@ -3,19 +3,19 @@ @@ -134,6 +138,7 @@ Date: Thu Feb 28 10:44:55 2013 +0100 change f() diff --git a/a.c b/a.c +index 444e415..3233403 100644 --- a/a.c +++ b/a.c @@ -3,18 +3,19 @@ @@ -164,6 +169,8 @@ Date: Thu Feb 28 10:44:48 2013 +0100 initial diff --git a/a.c b/a.c +new file mode 100644 +index 0000000..444e415 --- /dev/null +++ b/a.c @@ -0,0 +3,18 @@ diff --git a/t/t4211/sha1/expect.no-assertion-error b/t/t4211/sha1/expect.no-assertion-error index 994c37db1e..54c568f273 100644 --- a/t/t4211/sha1/expect.no-assertion-error +++ b/t/t4211/sha1/expect.no-assertion-error @@ -5,6 +5,7 @@ Date: Thu Feb 28 10:50:24 2013 +0100 move within the file diff --git a/b.c b/b.c +index bf79c2f..27c829c 100644 --- a/b.c +++ b/b.c @@ -25,0 +18,9 @@ @@ -25,9 +26,10 @@ Date: Thu Feb 28 10:48:43 2013 +0100 change back to complete line diff --git a/a.c b/a.c +index 0b9cae5..5de3ea4 100644 --- a/a.c +++ b/a.c -@@ -18,5 +18,7 @@ +@@ -18,5 +18,7 @@ long f(long x) int main () { printf("%ld\n", f(15)); @@ -45,9 +47,10 @@ Date: Thu Feb 28 10:48:10 2013 +0100 change to an incomplete line at end diff --git a/a.c b/a.c +index 5e709a1..0b9cae5 100644 --- a/a.c +++ b/a.c -@@ -18,5 +18,5 @@ +@@ -18,5 +18,5 @@ long f(long x) int main () { printf("%ld\n", f(15)); @@ -63,9 +66,10 @@ Date: Thu Feb 28 10:45:16 2013 +0100 touch both functions diff --git a/a.c b/a.c +index 3233403..e51de13 100644 --- a/a.c +++ b/a.c -@@ -17,5 +17,5 @@ +@@ -17,5 +17,5 @@ int f(int x) int main () { - printf("%d\n", f(15)); @@ -80,6 +84,8 @@ Date: Thu Feb 28 10:44:48 2013 +0100 initial diff --git a/a.c b/a.c +new file mode 100644 +index 0000000..444e415 --- /dev/null +++ b/a.c @@ -0,0 +16,5 @@ diff --git a/t/t4211/sha1/expect.parallel-change-f-to-main b/t/t4211/sha1/expect.parallel-change-f-to-main index 052def8074..65a8cc673a 100644 --- a/t/t4211/sha1/expect.parallel-change-f-to-main +++ b/t/t4211/sha1/expect.parallel-change-f-to-main @@ -13,6 +13,7 @@ Date: Thu Feb 28 10:49:50 2013 +0100 another simple change diff --git a/b.c b/b.c +index 5de3ea4..bf79c2f 100644 --- a/b.c +++ b/b.c @@ -4,14 +4,14 @@ @@ -39,6 +40,7 @@ Date: Fri Apr 12 16:15:57 2013 +0200 change on another line of history while rename happens diff --git a/a.c b/a.c +index 5de3ea4..01b5b65 100644 --- a/a.c +++ b/a.c @@ -4,14 +4,14 @@ @@ -65,6 +67,7 @@ Date: Thu Feb 28 10:45:41 2013 +0100 touch comment diff --git a/a.c b/a.c +index e51de13..bdb2bb1 100644 --- a/a.c +++ b/a.c @@ -3,14 +3,14 @@ @@ -91,6 +94,7 @@ Date: Thu Feb 28 10:45:16 2013 +0100 touch both functions diff --git a/a.c b/a.c +index 3233403..e51de13 100644 --- a/a.c +++ b/a.c @@ -3,14 +3,14 @@ @@ -117,6 +121,7 @@ Date: Thu Feb 28 10:44:55 2013 +0100 change f() diff --git a/a.c b/a.c +index 444e415..3233403 100644 --- a/a.c +++ b/a.c @@ -3,13 +3,14 @@ @@ -142,6 +147,8 @@ Date: Thu Feb 28 10:44:48 2013 +0100 initial diff --git a/a.c b/a.c +new file mode 100644 +index 0000000..444e415 --- /dev/null +++ b/a.c @@ -0,0 +3,13 @@ diff --git a/t/t4211/sha1/expect.simple-f b/t/t4211/sha1/expect.simple-f index a1f5bc49c8..b24ae40e03 100644 --- a/t/t4211/sha1/expect.simple-f +++ b/t/t4211/sha1/expect.simple-f @@ -5,6 +5,7 @@ Date: Thu Feb 28 10:45:16 2013 +0100 touch both functions diff --git a/a.c b/a.c +index 3233403..e51de13 100644 --- a/a.c +++ b/a.c @@ -3,9 +3,9 @@ @@ -26,6 +27,7 @@ Date: Thu Feb 28 10:44:55 2013 +0100 change f() diff --git a/a.c b/a.c +index 444e415..3233403 100644 --- a/a.c +++ b/a.c @@ -3,8 +3,9 @@ @@ -46,6 +48,8 @@ Date: Thu Feb 28 10:44:48 2013 +0100 initial diff --git a/a.c b/a.c +new file mode 100644 +index 0000000..444e415 --- /dev/null +++ b/a.c @@ -0,0 +3,8 @@ diff --git a/t/t4211/sha1/expect.simple-f-to-main b/t/t4211/sha1/expect.simple-f-to-main index a475768710..cd92100dfc 100644 --- a/t/t4211/sha1/expect.simple-f-to-main +++ b/t/t4211/sha1/expect.simple-f-to-main @@ -5,6 +5,7 @@ Date: Thu Feb 28 10:45:41 2013 +0100 touch comment diff --git a/a.c b/a.c +index e51de13..bdb2bb1 100644 --- a/a.c +++ b/a.c @@ -3,14 +3,14 @@ @@ -31,6 +32,7 @@ Date: Thu Feb 28 10:45:16 2013 +0100 touch both functions diff --git a/a.c b/a.c +index 3233403..e51de13 100644 --- a/a.c +++ b/a.c @@ -3,14 +3,14 @@ @@ -57,6 +59,7 @@ Date: Thu Feb 28 10:44:55 2013 +0100 change f() diff --git a/a.c b/a.c +index 444e415..3233403 100644 --- a/a.c +++ b/a.c @@ -3,13 +3,14 @@ @@ -82,6 +85,8 @@ Date: Thu Feb 28 10:44:48 2013 +0100 initial diff --git a/a.c b/a.c +new file mode 100644 +index 0000000..444e415 --- /dev/null +++ b/a.c @@ -0,0 +3,13 @@ diff --git a/t/t4211/sha1/expect.simple-main b/t/t4211/sha1/expect.simple-main index 39ce39bebe..ff31291d34 100644 --- a/t/t4211/sha1/expect.simple-main +++ b/t/t4211/sha1/expect.simple-main @@ -5,9 +5,10 @@ Date: Thu Feb 28 10:48:43 2013 +0100 change back to complete line diff --git a/a.c b/a.c +index 0b9cae5..5de3ea4 100644 --- a/a.c +++ b/a.c -@@ -18,5 +18,5 @@ +@@ -18,5 +18,5 @@ long f(long x) int main () { printf("%ld\n", f(15)); @@ -23,9 +24,10 @@ Date: Thu Feb 28 10:48:10 2013 +0100 change to an incomplete line at end diff --git a/a.c b/a.c +index 5e709a1..0b9cae5 100644 --- a/a.c +++ b/a.c -@@ -18,5 +18,5 @@ +@@ -18,5 +18,5 @@ long f(long x) int main () { printf("%ld\n", f(15)); @@ -41,9 +43,10 @@ Date: Thu Feb 28 10:45:16 2013 +0100 touch both functions diff --git a/a.c b/a.c +index 3233403..e51de13 100644 --- a/a.c +++ b/a.c -@@ -17,5 +17,5 @@ +@@ -17,5 +17,5 @@ int f(int x) int main () { - printf("%d\n", f(15)); @@ -58,6 +61,8 @@ Date: Thu Feb 28 10:44:48 2013 +0100 initial diff --git a/a.c b/a.c +new file mode 100644 +index 0000000..444e415 --- /dev/null +++ b/a.c @@ -0,0 +16,5 @@ diff --git a/t/t4211/sha1/expect.simple-main-to-end b/t/t4211/sha1/expect.simple-main-to-end index 8480bd9cc4..4bef21e657 100644 --- a/t/t4211/sha1/expect.simple-main-to-end +++ b/t/t4211/sha1/expect.simple-main-to-end @@ -5,9 +5,10 @@ Date: Thu Feb 28 10:48:43 2013 +0100 change back to complete line diff --git a/a.c b/a.c +index 0b9cae5..5de3ea4 100644 --- a/a.c +++ b/a.c -@@ -18,5 +18,7 @@ +@@ -18,5 +18,7 @@ long f(long x) int main () { printf("%ld\n", f(15)); @@ -25,9 +26,10 @@ Date: Thu Feb 28 10:48:10 2013 +0100 change to an incomplete line at end diff --git a/a.c b/a.c +index 5e709a1..0b9cae5 100644 --- a/a.c +++ b/a.c -@@ -18,5 +18,5 @@ +@@ -18,5 +18,5 @@ long f(long x) int main () { printf("%ld\n", f(15)); @@ -43,9 +45,10 @@ Date: Thu Feb 28 10:45:16 2013 +0100 touch both functions diff --git a/a.c b/a.c +index 3233403..e51de13 100644 --- a/a.c +++ b/a.c -@@ -17,5 +17,5 @@ +@@ -17,5 +17,5 @@ int f(int x) int main () { - printf("%d\n", f(15)); @@ -60,6 +63,8 @@ Date: Thu Feb 28 10:44:48 2013 +0100 initial diff --git a/a.c b/a.c +new file mode 100644 +index 0000000..444e415 --- /dev/null +++ b/a.c @@ -0,0 +16,5 @@ diff --git a/t/t4211/sha1/expect.two-ranges b/t/t4211/sha1/expect.two-ranges index c5164f3be3..aed01522e3 100644 --- a/t/t4211/sha1/expect.two-ranges +++ b/t/t4211/sha1/expect.two-ranges @@ -5,9 +5,10 @@ Date: Thu Feb 28 10:48:43 2013 +0100 change back to complete line diff --git a/a.c b/a.c +index 0b9cae5..5de3ea4 100644 --- a/a.c +++ b/a.c -@@ -18,5 +18,5 @@ +@@ -18,5 +18,5 @@ long f(long x) int main () { printf("%ld\n", f(15)); @@ -23,9 +24,10 @@ Date: Thu Feb 28 10:48:10 2013 +0100 change to an incomplete line at end diff --git a/a.c b/a.c +index 5e709a1..0b9cae5 100644 --- a/a.c +++ b/a.c -@@ -18,5 +18,5 @@ +@@ -18,5 +18,5 @@ long f(long x) int main () { printf("%ld\n", f(15)); @@ -41,6 +43,7 @@ Date: Thu Feb 28 10:45:16 2013 +0100 touch both functions diff --git a/a.c b/a.c +index 3233403..e51de13 100644 --- a/a.c +++ b/a.c @@ -3,9 +3,9 @@ @@ -69,6 +72,7 @@ Date: Thu Feb 28 10:44:55 2013 +0100 change f() diff --git a/a.c b/a.c +index 444e415..3233403 100644 --- a/a.c +++ b/a.c @@ -3,8 +3,9 @@ @@ -89,6 +93,8 @@ Date: Thu Feb 28 10:44:48 2013 +0100 initial diff --git a/a.c b/a.c +new file mode 100644 +index 0000000..444e415 --- /dev/null +++ b/a.c @@ -0,0 +3,8 @@ diff --git a/t/t4211/sha1/expect.vanishes-early b/t/t4211/sha1/expect.vanishes-early index 1f7cd06941..a413ad3659 100644 --- a/t/t4211/sha1/expect.vanishes-early +++ b/t/t4211/sha1/expect.vanishes-early @@ -5,11 +5,10 @@ Date: Thu Feb 28 10:48:43 2013 +0100 change back to complete line diff --git a/a.c b/a.c +index 0b9cae5..5de3ea4 100644 --- a/a.c +++ b/a.c -@@ -22,1 +24,1 @@ --} -\ No newline at end of file +@@ -23,0 +24,1 @@ int main () +/* incomplete lines are bad! */ commit 100b61a6f2f720f812620a9d10afb3a960ccb73c @@ -19,9 +18,10 @@ Date: Thu Feb 28 10:48:10 2013 +0100 change to an incomplete line at end diff --git a/a.c b/a.c +index 5e709a1..0b9cae5 100644 --- a/a.c +++ b/a.c -@@ -22,1 +22,1 @@ +@@ -22,1 +22,1 @@ int main () -} +} \ No newline at end of file @@ -33,6 +33,8 @@ Date: Thu Feb 28 10:44:48 2013 +0100 initial diff --git a/a.c b/a.c +new file mode 100644 +index 0000000..444e415 --- /dev/null +++ b/a.c @@ -0,0 +20,1 @@ diff --git a/t/t4211/sha256/expect.beginning-of-file b/t/t4211/sha256/expect.beginning-of-file index 5adfdfc1a1..e8d62328cf 100644 --- a/t/t4211/sha256/expect.beginning-of-file +++ b/t/t4211/sha256/expect.beginning-of-file @@ -5,6 +5,7 @@ Date: Thu Feb 28 10:47:40 2013 +0100 change at very beginning diff --git a/a.c b/a.c +index 3a78aaf..d325124 100644 --- a/a.c +++ b/a.c @@ -1,3 +1,4 @@ @@ -20,6 +21,7 @@ Date: Thu Feb 28 10:45:16 2013 +0100 touch both functions diff --git a/a.c b/a.c +index 7a296b9..75c0119 100644 --- a/a.c +++ b/a.c @@ -1,3 +1,3 @@ @@ -35,6 +37,8 @@ Date: Thu Feb 28 10:44:48 2013 +0100 initial diff --git a/a.c b/a.c +new file mode 100644 +index 0000000..9f550c3 --- /dev/null +++ b/a.c @@ -0,0 +1,3 @@ diff --git a/t/t4211/sha256/expect.end-of-file b/t/t4211/sha256/expect.end-of-file index 03ab5c1784..3b2e2384da 100644 --- a/t/t4211/sha256/expect.end-of-file +++ b/t/t4211/sha256/expect.end-of-file @@ -5,9 +5,10 @@ Date: Thu Feb 28 10:48:43 2013 +0100 change back to complete line diff --git a/a.c b/a.c +index e4fa1d8..62c1fc2 100644 --- a/a.c +++ b/a.c -@@ -20,3 +20,5 @@ +@@ -20,3 +20,5 @@ long f(long x) printf("%ld\n", f(15)); return 0; -} @@ -23,9 +24,10 @@ Date: Thu Feb 28 10:48:10 2013 +0100 change to an incomplete line at end diff --git a/a.c b/a.c +index d325124..e4fa1d8 100644 --- a/a.c +++ b/a.c -@@ -20,3 +20,3 @@ +@@ -20,3 +20,3 @@ int main () printf("%ld\n", f(15)); return 0; -} @@ -39,9 +41,10 @@ Date: Thu Feb 28 10:45:16 2013 +0100 touch both functions diff --git a/a.c b/a.c +index 7a296b9..75c0119 100644 --- a/a.c +++ b/a.c -@@ -19,3 +19,3 @@ +@@ -19,3 +19,3 @@ int f(int x) - printf("%d\n", f(15)); + printf("%ld\n", f(15)); return 0; @@ -54,6 +57,8 @@ Date: Thu Feb 28 10:44:48 2013 +0100 initial diff --git a/a.c b/a.c +new file mode 100644 +index 0000000..9f550c3 --- /dev/null +++ b/a.c @@ -0,0 +18,3 @@ diff --git a/t/t4211/sha256/expect.move-support-f b/t/t4211/sha256/expect.move-support-f index 223b4ed2a0..f49abcea3e 100644 --- a/t/t4211/sha256/expect.move-support-f +++ b/t/t4211/sha256/expect.move-support-f @@ -5,6 +5,7 @@ Date: Thu Feb 28 10:49:50 2013 +0100 another simple change diff --git a/b.c b/b.c +index 62c1fc2..69cb69c 100644 --- a/b.c +++ b/b.c @@ -4,9 +4,9 @@ @@ -26,6 +27,7 @@ Date: Thu Feb 28 10:45:16 2013 +0100 touch both functions diff --git a/a.c b/a.c +index 7a296b9..75c0119 100644 --- a/a.c +++ b/a.c @@ -3,9 +3,9 @@ @@ -47,6 +49,7 @@ Date: Thu Feb 28 10:44:55 2013 +0100 change f() diff --git a/a.c b/a.c +index 9f550c3..7a296b9 100644 --- a/a.c +++ b/a.c @@ -3,8 +3,9 @@ @@ -67,6 +70,8 @@ Date: Thu Feb 28 10:44:48 2013 +0100 initial diff --git a/a.c b/a.c +new file mode 100644 +index 0000000..9f550c3 --- /dev/null +++ b/a.c @@ -0,0 +3,8 @@ diff --git a/t/t4211/sha256/expect.multiple b/t/t4211/sha256/expect.multiple index dbd987b74a..0dee50ffb7 100644 --- a/t/t4211/sha256/expect.multiple +++ b/t/t4211/sha256/expect.multiple @@ -5,9 +5,10 @@ Date: Thu Feb 28 10:48:43 2013 +0100 change back to complete line diff --git a/a.c b/a.c +index e4fa1d8..62c1fc2 100644 --- a/a.c +++ b/a.c -@@ -18,5 +18,7 @@ +@@ -18,5 +18,7 @@ long f(long x) int main () { printf("%ld\n", f(15)); @@ -25,9 +26,10 @@ Date: Thu Feb 28 10:48:10 2013 +0100 change to an incomplete line at end diff --git a/a.c b/a.c +index d325124..e4fa1d8 100644 --- a/a.c +++ b/a.c -@@ -18,5 +18,5 @@ +@@ -18,5 +18,5 @@ long f(long x) int main () { printf("%ld\n", f(15)); @@ -43,6 +45,7 @@ Date: Thu Feb 28 10:45:16 2013 +0100 touch both functions diff --git a/a.c b/a.c +index 7a296b9..75c0119 100644 --- a/a.c +++ b/a.c @@ -3,9 +3,9 @@ @@ -71,6 +74,7 @@ Date: Thu Feb 28 10:44:55 2013 +0100 change f() diff --git a/a.c b/a.c +index 9f550c3..7a296b9 100644 --- a/a.c +++ b/a.c @@ -3,8 +3,9 @@ @@ -91,6 +95,8 @@ Date: Thu Feb 28 10:44:48 2013 +0100 initial diff --git a/a.c b/a.c +new file mode 100644 +index 0000000..9f550c3 --- /dev/null +++ b/a.c @@ -0,0 +3,8 @@ diff --git a/t/t4211/sha256/expect.multiple-overlapping b/t/t4211/sha256/expect.multiple-overlapping index 9015a45a25..b8c260e8ae 100644 --- a/t/t4211/sha256/expect.multiple-overlapping +++ b/t/t4211/sha256/expect.multiple-overlapping @@ -5,6 +5,7 @@ Date: Thu Feb 28 10:48:43 2013 +0100 change back to complete line diff --git a/a.c b/a.c +index e4fa1d8..62c1fc2 100644 --- a/a.c +++ b/a.c @@ -4,19 +4,21 @@ @@ -39,6 +40,7 @@ Date: Thu Feb 28 10:48:10 2013 +0100 change to an incomplete line at end diff --git a/a.c b/a.c +index d325124..e4fa1d8 100644 --- a/a.c +++ b/a.c @@ -4,19 +4,19 @@ @@ -71,6 +73,7 @@ Date: Thu Feb 28 10:45:41 2013 +0100 touch comment diff --git a/a.c b/a.c +index 75c0119..3a78aaf 100644 --- a/a.c +++ b/a.c @@ -3,19 +3,19 @@ @@ -102,6 +105,7 @@ Date: Thu Feb 28 10:45:16 2013 +0100 touch both functions diff --git a/a.c b/a.c +index 7a296b9..75c0119 100644 --- a/a.c +++ b/a.c @@ -3,19 +3,19 @@ @@ -134,6 +138,7 @@ Date: Thu Feb 28 10:44:55 2013 +0100 change f() diff --git a/a.c b/a.c +index 9f550c3..7a296b9 100644 --- a/a.c +++ b/a.c @@ -3,18 +3,19 @@ @@ -164,6 +169,8 @@ Date: Thu Feb 28 10:44:48 2013 +0100 initial diff --git a/a.c b/a.c +new file mode 100644 +index 0000000..9f550c3 --- /dev/null +++ b/a.c @@ -0,0 +3,18 @@ diff --git a/t/t4211/sha256/expect.multiple-superset b/t/t4211/sha256/expect.multiple-superset index 9015a45a25..b8c260e8ae 100644 --- a/t/t4211/sha256/expect.multiple-superset +++ b/t/t4211/sha256/expect.multiple-superset @@ -5,6 +5,7 @@ Date: Thu Feb 28 10:48:43 2013 +0100 change back to complete line diff --git a/a.c b/a.c +index e4fa1d8..62c1fc2 100644 --- a/a.c +++ b/a.c @@ -4,19 +4,21 @@ @@ -39,6 +40,7 @@ Date: Thu Feb 28 10:48:10 2013 +0100 change to an incomplete line at end diff --git a/a.c b/a.c +index d325124..e4fa1d8 100644 --- a/a.c +++ b/a.c @@ -4,19 +4,19 @@ @@ -71,6 +73,7 @@ Date: Thu Feb 28 10:45:41 2013 +0100 touch comment diff --git a/a.c b/a.c +index 75c0119..3a78aaf 100644 --- a/a.c +++ b/a.c @@ -3,19 +3,19 @@ @@ -102,6 +105,7 @@ Date: Thu Feb 28 10:45:16 2013 +0100 touch both functions diff --git a/a.c b/a.c +index 7a296b9..75c0119 100644 --- a/a.c +++ b/a.c @@ -3,19 +3,19 @@ @@ -134,6 +138,7 @@ Date: Thu Feb 28 10:44:55 2013 +0100 change f() diff --git a/a.c b/a.c +index 9f550c3..7a296b9 100644 --- a/a.c +++ b/a.c @@ -3,18 +3,19 @@ @@ -164,6 +169,8 @@ Date: Thu Feb 28 10:44:48 2013 +0100 initial diff --git a/a.c b/a.c +new file mode 100644 +index 0000000..9f550c3 --- /dev/null +++ b/a.c @@ -0,0 +3,18 @@ diff --git a/t/t4211/sha256/expect.no-assertion-error b/t/t4211/sha256/expect.no-assertion-error index 36ed12aa9c..c25f2ce19c 100644 --- a/t/t4211/sha256/expect.no-assertion-error +++ b/t/t4211/sha256/expect.no-assertion-error @@ -5,6 +5,7 @@ Date: Thu Feb 28 10:50:24 2013 +0100 move within the file diff --git a/b.c b/b.c +index 69cb69c..a0d566e 100644 --- a/b.c +++ b/b.c @@ -25,0 +18,9 @@ @@ -25,9 +26,10 @@ Date: Thu Feb 28 10:48:43 2013 +0100 change back to complete line diff --git a/a.c b/a.c +index e4fa1d8..62c1fc2 100644 --- a/a.c +++ b/a.c -@@ -18,5 +18,7 @@ +@@ -18,5 +18,7 @@ long f(long x) int main () { printf("%ld\n", f(15)); @@ -45,9 +47,10 @@ Date: Thu Feb 28 10:48:10 2013 +0100 change to an incomplete line at end diff --git a/a.c b/a.c +index d325124..e4fa1d8 100644 --- a/a.c +++ b/a.c -@@ -18,5 +18,5 @@ +@@ -18,5 +18,5 @@ long f(long x) int main () { printf("%ld\n", f(15)); @@ -63,9 +66,10 @@ Date: Thu Feb 28 10:45:16 2013 +0100 touch both functions diff --git a/a.c b/a.c +index 7a296b9..75c0119 100644 --- a/a.c +++ b/a.c -@@ -17,5 +17,5 @@ +@@ -17,5 +17,5 @@ int f(int x) int main () { - printf("%d\n", f(15)); @@ -80,6 +84,8 @@ Date: Thu Feb 28 10:44:48 2013 +0100 initial diff --git a/a.c b/a.c +new file mode 100644 +index 0000000..9f550c3 --- /dev/null +++ b/a.c @@ -0,0 +16,5 @@ diff --git a/t/t4211/sha256/expect.parallel-change-f-to-main b/t/t4211/sha256/expect.parallel-change-f-to-main index e68f8928ea..3178989253 100644 --- a/t/t4211/sha256/expect.parallel-change-f-to-main +++ b/t/t4211/sha256/expect.parallel-change-f-to-main @@ -13,6 +13,7 @@ Date: Thu Feb 28 10:49:50 2013 +0100 another simple change diff --git a/b.c b/b.c +index 62c1fc2..69cb69c 100644 --- a/b.c +++ b/b.c @@ -4,14 +4,14 @@ @@ -39,6 +40,7 @@ Date: Fri Apr 12 16:15:57 2013 +0200 change on another line of history while rename happens diff --git a/a.c b/a.c +index 62c1fc2..e1e8475 100644 --- a/a.c +++ b/a.c @@ -4,14 +4,14 @@ @@ -65,6 +67,7 @@ Date: Thu Feb 28 10:45:41 2013 +0100 touch comment diff --git a/a.c b/a.c +index 75c0119..3a78aaf 100644 --- a/a.c +++ b/a.c @@ -3,14 +3,14 @@ @@ -91,6 +94,7 @@ Date: Thu Feb 28 10:45:16 2013 +0100 touch both functions diff --git a/a.c b/a.c +index 7a296b9..75c0119 100644 --- a/a.c +++ b/a.c @@ -3,14 +3,14 @@ @@ -117,6 +121,7 @@ Date: Thu Feb 28 10:44:55 2013 +0100 change f() diff --git a/a.c b/a.c +index 9f550c3..7a296b9 100644 --- a/a.c +++ b/a.c @@ -3,13 +3,14 @@ @@ -142,6 +147,8 @@ Date: Thu Feb 28 10:44:48 2013 +0100 initial diff --git a/a.c b/a.c +new file mode 100644 +index 0000000..9f550c3 --- /dev/null +++ b/a.c @@ -0,0 +3,13 @@ diff --git a/t/t4211/sha256/expect.simple-f b/t/t4211/sha256/expect.simple-f index 65508d7c0b..983c711fe3 100644 --- a/t/t4211/sha256/expect.simple-f +++ b/t/t4211/sha256/expect.simple-f @@ -5,6 +5,7 @@ Date: Thu Feb 28 10:45:16 2013 +0100 touch both functions diff --git a/a.c b/a.c +index 7a296b9..75c0119 100644 --- a/a.c +++ b/a.c @@ -3,9 +3,9 @@ @@ -26,6 +27,7 @@ Date: Thu Feb 28 10:44:55 2013 +0100 change f() diff --git a/a.c b/a.c +index 9f550c3..7a296b9 100644 --- a/a.c +++ b/a.c @@ -3,8 +3,9 @@ @@ -46,6 +48,8 @@ Date: Thu Feb 28 10:44:48 2013 +0100 initial diff --git a/a.c b/a.c +new file mode 100644 +index 0000000..9f550c3 --- /dev/null +++ b/a.c @@ -0,0 +3,8 @@ diff --git a/t/t4211/sha256/expect.simple-f-to-main b/t/t4211/sha256/expect.simple-f-to-main index 77b721c196..e67fa017a7 100644 --- a/t/t4211/sha256/expect.simple-f-to-main +++ b/t/t4211/sha256/expect.simple-f-to-main @@ -5,6 +5,7 @@ Date: Thu Feb 28 10:45:41 2013 +0100 touch comment diff --git a/a.c b/a.c +index 75c0119..3a78aaf 100644 --- a/a.c +++ b/a.c @@ -3,14 +3,14 @@ @@ -31,6 +32,7 @@ Date: Thu Feb 28 10:45:16 2013 +0100 touch both functions diff --git a/a.c b/a.c +index 7a296b9..75c0119 100644 --- a/a.c +++ b/a.c @@ -3,14 +3,14 @@ @@ -57,6 +59,7 @@ Date: Thu Feb 28 10:44:55 2013 +0100 change f() diff --git a/a.c b/a.c +index 9f550c3..7a296b9 100644 --- a/a.c +++ b/a.c @@ -3,13 +3,14 @@ @@ -82,6 +85,8 @@ Date: Thu Feb 28 10:44:48 2013 +0100 initial diff --git a/a.c b/a.c +new file mode 100644 +index 0000000..9f550c3 --- /dev/null +++ b/a.c @@ -0,0 +3,13 @@ diff --git a/t/t4211/sha256/expect.simple-main b/t/t4211/sha256/expect.simple-main index d20708c9f9..0792b27cad 100644 --- a/t/t4211/sha256/expect.simple-main +++ b/t/t4211/sha256/expect.simple-main @@ -5,9 +5,10 @@ Date: Thu Feb 28 10:48:43 2013 +0100 change back to complete line diff --git a/a.c b/a.c +index e4fa1d8..62c1fc2 100644 --- a/a.c +++ b/a.c -@@ -18,5 +18,5 @@ +@@ -18,5 +18,5 @@ long f(long x) int main () { printf("%ld\n", f(15)); @@ -23,9 +24,10 @@ Date: Thu Feb 28 10:48:10 2013 +0100 change to an incomplete line at end diff --git a/a.c b/a.c +index d325124..e4fa1d8 100644 --- a/a.c +++ b/a.c -@@ -18,5 +18,5 @@ +@@ -18,5 +18,5 @@ long f(long x) int main () { printf("%ld\n", f(15)); @@ -41,9 +43,10 @@ Date: Thu Feb 28 10:45:16 2013 +0100 touch both functions diff --git a/a.c b/a.c +index 7a296b9..75c0119 100644 --- a/a.c +++ b/a.c -@@ -17,5 +17,5 @@ +@@ -17,5 +17,5 @@ int f(int x) int main () { - printf("%d\n", f(15)); @@ -58,6 +61,8 @@ Date: Thu Feb 28 10:44:48 2013 +0100 initial diff --git a/a.c b/a.c +new file mode 100644 +index 0000000..9f550c3 --- /dev/null +++ b/a.c @@ -0,0 +16,5 @@ diff --git a/t/t4211/sha256/expect.simple-main-to-end b/t/t4211/sha256/expect.simple-main-to-end index 617cdf3481..d3bd7c7bc6 100644 --- a/t/t4211/sha256/expect.simple-main-to-end +++ b/t/t4211/sha256/expect.simple-main-to-end @@ -5,9 +5,10 @@ Date: Thu Feb 28 10:48:43 2013 +0100 change back to complete line diff --git a/a.c b/a.c +index e4fa1d8..62c1fc2 100644 --- a/a.c +++ b/a.c -@@ -18,5 +18,7 @@ +@@ -18,5 +18,7 @@ long f(long x) int main () { printf("%ld\n", f(15)); @@ -25,9 +26,10 @@ Date: Thu Feb 28 10:48:10 2013 +0100 change to an incomplete line at end diff --git a/a.c b/a.c +index d325124..e4fa1d8 100644 --- a/a.c +++ b/a.c -@@ -18,5 +18,5 @@ +@@ -18,5 +18,5 @@ long f(long x) int main () { printf("%ld\n", f(15)); @@ -43,9 +45,10 @@ Date: Thu Feb 28 10:45:16 2013 +0100 touch both functions diff --git a/a.c b/a.c +index 7a296b9..75c0119 100644 --- a/a.c +++ b/a.c -@@ -17,5 +17,5 @@ +@@ -17,5 +17,5 @@ int f(int x) int main () { - printf("%d\n", f(15)); @@ -60,6 +63,8 @@ Date: Thu Feb 28 10:44:48 2013 +0100 initial diff --git a/a.c b/a.c +new file mode 100644 +index 0000000..9f550c3 --- /dev/null +++ b/a.c @@ -0,0 +16,5 @@ diff --git a/t/t4211/sha256/expect.two-ranges b/t/t4211/sha256/expect.two-ranges index 6a94d3b9cb..7735b19723 100644 --- a/t/t4211/sha256/expect.two-ranges +++ b/t/t4211/sha256/expect.two-ranges @@ -5,9 +5,10 @@ Date: Thu Feb 28 10:48:43 2013 +0100 change back to complete line diff --git a/a.c b/a.c +index e4fa1d8..62c1fc2 100644 --- a/a.c +++ b/a.c -@@ -18,5 +18,5 @@ +@@ -18,5 +18,5 @@ long f(long x) int main () { printf("%ld\n", f(15)); @@ -23,9 +24,10 @@ Date: Thu Feb 28 10:48:10 2013 +0100 change to an incomplete line at end diff --git a/a.c b/a.c +index d325124..e4fa1d8 100644 --- a/a.c +++ b/a.c -@@ -18,5 +18,5 @@ +@@ -18,5 +18,5 @@ long f(long x) int main () { printf("%ld\n", f(15)); @@ -41,6 +43,7 @@ Date: Thu Feb 28 10:45:16 2013 +0100 touch both functions diff --git a/a.c b/a.c +index 7a296b9..75c0119 100644 --- a/a.c +++ b/a.c @@ -3,9 +3,9 @@ @@ -69,6 +72,7 @@ Date: Thu Feb 28 10:44:55 2013 +0100 change f() diff --git a/a.c b/a.c +index 9f550c3..7a296b9 100644 --- a/a.c +++ b/a.c @@ -3,8 +3,9 @@ @@ -89,6 +93,8 @@ Date: Thu Feb 28 10:44:48 2013 +0100 initial diff --git a/a.c b/a.c +new file mode 100644 +index 0000000..9f550c3 --- /dev/null +++ b/a.c @@ -0,0 +3,8 @@ diff --git a/t/t4211/sha256/expect.vanishes-early b/t/t4211/sha256/expect.vanishes-early index 11ec9bdecf..bc33b963dc 100644 --- a/t/t4211/sha256/expect.vanishes-early +++ b/t/t4211/sha256/expect.vanishes-early @@ -5,11 +5,10 @@ Date: Thu Feb 28 10:48:43 2013 +0100 change back to complete line diff --git a/a.c b/a.c +index e4fa1d8..62c1fc2 100644 --- a/a.c +++ b/a.c -@@ -22,1 +24,1 @@ --} -\ No newline at end of file +@@ -23,0 +24,1 @@ int main () +/* incomplete lines are bad! */ commit 29f32ac3141c48b22803e5c4127b719917b67d0f8ca8c5248bebfa2a19f7da10 @@ -19,9 +18,10 @@ Date: Thu Feb 28 10:48:10 2013 +0100 change to an incomplete line at end diff --git a/a.c b/a.c +index d325124..e4fa1d8 100644 --- a/a.c +++ b/a.c -@@ -22,1 +22,1 @@ +@@ -22,1 +22,1 @@ int main () -} +} \ No newline at end of file @@ -33,6 +33,8 @@ Date: Thu Feb 28 10:44:48 2013 +0100 initial diff --git a/a.c b/a.c +new file mode 100644 +index 0000000..9f550c3 --- /dev/null +++ b/a.c @@ -0,0 +20,1 @@ From 0e51f7a7faae18d9e6819c38cc822d6232deacae Mon Sep 17 00:00:00 2001 From: Michael Montalbo Date: Tue, 17 Mar 2026 02:21:34 +0000 Subject: [PATCH 04/93] t4211: add tests for -L with standard diff options Now that -L output flows through the standard diff pipeline, verify that previously-ignored diff options work: formatting (--word-diff, --word-diff-regex, --no-prefix, --src/dst-prefix, --full-index, --abbrev), whitespace handling (-w, -b), output indicators (--output-indicator-new/old/context), direction reversal (-R), --color-moved, and pickaxe options (-S, -G). Signed-off-by: Michael Montalbo Signed-off-by: Junio C Hamano --- t/t4211-line-log.sh | 281 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 281 insertions(+) diff --git a/t/t4211-line-log.sh b/t/t4211-line-log.sh index 6a307e911b..aaf197d2ed 100755 --- a/t/t4211-line-log.sh +++ b/t/t4211-line-log.sh @@ -339,6 +339,92 @@ test_expect_success 'zero-width regex .* matches any function name' ' test_cmp expect actual ' +test_expect_success 'setup for diff pipeline tests' ' + git checkout parent-oids && + + head_blob_old=$(git rev-parse --short HEAD^:file.c) && + head_blob_new=$(git rev-parse --short HEAD:file.c) && + root_blob=$(git rev-parse --short HEAD~4:file.c) && + null_blob=$(test_oid zero | cut -c1-7) && + head_blob_old_full=$(git rev-parse HEAD^:file.c) && + head_blob_new_full=$(git rev-parse HEAD:file.c) && + root_blob_full=$(git rev-parse HEAD~4:file.c) && + null_blob_full=$(test_oid zero) +' + +test_expect_success '-L diff output includes index and new file mode' ' + git log -L:func2:file.c --format= >actual && + + # Output should contain index headers (not present in old code path) + grep "^index $head_blob_old\.\.$head_blob_new 100644" actual && + + # Root commit should show new file mode and null index + grep "^new file mode 100644" actual && + grep "^index $null_blob\.\.$root_blob$" actual && + + # Hunk headers should include funcname context + grep "^@@ .* @@ int func1()" actual +' + +test_expect_success '-L with --word-diff' ' + cat >expect <<-\EOF && + + diff --git a/file.c b/file.c + --- a/file.c + +++ b/file.c + @@ -6,4 +6,4 @@ int func1() + int func2() + { + return [-F2;-]{+F2 + 2;+} + } + + diff --git a/file.c b/file.c + new file mode 100644 + --- /dev/null + +++ b/file.c + @@ -0,0 +6,4 @@ + {+int func2()+} + {+{+} + {+ return F2;+} + {+}+} + EOF + git log -L:func2:file.c --word-diff --format= >actual && + grep -v "^index " actual >actual.filtered && + grep -v "^index " expect >expect.filtered && + test_cmp expect.filtered actual.filtered +' + +test_expect_success '-L with --no-prefix' ' + git log -L:func2:file.c --no-prefix --format= >actual && + grep "^diff --git file.c file.c" actual && + grep "^--- file.c" actual && + ! grep "^--- a/" actual +' + +test_expect_success '-L with --full-index' ' + git log -L:func2:file.c --full-index --format= >actual && + grep "^index $head_blob_old_full\.\.$head_blob_new_full 100644" actual && + grep "^index $null_blob_full\.\.$root_blob_full$" actual +' + +test_expect_success 'setup -L with whitespace change' ' + git checkout -b ws-change parent-oids && + sed "s/ return F2 + 2;/ return F2 + 2;/" file.c >tmp && + mv tmp file.c && + git commit -a -m "Whitespace change in func2()" +' + +test_expect_success '-L with --ignore-all-space suppresses whitespace-only diff' ' + git log -L:func2:file.c --format= >without_w && + git log -L:func2:file.c --format= -w >with_w && + + # Without -w: three commits produce diffs (whitespace, modify, root) + test $(grep -c "^diff --git" without_w) = 3 && + + # With -w: whitespace-only commit produces no hunk, so only two diffs + test $(grep -c "^diff --git" with_w) = 2 +' + test_expect_success 'show line-log with graph' ' git checkout parent-oids && head_blob_old=$(git rev-parse --short HEAD^:file.c) && @@ -430,4 +516,199 @@ test_expect_failure '-L --find-object should filter commits by object' ' test_must_be_empty actual ' +test_expect_success '-L with --word-diff-regex' ' + git checkout parent-oids && + git log -L:func2:file.c --word-diff \ + --word-diff-regex="[a-zA-Z0-9_]+" --format= >actual && + # Word-diff markers must be present + grep "{+" actual && + grep "+}" actual && + # No line-level +/- markers (word-diff replaces them); + # exclude --- header lines from the check + ! grep "^+[^+]" actual && + ! grep "^-[^-]" actual +' + +test_expect_success '-L with --src-prefix and --dst-prefix' ' + git checkout parent-oids && + git log -L:func2:file.c --src-prefix=old/ --dst-prefix=new/ \ + --format= >actual && + grep "^diff --git old/file.c new/file.c" actual && + grep "^--- old/file.c" actual && + grep "^+++ new/file.c" actual && + ! grep "^--- a/" actual +' + +test_expect_success '-L with --abbrev' ' + git checkout parent-oids && + git log -L:func2:file.c --abbrev=4 --format= -1 >actual && + # 4-char abbreviated hashes on index line + grep "^index [0-9a-f]\{4\}\.\.[0-9a-f]\{4\}" actual +' + +test_expect_success '-L with -b suppresses whitespace-only diff' ' + git checkout ws-change && + git log -L:func2:file.c --format= >without_b && + git log -L:func2:file.c --format= -b >with_b && + test $(grep -c "^diff --git" without_b) = 3 && + test $(grep -c "^diff --git" with_b) = 2 +' + +test_expect_success '-L with --output-indicator-*' ' + git checkout parent-oids && + git log -L:func2:file.c --output-indicator-new=">" \ + --output-indicator-old="<" --output-indicator-context="|" \ + --format= -1 >actual && + grep "^>" actual && + grep "^<" actual && + grep "^|" actual && + # No standard +/-/space content markers; exclude ---/+++ headers + ! grep "^+[^+]" actual && + ! grep "^-[^-]" actual && + ! grep "^ " actual +' + +test_expect_success '-L with -R reverses diff' ' + git checkout parent-oids && + git log -L:func2:file.c -R --format= -1 >actual && + grep "^diff --git b/file.c a/file.c" actual && + grep "^--- b/file.c" actual && + grep "^+++ a/file.c" actual && + # The modification added "F2 + 2", so reversed it is removed + grep "^-.*F2 + 2" actual && + grep "^+.*return F2;" actual +' + +test_expect_success 'setup for color-moved test' ' + git checkout -b color-moved-test parent-oids && + cat >big.c <<-\EOF && + int bigfunc() + { + int a = 1; + int b = 2; + int c = 3; + return a + b + c; + } + EOF + git add big.c && + git commit -m "add bigfunc" && + sed "s/ / /" big.c >tmp && mv tmp big.c && + git commit -a -m "reindent bigfunc" +' + +test_expect_success '-L with --color-moved' ' + git log -L:bigfunc:big.c --color-moved=zebra \ + --color-moved-ws=ignore-all-space \ + --color=always --format= -1 >actual.raw && + test_decode_color actual && + # Old moved lines: bold magenta; new moved lines: bold cyan + grep "BOLD;MAGENTA" actual && + grep "BOLD;CYAN" actual +' + +test_expect_success 'setup for no-newline-at-eof tests' ' + git checkout --orphan no-newline && + git reset --hard && + printf "int top()\n{\n return 1;\n}\n\nint bot()\n{\n return 2;\n}" >noeol.c && + git add noeol.c && + test_tick && + git commit -m "add noeol.c (no trailing newline)" && + sed "s/return 2/return 22/" noeol.c >tmp && mv tmp noeol.c && + git commit -a -m "modify bot()" && + printf "int top()\n{\n return 1;\n}\n\nint bot()\n{\n return 33;\n}\n" >noeol.c && + git commit -a -m "modify bot() and add trailing newline" +' + +# When the tracked function is at the end of a file with no trailing +# newline, the "\ No newline at end of file" marker should appear. +test_expect_success '-L no-newline-at-eof appears in tracked range' ' + git log -L:bot:noeol.c --format= -1 HEAD~1 >actual && + grep "No newline at end of file" actual +' + +# When tracking a function that ends before the no-newline content, +# the marker should not appear in the output. +test_expect_success '-L no-newline-at-eof suppressed outside range' ' + git log -L:top:noeol.c --format= >actual && + ! grep "No newline at end of file" actual +' + +# When a commit removes a no-newline last line and replaces it with +# a newline-terminated line, the marker should still appear (on the +# old side of the diff). +test_expect_success '-L no-newline-at-eof marker with deleted line' ' + git log -L:bot:noeol.c --format= -1 >actual && + grep "No newline at end of file" actual +' + +test_expect_success 'setup for range boundary deletion test' ' + git checkout --orphan range-boundary && + git reset --hard && + cat >boundary.c <<-\EOF && + void above() + { + return; + } + + void tracked() + { + int x = 1; + int y = 2; + } + + void below() + { + return; + } + EOF + git add boundary.c && + test_tick && + git commit -m "add boundary.c" && + cat >boundary.c <<-\EOF && + void above() + { + return; + } + + void tracked() + { + int x = 1; + int y = 2; + } + + void below_renamed() + { + return 0; + } + EOF + git commit -a -m "modify below() only" +' + +# When only a function below the tracked range is modified, the +# tracked function should not produce a diff. +test_expect_success '-L suppresses deletions outside tracked range' ' + git log -L:tracked:boundary.c --format= >actual && + test $(grep -c "^diff --git" actual) = 1 +' + +test_expect_success '-L with -S filters to string-count changes' ' + git checkout parent-oids && + git log -L:func2:file.c -S "F2 + 2" --format= >actual && + # -S searches the whole file, not just the tracked range; + # combined with the -L range walk, this selects commits that + # both touch func2 and change the count of "F2 + 2" in the file. + test $(grep -c "^diff --git" actual) = 1 && + grep "F2 + 2" actual +' + +test_expect_success '-L with -G filters to diff-text matches' ' + git checkout parent-oids && + git log -L:func2:file.c -G "F2 [+] 2" --format= >actual && + # -G greps the whole-file diff text, not just the tracked range; + # combined with -L, this selects commits that both touch func2 + # and have "F2 + 2" in their diff. + test $(grep -c "^diff --git" actual) = 1 && + grep "F2 + 2" actual +' + test_done From 512536a09ea2964e93226f219898ee0a09d85a70 Mon Sep 17 00:00:00 2001 From: Michael Montalbo Date: Tue, 17 Mar 2026 02:21:35 +0000 Subject: [PATCH 05/93] doc: note that -L supports patch formatting and pickaxe options Now that -L output flows through the standard diff pipeline, document that patch formatting options like --word-diff, --color-moved, --no-prefix, whitespace handling (-w, -b), and pickaxe options (-S, -G) are supported. Signed-off-by: Michael Montalbo Signed-off-by: Junio C Hamano --- Documentation/line-range-options.adoc | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/Documentation/line-range-options.adoc b/Documentation/line-range-options.adoc index c44ba05320..ecb2c79fb9 100644 --- a/Documentation/line-range-options.adoc +++ b/Documentation/line-range-options.adoc @@ -12,4 +12,8 @@ (namely `--raw`, `--numstat`, `--shortstat`, `--dirstat`, `--summary`, `--name-only`, `--name-status`, `--check`) are not currently implemented. + +Patch formatting options such as `--word-diff`, `--color-moved`, +`--no-prefix`, and whitespace options (`-w`, `-b`) are supported, +as are pickaxe options (`-S`, `-G`). ++ include::line-range-format.adoc[] From 3cfe355ca74aae5cf90a4eca73a341732b0eb456 Mon Sep 17 00:00:00 2001 From: Shreyansh Paliwal Date: Tue, 17 Mar 2026 21:20:29 +0530 Subject: [PATCH 06/93] add-patch: use repository instance from add_i_state instead of the_repository Functions parse_diff(), edit_hunk_manually() and patch_update_file() use the_repository even though a repository instance is already available via struct add_i_state s which is defined in struct add_p_state *s. Use 's->s.r' instead of the_repository to avoid relying on global state. All callers pass a valid add_p_state and this does not change any behavior. This aligns with the ongoing effort to reduce usage of the_repository global state. Signed-off-by: Shreyansh Paliwal Signed-off-by: Junio C Hamano --- add-patch.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/add-patch.c b/add-patch.c index 8c03f710d3..30df920723 100644 --- a/add-patch.c +++ b/add-patch.c @@ -434,8 +434,8 @@ static int parse_diff(struct add_p_state *s, const struct pathspec *ps) strvec_push(&args, /* could be on an unborn branch */ !strcmp("HEAD", s->revision) && - repo_get_oid(the_repository, "HEAD", &oid) ? - empty_tree_oid_hex(the_repository->hash_algo) : s->revision); + repo_get_oid(s->s.r, "HEAD", &oid) ? + empty_tree_oid_hex(s->s.r->hash_algo) : s->revision); } color_arg_index = args.nr; /* Use `--no-color` explicitly, just in case `diff.color = always`. */ @@ -1147,7 +1147,7 @@ static int edit_hunk_manually(struct add_p_state *s, struct hunk *hunk) "removed, then the edit is\n" "aborted and the hunk is left unchanged.\n")); - if (strbuf_edit_interactively(the_repository, &s->buf, + if (strbuf_edit_interactively(s->s.r, &s->buf, "addp-hunk-edit.diff", NULL) < 0) return -1; @@ -1551,7 +1551,7 @@ static size_t patch_update_file(struct add_p_state *s, size_t idx) if (file_diff->hunk_nr) { if (rendered_hunk_index != hunk_index) { if (use_pager) { - setup_pager(the_repository); + setup_pager(s->s.r); sigchain_push(SIGPIPE, SIG_IGN); } render_hunk(s, hunk, 0, colored, &s->buf); From afdb4c665f664e04c0f68c930ad50e5b05be71e1 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Tue, 17 Mar 2026 11:01:38 -0700 Subject: [PATCH 07/93] apply: fix new-style empty context line triggering incomplete-line check A new-style unified context diff represents an empty context line with an empty line (instead of a line with a single SP on it). The code to check whitespace errors in an incoming patch is designed to omit the first byte of a line (typically SP, "-", or "+") and pass the remainder of the line to the whitespace checker. Usually we do not pass a context line to the whitespace error checker, but when we are correcting errors, we do. This "remove the first byte and send the remainder" strategy of checking a line ended up sending a zero-length string to the whitespace checker when seeing a new-style empty context line, which caused the whitespace checker to say "ah, you do not even have a newline at the end!", leading to an "incomplete line" in the middle of the patch! Fix this by pretending that we got a traditional empty context line when we drive the whitespace checker. Signed-off-by: Junio C Hamano --- apply.c | 12 ++++++++++-- t/t4124-apply-ws-rule.sh | 16 ++++++++++++++++ 2 files changed, 26 insertions(+), 2 deletions(-) diff --git a/apply.c b/apply.c index f01204d15b..e88e5c77e3 100644 --- a/apply.c +++ b/apply.c @@ -1796,8 +1796,16 @@ static int parse_fragment(struct apply_state *state, trailing++; check_old_for_crlf(patch, line, len); if (!state->apply_in_reverse && - state->ws_error_action == correct_ws_error) - check_whitespace(state, line, len, patch->ws_rule); + state->ws_error_action == correct_ws_error) { + const char *test_line = line; + int test_len = len; + if (*line == '\n') { + test_line = " \n"; + test_len = 2; + } + check_whitespace(state, test_line, test_len, + patch->ws_rule); + } break; case '-': if (!state->apply_in_reverse) diff --git a/t/t4124-apply-ws-rule.sh b/t/t4124-apply-ws-rule.sh index 29ea7d4268..205d86d05e 100755 --- a/t/t4124-apply-ws-rule.sh +++ b/t/t4124-apply-ws-rule.sh @@ -561,6 +561,22 @@ test_expect_success 'check incomplete lines (setup)' ' git config core.whitespace incomplete-line ' +test_expect_success 'no incomplete context line (not an error)' ' + test_when_finished "rm -f sample*-i patch patch-new target" && + test_write_lines 1 2 3 "" 4 5 >sample-i && + test_write_lines 1 2 3 "" 0 5 >sample2-i && + cat sample-i >target && + git add target && + cat sample2-i >target && + git diff-files -p target >patch && + sed -e "s/^ $//" patch-new && + + cat sample-i >target && + git apply --whitespace=fix error && + test_cmp sample2-i target && + test_must_be_empty error +' + test_expect_success 'incomplete context line (not an error)' ' (test_write_lines 1 2 3 4 5 && printf 6) >sample-i && (test_write_lines 1 2 3 0 5 && printf 6) >sample2-i && From 736cef847cf788d90f39d15bb4be684bc4ba1013 Mon Sep 17 00:00:00 2001 From: Ramsay Jones Date: Thu, 19 Mar 2026 22:49:06 +0000 Subject: [PATCH 08/93] object-file: fix sparse 'plain integer as NULL pointer' error Signed-off-by: Ramsay Jones Signed-off-by: Junio C Hamano --- object-file.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/object-file.c b/object-file.c index 569ce6eaed..fa2ca60a59 100644 --- a/object-file.c +++ b/object-file.c @@ -1916,7 +1916,7 @@ int odb_source_loose_count_objects(struct odb_source *source, } else { *out = 0; ret = odb_source_loose_for_each_object(source, NULL, count_loose_object, - out, 0); + out, NULL); } out: From 753ecf42053b8afa9afcc19726635cc5a080c1bb Mon Sep 17 00:00:00 2001 From: Yuvraj Singh Chauhan Date: Fri, 20 Mar 2026 17:18:23 +0530 Subject: [PATCH 09/93] path-walk: fix NULL pointer dereference in error message When lookup_tree() or lookup_blob() cannot find a tree entry's object, 'o' is set to NULL via: o = child ? &child->object : NULL; The subsequent null-check catches this correctly, but then dereferences 'o' to format the error message: error(_("failed to find object %s"), oid_to_hex(&o->oid)); This causes a segfault instead of the intended diagnostic output. Fix this by using &entry.oid instead. 'entry' is the struct name_entry populated by tree_entry() on each loop iteration and holds the OID of the failing lookup -- which is exactly what the error should report. This crash is reachable via git-backfill(1) when a tree entry's object is absent from the local object database. Signed-off-by: Yuvraj Singh Chauhan Signed-off-by: Junio C Hamano --- path-walk.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/path-walk.c b/path-walk.c index f1ceed99e9..dd2c138c98 100644 --- a/path-walk.c +++ b/path-walk.c @@ -171,7 +171,7 @@ static int add_tree_entries(struct path_walk_context *ctx, if (!o) { error(_("failed to find object %s"), - oid_to_hex(&o->oid)); + oid_to_hex(&entry.oid)); return -1; } From 1382e54a9c9e5f98271a943af9c10299c6ba934b Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Fri, 20 Mar 2026 08:07:27 +0100 Subject: [PATCH 10/93] oidtree: modernize the code a bit The "oidtree.c" subsystem is rather small and self-contained and tends to just work. It thus doesn't typically receive a lot of attention, which has as a consequence that it's coding style is somewhat dated nowadays. Modernize the style of this subsystem a bit: - Rename the `oidtree_iter()` function to `oidtree_each_cb()`. - Rename `struct oidtree_iter_data` to `struct oidtree_each_data` to match the renamed callback function type. - Rename parameters and variables to clarify their intent. - Add comments that explain what some of the functions do. - Adapt the return value of `oidtree_contains()` to be a boolean. This prepares for some changes to the subsystem that'll happen in the next commit. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- oidtree.c | 59 ++++++++++++++++++++-------------------- oidtree.h | 42 +++++++++++++++++++++++----- t/unit-tests/u-oidtree.c | 14 +++++----- 3 files changed, 72 insertions(+), 43 deletions(-) diff --git a/oidtree.c b/oidtree.c index 324de94934..a4d10cd429 100644 --- a/oidtree.c +++ b/oidtree.c @@ -6,14 +6,6 @@ #include "oidtree.h" #include "hash.h" -struct oidtree_iter_data { - oidtree_iter fn; - void *arg; - size_t *last_nibble_at; - uint32_t algo; - uint8_t last_byte; -}; - void oidtree_init(struct oidtree *ot) { cb_init(&ot->tree); @@ -54,8 +46,7 @@ void oidtree_insert(struct oidtree *ot, const struct object_id *oid) cb_insert(&ot->tree, on, sizeof(*oid)); } - -int oidtree_contains(struct oidtree *ot, const struct object_id *oid) +bool oidtree_contains(struct oidtree *ot, const struct object_id *oid) { struct object_id k; size_t klen = sizeof(k); @@ -69,41 +60,51 @@ int oidtree_contains(struct oidtree *ot, const struct object_id *oid) klen += BUILD_ASSERT_OR_ZERO(offsetof(struct object_id, hash) < offsetof(struct object_id, algo)); - return cb_lookup(&ot->tree, (const uint8_t *)&k, klen) ? 1 : 0; + return !!cb_lookup(&ot->tree, (const uint8_t *)&k, klen); } -static enum cb_next iter(struct cb_node *n, void *arg) +struct oidtree_each_data { + oidtree_each_cb cb; + void *cb_data; + size_t *last_nibble_at; + uint32_t algo; + uint8_t last_byte; +}; + +static enum cb_next iter(struct cb_node *n, void *cb_data) { - struct oidtree_iter_data *x = arg; + struct oidtree_each_data *data = cb_data; struct object_id k; /* Copy to provide 4-byte alignment needed by struct object_id. */ memcpy(&k, n->k, sizeof(k)); - if (x->algo != GIT_HASH_UNKNOWN && x->algo != k.algo) + if (data->algo != GIT_HASH_UNKNOWN && data->algo != k.algo) return CB_CONTINUE; - if (x->last_nibble_at) { - if ((k.hash[*x->last_nibble_at] ^ x->last_byte) & 0xf0) + if (data->last_nibble_at) { + if ((k.hash[*data->last_nibble_at] ^ data->last_byte) & 0xf0) return CB_CONTINUE; } - return x->fn(&k, x->arg); + return data->cb(&k, data->cb_data); } -void oidtree_each(struct oidtree *ot, const struct object_id *oid, - size_t oidhexsz, oidtree_iter fn, void *arg) +void oidtree_each(struct oidtree *ot, const struct object_id *prefix, + size_t prefix_hex_len, oidtree_each_cb cb, void *cb_data) { - size_t klen = oidhexsz / 2; - struct oidtree_iter_data x = { 0 }; - assert(oidhexsz <= GIT_MAX_HEXSZ); + struct oidtree_each_data data = { + .cb = cb, + .cb_data = cb_data, + .algo = prefix->algo, + }; + size_t klen = prefix_hex_len / 2; + assert(prefix_hex_len <= GIT_MAX_HEXSZ); - x.fn = fn; - x.arg = arg; - x.algo = oid->algo; - if (oidhexsz & 1) { - x.last_byte = oid->hash[klen]; - x.last_nibble_at = &klen; + if (prefix_hex_len & 1) { + data.last_byte = prefix->hash[klen]; + data.last_nibble_at = &klen; } - cb_each(&ot->tree, (const uint8_t *)oid, klen, iter, &x); + + cb_each(&ot->tree, prefix->hash, klen, iter, &data); } diff --git a/oidtree.h b/oidtree.h index 77898f510a..0651401017 100644 --- a/oidtree.h +++ b/oidtree.h @@ -5,18 +5,46 @@ #include "hash.h" #include "mem-pool.h" +/* + * OID trees are an efficient storage for object IDs that use a critbit tree + * internally. Common prefixes are duplicated and object IDs are stored in a + * way that allow easy iteration over the objects in lexicographic order. As a + * consequence, operations that want to enumerate all object IDs that match a + * given prefix can be answered efficiently. + * + * Note that it is not (yet) possible to store data other than the object IDs + * themselves in this tree. + */ struct oidtree { struct cb_tree tree; struct mem_pool mem_pool; }; -void oidtree_init(struct oidtree *); -void oidtree_clear(struct oidtree *); -void oidtree_insert(struct oidtree *, const struct object_id *); -int oidtree_contains(struct oidtree *, const struct object_id *); +/* Initialize the oidtree so that it is ready for use. */ +void oidtree_init(struct oidtree *ot); -typedef enum cb_next (*oidtree_iter)(const struct object_id *, void *data); -void oidtree_each(struct oidtree *, const struct object_id *, - size_t oidhexsz, oidtree_iter, void *data); +/* + * Release all memory associated with the oidtree and reinitialize it for + * subsequent use. + */ +void oidtree_clear(struct oidtree *ot); + +/* Insert the object ID into the tree. */ +void oidtree_insert(struct oidtree *ot, const struct object_id *oid); + +/* Check whether the tree contains the given object ID. */ +bool oidtree_contains(struct oidtree *ot, const struct object_id *oid); + +/* Callback function used for `oidtree_each()`. */ +typedef enum cb_next (*oidtree_each_cb)(const struct object_id *oid, + void *cb_data); + +/* + * Iterate through all object IDs in the tree whose prefix matches the given + * object ID prefix and invoke the callback function on each of them. + */ +void oidtree_each(struct oidtree *ot, + const struct object_id *prefix, size_t prefix_hex_len, + oidtree_each_cb cb, void *cb_data); #endif /* OIDTREE_H */ diff --git a/t/unit-tests/u-oidtree.c b/t/unit-tests/u-oidtree.c index e6eede2740..def47c6795 100644 --- a/t/unit-tests/u-oidtree.c +++ b/t/unit-tests/u-oidtree.c @@ -24,7 +24,7 @@ static int fill_tree_loc(struct oidtree *ot, const char *hexes[], size_t n) return 0; } -static void check_contains(struct oidtree *ot, const char *hex, int expected) +static void check_contains(struct oidtree *ot, const char *hex, bool expected) { struct object_id oid; @@ -88,12 +88,12 @@ void test_oidtree__cleanup(void) void test_oidtree__contains(void) { FILL_TREE(&ot, "444", "1", "2", "3", "4", "5", "a", "b", "c", "d", "e"); - check_contains(&ot, "44", 0); - check_contains(&ot, "441", 0); - check_contains(&ot, "440", 0); - check_contains(&ot, "444", 1); - check_contains(&ot, "4440", 1); - check_contains(&ot, "4444", 0); + check_contains(&ot, "44", false); + check_contains(&ot, "441", false); + check_contains(&ot, "440", false); + check_contains(&ot, "444", true); + check_contains(&ot, "4440", true); + check_contains(&ot, "4444", false); } void test_oidtree__each(void) From fe446b01aeaab307adcbfb39d4aaa72c37afbcda Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Fri, 20 Mar 2026 08:07:28 +0100 Subject: [PATCH 11/93] oidtree: extend iteration to allow for arbitrary return codes The interface `cb_each()` iterates through a crit-bit tree and calls a specific callback function for each of the contained items. The callback function is expected to return either: - `CB_CONTINUE` in case iteration shall continue. - `CB_BREAK` to abort iteration. This is needlessly restrictive though, as callers may want to return arbitrary values and have them be bubbled up to the `cb_each()` call site. In fact, this is a rather common pattern we have: whenever such a callback function returns a non-zero error code, we abort iteration and bubble up the code as-is. Refactor both the crit-bit tree and oidtree subsystems to behave accordingly. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- cbtree.c | 21 ++++++++++++--------- cbtree.h | 17 +++++++++-------- object-name.c | 4 ++-- oidtree.c | 12 ++++++------ oidtree.h | 18 ++++++++++++------ t/unit-tests/u-oidtree.c | 4 ++-- 6 files changed, 43 insertions(+), 33 deletions(-) diff --git a/cbtree.c b/cbtree.c index cf8cf75b89..4ab794bddc 100644 --- a/cbtree.c +++ b/cbtree.c @@ -96,26 +96,28 @@ struct cb_node *cb_lookup(struct cb_tree *t, const uint8_t *k, size_t klen) return p && !memcmp(p->k, k, klen) ? p : NULL; } -static enum cb_next cb_descend(struct cb_node *p, cb_iter fn, void *arg) +static int cb_descend(struct cb_node *p, cb_iter fn, void *arg) { if (1 & (uintptr_t)p) { struct cb_node *q = cb_node_of(p); - enum cb_next n = cb_descend(q->child[0], fn, arg); - - return n == CB_BREAK ? n : cb_descend(q->child[1], fn, arg); + int ret = cb_descend(q->child[0], fn, arg); + if (ret) + return ret; + return cb_descend(q->child[1], fn, arg); } else { return fn(p, arg); } } -void cb_each(struct cb_tree *t, const uint8_t *kpfx, size_t klen, - cb_iter fn, void *arg) +int cb_each(struct cb_tree *t, const uint8_t *kpfx, size_t klen, + cb_iter fn, void *arg) { struct cb_node *p = t->root; struct cb_node *top = p; size_t i = 0; - if (!p) return; /* empty tree */ + if (!p) + return 0; /* empty tree */ /* Walk tree, maintaining top pointer */ while (1 & (uintptr_t)p) { @@ -130,7 +132,8 @@ void cb_each(struct cb_tree *t, const uint8_t *kpfx, size_t klen, for (i = 0; i < klen; i++) { if (p->k[i] != kpfx[i]) - return; /* "best" match failed */ + return 0; /* "best" match failed */ } - cb_descend(top, fn, arg); + + return cb_descend(top, fn, arg); } diff --git a/cbtree.h b/cbtree.h index 43193abdda..c374b1b3db 100644 --- a/cbtree.h +++ b/cbtree.h @@ -30,11 +30,6 @@ struct cb_tree { struct cb_node *root; }; -enum cb_next { - CB_CONTINUE = 0, - CB_BREAK = 1 -}; - #define CBTREE_INIT { 0 } static inline void cb_init(struct cb_tree *t) @@ -46,9 +41,15 @@ static inline void cb_init(struct cb_tree *t) struct cb_node *cb_lookup(struct cb_tree *, const uint8_t *k, size_t klen); struct cb_node *cb_insert(struct cb_tree *, struct cb_node *, size_t klen); -typedef enum cb_next (*cb_iter)(struct cb_node *, void *arg); +/* + * Callback invoked by `cb_each()` for each node in the critbit tree. A return + * value of 0 will cause the iteration to continue, a non-zero return code will + * cause iteration to abort. The error code will be relayed back from + * `cb_each()` in that case. + */ +typedef int (*cb_iter)(struct cb_node *, void *arg); -void cb_each(struct cb_tree *, const uint8_t *kpfx, size_t klen, - cb_iter, void *arg); +int cb_each(struct cb_tree *, const uint8_t *kpfx, size_t klen, + cb_iter, void *arg); #endif /* CBTREE_H */ diff --git a/object-name.c b/object-name.c index e5adec4c9d..a24a1b48e1 100644 --- a/object-name.c +++ b/object-name.c @@ -103,12 +103,12 @@ static void update_candidates(struct disambiguate_state *ds, const struct object static int match_hash(unsigned, const unsigned char *, const unsigned char *); -static enum cb_next match_prefix(const struct object_id *oid, void *arg) +static int match_prefix(const struct object_id *oid, void *arg) { struct disambiguate_state *ds = arg; /* no need to call match_hash, oidtree_each did prefix match */ update_candidates(ds, oid); - return ds->ambiguous ? CB_BREAK : CB_CONTINUE; + return ds->ambiguous; } static void find_short_object_filename(struct disambiguate_state *ds) diff --git a/oidtree.c b/oidtree.c index a4d10cd429..ab9fe7ec7a 100644 --- a/oidtree.c +++ b/oidtree.c @@ -71,7 +71,7 @@ struct oidtree_each_data { uint8_t last_byte; }; -static enum cb_next iter(struct cb_node *n, void *cb_data) +static int iter(struct cb_node *n, void *cb_data) { struct oidtree_each_data *data = cb_data; struct object_id k; @@ -80,18 +80,18 @@ static enum cb_next iter(struct cb_node *n, void *cb_data) memcpy(&k, n->k, sizeof(k)); if (data->algo != GIT_HASH_UNKNOWN && data->algo != k.algo) - return CB_CONTINUE; + return 0; if (data->last_nibble_at) { if ((k.hash[*data->last_nibble_at] ^ data->last_byte) & 0xf0) - return CB_CONTINUE; + return 0; } return data->cb(&k, data->cb_data); } -void oidtree_each(struct oidtree *ot, const struct object_id *prefix, - size_t prefix_hex_len, oidtree_each_cb cb, void *cb_data) +int oidtree_each(struct oidtree *ot, const struct object_id *prefix, + size_t prefix_hex_len, oidtree_each_cb cb, void *cb_data) { struct oidtree_each_data data = { .cb = cb, @@ -106,5 +106,5 @@ void oidtree_each(struct oidtree *ot, const struct object_id *prefix, data.last_nibble_at = &klen; } - cb_each(&ot->tree, prefix->hash, klen, iter, &data); + return cb_each(&ot->tree, prefix->hash, klen, iter, &data); } diff --git a/oidtree.h b/oidtree.h index 0651401017..2b7bad2e60 100644 --- a/oidtree.h +++ b/oidtree.h @@ -35,16 +35,22 @@ void oidtree_insert(struct oidtree *ot, const struct object_id *oid); /* Check whether the tree contains the given object ID. */ bool oidtree_contains(struct oidtree *ot, const struct object_id *oid); -/* Callback function used for `oidtree_each()`. */ -typedef enum cb_next (*oidtree_each_cb)(const struct object_id *oid, - void *cb_data); +/* + * Callback function used for `oidtree_each()`. Returning a non-zero exit code + * will cause iteration to stop. The exit code will be propagated to the caller + * of `oidtree_each()`. + */ +typedef int (*oidtree_each_cb)(const struct object_id *oid, + void *cb_data); /* * Iterate through all object IDs in the tree whose prefix matches the given * object ID prefix and invoke the callback function on each of them. + * + * Returns any non-zero exit code from the provided callback function. */ -void oidtree_each(struct oidtree *ot, - const struct object_id *prefix, size_t prefix_hex_len, - oidtree_each_cb cb, void *cb_data); +int oidtree_each(struct oidtree *ot, + const struct object_id *prefix, size_t prefix_hex_len, + oidtree_each_cb cb, void *cb_data); #endif /* OIDTREE_H */ diff --git a/t/unit-tests/u-oidtree.c b/t/unit-tests/u-oidtree.c index def47c6795..d4d05c7dc3 100644 --- a/t/unit-tests/u-oidtree.c +++ b/t/unit-tests/u-oidtree.c @@ -38,7 +38,7 @@ struct expected_hex_iter { const char *query; }; -static enum cb_next check_each_cb(const struct object_id *oid, void *data) +static int check_each_cb(const struct object_id *oid, void *data) { struct expected_hex_iter *hex_iter = data; struct object_id expected; @@ -49,7 +49,7 @@ static enum cb_next check_each_cb(const struct object_id *oid, void *data) &expected); cl_assert_equal_s(oid_to_hex(oid), oid_to_hex(&expected)); hex_iter->i += 1; - return CB_CONTINUE; + return 0; } LAST_ARG_MUST_BE_NULL From cfd575f0a9730712107e4ee6799a37665bcd8204 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Fri, 20 Mar 2026 08:07:29 +0100 Subject: [PATCH 12/93] odb: introduce `struct odb_for_each_object_options` The `odb_for_each_object()` function only accepts a bitset of flags. In a subsequent commit we'll want to change object iteration to also support iterating over only those objects that have a specific prefix. While we could of course add the prefix to the function signature, or alternatively introduce a new function, both of these options don't really seem to be that sensible. Instead, introduce a new `struct odb_for_each_object_options` that can be passed to a new `odb_for_each_object_ext()` function. Splice through the options structure into the respective object database sources. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- builtin/cat-file.c | 7 +++++-- builtin/pack-objects.c | 12 +++++++----- commit-graph.c | 5 ++++- object-file.c | 9 +++++---- object-file.h | 2 +- odb.c | 38 +++++++++++++++++++++++++------------- odb.h | 16 ++++++++++++++++ odb/source-files.c | 8 ++++---- odb/source.h | 6 +++--- packfile.c | 12 ++++++------ packfile.h | 2 +- 11 files changed, 77 insertions(+), 40 deletions(-) diff --git a/builtin/cat-file.c b/builtin/cat-file.c index b6f12f41d6..cd13a3a89f 100644 --- a/builtin/cat-file.c +++ b/builtin/cat-file.c @@ -848,6 +848,9 @@ static void batch_each_object(struct batch_options *opt, .callback = callback, .payload = _payload, }; + struct odb_for_each_object_options opts = { + .flags = flags, + }; struct bitmap_index *bitmap = NULL; struct odb_source *source; @@ -860,7 +863,7 @@ static void batch_each_object(struct batch_options *opt, odb_prepare_alternates(the_repository->objects); for (source = the_repository->objects->sources; source; source = source->next) { int ret = odb_source_loose_for_each_object(source, NULL, batch_one_object_oi, - &payload, flags); + &payload, &opts); if (ret) break; } @@ -884,7 +887,7 @@ static void batch_each_object(struct batch_options *opt, for (source = the_repository->objects->sources; source; source = source->next) { struct odb_source_files *files = odb_source_files_downcast(source); int ret = packfile_store_for_each_object(files->packed, &oi, - batch_one_object_oi, &payload, flags); + batch_one_object_oi, &payload, &opts); if (ret) break; } diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c index cd013c0b68..3bb57ff183 100644 --- a/builtin/pack-objects.c +++ b/builtin/pack-objects.c @@ -4344,6 +4344,12 @@ static void add_objects_in_unpacked_packs(void) { struct odb_source *source; time_t mtime; + struct odb_for_each_object_options opts = { + .flags = ODB_FOR_EACH_OBJECT_PACK_ORDER | + ODB_FOR_EACH_OBJECT_LOCAL_ONLY | + ODB_FOR_EACH_OBJECT_SKIP_IN_CORE_KEPT_PACKS | + ODB_FOR_EACH_OBJECT_SKIP_ON_DISK_KEPT_PACKS, + }; struct object_info oi = { .mtimep = &mtime, }; @@ -4356,11 +4362,7 @@ static void add_objects_in_unpacked_packs(void) continue; if (packfile_store_for_each_object(files->packed, &oi, - add_object_in_unpacked_pack, NULL, - ODB_FOR_EACH_OBJECT_PACK_ORDER | - ODB_FOR_EACH_OBJECT_LOCAL_ONLY | - ODB_FOR_EACH_OBJECT_SKIP_IN_CORE_KEPT_PACKS | - ODB_FOR_EACH_OBJECT_SKIP_ON_DISK_KEPT_PACKS)) + add_object_in_unpacked_pack, NULL, &opts)) die(_("cannot open pack index")); } } diff --git a/commit-graph.c b/commit-graph.c index c030003330..df4b4a125e 100644 --- a/commit-graph.c +++ b/commit-graph.c @@ -1969,6 +1969,9 @@ static void fill_oids_from_all_packs(struct write_commit_graph_context *ctx) { struct odb_source *source; enum object_type type; + struct odb_for_each_object_options opts = { + .flags = ODB_FOR_EACH_OBJECT_PACK_ORDER, + }; struct object_info oi = { .typep = &type, }; @@ -1983,7 +1986,7 @@ static void fill_oids_from_all_packs(struct write_commit_graph_context *ctx) for (source = ctx->r->objects->sources; source; source = source->next) { struct odb_source_files *files = odb_source_files_downcast(source); packfile_store_for_each_object(files->packed, &oi, add_packed_commits_oi, - ctx, ODB_FOR_EACH_OBJECT_PACK_ORDER); + ctx, &opts); } if (ctx->progress_done < ctx->approx_nr_objects) diff --git a/object-file.c b/object-file.c index 9764c8dd06..56cbb27ab9 100644 --- a/object-file.c +++ b/object-file.c @@ -1849,7 +1849,7 @@ int odb_source_loose_for_each_object(struct odb_source *source, const struct object_info *request, odb_for_each_object_cb cb, void *cb_data, - unsigned flags) + const struct odb_for_each_object_options *opts) { struct for_each_object_wrapper_data data = { .source = source, @@ -1859,9 +1859,9 @@ int odb_source_loose_for_each_object(struct odb_source *source, }; /* There are no loose promisor objects, so we can return immediately. */ - if ((flags & ODB_FOR_EACH_OBJECT_PROMISOR_ONLY)) + if ((opts->flags & ODB_FOR_EACH_OBJECT_PROMISOR_ONLY)) return 0; - if ((flags & ODB_FOR_EACH_OBJECT_LOCAL_ONLY) && !source->local) + if ((opts->flags & ODB_FOR_EACH_OBJECT_LOCAL_ONLY) && !source->local) return 0; return for_each_loose_file_in_source(source, for_each_object_wrapper_cb, @@ -1914,9 +1914,10 @@ int odb_source_loose_count_objects(struct odb_source *source, *out = count * 256; ret = 0; } else { + struct odb_for_each_object_options opts = { 0 }; *out = 0; ret = odb_source_loose_for_each_object(source, NULL, count_loose_object, - out, NULL); + out, &opts); } out: diff --git a/object-file.h b/object-file.h index f8d8805a18..46dfa7b632 100644 --- a/object-file.h +++ b/object-file.h @@ -137,7 +137,7 @@ int odb_source_loose_for_each_object(struct odb_source *source, const struct object_info *request, odb_for_each_object_cb cb, void *cb_data, - unsigned flags); + const struct odb_for_each_object_options *opts); /* * Count the number of loose objects in this source. diff --git a/odb.c b/odb.c index 350e23f3c0..3019957b87 100644 --- a/odb.c +++ b/odb.c @@ -896,25 +896,37 @@ int odb_freshen_object(struct object_database *odb, return 0; } +int odb_for_each_object_ext(struct object_database *odb, + const struct object_info *request, + odb_for_each_object_cb cb, + void *cb_data, + const struct odb_for_each_object_options *opts) +{ + int ret; + + odb_prepare_alternates(odb); + for (struct odb_source *source = odb->sources; source; source = source->next) { + if (opts->flags & ODB_FOR_EACH_OBJECT_LOCAL_ONLY && !source->local) + continue; + + ret = odb_source_for_each_object(source, request, cb, cb_data, opts); + if (ret) + return ret; + } + + return 0; +} + int odb_for_each_object(struct object_database *odb, const struct object_info *request, odb_for_each_object_cb cb, void *cb_data, unsigned flags) { - int ret; - - odb_prepare_alternates(odb); - for (struct odb_source *source = odb->sources; source; source = source->next) { - if (flags & ODB_FOR_EACH_OBJECT_LOCAL_ONLY && !source->local) - continue; - - ret = odb_source_for_each_object(source, request, cb, cb_data, flags); - if (ret) - return ret; - } - - return 0; + struct odb_for_each_object_options opts = { + .flags = flags, + }; + return odb_for_each_object_ext(odb, request, cb, cb_data, &opts); } int odb_count_objects(struct object_database *odb, diff --git a/odb.h b/odb.h index 9aee260105..a19a8bb50d 100644 --- a/odb.h +++ b/odb.h @@ -481,6 +481,15 @@ typedef int (*odb_for_each_object_cb)(const struct object_id *oid, struct object_info *oi, void *cb_data); +/* + * Options that can be passed to `odb_for_each_object()` and its + * backend-specific implementations. + */ +struct odb_for_each_object_options { + /* A bitfield of `odb_for_each_object_flags`. */ + enum odb_for_each_object_flags flags; +}; + /* * Iterate through all objects contained in the object database. Note that * objects may be iterated over multiple times in case they are either stored @@ -495,6 +504,13 @@ typedef int (*odb_for_each_object_cb)(const struct object_id *oid, * Returns 0 on success, a negative error code in case a failure occurred, or * an arbitrary non-zero error code returned by the callback itself. */ +int odb_for_each_object_ext(struct object_database *odb, + const struct object_info *request, + odb_for_each_object_cb cb, + void *cb_data, + const struct odb_for_each_object_options *opts); + +/* Same as `odb_for_each_object_ext()` with `opts.flags` set to the given flags. */ int odb_for_each_object(struct object_database *odb, const struct object_info *request, odb_for_each_object_cb cb, diff --git a/odb/source-files.c b/odb/source-files.c index c08d8993e3..e90bb689bb 100644 --- a/odb/source-files.c +++ b/odb/source-files.c @@ -75,18 +75,18 @@ static int odb_source_files_for_each_object(struct odb_source *source, const struct object_info *request, odb_for_each_object_cb cb, void *cb_data, - unsigned flags) + const struct odb_for_each_object_options *opts) { struct odb_source_files *files = odb_source_files_downcast(source); int ret; - if (!(flags & ODB_FOR_EACH_OBJECT_PROMISOR_ONLY)) { - ret = odb_source_loose_for_each_object(source, request, cb, cb_data, flags); + if (!(opts->flags & ODB_FOR_EACH_OBJECT_PROMISOR_ONLY)) { + ret = odb_source_loose_for_each_object(source, request, cb, cb_data, opts); if (ret) return ret; } - ret = packfile_store_for_each_object(files->packed, request, cb, cb_data, flags); + ret = packfile_store_for_each_object(files->packed, request, cb, cb_data, opts); if (ret) return ret; diff --git a/odb/source.h b/odb/source.h index 96c906e7a1..ee5d6ed530 100644 --- a/odb/source.h +++ b/odb/source.h @@ -140,7 +140,7 @@ struct odb_source { const struct object_info *request, odb_for_each_object_cb cb, void *cb_data, - unsigned flags); + const struct odb_for_each_object_options *opts); /* * This callback is expected to count objects in the given object @@ -343,9 +343,9 @@ static inline int odb_source_for_each_object(struct odb_source *source, const struct object_info *request, odb_for_each_object_cb cb, void *cb_data, - unsigned flags) + const struct odb_for_each_object_options *opts) { - return source->for_each_object(source, request, cb, cb_data, flags); + return source->for_each_object(source, request, cb, cb_data, opts); } /* diff --git a/packfile.c b/packfile.c index d4de9f3ffe..a6f3d2035d 100644 --- a/packfile.c +++ b/packfile.c @@ -2375,7 +2375,7 @@ int packfile_store_for_each_object(struct packfile_store *store, const struct object_info *request, odb_for_each_object_cb cb, void *cb_data, - unsigned flags) + const struct odb_for_each_object_options *opts) { struct packfile_store_for_each_object_wrapper_data data = { .store = store, @@ -2391,15 +2391,15 @@ int packfile_store_for_each_object(struct packfile_store *store, for (e = packfile_store_get_packs(store); e; e = e->next) { struct packed_git *p = e->pack; - if ((flags & ODB_FOR_EACH_OBJECT_LOCAL_ONLY) && !p->pack_local) + if ((opts->flags & ODB_FOR_EACH_OBJECT_LOCAL_ONLY) && !p->pack_local) continue; - if ((flags & ODB_FOR_EACH_OBJECT_PROMISOR_ONLY) && + if ((opts->flags & ODB_FOR_EACH_OBJECT_PROMISOR_ONLY) && !p->pack_promisor) continue; - if ((flags & ODB_FOR_EACH_OBJECT_SKIP_IN_CORE_KEPT_PACKS) && + if ((opts->flags & ODB_FOR_EACH_OBJECT_SKIP_IN_CORE_KEPT_PACKS) && p->pack_keep_in_core) continue; - if ((flags & ODB_FOR_EACH_OBJECT_SKIP_ON_DISK_KEPT_PACKS) && + if ((opts->flags & ODB_FOR_EACH_OBJECT_SKIP_ON_DISK_KEPT_PACKS) && p->pack_keep) continue; if (open_pack_index(p)) { @@ -2408,7 +2408,7 @@ int packfile_store_for_each_object(struct packfile_store *store, } ret = for_each_object_in_pack(p, packfile_store_for_each_object_wrapper, - &data, flags); + &data, opts->flags); if (ret) goto out; } diff --git a/packfile.h b/packfile.h index a16ec3950d..fa41dfda38 100644 --- a/packfile.h +++ b/packfile.h @@ -367,7 +367,7 @@ int packfile_store_for_each_object(struct packfile_store *store, const struct object_info *request, odb_for_each_object_cb cb, void *cb_data, - unsigned flags); + const struct odb_for_each_object_options *opts); /* A hook to report invalid files in pack directory */ #define PACKDIR_FILE_PACK 1 From 284b7862be735bb47276ac288ace153ae3d06938 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Fri, 20 Mar 2026 08:07:30 +0100 Subject: [PATCH 13/93] object-name: move logic to iterate through loose prefixed objects The logic to iterate through loose objects that have a certain prefix is currently hosted in "object-name.c". This logic reaches into specifics of the loose object source, so it breaks once a different backend is used for the object storage. Move the logic to iterate through loose objects with a prefix into "object-file.c". This is done by extending the for-each-object options to support an optional prefix that is then honored by the loose source. Naturally, we'll also have this support in the packfile store. This is done in the next commit. Furthermore, there are no users of the loose cache outside of "object-file.c" anymore. As such, convert `odb_source_loose_cache()` to have file scope. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- object-file.c | 29 +++++++++++++++++++++++++++-- object-file.h | 7 ------- object-name.c | 10 ++++++---- odb.h | 7 +++++++ 4 files changed, 40 insertions(+), 13 deletions(-) diff --git a/object-file.c b/object-file.c index 56cbb27ab9..13732f324f 100644 --- a/object-file.c +++ b/object-file.c @@ -33,6 +33,9 @@ /* The maximum size for an object header. */ #define MAX_HEADER_LEN 32 +static struct oidtree *odb_source_loose_cache(struct odb_source *source, + const struct object_id *oid); + static int get_conv_flags(unsigned flags) { if (flags & INDEX_RENORMALIZE) @@ -1845,6 +1848,23 @@ static int for_each_object_wrapper_cb(const struct object_id *oid, } } +static int for_each_prefixed_object_wrapper_cb(const struct object_id *oid, + void *cb_data) +{ + struct for_each_object_wrapper_data *data = cb_data; + if (data->request) { + struct object_info oi = *data->request; + + if (odb_source_loose_read_object_info(data->source, + oid, &oi, 0) < 0) + return -1; + + return data->cb(oid, &oi, data->cb_data); + } else { + return data->cb(oid, NULL, data->cb_data); + } +} + int odb_source_loose_for_each_object(struct odb_source *source, const struct object_info *request, odb_for_each_object_cb cb, @@ -1864,6 +1884,11 @@ int odb_source_loose_for_each_object(struct odb_source *source, if ((opts->flags & ODB_FOR_EACH_OBJECT_LOCAL_ONLY) && !source->local) return 0; + if (opts->prefix) + return oidtree_each(odb_source_loose_cache(source, opts->prefix), + opts->prefix, opts->prefix_hex_len, + for_each_prefixed_object_wrapper_cb, &data); + return for_each_loose_file_in_source(source, for_each_object_wrapper_cb, NULL, NULL, &data); } @@ -1935,8 +1960,8 @@ static int append_loose_object(const struct object_id *oid, return 0; } -struct oidtree *odb_source_loose_cache(struct odb_source *source, - const struct object_id *oid) +static struct oidtree *odb_source_loose_cache(struct odb_source *source, + const struct object_id *oid) { struct odb_source_files *files = odb_source_files_downcast(source); int subdir_nr = oid->hash[0]; diff --git a/object-file.h b/object-file.h index 46dfa7b632..f11ad58f6c 100644 --- a/object-file.h +++ b/object-file.h @@ -74,13 +74,6 @@ int odb_source_loose_write_stream(struct odb_source *source, struct odb_write_stream *stream, size_t len, struct object_id *oid); -/* - * Populate and return the loose object cache array corresponding to the - * given object ID. - */ -struct oidtree *odb_source_loose_cache(struct odb_source *source, - const struct object_id *oid); - /* * Put in `buf` the name of the file in the local object database that * would be used to store a loose object with the specified oid. diff --git a/object-name.c b/object-name.c index a24a1b48e1..929a68dbd0 100644 --- a/object-name.c +++ b/object-name.c @@ -16,7 +16,6 @@ #include "remote.h" #include "dir.h" #include "oid-array.h" -#include "oidtree.h" #include "packfile.h" #include "pretty.h" #include "object-file.h" @@ -103,7 +102,7 @@ static void update_candidates(struct disambiguate_state *ds, const struct object static int match_hash(unsigned, const unsigned char *, const unsigned char *); -static int match_prefix(const struct object_id *oid, void *arg) +static int match_prefix(const struct object_id *oid, struct object_info *oi UNUSED, void *arg) { struct disambiguate_state *ds = arg; /* no need to call match_hash, oidtree_each did prefix match */ @@ -113,11 +112,14 @@ static int match_prefix(const struct object_id *oid, void *arg) static void find_short_object_filename(struct disambiguate_state *ds) { + struct odb_for_each_object_options opts = { + .prefix = &ds->bin_pfx, + .prefix_hex_len = ds->len, + }; struct odb_source *source; for (source = ds->repo->objects->sources; source && !ds->ambiguous; source = source->next) - oidtree_each(odb_source_loose_cache(source, &ds->bin_pfx), - &ds->bin_pfx, ds->len, match_prefix, ds); + odb_source_loose_for_each_object(source, NULL, match_prefix, ds, &opts); } static int match_hash(unsigned len, const unsigned char *a, const unsigned char *b) diff --git a/odb.h b/odb.h index a19a8bb50d..e80fd8f7ab 100644 --- a/odb.h +++ b/odb.h @@ -488,6 +488,13 @@ typedef int (*odb_for_each_object_cb)(const struct object_id *oid, struct odb_for_each_object_options { /* A bitfield of `odb_for_each_object_flags`. */ enum odb_for_each_object_flags flags; + + /* + * If set, only iterate through objects whose first `prefix_hex_len` + * hex characters matches the given prefix. + */ + const struct object_id *prefix; + size_t prefix_hex_len; }; /* From e30bff8f8402f0f147a08fe00b75e24f293fa870 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Fri, 20 Mar 2026 08:07:31 +0100 Subject: [PATCH 14/93] object-name: move logic to iterate through packed prefixed objects Similar to the preceding commit, move the logic to iterate through objects that have a given prefix into "packfile.c". Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- object-name.c | 94 ++------------------------- packfile.c | 174 ++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 181 insertions(+), 87 deletions(-) diff --git a/object-name.c b/object-name.c index 929a68dbd0..ff0de06ff9 100644 --- a/object-name.c +++ b/object-name.c @@ -100,8 +100,6 @@ static void update_candidates(struct disambiguate_state *ds, const struct object /* otherwise, current can be discarded and candidate is still good */ } -static int match_hash(unsigned, const unsigned char *, const unsigned char *); - static int match_prefix(const struct object_id *oid, struct object_info *oi UNUSED, void *arg) { struct disambiguate_state *ds = arg; @@ -122,103 +120,25 @@ static void find_short_object_filename(struct disambiguate_state *ds) odb_source_loose_for_each_object(source, NULL, match_prefix, ds, &opts); } -static int match_hash(unsigned len, const unsigned char *a, const unsigned char *b) -{ - do { - if (*a != *b) - return 0; - a++; - b++; - len -= 2; - } while (len > 1); - if (len) - if ((*a ^ *b) & 0xf0) - return 0; - return 1; -} - -static void unique_in_midx(struct multi_pack_index *m, - struct disambiguate_state *ds) -{ - for (; m; m = m->base_midx) { - uint32_t num, i, first = 0; - const struct object_id *current = NULL; - int len = ds->len > ds->repo->hash_algo->hexsz ? - ds->repo->hash_algo->hexsz : ds->len; - - if (!m->num_objects) - continue; - - num = m->num_objects + m->num_objects_in_base; - - bsearch_one_midx(&ds->bin_pfx, m, &first); - - /* - * At this point, "first" is the location of the lowest - * object with an object name that could match - * "bin_pfx". See if we have 0, 1 or more objects that - * actually match(es). - */ - for (i = first; i < num && !ds->ambiguous; i++) { - struct object_id oid; - current = nth_midxed_object_oid(&oid, m, i); - if (!match_hash(len, ds->bin_pfx.hash, current->hash)) - break; - update_candidates(ds, current); - } - } -} - -static void unique_in_pack(struct packed_git *p, - struct disambiguate_state *ds) -{ - uint32_t num, i, first = 0; - int len = ds->len > ds->repo->hash_algo->hexsz ? - ds->repo->hash_algo->hexsz : ds->len; - - if (p->multi_pack_index) - return; - - if (open_pack_index(p) || !p->num_objects) - return; - - num = p->num_objects; - bsearch_pack(&ds->bin_pfx, p, &first); - - /* - * At this point, "first" is the location of the lowest object - * with an object name that could match "bin_pfx". See if we have - * 0, 1 or more objects that actually match(es). - */ - for (i = first; i < num && !ds->ambiguous; i++) { - struct object_id oid; - nth_packed_object_id(&oid, p, i); - if (!match_hash(len, ds->bin_pfx.hash, oid.hash)) - break; - update_candidates(ds, &oid); - } -} - static void find_short_packed_object(struct disambiguate_state *ds) { + struct odb_for_each_object_options opts = { + .prefix = &ds->bin_pfx, + .prefix_hex_len = ds->len, + }; struct odb_source *source; - struct packed_git *p; /* Skip, unless oids from the storage hash algorithm are wanted */ if (ds->bin_pfx.algo && (&hash_algos[ds->bin_pfx.algo] != ds->repo->hash_algo)) return; odb_prepare_alternates(ds->repo->objects); - for (source = ds->repo->objects->sources; source && !ds->ambiguous; source = source->next) { - struct multi_pack_index *m = get_multi_pack_index(source); - if (m) - unique_in_midx(m, ds); - } + for (source = ds->repo->objects->sources; source; source = source->next) { + struct odb_source_files *files = odb_source_files_downcast(source); - repo_for_each_pack(ds->repo, p) { + packfile_store_for_each_object(files->packed, NULL, match_prefix, ds, &opts); if (ds->ambiguous) break; - unique_in_pack(p, ds); } } diff --git a/packfile.c b/packfile.c index a6f3d2035d..2539a371c1 100644 --- a/packfile.c +++ b/packfile.c @@ -2371,6 +2371,177 @@ static int packfile_store_for_each_object_wrapper(const struct object_id *oid, } } +static int match_hash(unsigned len, const unsigned char *a, const unsigned char *b) +{ + do { + if (*a != *b) + return 0; + a++; + b++; + len -= 2; + } while (len > 1); + if (len) + if ((*a ^ *b) & 0xf0) + return 0; + return 1; +} + +static int for_each_prefixed_object_in_midx( + struct packfile_store *store, + struct multi_pack_index *m, + const struct odb_for_each_object_options *opts, + struct packfile_store_for_each_object_wrapper_data *data) +{ + int ret; + + for (; m; m = m->base_midx) { + uint32_t num, i, first = 0; + int len = opts->prefix_hex_len > m->source->odb->repo->hash_algo->hexsz ? + m->source->odb->repo->hash_algo->hexsz : opts->prefix_hex_len; + + if (!m->num_objects) + continue; + + num = m->num_objects + m->num_objects_in_base; + + bsearch_one_midx(opts->prefix, m, &first); + + /* + * At this point, "first" is the location of the lowest + * object with an object name that could match "opts->prefix". + * See if we have 0, 1 or more objects that actually match(es). + */ + for (i = first; i < num; i++) { + const struct object_id *current = NULL; + struct object_id oid; + + current = nth_midxed_object_oid(&oid, m, i); + + if (!match_hash(len, opts->prefix->hash, current->hash)) + break; + + if (data->request) { + struct object_info oi = *data->request; + + ret = packfile_store_read_object_info(store, current, + &oi, 0); + if (ret) + goto out; + + ret = data->cb(&oid, &oi, data->cb_data); + if (ret) + goto out; + } else { + ret = data->cb(&oid, NULL, data->cb_data); + if (ret) + goto out; + } + } + } + + ret = 0; + +out: + return ret; +} + +static int for_each_prefixed_object_in_pack( + struct packfile_store *store, + struct packed_git *p, + const struct odb_for_each_object_options *opts, + struct packfile_store_for_each_object_wrapper_data *data) +{ + uint32_t num, i, first = 0; + int len = opts->prefix_hex_len > p->repo->hash_algo->hexsz ? + p->repo->hash_algo->hexsz : opts->prefix_hex_len; + int ret; + + num = p->num_objects; + bsearch_pack(opts->prefix, p, &first); + + /* + * At this point, "first" is the location of the lowest object + * with an object name that could match "bin_pfx". See if we have + * 0, 1 or more objects that actually match(es). + */ + for (i = first; i < num; i++) { + struct object_id oid; + + nth_packed_object_id(&oid, p, i); + if (!match_hash(len, opts->prefix->hash, oid.hash)) + break; + + if (data->request) { + struct object_info oi = *data->request; + + ret = packfile_store_read_object_info(store, &oid, &oi, 0); + if (ret) + goto out; + + ret = data->cb(&oid, &oi, data->cb_data); + if (ret) + goto out; + } else { + ret = data->cb(&oid, NULL, data->cb_data); + if (ret) + goto out; + } + } + + ret = 0; + +out: + return ret; +} + +static int packfile_store_for_each_prefixed_object( + struct packfile_store *store, + const struct odb_for_each_object_options *opts, + struct packfile_store_for_each_object_wrapper_data *data) +{ + struct packfile_list_entry *e; + struct multi_pack_index *m; + bool pack_errors = false; + int ret; + + if (opts->flags) + BUG("flags unsupported"); + + store->skip_mru_updates = true; + + m = get_multi_pack_index(store->source); + if (m) { + ret = for_each_prefixed_object_in_midx(store, m, opts, data); + if (ret) + goto out; + } + + for (e = packfile_store_get_packs(store); e; e = e->next) { + if (e->pack->multi_pack_index) + continue; + + if (open_pack_index(e->pack)) { + pack_errors = true; + continue; + } + + if (!e->pack->num_objects) + continue; + + ret = for_each_prefixed_object_in_pack(store, e->pack, opts, data); + if (ret) + goto out; + } + + ret = 0; + +out: + store->skip_mru_updates = false; + if (!ret && pack_errors) + ret = -1; + return ret; +} + int packfile_store_for_each_object(struct packfile_store *store, const struct object_info *request, odb_for_each_object_cb cb, @@ -2386,6 +2557,9 @@ int packfile_store_for_each_object(struct packfile_store *store, struct packfile_list_entry *e; int pack_errors = 0, ret; + if (opts->prefix) + return packfile_store_for_each_prefixed_object(store, opts, &data); + store->skip_mru_updates = true; for (e = packfile_store_get_packs(store); e; e = e->next) { From 28c9254e3b3e1304b5a6c36146cf6fec29f3f5d3 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Fri, 20 Mar 2026 08:07:32 +0100 Subject: [PATCH 15/93] object-name: extract function to parse object ID prefixes Extract the logic that parses an object ID prefix into a new function. This function will be used by a second callsite in a subsequent commit. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- object-name.c | 60 ++++++++++++++++++++++++++++++++------------------- 1 file changed, 38 insertions(+), 22 deletions(-) diff --git a/object-name.c b/object-name.c index ff0de06ff9..fd1b010ab3 100644 --- a/object-name.c +++ b/object-name.c @@ -270,41 +270,57 @@ int set_disambiguate_hint_config(const char *var, const char *value) return error("unknown hint type for '%s': %s", var, value); } +static int parse_oid_prefix(const char *name, int len, + const struct git_hash_algo *algo, + char *hex_out, + struct object_id *oid_out) +{ + for (int i = 0; i < len; i++) { + unsigned char c = name[i]; + unsigned char val; + if (c >= '0' && c <= '9') { + val = c - '0'; + } else if (c >= 'a' && c <= 'f') { + val = c - 'a' + 10; + } else if (c >= 'A' && c <='F') { + val = c - 'A' + 10; + c -= 'A' - 'a'; + } else { + return -1; + } + + if (hex_out) + hex_out[i] = c; + if (oid_out) { + if (!(i & 1)) + val <<= 4; + oid_out->hash[i >> 1] |= val; + } + } + + if (hex_out) + hex_out[len] = '\0'; + if (oid_out) + oid_out->algo = algo ? hash_algo_by_ptr(algo) : GIT_HASH_UNKNOWN; + + return 0; +} + static int init_object_disambiguation(struct repository *r, const char *name, int len, const struct git_hash_algo *algo, struct disambiguate_state *ds) { - int i; - if (len < MINIMUM_ABBREV || len > GIT_MAX_HEXSZ) return -1; memset(ds, 0, sizeof(*ds)); - for (i = 0; i < len ;i++) { - unsigned char c = name[i]; - unsigned char val; - if (c >= '0' && c <= '9') - val = c - '0'; - else if (c >= 'a' && c <= 'f') - val = c - 'a' + 10; - else if (c >= 'A' && c <='F') { - val = c - 'A' + 10; - c -= 'A' - 'a'; - } - else - return -1; - ds->hex_pfx[i] = c; - if (!(i & 1)) - val <<= 4; - ds->bin_pfx.hash[i >> 1] |= val; - } + if (parse_oid_prefix(name, len, algo, ds->hex_pfx, &ds->bin_pfx) < 0) + return -1; ds->len = len; - ds->hex_pfx[len] = '\0'; ds->repo = r; - ds->bin_pfx.algo = algo ? hash_algo_by_ptr(algo) : GIT_HASH_UNKNOWN; odb_prepare_alternates(r->objects); return 0; } From d2612fe59e605102cb422fadbb0cb8bea499daee Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Fri, 20 Mar 2026 08:07:33 +0100 Subject: [PATCH 16/93] object-name: backend-generic `repo_collect_ambiguous()` The function `repo_collect_ambiguous()` is responsible for collecting objects whose IDs match a specific prefix. The information is then used to inform the user about which objects they could have meant in case a short object ID is ambiguous. The logic to do this uses the object disambiguation infrastructure and calls into backend-specific functions to iterate through loose and packed objects. This isn't really required anymore though: all we want to do is to enumerate objects that have such a prefix and then append those objects to a `struct oid_array`. This can be trivially achieved in a generic way now that `odb_for_each_object()` has learned to yield only objects that match such a prefix. Refactor the code to use the backend-generic infrastructure instead. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- object-name.c | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/object-name.c b/object-name.c index fd1b010ab3..4c3ace150e 100644 --- a/object-name.c +++ b/object-name.c @@ -448,8 +448,8 @@ static int collect_ambiguous(const struct object_id *oid, void *data) return 0; } -static int repo_collect_ambiguous(struct repository *r UNUSED, - const struct object_id *oid, +static int repo_collect_ambiguous(const struct object_id *oid, + struct object_info *oi UNUSED, void *data) { return collect_ambiguous(oid, data); @@ -586,18 +586,19 @@ int repo_for_each_abbrev(struct repository *r, const char *prefix, const struct git_hash_algo *algo, each_abbrev_fn fn, void *cb_data) { + struct object_id prefix_oid = { 0 }; + struct odb_for_each_object_options opts = { + .prefix = &prefix_oid, + .prefix_hex_len = strlen(prefix), + }; struct oid_array collect = OID_ARRAY_INIT; - struct disambiguate_state ds; int ret; - if (init_object_disambiguation(r, prefix, strlen(prefix), algo, &ds) < 0) + if (parse_oid_prefix(prefix, opts.prefix_hex_len, algo, NULL, &prefix_oid) < 0) return -1; - ds.always_call_fn = 1; - ds.fn = repo_collect_ambiguous; - ds.cb_data = &collect; - find_short_object_filename(&ds); - find_short_packed_object(&ds); + if (odb_for_each_object_ext(r->objects, NULL, repo_collect_ambiguous, &collect, &opts) < 0) + return -1; ret = oid_array_for_each_unique(&collect, fn, cb_data); oid_array_clear(&collect); From eac58debd9f61391a61b832e3cee349a20bd2c4a Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Fri, 20 Mar 2026 08:07:34 +0100 Subject: [PATCH 17/93] object-name: backend-generic `get_short_oid()` The function `get_short_oid()` takes as input an abbreviated object ID and tries to turn that object ID into the full object ID. This is done by iterating through all objects that have the user-provided prefix. If that yields exactly one object we know that the abbreviated object ID is unambiguous, otherwise it is ambiguous and we print the list of objects that match the prefix. We iterate through all objects with the given prefix by calling both `find_short_packed_object()` and `find_short_object_filename()`, which is of course specific to the "files" backend. But we now have a generic way to iterate through objects with a specific prefix. Refactor the code to use `odb_for_each_object()` instead so that it works with object backends different than the "files" backend. Remove the now-unused `find_short_packed_object()` function. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- object-name.c | 32 ++++++-------------------------- 1 file changed, 6 insertions(+), 26 deletions(-) diff --git a/object-name.c b/object-name.c index 4c3ace150e..7a224ab4af 100644 --- a/object-name.c +++ b/object-name.c @@ -120,28 +120,6 @@ static void find_short_object_filename(struct disambiguate_state *ds) odb_source_loose_for_each_object(source, NULL, match_prefix, ds, &opts); } -static void find_short_packed_object(struct disambiguate_state *ds) -{ - struct odb_for_each_object_options opts = { - .prefix = &ds->bin_pfx, - .prefix_hex_len = ds->len, - }; - struct odb_source *source; - - /* Skip, unless oids from the storage hash algorithm are wanted */ - if (ds->bin_pfx.algo && (&hash_algos[ds->bin_pfx.algo] != ds->repo->hash_algo)) - return; - - odb_prepare_alternates(ds->repo->objects); - for (source = ds->repo->objects->sources; source; source = source->next) { - struct odb_source_files *files = odb_source_files_downcast(source); - - packfile_store_for_each_object(files->packed, NULL, match_prefix, ds, &opts); - if (ds->ambiguous) - break; - } -} - static int finish_object_disambiguation(struct disambiguate_state *ds, struct object_id *oid) { @@ -499,6 +477,7 @@ static enum get_oid_result get_short_oid(struct repository *r, struct object_id *oid, unsigned flags) { + struct odb_for_each_object_options opts = { 0 }; int status; struct disambiguate_state ds; int quietly = !!(flags & GET_OID_QUIETLY); @@ -526,8 +505,10 @@ static enum get_oid_result get_short_oid(struct repository *r, else ds.fn = default_disambiguate_hint; - find_short_object_filename(&ds); - find_short_packed_object(&ds); + opts.prefix = &ds.bin_pfx; + opts.prefix_hex_len = ds.len; + + odb_for_each_object_ext(r->objects, NULL, match_prefix, &ds, &opts); status = finish_object_disambiguation(&ds, oid); /* @@ -537,8 +518,7 @@ static enum get_oid_result get_short_oid(struct repository *r, */ if (status == MISSING_OBJECT) { odb_reprepare(r->objects); - find_short_object_filename(&ds); - find_short_packed_object(&ds); + odb_for_each_object_ext(r->objects, NULL, match_prefix, &ds, &opts); status = finish_object_disambiguation(&ds, oid); } From e9b7caa1b14bc1fe825b216941a0655d6afdffe5 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Fri, 20 Mar 2026 08:07:35 +0100 Subject: [PATCH 18/93] object-name: merge `update_candidates()` and `match_prefix()` There's only a single callsite for `match_prefix()`, and that function is a rather trivial wrapper of `update_candidates()`. Merge these two functions into a single `update_disambiguate_state()` function. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- object-name.c | 34 ++++++++++++++++++---------------- 1 file changed, 18 insertions(+), 16 deletions(-) diff --git a/object-name.c b/object-name.c index 7a224ab4af..f55a332032 100644 --- a/object-name.c +++ b/object-name.c @@ -51,27 +51,31 @@ struct disambiguate_state { unsigned always_call_fn:1; }; -static void update_candidates(struct disambiguate_state *ds, const struct object_id *current) +static int update_disambiguate_state(const struct object_id *current, + struct object_info *oi UNUSED, + void *cb_data) { + struct disambiguate_state *ds = cb_data; + /* The hash algorithm of current has already been filtered */ if (ds->always_call_fn) { ds->ambiguous = ds->fn(ds->repo, current, ds->cb_data) ? 1 : 0; - return; + return ds->ambiguous; } if (!ds->candidate_exists) { /* this is the first candidate */ oidcpy(&ds->candidate, current); ds->candidate_exists = 1; - return; + return 0; } else if (oideq(&ds->candidate, current)) { /* the same as what we already have seen */ - return; + return 0; } if (!ds->fn) { /* cannot disambiguate between ds->candidate and current */ ds->ambiguous = 1; - return; + return ds->ambiguous; } if (!ds->candidate_checked) { @@ -84,7 +88,7 @@ static void update_candidates(struct disambiguate_state *ds, const struct object /* discard the candidate; we know it does not satisfy fn */ oidcpy(&ds->candidate, current); ds->candidate_checked = 0; - return; + return 0; } /* if we reach this point, we know ds->candidate satisfies fn */ @@ -95,17 +99,12 @@ static void update_candidates(struct disambiguate_state *ds, const struct object */ ds->candidate_ok = 0; ds->ambiguous = 1; + return ds->ambiguous; } /* otherwise, current can be discarded and candidate is still good */ -} -static int match_prefix(const struct object_id *oid, struct object_info *oi UNUSED, void *arg) -{ - struct disambiguate_state *ds = arg; - /* no need to call match_hash, oidtree_each did prefix match */ - update_candidates(ds, oid); - return ds->ambiguous; + return 0; } static void find_short_object_filename(struct disambiguate_state *ds) @@ -117,7 +116,8 @@ static void find_short_object_filename(struct disambiguate_state *ds) struct odb_source *source; for (source = ds->repo->objects->sources; source && !ds->ambiguous; source = source->next) - odb_source_loose_for_each_object(source, NULL, match_prefix, ds, &opts); + odb_source_loose_for_each_object(source, NULL, update_disambiguate_state, + ds, &opts); } static int finish_object_disambiguation(struct disambiguate_state *ds, @@ -508,7 +508,8 @@ static enum get_oid_result get_short_oid(struct repository *r, opts.prefix = &ds.bin_pfx; opts.prefix_hex_len = ds.len; - odb_for_each_object_ext(r->objects, NULL, match_prefix, &ds, &opts); + odb_for_each_object_ext(r->objects, NULL, update_disambiguate_state, + &ds, &opts); status = finish_object_disambiguation(&ds, oid); /* @@ -518,7 +519,8 @@ static enum get_oid_result get_short_oid(struct repository *r, */ if (status == MISSING_OBJECT) { odb_reprepare(r->objects); - odb_for_each_object_ext(r->objects, NULL, match_prefix, &ds, &opts); + odb_for_each_object_ext(r->objects, NULL, update_disambiguate_state, + &ds, &opts); status = finish_object_disambiguation(&ds, oid); } From 67f47eab61c3a2c14f2d0351c3844f12fbd95dd2 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Fri, 20 Mar 2026 08:07:36 +0100 Subject: [PATCH 19/93] object-name: abbreviate loose object names without `disambiguate_state` The function `find_short_object_filename()` takes an object ID and computes the minimum required object name length to make it unique. This is done by reusing the object disambiguation infrastructure, where we iterate through every loose object and then update the disambiguate state one by one. Ultimately, we don't care about the disambiguate state though. It is used because this infrastructure knows how to enumerate only those objects that match a given prefix. But now that we have extended the `odb_for_each_object()` function to do this for us we have an easier way to do this. Consequently, we really only use the disambiguate state now to propagate `struct min_abbrev_data`. Refactor the code and drop this indirection so that we use `struct min_abbrev_data` directly. This also allows us to drop some now-unused logic from the disambiguate infrastructure. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- object-name.c | 54 +++++++++++++++++++-------------------------------- 1 file changed, 20 insertions(+), 34 deletions(-) diff --git a/object-name.c b/object-name.c index f55a332032..d82fb49f39 100644 --- a/object-name.c +++ b/object-name.c @@ -48,7 +48,6 @@ struct disambiguate_state { unsigned candidate_ok:1; unsigned disambiguate_fn_used:1; unsigned ambiguous:1; - unsigned always_call_fn:1; }; static int update_disambiguate_state(const struct object_id *current, @@ -58,10 +57,6 @@ static int update_disambiguate_state(const struct object_id *current, struct disambiguate_state *ds = cb_data; /* The hash algorithm of current has already been filtered */ - if (ds->always_call_fn) { - ds->ambiguous = ds->fn(ds->repo, current, ds->cb_data) ? 1 : 0; - return ds->ambiguous; - } if (!ds->candidate_exists) { /* this is the first candidate */ oidcpy(&ds->candidate, current); @@ -107,19 +102,6 @@ static int update_disambiguate_state(const struct object_id *current, return 0; } -static void find_short_object_filename(struct disambiguate_state *ds) -{ - struct odb_for_each_object_options opts = { - .prefix = &ds->bin_pfx, - .prefix_hex_len = ds->len, - }; - struct odb_source *source; - - for (source = ds->repo->objects->sources; source && !ds->ambiguous; source = source->next) - odb_source_loose_for_each_object(source, NULL, update_disambiguate_state, - ds, &opts); -} - static int finish_object_disambiguation(struct disambiguate_state *ds, struct object_id *oid) { @@ -632,11 +614,26 @@ static int extend_abbrev_len(const struct object_id *oid, return 0; } -static int repo_extend_abbrev_len(struct repository *r UNUSED, - const struct object_id *oid, - void *cb_data) +static int extend_abbrev_len_loose(const struct object_id *oid, + struct object_info *oi UNUSED, + void *cb_data) { - return extend_abbrev_len(oid, cb_data); + struct min_abbrev_data *data = cb_data; + extend_abbrev_len(oid, data); + return 0; +} + +static void find_abbrev_len_loose(struct min_abbrev_data *mad) +{ + struct odb_for_each_object_options opts = { + .prefix = mad->oid, + .prefix_hex_len = mad->cur_len, + }; + struct odb_source *source; + + for (source = mad->repo->objects->sources; source; source = source->next) + odb_source_loose_for_each_object(source, NULL, extend_abbrev_len_loose, + mad, &opts); } static void find_abbrev_len_for_midx(struct multi_pack_index *m, @@ -752,9 +749,7 @@ int repo_find_unique_abbrev_r(struct repository *r, char *hex, { const struct git_hash_algo *algo = oid->algo ? &hash_algos[oid->algo] : r->hash_algo; - struct disambiguate_state ds; struct min_abbrev_data mad; - struct object_id oid_ret; const unsigned hexsz = algo->hexsz; if (len < 0) { @@ -794,16 +789,7 @@ int repo_find_unique_abbrev_r(struct repository *r, char *hex, mad.oid = oid; find_abbrev_len_packed(&mad); - - if (init_object_disambiguation(r, hex, mad.cur_len, algo, &ds) < 0) - return -1; - - ds.fn = repo_extend_abbrev_len; - ds.always_call_fn = 1; - ds.cb_data = (void *)&mad; - - find_short_object_filename(&ds); - (void)finish_object_disambiguation(&ds, &oid_ret); + find_abbrev_len_loose(&mad); hex[mad.cur_len] = 0; return mad.cur_len; From 1a2842d1b1e91d5e068d231cf4df4e783bdf9205 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Fri, 20 Mar 2026 08:07:37 +0100 Subject: [PATCH 20/93] object-name: simplify computing common prefixes The function `extend_abbrev_len()` computes the length of common hex characters between two object IDs. This is done by: - Making the caller provide the `hex` string for the needle object ID. - Comparing every hex position of the haystack object ID with `get_hex_char_from_oid()`. Turning the binary representation into hex first is roundabout though: we can simply compare the binary representation and give some special attention to the final nibble. Introduce a new function `oid_common_prefix_hexlen()` that does exactly this and refactor the code to use the new function. This allows us to drop the `struct min_abbrev_data::hex` field. Furthermore, this function will be used in by some other callsites in subsequent commits. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- hash.c | 18 ++++++++++++++++++ hash.h | 3 +++ object-name.c | 23 +++-------------------- 3 files changed, 24 insertions(+), 20 deletions(-) diff --git a/hash.c b/hash.c index 553f2008ea..e925b9754e 100644 --- a/hash.c +++ b/hash.c @@ -317,3 +317,21 @@ const struct git_hash_algo *unsafe_hash_algo(const struct git_hash_algo *algop) /* Otherwise use the default one. */ return algop; } + +unsigned oid_common_prefix_hexlen(const struct object_id *a, + const struct object_id *b) +{ + unsigned rawsz = hash_algos[a->algo].rawsz; + + for (unsigned i = 0; i < rawsz; i++) { + if (a->hash[i] == b->hash[i]) + continue; + + if ((a->hash[i] ^ b->hash[i]) & 0xf0) + return i * 2; + else + return i * 2 + 1; + } + + return rawsz * 2; +} diff --git a/hash.h b/hash.h index d51efce1d3..c082a53c9a 100644 --- a/hash.h +++ b/hash.h @@ -396,6 +396,9 @@ static inline int oideq(const struct object_id *oid1, const struct object_id *oi return !memcmp(oid1->hash, oid2->hash, GIT_MAX_RAWSZ); } +unsigned oid_common_prefix_hexlen(const struct object_id *a, + const struct object_id *b); + static inline void oidcpy(struct object_id *dst, const struct object_id *src) { memcpy(dst->hash, src->hash, GIT_MAX_RAWSZ); diff --git a/object-name.c b/object-name.c index d82fb49f39..32e9c23e40 100644 --- a/object-name.c +++ b/object-name.c @@ -585,32 +585,16 @@ static unsigned msb(unsigned long val) struct min_abbrev_data { unsigned int init_len; unsigned int cur_len; - char *hex; struct repository *repo; const struct object_id *oid; }; -static inline char get_hex_char_from_oid(const struct object_id *oid, - unsigned int pos) -{ - static const char hex[] = "0123456789abcdef"; - - if ((pos & 1) == 0) - return hex[oid->hash[pos >> 1] >> 4]; - else - return hex[oid->hash[pos >> 1] & 0xf]; -} - static int extend_abbrev_len(const struct object_id *oid, struct min_abbrev_data *mad) { - unsigned int i = mad->init_len; - while (mad->hex[i] && mad->hex[i] == get_hex_char_from_oid(oid, i)) - i++; - - if (mad->hex[i] && i >= mad->cur_len) - mad->cur_len = i + 1; - + unsigned len = oid_common_prefix_hexlen(oid, mad->oid); + if (len != hash_algos[oid->algo].hexsz && len >= mad->cur_len) + mad->cur_len = len + 1; return 0; } @@ -785,7 +769,6 @@ int repo_find_unique_abbrev_r(struct repository *r, char *hex, mad.repo = r; mad.init_len = len; mad.cur_len = len; - mad.hex = hex; mad.oid = oid; find_abbrev_len_packed(&mad); From ab3ab1038dd38d2be62e3bacf39a3248929a7a98 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Fri, 20 Mar 2026 08:07:38 +0100 Subject: [PATCH 21/93] object-name: move logic to compute loose abbreviation length The function `repo_find_unique_abbrev_r()` takes as input an object ID as well as a minimum object ID length and returns the minimum required prefix to make the object ID unique. The logic that computes the abbreviation length for loose objects is deeply tied to the loose object storage format. As such, it would fail in case a different object storage format was used. Prepare for making this logic generic to the backend by moving the logic into a new `odb_source_loose_find_abbrev_len()` function that is part of "object-file.c". Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- object-file.c | 38 ++++++++++++++++++++++++++++++++++++++ object-file.h | 12 ++++++++++++ object-name.c | 27 ++++----------------------- 3 files changed, 54 insertions(+), 23 deletions(-) diff --git a/object-file.c b/object-file.c index 13732f324f..4f77ce0982 100644 --- a/object-file.c +++ b/object-file.c @@ -1952,6 +1952,44 @@ out: return ret; } +struct find_abbrev_len_data { + const struct object_id *oid; + unsigned len; +}; + +static int find_abbrev_len_cb(const struct object_id *oid, + struct object_info *oi UNUSED, + void *cb_data) +{ + struct find_abbrev_len_data *data = cb_data; + unsigned len = oid_common_prefix_hexlen(oid, data->oid); + if (len != hash_algos[oid->algo].hexsz && len >= data->len) + data->len = len + 1; + return 0; +} + +int odb_source_loose_find_abbrev_len(struct odb_source *source, + const struct object_id *oid, + unsigned min_len, + unsigned *out) +{ + struct odb_for_each_object_options opts = { + .prefix = oid, + .prefix_hex_len = min_len, + }; + struct find_abbrev_len_data data = { + .oid = oid, + .len = min_len, + }; + int ret; + + ret = odb_source_loose_for_each_object(source, NULL, find_abbrev_len_cb, + &data, &opts); + *out = data.len; + + return ret; +} + static int append_loose_object(const struct object_id *oid, const char *path UNUSED, void *data) diff --git a/object-file.h b/object-file.h index f11ad58f6c..3686f182e4 100644 --- a/object-file.h +++ b/object-file.h @@ -146,6 +146,18 @@ int odb_source_loose_count_objects(struct odb_source *source, enum odb_count_objects_flags flags, unsigned long *out); +/* + * Find the shortest unique prefix for the given object ID, where `min_len` is + * the minimum length that the prefix should have. + * + * Returns 0 on success, in which case the computed length will be written to + * `out`. Otherwise, a negative error code is returned. + */ +int odb_source_loose_find_abbrev_len(struct odb_source *source, + const struct object_id *oid, + unsigned min_len, + unsigned *out); + /** * format_object_header() is a thin wrapper around s xsnprintf() that * writes the initial " " part of the loose object diff --git a/object-name.c b/object-name.c index 32e9c23e40..4e21dbfa97 100644 --- a/object-name.c +++ b/object-name.c @@ -598,28 +598,6 @@ static int extend_abbrev_len(const struct object_id *oid, return 0; } -static int extend_abbrev_len_loose(const struct object_id *oid, - struct object_info *oi UNUSED, - void *cb_data) -{ - struct min_abbrev_data *data = cb_data; - extend_abbrev_len(oid, data); - return 0; -} - -static void find_abbrev_len_loose(struct min_abbrev_data *mad) -{ - struct odb_for_each_object_options opts = { - .prefix = mad->oid, - .prefix_hex_len = mad->cur_len, - }; - struct odb_source *source; - - for (source = mad->repo->objects->sources; source; source = source->next) - odb_source_loose_for_each_object(source, NULL, extend_abbrev_len_loose, - mad, &opts); -} - static void find_abbrev_len_for_midx(struct multi_pack_index *m, struct min_abbrev_data *mad) { @@ -772,7 +750,10 @@ int repo_find_unique_abbrev_r(struct repository *r, char *hex, mad.oid = oid; find_abbrev_len_packed(&mad); - find_abbrev_len_loose(&mad); + + odb_prepare_alternates(r->objects); + for (struct odb_source *s = r->objects->sources; s; s = s->next) + odb_source_loose_find_abbrev_len(s, mad.oid, mad.cur_len, &mad.cur_len); hex[mad.cur_len] = 0; return mad.cur_len; From 6c2ede6e4abed754bb5891c2904212c05efcfb11 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Fri, 20 Mar 2026 08:07:39 +0100 Subject: [PATCH 22/93] object-file: move logic to compute packed abbreviation length Same as the preceding commit, move the logic that computes the minimum required prefix length to make a given object ID unique for the packfile store into a new function `packfile_store_find_abbrev_len()` that is part of "packfile.c". This prepares for making the logic fully generic via pluggable object databases. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- object-name.c | 135 +++++--------------------------------------------- packfile.c | 111 +++++++++++++++++++++++++++++++++++++++++ packfile.h | 5 ++ 3 files changed, 128 insertions(+), 123 deletions(-) diff --git a/object-name.c b/object-name.c index 4e21dbfa97..bb2294a193 100644 --- a/object-name.c +++ b/object-name.c @@ -582,115 +582,6 @@ static unsigned msb(unsigned long val) return r; } -struct min_abbrev_data { - unsigned int init_len; - unsigned int cur_len; - struct repository *repo; - const struct object_id *oid; -}; - -static int extend_abbrev_len(const struct object_id *oid, - struct min_abbrev_data *mad) -{ - unsigned len = oid_common_prefix_hexlen(oid, mad->oid); - if (len != hash_algos[oid->algo].hexsz && len >= mad->cur_len) - mad->cur_len = len + 1; - return 0; -} - -static void find_abbrev_len_for_midx(struct multi_pack_index *m, - struct min_abbrev_data *mad) -{ - for (; m; m = m->base_midx) { - int match = 0; - uint32_t num, first = 0; - struct object_id oid; - const struct object_id *mad_oid; - - if (!m->num_objects) - continue; - - num = m->num_objects + m->num_objects_in_base; - mad_oid = mad->oid; - match = bsearch_one_midx(mad_oid, m, &first); - - /* - * first is now the position in the packfile where we - * would insert mad->hash if it does not exist (or the - * position of mad->hash if it does exist). Hence, we - * consider a maximum of two objects nearby for the - * abbreviation length. - */ - mad->init_len = 0; - if (!match) { - if (nth_midxed_object_oid(&oid, m, first)) - extend_abbrev_len(&oid, mad); - } else if (first < num - 1) { - if (nth_midxed_object_oid(&oid, m, first + 1)) - extend_abbrev_len(&oid, mad); - } - if (first > 0) { - if (nth_midxed_object_oid(&oid, m, first - 1)) - extend_abbrev_len(&oid, mad); - } - mad->init_len = mad->cur_len; - } -} - -static void find_abbrev_len_for_pack(struct packed_git *p, - struct min_abbrev_data *mad) -{ - int match = 0; - uint32_t num, first = 0; - struct object_id oid; - const struct object_id *mad_oid; - - if (p->multi_pack_index) - return; - - if (open_pack_index(p) || !p->num_objects) - return; - - num = p->num_objects; - mad_oid = mad->oid; - match = bsearch_pack(mad_oid, p, &first); - - /* - * first is now the position in the packfile where we would insert - * mad->hash if it does not exist (or the position of mad->hash if - * it does exist). Hence, we consider a maximum of two objects - * nearby for the abbreviation length. - */ - mad->init_len = 0; - if (!match) { - if (!nth_packed_object_id(&oid, p, first)) - extend_abbrev_len(&oid, mad); - } else if (first < num - 1) { - if (!nth_packed_object_id(&oid, p, first + 1)) - extend_abbrev_len(&oid, mad); - } - if (first > 0) { - if (!nth_packed_object_id(&oid, p, first - 1)) - extend_abbrev_len(&oid, mad); - } - mad->init_len = mad->cur_len; -} - -static void find_abbrev_len_packed(struct min_abbrev_data *mad) -{ - struct packed_git *p; - - odb_prepare_alternates(mad->repo->objects); - for (struct odb_source *source = mad->repo->objects->sources; source; source = source->next) { - struct multi_pack_index *m = get_multi_pack_index(source); - if (m) - find_abbrev_len_for_midx(m, mad); - } - - repo_for_each_pack(mad->repo, p) - find_abbrev_len_for_pack(p, mad); -} - void strbuf_repo_add_unique_abbrev(struct strbuf *sb, struct repository *repo, const struct object_id *oid, int abbrev_len) { @@ -707,14 +598,14 @@ void strbuf_add_unique_abbrev(struct strbuf *sb, const struct object_id *oid, } int repo_find_unique_abbrev_r(struct repository *r, char *hex, - const struct object_id *oid, int len) + const struct object_id *oid, int min_len) { const struct git_hash_algo *algo = oid->algo ? &hash_algos[oid->algo] : r->hash_algo; - struct min_abbrev_data mad; const unsigned hexsz = algo->hexsz; + unsigned len; - if (len < 0) { + if (min_len < 0) { unsigned long count; if (odb_count_objects(r->objects, ODB_COUNT_OBJECTS_APPROXIMATE, &count) < 0) @@ -738,25 +629,23 @@ int repo_find_unique_abbrev_r(struct repository *r, char *hex, */ if (len < FALLBACK_DEFAULT_ABBREV) len = FALLBACK_DEFAULT_ABBREV; + } else { + len = min_len; } oid_to_hex_r(hex, oid); if (len >= hexsz || !len) return hexsz; - mad.repo = r; - mad.init_len = len; - mad.cur_len = len; - mad.oid = oid; - - find_abbrev_len_packed(&mad); - odb_prepare_alternates(r->objects); - for (struct odb_source *s = r->objects->sources; s; s = s->next) - odb_source_loose_find_abbrev_len(s, mad.oid, mad.cur_len, &mad.cur_len); + for (struct odb_source *s = r->objects->sources; s; s = s->next) { + struct odb_source_files *files = odb_source_files_downcast(s); + packfile_store_find_abbrev_len(files->packed, oid, len, &len); + odb_source_loose_find_abbrev_len(s, oid, len, &len); + } - hex[mad.cur_len] = 0; - return mad.cur_len; + hex[len] = 0; + return len; } const char *repo_find_unique_abbrev(struct repository *r, diff --git a/packfile.c b/packfile.c index 2539a371c1..ee9c7ea1d1 100644 --- a/packfile.c +++ b/packfile.c @@ -2597,6 +2597,117 @@ out: return ret; } +static int extend_abbrev_len(const struct object_id *a, + const struct object_id *b, + unsigned *out) +{ + unsigned len = oid_common_prefix_hexlen(a, b); + if (len != hash_algos[a->algo].hexsz && len >= *out) + *out = len + 1; + return 0; +} + +static void find_abbrev_len_for_midx(struct multi_pack_index *m, + const struct object_id *oid, + unsigned min_len, + unsigned *out) +{ + unsigned len = min_len; + + for (; m; m = m->base_midx) { + int match = 0; + uint32_t num, first = 0; + struct object_id found_oid; + + if (!m->num_objects) + continue; + + num = m->num_objects + m->num_objects_in_base; + match = bsearch_one_midx(oid, m, &first); + + /* + * first is now the position in the packfile where we + * would insert the object ID if it does not exist (or the + * position of the object ID if it does exist). Hence, we + * consider a maximum of two objects nearby for the + * abbreviation length. + */ + + if (!match) { + if (nth_midxed_object_oid(&found_oid, m, first)) + extend_abbrev_len(&found_oid, oid, &len); + } else if (first < num - 1) { + if (nth_midxed_object_oid(&found_oid, m, first + 1)) + extend_abbrev_len(&found_oid, oid, &len); + } + if (first > 0) { + if (nth_midxed_object_oid(&found_oid, m, first - 1)) + extend_abbrev_len(&found_oid, oid, &len); + } + } + + *out = len; +} + +static void find_abbrev_len_for_pack(struct packed_git *p, + const struct object_id *oid, + unsigned min_len, + unsigned *out) +{ + int match; + uint32_t num, first = 0; + struct object_id found_oid; + unsigned len = min_len; + + num = p->num_objects; + match = bsearch_pack(oid, p, &first); + + /* + * first is now the position in the packfile where we would insert + * the object ID if it does not exist (or the position of mad->hash if + * it does exist). Hence, we consider a maximum of two objects + * nearby for the abbreviation length. + */ + if (!match) { + if (!nth_packed_object_id(&found_oid, p, first)) + extend_abbrev_len(&found_oid, oid, &len); + } else if (first < num - 1) { + if (!nth_packed_object_id(&found_oid, p, first + 1)) + extend_abbrev_len(&found_oid, oid, &len); + } + if (first > 0) { + if (!nth_packed_object_id(&found_oid, p, first - 1)) + extend_abbrev_len(&found_oid, oid, &len); + } + + *out = len; +} + +int packfile_store_find_abbrev_len(struct packfile_store *store, + const struct object_id *oid, + unsigned min_len, + unsigned *out) +{ + struct packfile_list_entry *e; + struct multi_pack_index *m; + + m = get_multi_pack_index(store->source); + if (m) + find_abbrev_len_for_midx(m, oid, min_len, &min_len); + + for (e = packfile_store_get_packs(store); e; e = e->next) { + if (e->pack->multi_pack_index) + continue; + if (open_pack_index(e->pack) || !e->pack->num_objects) + continue; + + find_abbrev_len_for_pack(e->pack, oid, min_len, &min_len); + } + + *out = min_len; + return 0; +} + struct add_promisor_object_data { struct repository *repo; struct oidset *set; diff --git a/packfile.h b/packfile.h index fa41dfda38..45b35973f0 100644 --- a/packfile.h +++ b/packfile.h @@ -369,6 +369,11 @@ int packfile_store_for_each_object(struct packfile_store *store, void *cb_data, const struct odb_for_each_object_options *opts); +int packfile_store_find_abbrev_len(struct packfile_store *store, + const struct object_id *oid, + unsigned min_len, + unsigned *out); + /* A hook to report invalid files in pack directory */ #define PACKDIR_FILE_PACK 1 #define PACKDIR_FILE_IDX 2 From 83869e15fa9ef3b0ea2adbfe2fe68a309f95b856 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Fri, 20 Mar 2026 08:07:40 +0100 Subject: [PATCH 23/93] odb: introduce generic `odb_find_abbrev_len()` Introduce a new generic `odb_find_abbrev_len()` function as well as source-specific callback functions. This makes the logic to compute the required prefix length to make a given object unique fully pluggable. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- object-name.c | 57 +++--------------------------------- odb.c | 73 ++++++++++++++++++++++++++++++++++++++++++++++ odb.h | 16 ++++++++++ odb/source-files.c | 25 ++++++++++++++++ odb/source.h | 24 +++++++++++++++ 5 files changed, 142 insertions(+), 53 deletions(-) diff --git a/object-name.c b/object-name.c index bb2294a193..f6e1f29e1f 100644 --- a/object-name.c +++ b/object-name.c @@ -15,10 +15,9 @@ #include "refs.h" #include "remote.h" #include "dir.h" +#include "odb.h" #include "oid-array.h" -#include "packfile.h" #include "pretty.h" -#include "object-file.h" #include "read-cache-ll.h" #include "repo-settings.h" #include "repository.h" @@ -569,19 +568,6 @@ int repo_for_each_abbrev(struct repository *r, const char *prefix, return ret; } -/* - * Return the slot of the most-significant bit set in "val". There are various - * ways to do this quickly with fls() or __builtin_clzl(), but speed is - * probably not a big deal here. - */ -static unsigned msb(unsigned long val) -{ - unsigned r = 0; - while (val >>= 1) - r++; - return r; -} - void strbuf_repo_add_unique_abbrev(struct strbuf *sb, struct repository *repo, const struct object_id *oid, int abbrev_len) { @@ -602,49 +588,14 @@ int repo_find_unique_abbrev_r(struct repository *r, char *hex, { const struct git_hash_algo *algo = oid->algo ? &hash_algos[oid->algo] : r->hash_algo; - const unsigned hexsz = algo->hexsz; unsigned len; - if (min_len < 0) { - unsigned long count; - - if (odb_count_objects(r->objects, ODB_COUNT_OBJECTS_APPROXIMATE, &count) < 0) - count = 0; - - /* - * Add one because the MSB only tells us the highest bit set, - * not including the value of all the _other_ bits (so "15" - * is only one off of 2^4, but the MSB is the 3rd bit. - */ - len = msb(count) + 1; - /* - * We now know we have on the order of 2^len objects, which - * expects a collision at 2^(len/2). But we also care about hex - * chars, not bits, and there are 4 bits per hex. So all - * together we need to divide by 2 and round up. - */ - len = DIV_ROUND_UP(len, 2); - /* - * For very small repos, we stick with our regular fallback. - */ - if (len < FALLBACK_DEFAULT_ABBREV) - len = FALLBACK_DEFAULT_ABBREV; - } else { - len = min_len; - } + if (odb_find_abbrev_len(r->objects, oid, min_len, &len) < 0) + len = algo->hexsz; oid_to_hex_r(hex, oid); - if (len >= hexsz || !len) - return hexsz; - - odb_prepare_alternates(r->objects); - for (struct odb_source *s = r->objects->sources; s; s = s->next) { - struct odb_source_files *files = odb_source_files_downcast(s); - packfile_store_find_abbrev_len(files->packed, oid, len, &len); - odb_source_loose_find_abbrev_len(s, oid, len, &len); - } - hex[len] = 0; + return len; } diff --git a/odb.c b/odb.c index 3019957b87..3f94a53df1 100644 --- a/odb.c +++ b/odb.c @@ -12,6 +12,7 @@ #include "midx.h" #include "object-file-convert.h" #include "object-file.h" +#include "object-name.h" #include "odb.h" #include "packfile.h" #include "path.h" @@ -964,6 +965,78 @@ out: return ret; } +/* + * Return the slot of the most-significant bit set in "val". There are various + * ways to do this quickly with fls() or __builtin_clzl(), but speed is + * probably not a big deal here. + */ +static unsigned msb(unsigned long val) +{ + unsigned r = 0; + while (val >>= 1) + r++; + return r; +} + +int odb_find_abbrev_len(struct object_database *odb, + const struct object_id *oid, + int min_length, + unsigned *out) +{ + const struct git_hash_algo *algo = + oid->algo ? &hash_algos[oid->algo] : odb->repo->hash_algo; + const unsigned hexsz = algo->hexsz; + unsigned len; + int ret; + + if (min_length < 0) { + unsigned long count; + + if (odb_count_objects(odb, ODB_COUNT_OBJECTS_APPROXIMATE, &count) < 0) + count = 0; + + /* + * Add one because the MSB only tells us the highest bit set, + * not including the value of all the _other_ bits (so "15" + * is only one off of 2^4, but the MSB is the 3rd bit. + */ + len = msb(count) + 1; + /* + * We now know we have on the order of 2^len objects, which + * expects a collision at 2^(len/2). But we also care about hex + * chars, not bits, and there are 4 bits per hex. So all + * together we need to divide by 2 and round up. + */ + len = DIV_ROUND_UP(len, 2); + /* + * For very small repos, we stick with our regular fallback. + */ + if (len < FALLBACK_DEFAULT_ABBREV) + len = FALLBACK_DEFAULT_ABBREV; + } else { + len = min_length; + } + + if (len >= hexsz || !len) { + *out = hexsz; + ret = 0; + goto out; + } + + odb_prepare_alternates(odb); + for (struct odb_source *source = odb->sources; source; source = source->next) { + ret = odb_source_find_abbrev_len(source, oid, len, &len); + if (ret) + goto out; + } + + ret = 0; + *out = len; + +out: + return ret; +} + void odb_assert_oid_type(struct object_database *odb, const struct object_id *oid, enum object_type expect) { diff --git a/odb.h b/odb.h index e80fd8f7ab..984bafca9d 100644 --- a/odb.h +++ b/odb.h @@ -545,6 +545,22 @@ int odb_count_objects(struct object_database *odb, enum odb_count_objects_flags flags, unsigned long *out); +/* + * Given an object ID, find the minimum required length required to make the + * object ID unique across the whole object database. + * + * The `min_len` determines the minimum abbreviated length that'll be returned + * by this function. If `min_len < 0`, then the function will set a sensible + * default minimum abbreviation length. + * + * Returns 0 on success, a negative error code otherwise. The computed length + * will be assigned to `*out`. + */ +int odb_find_abbrev_len(struct object_database *odb, + const struct object_id *oid, + int min_len, + unsigned *out); + enum { /* * By default, `odb_write_object()` does not actually write anything diff --git a/odb/source-files.c b/odb/source-files.c index e90bb689bb..76797569de 100644 --- a/odb/source-files.c +++ b/odb/source-files.c @@ -122,6 +122,30 @@ out: return ret; } +static int odb_source_files_find_abbrev_len(struct odb_source *source, + const struct object_id *oid, + unsigned min_len, + unsigned *out) +{ + struct odb_source_files *files = odb_source_files_downcast(source); + unsigned len = min_len; + int ret; + + ret = packfile_store_find_abbrev_len(files->packed, oid, len, &len); + if (ret < 0) + goto out; + + ret = odb_source_loose_find_abbrev_len(source, oid, len, &len); + if (ret < 0) + goto out; + + *out = len; + ret = 0; + +out: + return ret; +} + static int odb_source_files_freshen_object(struct odb_source *source, const struct object_id *oid) { @@ -250,6 +274,7 @@ struct odb_source_files *odb_source_files_new(struct object_database *odb, files->base.read_object_stream = odb_source_files_read_object_stream; files->base.for_each_object = odb_source_files_for_each_object; files->base.count_objects = odb_source_files_count_objects; + files->base.find_abbrev_len = odb_source_files_find_abbrev_len; files->base.freshen_object = odb_source_files_freshen_object; files->base.write_object = odb_source_files_write_object; files->base.write_object_stream = odb_source_files_write_object_stream; diff --git a/odb/source.h b/odb/source.h index ee5d6ed530..a9d7d0b96f 100644 --- a/odb/source.h +++ b/odb/source.h @@ -157,6 +157,18 @@ struct odb_source { enum odb_count_objects_flags flags, unsigned long *out); + /* + * This callback is expected to find the minimum required length to + * make the given object ID unique. + * + * The callback is expected to return a negative error code in case it + * failed, 0 otherwise. + */ + int (*find_abbrev_len)(struct odb_source *source, + const struct object_id *oid, + unsigned min_length, + unsigned *out); + /* * This callback is expected to freshen the given object so that its * last access time is set to the current time. This is used to ensure @@ -360,6 +372,18 @@ static inline int odb_source_count_objects(struct odb_source *source, return source->count_objects(source, flags, out); } +/* + * Determine the minimum required length to make the given object ID unique in + * the given source. Returns 0 on success, a negative error code otherwise. + */ +static inline int odb_source_find_abbrev_len(struct odb_source *source, + const struct object_id *oid, + unsigned min_len, + unsigned *out) +{ + return source->find_abbrev_len(source, oid, min_len, out); +} + /* * Freshen an object in the object database by updating its timestamp. * Returns 1 in case the object has been freshened, 0 in case the object does From 17cabd369b5cb96bee9577f49247ef95d07058a7 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 23 Mar 2026 16:02:52 +0100 Subject: [PATCH 24/93] fetch-pack: move fsck options into function scope When fetching a packfile, we optionally verify received objects via the fsck subsystem. The options for those consistency checks are declared in global scope without a good reason, and they are never cleaned up. So in case the options are reused, they may accumulate more state over time. Furthermore, in subsequent changes we'll introduce a repository pointer into the structure. Obviously though, we don't have a repository available at static time, except for `the_repository`, which we don't want to use here. Refactor the code to move the options into the respective functions and properly manage their lifecycle. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- fetch-pack.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/fetch-pack.c b/fetch-pack.c index 6ecd468ef7..ec5abb92b5 100644 --- a/fetch-pack.c +++ b/fetch-pack.c @@ -51,7 +51,6 @@ static int server_supports_filtering; static int advertise_sid; static struct shallow_lock shallow_lock; static const char *alternate_shallow_file; -static struct fsck_options fsck_options = FSCK_OPTIONS_MISSING_GITMODULES; static struct strbuf fsck_msg_types = STRBUF_INIT; static struct string_list uri_protocols = STRING_LIST_INIT_DUP; @@ -1100,6 +1099,7 @@ static struct ref *do_fetch_pack(struct fetch_pack_args *args, struct shallow_info *si, struct string_list *pack_lockfiles) { + struct fsck_options fsck_options = FSCK_OPTIONS_MISSING_GITMODULES; struct repository *r = the_repository; struct ref *ref = copy_ref_list(orig_ref); struct object_id oid; @@ -1235,6 +1235,7 @@ static struct ref *do_fetch_pack(struct fetch_pack_args *args, die("fsck failed"); all_done: + fsck_options_clear(&fsck_options); if (negotiator) negotiator->release(negotiator); return ref; @@ -1654,6 +1655,7 @@ static struct ref *do_fetch_pack_v2(struct fetch_pack_args *args, struct string_list *pack_lockfiles) { struct repository *r = the_repository; + struct fsck_options fsck_options = FSCK_OPTIONS_MISSING_GITMODULES; struct ref *ref = copy_ref_list(orig_ref); enum fetch_state state = FETCH_CHECK_LOCAL; struct oidset common = OIDSET_INIT; @@ -1882,6 +1884,7 @@ static struct ref *do_fetch_pack_v2(struct fetch_pack_args *args, if (negotiator) negotiator->release(negotiator); + fsck_options_clear(&fsck_options); oidset_clear(&common); return ref; } From f22360902621e0807a1c0a77476e3e4d323c708d Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 23 Mar 2026 16:02:53 +0100 Subject: [PATCH 25/93] fsck: initialize fsck options via a function We initialize the `struct fsck_options` via a set of macros, often in global scope. In the next commit though we're about to introduce a new repository field to the options that must be initialized, and naturally we don't have a repo other than `the_repository` available in this scope. Refactor the code to instead intrdouce a new `fsck_options_init()` function that initializes the options for us and move initialization into function scope. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- builtin/fsck.c | 10 ++++++--- builtin/index-pack.c | 4 +++- builtin/mktag.c | 3 ++- builtin/refs.c | 4 +++- builtin/unpack-objects.c | 4 +++- fetch-pack.c | 8 +++++-- fsck.c | 45 ++++++++++++++++++++++++++++++++++++++++ fsck.h | 38 +++++++++------------------------ object-file.c | 3 ++- 9 files changed, 81 insertions(+), 38 deletions(-) diff --git a/builtin/fsck.c b/builtin/fsck.c index 9bab32effe..59e3b0f7ac 100644 --- a/builtin/fsck.c +++ b/builtin/fsck.c @@ -42,8 +42,8 @@ static int check_full = 1; static int connectivity_only; static int check_strict; static int keep_cache_objects; -static struct fsck_options fsck_walk_options = FSCK_OPTIONS_DEFAULT; -static struct fsck_options fsck_obj_options = FSCK_OPTIONS_DEFAULT; +static struct fsck_options fsck_walk_options; +static struct fsck_options fsck_obj_options; static int errors_found; static int write_lost_and_found; static int verbose; @@ -224,7 +224,7 @@ static int mark_unreachable_referents(const struct object_id *oid, struct object_info *oi UNUSED, void *data UNUSED) { - struct fsck_options options = FSCK_OPTIONS_DEFAULT; + struct fsck_options options; struct object *obj = lookup_object(the_repository, oid); if (!obj || !(obj->flags & HAS_OBJ)) @@ -243,6 +243,7 @@ static int mark_unreachable_referents(const struct object_id *oid, object_as_type(obj, type, 0); } + fsck_options_init(&options, FSCK_OPTIONS_DEFAULT); options.walk = mark_used; fsck_walk(obj, NULL, &options); if (obj->type == OBJ_TREE) @@ -1004,7 +1005,10 @@ int cmd_fsck(int argc, argc = parse_options(argc, argv, prefix, fsck_opts, fsck_usage, 0); + fsck_options_init(&fsck_walk_options, FSCK_OPTIONS_DEFAULT); fsck_walk_options.walk = mark_object; + + fsck_options_init(&fsck_obj_options, FSCK_OPTIONS_DEFAULT); fsck_obj_options.walk = mark_used; fsck_obj_options.error_func = fsck_objects_error_func; if (check_strict) diff --git a/builtin/index-pack.c b/builtin/index-pack.c index d1e47279a8..c8d28bcf8e 100644 --- a/builtin/index-pack.c +++ b/builtin/index-pack.c @@ -136,7 +136,7 @@ static int nr_threads; static int from_stdin; static int strict; static int do_fsck_object; -static struct fsck_options fsck_options = FSCK_OPTIONS_MISSING_GITMODULES; +static struct fsck_options fsck_options; static int verbose; static const char *progress_title; static int show_resolving_progress; @@ -1908,6 +1908,8 @@ int cmd_index_pack(int argc, show_usage_if_asked(argc, argv, index_pack_usage); disable_replace_refs(); + + fsck_options_init(&fsck_options, FSCK_OPTIONS_MISSING_GITMODULES); fsck_options.walk = mark_link; reset_pack_idx_option(&opts); diff --git a/builtin/mktag.c b/builtin/mktag.c index 7cf6e1230a..9f37f9dede 100644 --- a/builtin/mktag.c +++ b/builtin/mktag.c @@ -16,7 +16,7 @@ static char const * const builtin_mktag_usage[] = { }; static int option_strict = 1; -static struct fsck_options fsck_options = FSCK_OPTIONS_STRICT; +static struct fsck_options fsck_options; static int mktag_fsck_error_func(struct fsck_options *o UNUSED, void *fsck_report UNUSED, @@ -94,6 +94,7 @@ int cmd_mktag(int argc, if (strbuf_read(&buf, 0, 0) < 0) die_errno(_("could not read from stdin")); + fsck_options_init(&fsck_options, FSCK_OPTIONS_STRICT); fsck_options.error_func = mktag_fsck_error_func; fsck_set_msg_type_from_ids(&fsck_options, FSCK_MSG_EXTRA_HEADER_ENTRY, FSCK_WARN); diff --git a/builtin/refs.c b/builtin/refs.c index 3064f888b2..1719ada549 100644 --- a/builtin/refs.c +++ b/builtin/refs.c @@ -80,7 +80,7 @@ out: static int cmd_refs_verify(int argc, const char **argv, const char *prefix, struct repository *repo UNUSED) { - struct fsck_options fsck_refs_options = FSCK_REFS_OPTIONS_DEFAULT; + struct fsck_options fsck_refs_options; struct worktree **worktrees; const char * const verify_usage[] = { REFS_VERIFY_USAGE, @@ -93,6 +93,8 @@ static int cmd_refs_verify(int argc, const char **argv, const char *prefix, }; int ret = 0; + fsck_options_init(&fsck_refs_options, FSCK_OPTIONS_REFS); + argc = parse_options(argc, argv, prefix, options, verify_usage, 0); if (argc) usage(_("'git refs verify' takes no arguments")); diff --git a/builtin/unpack-objects.c b/builtin/unpack-objects.c index 6fc64e9e4b..9e4bb9d25c 100644 --- a/builtin/unpack-objects.c +++ b/builtin/unpack-objects.c @@ -29,7 +29,7 @@ static unsigned int offset, len; static off_t consumed_bytes; static off_t max_input_size; static struct git_hash_ctx ctx; -static struct fsck_options fsck_options = FSCK_OPTIONS_STRICT; +static struct fsck_options fsck_options; static struct progress *progress; /* @@ -627,6 +627,8 @@ int cmd_unpack_objects(int argc, show_usage_if_asked(argc, argv, unpack_usage); + fsck_options_init(&fsck_options, FSCK_OPTIONS_STRICT); + for (i = 1 ; i < argc; i++) { const char *arg = argv[i]; diff --git a/fetch-pack.c b/fetch-pack.c index ec5abb92b5..7339162368 100644 --- a/fetch-pack.c +++ b/fetch-pack.c @@ -1099,7 +1099,7 @@ static struct ref *do_fetch_pack(struct fetch_pack_args *args, struct shallow_info *si, struct string_list *pack_lockfiles) { - struct fsck_options fsck_options = FSCK_OPTIONS_MISSING_GITMODULES; + struct fsck_options fsck_options = { 0 }; struct repository *r = the_repository; struct ref *ref = copy_ref_list(orig_ref); struct object_id oid; @@ -1228,6 +1228,8 @@ static struct ref *do_fetch_pack(struct fetch_pack_args *args, alternate_shallow_file = setup_temporary_shallow(si->shallow); } else alternate_shallow_file = NULL; + + fsck_options_init(&fsck_options, FSCK_OPTIONS_MISSING_GITMODULES); if (get_pack(args, fd, pack_lockfiles, NULL, sought, nr_sought, &fsck_options.gitmodules_found)) die(_("git fetch-pack: fetch failed.")); @@ -1655,7 +1657,7 @@ static struct ref *do_fetch_pack_v2(struct fetch_pack_args *args, struct string_list *pack_lockfiles) { struct repository *r = the_repository; - struct fsck_options fsck_options = FSCK_OPTIONS_MISSING_GITMODULES; + struct fsck_options fsck_options; struct ref *ref = copy_ref_list(orig_ref); enum fetch_state state = FETCH_CHECK_LOCAL; struct oidset common = OIDSET_INIT; @@ -1673,6 +1675,8 @@ static struct ref *do_fetch_pack_v2(struct fetch_pack_args *args, struct strvec index_pack_args = STRVEC_INIT; const char *promisor_remote_config; + fsck_options_init(&fsck_options, FSCK_OPTIONS_MISSING_GITMODULES); + if (server_feature_v2("promisor-remote", &promisor_remote_config)) promisor_remote_reply(promisor_remote_config, NULL); diff --git a/fsck.c b/fsck.c index 0f02cf8f77..1ff8208502 100644 --- a/fsck.c +++ b/fsck.c @@ -1380,6 +1380,51 @@ bool fsck_has_queued_checks(struct fsck_options *options) !oidset_equal(&options->gitattributes_found, &options->gitattributes_done); } +void fsck_options_init(struct fsck_options *options, + enum fsck_options_type type) +{ + static const struct fsck_options defaults[] = { + [FSCK_OPTIONS_DEFAULT] = { + .skip_oids = OIDSET_INIT, + .gitmodules_found = OIDSET_INIT, + .gitmodules_done = OIDSET_INIT, + .gitattributes_found = OIDSET_INIT, + .gitattributes_done = OIDSET_INIT, + .error_func = fsck_objects_error_function + }, + [FSCK_OPTIONS_STRICT] = { + .strict = 1, + .gitmodules_found = OIDSET_INIT, + .gitmodules_done = OIDSET_INIT, + .gitattributes_found = OIDSET_INIT, + .gitattributes_done = OIDSET_INIT, + .error_func = fsck_objects_error_function, + }, + [FSCK_OPTIONS_MISSING_GITMODULES] = { + .strict = 1, + .gitmodules_found = OIDSET_INIT, + .gitmodules_done = OIDSET_INIT, + .gitattributes_found = OIDSET_INIT, + .gitattributes_done = OIDSET_INIT, + .error_func = fsck_objects_error_cb_print_missing_gitmodules, + }, + [FSCK_OPTIONS_REFS] = { + .error_func = fsck_refs_error_function, + }, + }; + + switch (type) { + case FSCK_OPTIONS_DEFAULT: + case FSCK_OPTIONS_STRICT: + case FSCK_OPTIONS_MISSING_GITMODULES: + case FSCK_OPTIONS_REFS: + memcpy(options, &defaults[type], sizeof(*options)); + break; + default: + BUG("unknown fsck options type %d", type); + } +} + void fsck_options_clear(struct fsck_options *options) { free(options->msg_type); diff --git a/fsck.h b/fsck.h index 65ecbb7fe1..9c973b53b2 100644 --- a/fsck.h +++ b/fsck.h @@ -180,34 +180,6 @@ struct fsck_options { kh_oid_map_t *object_names; }; -#define FSCK_OPTIONS_DEFAULT { \ - .skip_oids = OIDSET_INIT, \ - .gitmodules_found = OIDSET_INIT, \ - .gitmodules_done = OIDSET_INIT, \ - .gitattributes_found = OIDSET_INIT, \ - .gitattributes_done = OIDSET_INIT, \ - .error_func = fsck_objects_error_function \ -} -#define FSCK_OPTIONS_STRICT { \ - .strict = 1, \ - .gitmodules_found = OIDSET_INIT, \ - .gitmodules_done = OIDSET_INIT, \ - .gitattributes_found = OIDSET_INIT, \ - .gitattributes_done = OIDSET_INIT, \ - .error_func = fsck_objects_error_function, \ -} -#define FSCK_OPTIONS_MISSING_GITMODULES { \ - .strict = 1, \ - .gitmodules_found = OIDSET_INIT, \ - .gitmodules_done = OIDSET_INIT, \ - .gitattributes_found = OIDSET_INIT, \ - .gitattributes_done = OIDSET_INIT, \ - .error_func = fsck_objects_error_cb_print_missing_gitmodules, \ -} -#define FSCK_REFS_OPTIONS_DEFAULT { \ - .error_func = fsck_refs_error_function, \ -} - /* descend in all linked child objects * the return value is: * -1 error in processing the object @@ -255,6 +227,16 @@ int fsck_finish(struct fsck_options *options); */ bool fsck_has_queued_checks(struct fsck_options *options); +enum fsck_options_type { + FSCK_OPTIONS_DEFAULT, + FSCK_OPTIONS_STRICT, + FSCK_OPTIONS_MISSING_GITMODULES, + FSCK_OPTIONS_REFS, +}; + +void fsck_options_init(struct fsck_options *options, + enum fsck_options_type type); + /* * Clear the fsck_options struct, freeing any allocated memory. */ diff --git a/object-file.c b/object-file.c index c62e5496e0..186b2ff764 100644 --- a/object-file.c +++ b/object-file.c @@ -1279,8 +1279,9 @@ static int index_mem(struct index_state *istate, } } if (flags & INDEX_FORMAT_CHECK) { - struct fsck_options opts = FSCK_OPTIONS_DEFAULT; + struct fsck_options opts; + fsck_options_init(&opts, FSCK_OPTIONS_DEFAULT); opts.strict = 1; opts.error_func = hash_format_check_report; if (fsck_buffer(null_oid(istate->repo->hash_algo), type, buf, size, &opts)) From 374985390871cb67c02b1608b693480e89567b9a Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 23 Mar 2026 16:02:54 +0100 Subject: [PATCH 26/93] fsck: store repository in fsck options The fsck subsystem relies on `the_repository` quite a bit. While we could of course explicitly pass a repository down the callchain, we already have a `struct fsck_options` that we pass to almost all functions. Extend the options to also store the repository to make it readily available. Suggested-by: Junio C Hamano Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- builtin/fsck.c | 8 ++++---- builtin/index-pack.c | 2 +- builtin/mktag.c | 4 ++-- builtin/refs.c | 4 ++-- builtin/unpack-objects.c | 4 ++-- fetch-pack.c | 4 ++-- fsck.c | 3 +++ fsck.h | 4 ++++ object-file.c | 2 +- 9 files changed, 21 insertions(+), 14 deletions(-) diff --git a/builtin/fsck.c b/builtin/fsck.c index 59e3b0f7ac..990d836918 100644 --- a/builtin/fsck.c +++ b/builtin/fsck.c @@ -243,7 +243,7 @@ static int mark_unreachable_referents(const struct object_id *oid, object_as_type(obj, type, 0); } - fsck_options_init(&options, FSCK_OPTIONS_DEFAULT); + fsck_options_init(&options, the_repository, FSCK_OPTIONS_DEFAULT); options.walk = mark_used; fsck_walk(obj, NULL, &options); if (obj->type == OBJ_TREE) @@ -987,7 +987,7 @@ static struct option fsck_opts[] = { int cmd_fsck(int argc, const char **argv, const char *prefix, - struct repository *repo UNUSED) + struct repository *repo) { struct odb_source *source; struct snapshot snap = { @@ -1005,10 +1005,10 @@ int cmd_fsck(int argc, argc = parse_options(argc, argv, prefix, fsck_opts, fsck_usage, 0); - fsck_options_init(&fsck_walk_options, FSCK_OPTIONS_DEFAULT); + fsck_options_init(&fsck_walk_options, repo, FSCK_OPTIONS_DEFAULT); fsck_walk_options.walk = mark_object; - fsck_options_init(&fsck_obj_options, FSCK_OPTIONS_DEFAULT); + fsck_options_init(&fsck_obj_options, repo, FSCK_OPTIONS_DEFAULT); fsck_obj_options.walk = mark_used; fsck_obj_options.error_func = fsck_objects_error_func; if (check_strict) diff --git a/builtin/index-pack.c b/builtin/index-pack.c index c8d28bcf8e..e4129bd605 100644 --- a/builtin/index-pack.c +++ b/builtin/index-pack.c @@ -1909,7 +1909,7 @@ int cmd_index_pack(int argc, disable_replace_refs(); - fsck_options_init(&fsck_options, FSCK_OPTIONS_MISSING_GITMODULES); + fsck_options_init(&fsck_options, the_repository, FSCK_OPTIONS_MISSING_GITMODULES); fsck_options.walk = mark_link; reset_pack_idx_option(&opts); diff --git a/builtin/mktag.c b/builtin/mktag.c index 9f37f9dede..f40264a878 100644 --- a/builtin/mktag.c +++ b/builtin/mktag.c @@ -75,7 +75,7 @@ static int verify_object_in_tag(struct object_id *tagged_oid, int *tagged_type) int cmd_mktag(int argc, const char **argv, const char *prefix, - struct repository *repo UNUSED) + struct repository *repo) { static struct option builtin_mktag_options[] = { OPT_BOOL(0, "strict", &option_strict, @@ -94,7 +94,7 @@ int cmd_mktag(int argc, if (strbuf_read(&buf, 0, 0) < 0) die_errno(_("could not read from stdin")); - fsck_options_init(&fsck_options, FSCK_OPTIONS_STRICT); + fsck_options_init(&fsck_options, repo, FSCK_OPTIONS_STRICT); fsck_options.error_func = mktag_fsck_error_func; fsck_set_msg_type_from_ids(&fsck_options, FSCK_MSG_EXTRA_HEADER_ENTRY, FSCK_WARN); diff --git a/builtin/refs.c b/builtin/refs.c index 1719ada549..e3125bc61b 100644 --- a/builtin/refs.c +++ b/builtin/refs.c @@ -78,7 +78,7 @@ out: } static int cmd_refs_verify(int argc, const char **argv, const char *prefix, - struct repository *repo UNUSED) + struct repository *repo) { struct fsck_options fsck_refs_options; struct worktree **worktrees; @@ -93,7 +93,7 @@ static int cmd_refs_verify(int argc, const char **argv, const char *prefix, }; int ret = 0; - fsck_options_init(&fsck_refs_options, FSCK_OPTIONS_REFS); + fsck_options_init(&fsck_refs_options, repo, FSCK_OPTIONS_REFS); argc = parse_options(argc, argv, prefix, options, verify_usage, 0); if (argc) diff --git a/builtin/unpack-objects.c b/builtin/unpack-objects.c index 9e4bb9d25c..d863912b24 100644 --- a/builtin/unpack-objects.c +++ b/builtin/unpack-objects.c @@ -613,7 +613,7 @@ static void unpack_all(void) int cmd_unpack_objects(int argc, const char **argv, const char *prefix UNUSED, - struct repository *repo UNUSED) + struct repository *repo) { int i; struct object_id oid; @@ -627,7 +627,7 @@ int cmd_unpack_objects(int argc, show_usage_if_asked(argc, argv, unpack_usage); - fsck_options_init(&fsck_options, FSCK_OPTIONS_STRICT); + fsck_options_init(&fsck_options, repo, FSCK_OPTIONS_STRICT); for (i = 1 ; i < argc; i++) { const char *arg = argv[i]; diff --git a/fetch-pack.c b/fetch-pack.c index 7339162368..84a21c5107 100644 --- a/fetch-pack.c +++ b/fetch-pack.c @@ -1229,7 +1229,7 @@ static struct ref *do_fetch_pack(struct fetch_pack_args *args, } else alternate_shallow_file = NULL; - fsck_options_init(&fsck_options, FSCK_OPTIONS_MISSING_GITMODULES); + fsck_options_init(&fsck_options, the_repository, FSCK_OPTIONS_MISSING_GITMODULES); if (get_pack(args, fd, pack_lockfiles, NULL, sought, nr_sought, &fsck_options.gitmodules_found)) die(_("git fetch-pack: fetch failed.")); @@ -1675,7 +1675,7 @@ static struct ref *do_fetch_pack_v2(struct fetch_pack_args *args, struct strvec index_pack_args = STRVEC_INIT; const char *promisor_remote_config; - fsck_options_init(&fsck_options, FSCK_OPTIONS_MISSING_GITMODULES); + fsck_options_init(&fsck_options, the_repository, FSCK_OPTIONS_MISSING_GITMODULES); if (server_feature_v2("promisor-remote", &promisor_remote_config)) promisor_remote_reply(promisor_remote_config, NULL); diff --git a/fsck.c b/fsck.c index 1ff8208502..a05997703a 100644 --- a/fsck.c +++ b/fsck.c @@ -1381,6 +1381,7 @@ bool fsck_has_queued_checks(struct fsck_options *options) } void fsck_options_init(struct fsck_options *options, + struct repository *repo, enum fsck_options_type type) { static const struct fsck_options defaults[] = { @@ -1423,6 +1424,8 @@ void fsck_options_init(struct fsck_options *options, default: BUG("unknown fsck options type %d", type); } + + options->repo = repo; } void fsck_options_clear(struct fsck_options *options) diff --git a/fsck.h b/fsck.h index 9c973b53b2..e77935c8a9 100644 --- a/fsck.h +++ b/fsck.h @@ -166,7 +166,10 @@ struct fsck_ref_report { const char *path; }; +struct repository; + struct fsck_options { + struct repository *repo; fsck_walk_func walk; fsck_error error_func; unsigned strict; @@ -235,6 +238,7 @@ enum fsck_options_type { }; void fsck_options_init(struct fsck_options *options, + struct repository *repo, enum fsck_options_type type); /* diff --git a/object-file.c b/object-file.c index 186b2ff764..24ed5d5577 100644 --- a/object-file.c +++ b/object-file.c @@ -1281,7 +1281,7 @@ static int index_mem(struct index_state *istate, if (flags & INDEX_FORMAT_CHECK) { struct fsck_options opts; - fsck_options_init(&opts, FSCK_OPTIONS_DEFAULT); + fsck_options_init(&opts, the_repository, FSCK_OPTIONS_DEFAULT); opts.strict = 1; opts.error_func = hash_format_check_report; if (fsck_buffer(null_oid(istate->repo->hash_algo), type, buf, size, &opts)) From fe5f16ecc39e2879e5b57925648984b78aaf6339 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 23 Mar 2026 16:02:55 +0100 Subject: [PATCH 27/93] fsck: drop USE_THE_REPOSITORY Stop using `the_repository` in "fsck.c" in favor of the repository that we've already got available via `struct fsck_options`. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- fsck.c | 28 +++++++++++++--------------- 1 file changed, 13 insertions(+), 15 deletions(-) diff --git a/fsck.c b/fsck.c index a05997703a..b72200c352 100644 --- a/fsck.c +++ b/fsck.c @@ -1,5 +1,3 @@ -#define USE_THE_REPOSITORY_VARIABLE - #include "git-compat-util.h" #include "date.h" #include "dir.h" @@ -207,7 +205,7 @@ void fsck_set_msg_types(struct fsck_options *options, const char *values) if (equal == len) die("skiplist requires a path"); oidset_parse_file(&options->skip_oids, buf + equal + 1, - the_repository->hash_algo); + options->repo->hash_algo); buf += len + 1; continue; } @@ -360,7 +358,7 @@ static int fsck_walk_tree(struct tree *tree, void *data, struct fsck_options *op int res = 0; const char *name; - if (repo_parse_tree(the_repository, tree)) + if (repo_parse_tree(options->repo, tree)) return -1; name = fsck_get_object_name(options, &tree->object.oid); @@ -375,14 +373,14 @@ static int fsck_walk_tree(struct tree *tree, void *data, struct fsck_options *op continue; if (S_ISDIR(entry.mode)) { - obj = (struct object *)lookup_tree(the_repository, &entry.oid); + obj = (struct object *)lookup_tree(options->repo, &entry.oid); if (name && obj) fsck_put_object_name(options, &entry.oid, "%s%s/", name, entry.path); result = options->walk(obj, OBJ_TREE, data, options); } else if (S_ISREG(entry.mode) || S_ISLNK(entry.mode)) { - obj = (struct object *)lookup_blob(the_repository, &entry.oid); + obj = (struct object *)lookup_blob(options->repo, &entry.oid); if (name && obj) fsck_put_object_name(options, &entry.oid, "%s%s", name, entry.path); @@ -409,7 +407,7 @@ static int fsck_walk_commit(struct commit *commit, void *data, struct fsck_optio int result; const char *name; - if (repo_parse_commit(the_repository, commit)) + if (repo_parse_commit(options->repo, commit)) return -1; name = fsck_get_object_name(options, &commit->object.oid); @@ -417,7 +415,7 @@ static int fsck_walk_commit(struct commit *commit, void *data, struct fsck_optio fsck_put_object_name(options, get_commit_tree_oid(commit), "%s:", name); - result = options->walk((struct object *) repo_get_commit_tree(the_repository, commit), + result = options->walk((struct object *) repo_get_commit_tree(options->repo, commit), OBJ_TREE, data, options); if (result < 0) return result; @@ -474,7 +472,7 @@ static int fsck_walk_tag(struct tag *tag, void *data, struct fsck_options *optio { const char *name = fsck_get_object_name(options, &tag->object.oid); - if (parse_tag(the_repository, tag)) + if (parse_tag(options->repo, tag)) return -1; if (name) fsck_put_object_name(options, &tag->tagged->oid, "%s", name); @@ -487,7 +485,7 @@ int fsck_walk(struct object *obj, void *data, struct fsck_options *options) return -1; if (obj->type == OBJ_NONE) - parse_object(the_repository, &obj->oid); + parse_object(options->repo, &obj->oid); switch (obj->type) { case OBJ_BLOB: @@ -970,14 +968,14 @@ static int fsck_commit(const struct object_id *oid, if (buffer >= buffer_end || !skip_prefix(buffer, "tree ", &buffer)) return report(options, oid, OBJ_COMMIT, FSCK_MSG_MISSING_TREE, "invalid format - expected 'tree' line"); - if (parse_oid_hex(buffer, &tree_oid, &p) || *p != '\n') { + if (parse_oid_hex_algop(buffer, &tree_oid, &p, options->repo->hash_algo) || *p != '\n') { err = report(options, oid, OBJ_COMMIT, FSCK_MSG_BAD_TREE_SHA1, "invalid 'tree' line format - bad sha1"); if (err) return err; } buffer = p + 1; while (buffer < buffer_end && skip_prefix(buffer, "parent ", &buffer)) { - if (parse_oid_hex(buffer, &parent_oid, &p) || *p != '\n') { + if (parse_oid_hex_algop(buffer, &parent_oid, &p, options->repo->hash_algo) || *p != '\n') { err = report(options, oid, OBJ_COMMIT, FSCK_MSG_BAD_PARENT_SHA1, "invalid 'parent' line format - bad sha1"); if (err) return err; @@ -1044,7 +1042,7 @@ int fsck_tag_standalone(const struct object_id *oid, const char *buffer, ret = report(options, oid, OBJ_TAG, FSCK_MSG_MISSING_OBJECT, "invalid format - expected 'object' line"); goto done; } - if (parse_oid_hex(buffer, tagged_oid, &p) || *p != '\n') { + if (parse_oid_hex_algop(buffer, tagged_oid, &p, options->repo->hash_algo) || *p != '\n') { ret = report(options, oid, OBJ_TAG, FSCK_MSG_BAD_OBJECT_SHA1, "invalid 'object' line format - bad sha1"); if (ret) goto done; @@ -1336,9 +1334,9 @@ static int fsck_blobs(struct oidset *blobs_found, struct oidset *blobs_done, if (oidset_contains(blobs_done, oid)) continue; - buf = odb_read_object(the_repository->objects, oid, &type, &size); + buf = odb_read_object(options->repo->objects, oid, &type, &size); if (!buf) { - if (is_promisor_object(the_repository, oid)) + if (is_promisor_object(options->repo, oid)) continue; ret |= report(options, oid, OBJ_BLOB, msg_missing, From da3ead3ee3a27df391932379b0b7283f2b17729f Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 23 Mar 2026 16:02:56 +0100 Subject: [PATCH 28/93] builtin/fsck: fix trivial dependence on `the_repository` We have a bunch of sites in "builtin/fsck.c" that depend on `the_repository` even though we already have a repository available, or in cases where we can trivially make it available. Refactor such sites to use the context-provided repository instead. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- builtin/fsck.c | 98 +++++++++++++++++++++++++------------------------- 1 file changed, 49 insertions(+), 49 deletions(-) diff --git a/builtin/fsck.c b/builtin/fsck.c index 990d836918..59680e6daf 100644 --- a/builtin/fsck.c +++ b/builtin/fsck.c @@ -195,13 +195,13 @@ static int traverse_one_object(struct object *obj) return result; } -static int traverse_reachable(void) +static int traverse_reachable(struct repository *repo) { struct progress *progress = NULL; unsigned int nr = 0; int result = 0; if (show_progress) - progress = start_delayed_progress(the_repository, + progress = start_delayed_progress(repo, _("Checking connectivity"), 0); while (pending.nr) { result |= traverse_one_object(object_array_pop(&pending)); @@ -255,7 +255,7 @@ static int mark_unreachable_referents(const struct object_id *oid, /* * Check a single reachable object */ -static void check_reachable_object(struct object *obj) +static void check_reachable_object(struct repository *repo, struct object *obj) { /* * We obviously want the object to be parsed, @@ -263,9 +263,9 @@ static void check_reachable_object(struct object *obj) * do a full fsck */ if (!(obj->flags & HAS_OBJ)) { - if (is_promisor_object(the_repository, &obj->oid)) + if (is_promisor_object(repo, &obj->oid)) return; - if (has_object_pack(the_repository, &obj->oid)) + if (has_object_pack(repo, &obj->oid)) return; /* it is in pack - forget about it */ printf_ln(_("missing %s %s"), printable_type(&obj->oid, obj->type), @@ -278,7 +278,7 @@ static void check_reachable_object(struct object *obj) /* * Check a single unreachable object */ -static void check_unreachable_object(struct object *obj) +static void check_unreachable_object(struct repository *repo, struct object *obj) { /* * Missing unreachable object? Ignore it. It's not like @@ -318,19 +318,19 @@ static void check_unreachable_object(struct object *obj) printable_type(&obj->oid, obj->type), describe_object(&obj->oid)); if (write_lost_and_found) { - char *filename = repo_git_path(the_repository, "lost-found/%s/%s", + char *filename = repo_git_path(repo, "lost-found/%s/%s", obj->type == OBJ_COMMIT ? "commit" : "other", describe_object(&obj->oid)); FILE *f; - if (safe_create_leading_directories_const(the_repository, filename)) { + if (safe_create_leading_directories_const(repo, filename)) { error(_("could not create lost-found")); free(filename); return; } f = xfopen(filename, "w"); if (obj->type == OBJ_BLOB) { - if (odb_stream_blob_to_fd(the_repository->objects, fileno(f), + if (odb_stream_blob_to_fd(repo->objects, fileno(f), &obj->oid, NULL, 1)) die_errno(_("could not write '%s'"), filename); } else @@ -350,23 +350,23 @@ static void check_unreachable_object(struct object *obj) */ } -static void check_object(struct object *obj) +static void check_object(struct repository *repo, struct object *obj) { if (verbose) fprintf_ln(stderr, _("Checking %s"), describe_object(&obj->oid)); if (obj->flags & REACHABLE) - check_reachable_object(obj); + check_reachable_object(repo, obj); else - check_unreachable_object(obj); + check_unreachable_object(repo, obj); } -static void check_connectivity(void) +static void check_connectivity(struct repository *repo) { int i, max; /* Traverse the pending reachable objects */ - traverse_reachable(); + traverse_reachable(repo); /* * With --connectivity-only, we won't have actually opened and marked @@ -384,20 +384,20 @@ static void check_connectivity(void) * and ignore any that weren't present in our earlier * traversal. */ - odb_for_each_object(the_repository->objects, NULL, + odb_for_each_object(repo->objects, NULL, mark_unreachable_referents, NULL, 0); } /* Look up all the requirements, warn about missing objects.. */ - max = get_max_object_index(the_repository); + max = get_max_object_index(repo); if (verbose) fprintf_ln(stderr, _("Checking connectivity (%d objects)"), max); for (i = 0; i < max; i++) { - struct object *obj = get_indexed_object(the_repository, i); + struct object *obj = get_indexed_object(repo, i); if (obj) - check_object(obj); + check_object(repo, obj); } } @@ -770,7 +770,7 @@ static int fsck_subdir(unsigned int nr, const char *path UNUSED, void *data) return 0; } -static void fsck_source(struct odb_source *source) +static void fsck_source(struct repository *repo, struct odb_source *source) { struct progress *progress = NULL; struct for_each_loose_cb cb_data = { @@ -781,7 +781,7 @@ static void fsck_source(struct odb_source *source) fprintf_ln(stderr, _("Checking object directory")); if (show_progress) - progress = start_progress(the_repository, + progress = start_progress(repo, _("Checking object directories"), 256); for_each_loose_file_in_source(source, fsck_loose, @@ -790,7 +790,7 @@ static void fsck_source(struct odb_source *source) stop_progress(&progress); } -static int fsck_cache_tree(struct cache_tree *it, const char *index_path) +static int fsck_cache_tree(struct repository *repo, struct cache_tree *it, const char *index_path) { int i; int err = 0; @@ -799,7 +799,7 @@ static int fsck_cache_tree(struct cache_tree *it, const char *index_path) fprintf_ln(stderr, _("Checking cache tree of %s"), index_path); if (0 <= it->entry_count) { - struct object *obj = parse_object(the_repository, &it->oid); + struct object *obj = parse_object(repo, &it->oid); if (!obj) { error(_("%s: invalid sha1 pointer in cache-tree of %s"), oid_to_hex(&it->oid), index_path); @@ -813,7 +813,7 @@ static int fsck_cache_tree(struct cache_tree *it, const char *index_path) err |= objerror(obj, _("non-tree in cache-tree")); } for (i = 0; i < it->subtree_nr; i++) - err |= fsck_cache_tree(it->down[i]->cache_tree, index_path); + err |= fsck_cache_tree(repo, it->down[i]->cache_tree, index_path); return err; } @@ -839,7 +839,7 @@ static int fsck_resolve_undo(struct index_state *istate, if (!ru->mode[i] || !S_ISREG(ru->mode[i])) continue; - obj = parse_object(the_repository, &ru->oid[i]); + obj = parse_object(istate->repo, &ru->oid[i]); if (!obj) { error(_("%s: invalid sha1 pointer in resolve-undo of %s"), oid_to_hex(&ru->oid[i]), @@ -871,7 +871,7 @@ static void fsck_index(struct index_state *istate, const char *index_path, mode = istate->cache[i]->ce_mode; if (S_ISGITLINK(mode)) continue; - blob = lookup_blob(the_repository, + blob = lookup_blob(istate->repo, &istate->cache[i]->oid); if (!blob) continue; @@ -884,7 +884,7 @@ static void fsck_index(struct index_state *istate, const char *index_path, mark_object_reachable(obj); } if (istate->cache_tree) - fsck_cache_tree(istate->cache_tree, index_path); + fsck_cache_tree(istate->repo, istate->cache_tree, index_path); fsck_resolve_undo(istate, index_path); } @@ -907,7 +907,7 @@ static int check_pack_rev_indexes(struct repository *r, int show_progress) if (show_progress) { repo_for_each_pack(r, p) pack_count++; - progress = start_delayed_progress(the_repository, + progress = start_delayed_progress(r, "Verifying reverse pack-indexes", pack_count); pack_count = 0; } @@ -1027,11 +1027,11 @@ int cmd_fsck(int argc, if (name_objects) fsck_enable_object_names(&fsck_walk_options); - repo_config(the_repository, git_fsck_config, &fsck_obj_options); - prepare_repo_settings(the_repository); + repo_config(repo, git_fsck_config, &fsck_obj_options); + prepare_repo_settings(repo); if (check_references) - fsck_refs(the_repository); + fsck_refs(repo); /* * Take a snapshot of the refs before walking objects to avoid looking @@ -1042,15 +1042,15 @@ int cmd_fsck(int argc, snapshot_refs(&snap, argc, argv); /* Ensure we get a "fresh" view of the odb */ - odb_reprepare(the_repository->objects); + odb_reprepare(repo->objects); if (connectivity_only) { - odb_for_each_object(the_repository->objects, NULL, + odb_for_each_object(repo->objects, NULL, mark_object_for_connectivity, NULL, 0); } else { - odb_prepare_alternates(the_repository->objects); - for (source = the_repository->objects->sources; source; source = source->next) - fsck_source(source); + odb_prepare_alternates(repo->objects); + for (source = repo->objects->sources; source; source = source->next) + fsck_source(repo, source); if (check_full) { struct packed_git *p; @@ -1058,19 +1058,19 @@ int cmd_fsck(int argc, struct progress *progress = NULL; if (show_progress) { - repo_for_each_pack(the_repository, p) { + repo_for_each_pack(repo, p) { if (open_pack_index(p)) continue; total += p->num_objects; } - progress = start_progress(the_repository, + progress = start_progress(repo, _("Checking objects"), total); } - repo_for_each_pack(the_repository, p) { + repo_for_each_pack(repo, p) { /* verify gives error messages itself */ - if (verify_pack(the_repository, + if (verify_pack(repo, p, fsck_obj_buffer, progress, count)) errors_found |= ERROR_PACK; @@ -1104,7 +1104,7 @@ int cmd_fsck(int argc, for (p = worktrees; *p; p++) { struct worktree *wt = *p; struct index_state istate = - INDEX_STATE_INIT(the_repository); + INDEX_STATE_INIT(repo); char *path, *wt_gitdir; /* @@ -1125,17 +1125,17 @@ int cmd_fsck(int argc, free_worktrees(worktrees); } - errors_found |= check_pack_rev_indexes(the_repository, show_progress); - if (verify_bitmap_files(the_repository)) + errors_found |= check_pack_rev_indexes(repo, show_progress); + if (verify_bitmap_files(repo)) errors_found |= ERROR_BITMAP; - check_connectivity(); + check_connectivity(repo); - if (the_repository->settings.core_commit_graph) { + if (repo->settings.core_commit_graph) { struct child_process commit_graph_verify = CHILD_PROCESS_INIT; - odb_prepare_alternates(the_repository->objects); - for (source = the_repository->objects->sources; source; source = source->next) { + odb_prepare_alternates(repo->objects); + for (source = repo->objects->sources; source; source = source->next) { child_process_init(&commit_graph_verify); commit_graph_verify.git_cmd = 1; strvec_pushl(&commit_graph_verify.args, "commit-graph", @@ -1149,11 +1149,11 @@ int cmd_fsck(int argc, } } - if (the_repository->settings.core_multi_pack_index) { + if (repo->settings.core_multi_pack_index) { struct child_process midx_verify = CHILD_PROCESS_INIT; - odb_prepare_alternates(the_repository->objects); - for (source = the_repository->objects->sources; source; source = source->next) { + odb_prepare_alternates(repo->objects); + for (source = repo->objects->sources; source; source = source->next) { child_process_init(&midx_verify); midx_verify.git_cmd = 1; strvec_pushl(&midx_verify.args, "multi-pack-index", From 4c44db7dc55c1aac0d0414ed22c27ea965cc2c77 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 23 Mar 2026 16:02:57 +0100 Subject: [PATCH 29/93] builtin/fsck: stop using `the_repository` when snapshotting refs We depedn on `the_repository` when snapshotting refs. Refactor this to use a context-provided repository instead that is injected via the `struct snapshot_ref_data`. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- builtin/fsck.c | 33 ++++++++++++++++++++++----------- 1 file changed, 22 insertions(+), 11 deletions(-) diff --git a/builtin/fsck.c b/builtin/fsck.c index 59680e6daf..edbff16add 100644 --- a/builtin/fsck.c +++ b/builtin/fsck.c @@ -533,14 +533,20 @@ struct snapshot { /* TODO: Consider also snapshotting the index of each worktree. */ }; +struct snapshot_ref_data { + struct repository *repo; + struct snapshot *snap; +}; + static int snapshot_ref(const struct reference *ref, void *cb_data) { - struct snapshot *snap = cb_data; + struct snapshot_ref_data *data = cb_data; + struct snapshot *snap = data->snap; struct object *obj; - obj = parse_object(the_repository, ref->oid); + obj = parse_object(data->repo, ref->oid); if (!obj) { - if (is_promisor_object(the_repository, ref->oid)) { + if (is_promisor_object(data->repo, ref->oid)) { /* * Increment default_refs anyway, because this is a * valid ref. @@ -581,11 +587,16 @@ static int fsck_handle_ref(const struct reference *ref, void *cb_data UNUSED) return 0; } -static void snapshot_refs(struct snapshot *snap, int argc, const char **argv) +static void snapshot_refs(struct repository *repo, + struct snapshot *snap, int argc, const char **argv) { struct refs_for_each_ref_options opts = { .flags = REFS_FOR_EACH_INCLUDE_BROKEN, }; + struct snapshot_ref_data data = { + .repo = repo, + .snap = snap, + }; struct worktree **worktrees, **p; const char *head_points_at; struct object_id head_oid; @@ -593,13 +604,13 @@ static void snapshot_refs(struct snapshot *snap, int argc, const char **argv) for (int i = 0; i < argc; i++) { const char *arg = argv[i]; struct object_id oid; - if (!repo_get_oid(the_repository, arg, &oid)) { + if (!repo_get_oid(repo, arg, &oid)) { struct reference ref = { .name = arg, .oid = &oid, }; - snapshot_ref(&ref, snap); + snapshot_ref(&ref, &data); continue; } error(_("invalid parameter: expected sha1, got '%s'"), arg); @@ -611,8 +622,8 @@ static void snapshot_refs(struct snapshot *snap, int argc, const char **argv) return; } - refs_for_each_ref_ext(get_main_ref_store(the_repository), - snapshot_ref, snap, &opts); + refs_for_each_ref_ext(get_main_ref_store(repo), + snapshot_ref, &data, &opts); worktrees = get_worktrees(); for (p = worktrees; *p; p++) { @@ -621,7 +632,7 @@ static void snapshot_refs(struct snapshot *snap, int argc, const char **argv) strbuf_worktree_ref(wt, &refname, "HEAD"); - head_points_at = refs_resolve_ref_unsafe(get_main_ref_store(the_repository), + head_points_at = refs_resolve_ref_unsafe(get_main_ref_store(repo), refname.buf, 0, &head_oid, NULL); if (head_points_at && !is_null_oid(&head_oid)) { @@ -630,7 +641,7 @@ static void snapshot_refs(struct snapshot *snap, int argc, const char **argv) .oid = &head_oid, }; - snapshot_ref(&ref, snap); + snapshot_ref(&ref, &data); } strbuf_release(&refname); @@ -1039,7 +1050,7 @@ int cmd_fsck(int argc, * objects. We can still walk over new objects that are added during the * execution of fsck but won't miss any objects that were reachable. */ - snapshot_refs(&snap, argc, argv); + snapshot_refs(repo, &snap, argc, argv); /* Ensure we get a "fresh" view of the odb */ odb_reprepare(repo->objects); From 3ea779432d28b0229ef2a64e6a73a9018ad4c940 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 23 Mar 2026 16:02:58 +0100 Subject: [PATCH 30/93] builtin/fsck: stop using `the_repository` when checking refs We implicitly rely on `the_repository` when checking refs. Refactor this to instead inject the repository via the callback payload. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- builtin/fsck.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/builtin/fsck.c b/builtin/fsck.c index edbff16add..efc60862ae 100644 --- a/builtin/fsck.c +++ b/builtin/fsck.c @@ -574,11 +574,12 @@ static int snapshot_ref(const struct reference *ref, void *cb_data) return 0; } -static int fsck_handle_ref(const struct reference *ref, void *cb_data UNUSED) +static int fsck_handle_ref(const struct reference *ref, void *cb_data) { + struct repository *repo = cb_data; struct object *obj; - obj = parse_object(the_repository, ref->oid); + obj = parse_object(repo, ref->oid); obj->flags |= USED; fsck_put_object_name(&fsck_walk_options, ref->oid, "%s", ref->name); @@ -665,7 +666,7 @@ static void free_snapshot_refs(struct snapshot *snap) free(snap->ref); } -static void process_refs(struct snapshot *snap) +static void process_refs(struct repository *repo, struct snapshot *snap) { struct worktree **worktrees, **p; @@ -674,7 +675,7 @@ static void process_refs(struct snapshot *snap) .name = snap->ref[i].refname, .oid = &snap->ref[i].oid, }; - fsck_handle_ref(&ref, NULL); + fsck_handle_ref(&ref, repo); } if (include_reflogs) { @@ -1095,7 +1096,7 @@ int cmd_fsck(int argc, } /* Process the snapshotted refs and the reflogs. */ - process_refs(&snap); + process_refs(repo, &snap); /* If not given any explicit objects, process index files too. */ if (!argc) From 38e09ebfd444f04e7282e5a7109edb6800864d41 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 23 Mar 2026 16:02:59 +0100 Subject: [PATCH 31/93] builtin/fsck: stop using `the_repository` when checking reflogs We implicitly rely on `the_repository` when checking reflogs. Refactor this to instead inject the repository via the callback payload. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- builtin/fsck.c | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/builtin/fsck.c b/builtin/fsck.c index efc60862ae..be9dbba2da 100644 --- a/builtin/fsck.c +++ b/builtin/fsck.c @@ -468,13 +468,14 @@ static int fsck_obj_buffer(const struct object_id *oid, enum object_type type, static int default_refs; -static void fsck_handle_reflog_oid(const char *refname, struct object_id *oid, - timestamp_t timestamp) +static void fsck_handle_reflog_oid(struct repository *repo, + const char *refname, struct object_id *oid, + timestamp_t timestamp) { struct object *obj; if (!is_null_oid(oid)) { - obj = lookup_object(the_repository, oid); + obj = lookup_object(repo, oid); if (obj && (obj->flags & HAS_OBJ)) { if (timestamp) fsck_put_object_name(&fsck_walk_options, oid, @@ -482,7 +483,7 @@ static void fsck_handle_reflog_oid(const char *refname, struct object_id *oid, refname, timestamp); obj->flags |= USED; mark_object_reachable(obj); - } else if (!is_promisor_object(the_repository, oid)) { + } else if (!is_promisor_object(repo, oid)) { error(_("%s: invalid reflog entry %s"), refname, oid_to_hex(oid)); errors_found |= ERROR_REACHABLE; @@ -494,8 +495,10 @@ static int fsck_handle_reflog_ent(const char *refname, struct object_id *ooid, struct object_id *noid, const char *email UNUSED, timestamp_t timestamp, int tz UNUSED, - const char *message UNUSED, void *cb_data UNUSED) + const char *message UNUSED, void *cb_data) { + struct repository *repo = cb_data; + if (now && timestamp > now) return 0; @@ -503,19 +506,20 @@ static int fsck_handle_reflog_ent(const char *refname, fprintf_ln(stderr, _("Checking reflog %s->%s"), oid_to_hex(ooid), oid_to_hex(noid)); - fsck_handle_reflog_oid(refname, ooid, 0); - fsck_handle_reflog_oid(refname, noid, timestamp); + fsck_handle_reflog_oid(repo, refname, ooid, 0); + fsck_handle_reflog_oid(repo, refname, noid, timestamp); return 0; } static int fsck_handle_reflog(const char *logname, void *cb_data) { struct strbuf refname = STRBUF_INIT; + struct worktree *wt = cb_data; - strbuf_worktree_ref(cb_data, &refname, logname); - refs_for_each_reflog_ent(get_main_ref_store(the_repository), + strbuf_worktree_ref(wt, &refname, logname); + refs_for_each_reflog_ent(get_main_ref_store(wt->repo), refname.buf, fsck_handle_reflog_ent, - NULL); + wt->repo); strbuf_release(&refname); return 0; } From 2b2287c479ced6f794a7c8d305c39eef4ee563f5 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 23 Mar 2026 16:03:00 +0100 Subject: [PATCH 32/93] builtin/fsck: stop using `the_repository` with loose objects We depend on `the_repository` when performing consistency checks for loose objects. Refactor this to use a context-provided repository instead that is injected via the `struct for_each_loose_cb`. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- builtin/fsck.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/builtin/fsck.c b/builtin/fsck.c index be9dbba2da..e8bdec7cd0 100644 --- a/builtin/fsck.c +++ b/builtin/fsck.c @@ -711,27 +711,28 @@ static void process_refs(struct repository *repo, struct snapshot *snap) } } -struct for_each_loose_cb -{ +struct for_each_loose_cb { + struct repository *repo; struct progress *progress; }; static int fsck_loose(const struct object_id *oid, const char *path, - void *data UNUSED) + void *cb_data) { + struct for_each_loose_cb *data = cb_data; struct object *obj; enum object_type type = OBJ_NONE; unsigned long size; void *contents = NULL; int eaten; struct object_info oi = OBJECT_INFO_INIT; - struct object_id real_oid = *null_oid(the_hash_algo); + struct object_id real_oid = *null_oid(data->repo->hash_algo); int err = 0; oi.sizep = &size; oi.typep = &type; - if (read_loose_object(the_repository, path, oid, &real_oid, &contents, &oi) < 0) { + if (read_loose_object(data->repo, path, oid, &real_oid, &contents, &oi) < 0) { if (contents && !oideq(&real_oid, oid)) err = error(_("%s: hash-path mismatch, found at: %s"), oid_to_hex(&real_oid), path); @@ -748,7 +749,7 @@ static int fsck_loose(const struct object_id *oid, const char *path, if (!contents && type != OBJ_BLOB) BUG("read_loose_object streamed a non-blob"); - obj = parse_object_buffer(the_repository, oid, type, size, + obj = parse_object_buffer(data->repo, oid, type, size, contents, &eaten); if (!obj) { @@ -790,6 +791,7 @@ static void fsck_source(struct repository *repo, struct odb_source *source) { struct progress *progress = NULL; struct for_each_loose_cb cb_data = { + .repo = source->odb->repo, .progress = progress, }; From 1c5f77b6103adae5d45ae9ff24e9945b8f8b76c8 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 23 Mar 2026 16:03:01 +0100 Subject: [PATCH 33/93] builtin/fsck: stop using `the_repository` when checking packed objects We implicitly rely on `the_repository` when checking objects part of a packfile. These objects are iterated over via `verify_pack()`, which is provided by the packfile subsystem, and a callback function is then invoked for each of the objects in that specific pack. Unfortunately, it is not possible to provide a payload to the callback function. Refactor `verify_pack()` to accept a payload that is passed through to the callback so that we can inject the repository and get rid of the use of `the_repository`. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- builtin/fsck.c | 11 ++++++----- pack-check.c | 7 ++++--- pack.h | 9 +++++++-- 3 files changed, 17 insertions(+), 10 deletions(-) diff --git a/builtin/fsck.c b/builtin/fsck.c index e8bdec7cd0..22ca1200a2 100644 --- a/builtin/fsck.c +++ b/builtin/fsck.c @@ -447,15 +447,16 @@ out: } static int fsck_obj_buffer(const struct object_id *oid, enum object_type type, - unsigned long size, void *buffer, int *eaten) + unsigned long size, void *buffer, int *eaten, void *cb_data) { + struct repository *repo = cb_data; + struct object *obj; + /* * Note, buffer may be NULL if type is OBJ_BLOB. See * verify_packfile(), data_valid variable for details. */ - struct object *obj; - obj = parse_object_buffer(the_repository, oid, type, size, buffer, - eaten); + obj = parse_object_buffer(repo, oid, type, size, buffer, eaten); if (!obj) { errors_found |= ERROR_OBJECT; return error(_("%s: object corrupt or missing"), @@ -1089,7 +1090,7 @@ int cmd_fsck(int argc, repo_for_each_pack(repo, p) { /* verify gives error messages itself */ if (verify_pack(repo, - p, fsck_obj_buffer, + p, fsck_obj_buffer, repo, progress, count)) errors_found |= ERROR_PACK; count += p->num_objects; diff --git a/pack-check.c b/pack-check.c index 7378c80730..79992bb509 100644 --- a/pack-check.c +++ b/pack-check.c @@ -53,6 +53,7 @@ static int verify_packfile(struct repository *r, struct packed_git *p, struct pack_window **w_curs, verify_fn fn, + void *fn_data, struct progress *progress, uint32_t base_count) { @@ -161,7 +162,7 @@ static int verify_packfile(struct repository *r, oid_to_hex(&oid), p->pack_name); else if (fn) { int eaten = 0; - err |= fn(&oid, type, size, data, &eaten); + err |= fn(&oid, type, size, data, &eaten, fn_data); if (eaten) data = NULL; } @@ -192,7 +193,7 @@ int verify_pack_index(struct packed_git *p) return err; } -int verify_pack(struct repository *r, struct packed_git *p, verify_fn fn, +int verify_pack(struct repository *r, struct packed_git *p, verify_fn fn, void *fn_data, struct progress *progress, uint32_t base_count) { int err = 0; @@ -202,7 +203,7 @@ int verify_pack(struct repository *r, struct packed_git *p, verify_fn fn, if (!p->index_data) return -1; - err |= verify_packfile(r, p, &w_curs, fn, progress, base_count); + err |= verify_packfile(r, p, &w_curs, fn, fn_data, progress, base_count); unuse_pack(&w_curs); return err; diff --git a/pack.h b/pack.h index ec76472e49..1cde92082b 100644 --- a/pack.h +++ b/pack.h @@ -85,7 +85,11 @@ struct pack_idx_entry { struct progress; /* Note, the data argument could be NULL if object type is blob */ -typedef int (*verify_fn)(const struct object_id *, enum object_type, unsigned long, void*, int*); +typedef int (*verify_fn)(const struct object_id *oid, + enum object_type type, + unsigned long size, + void *buffer, int *eaten, + void *fn_data); const char *write_idx_file(struct repository *repo, const char *index_name, @@ -95,7 +99,8 @@ const char *write_idx_file(struct repository *repo, const unsigned char *sha1); int check_pack_crc(struct packed_git *p, struct pack_window **w_curs, off_t offset, off_t len, unsigned int nr); int verify_pack_index(struct packed_git *); -int verify_pack(struct repository *, struct packed_git *, verify_fn fn, struct progress *, uint32_t); +int verify_pack(struct repository *, struct packed_git *, verify_fn fn, void *fn_data, + struct progress *, uint32_t); off_t write_pack_header(struct hashfile *f, uint32_t); void fixup_pack_header_footer(const struct git_hash_algo *, int, unsigned char *, const char *, uint32_t, From cc050f00cd969a72fcdad34e2308c7b19c1e68e6 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 23 Mar 2026 16:03:02 +0100 Subject: [PATCH 34/93] builtin/fsck: stop using `the_repository` when marking objects We implicitly rely on `the_repository` when marking objects for connectivity. Refactor this to instead inject the repository via the callback payload. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- builtin/fsck.c | 26 ++++++++++++++------------ 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/builtin/fsck.c b/builtin/fsck.c index 22ca1200a2..e37f708480 100644 --- a/builtin/fsck.c +++ b/builtin/fsck.c @@ -124,7 +124,7 @@ static int fsck_objects_error_func(struct fsck_options *o UNUSED, static struct object_array pending; static int mark_object(struct object *obj, enum object_type type, - void *data, struct fsck_options *options UNUSED) + void *data, struct fsck_options *options) { struct object *parent = data; @@ -153,7 +153,7 @@ static int mark_object(struct object *obj, enum object_type type, return 0; obj->flags |= REACHABLE; - if (is_promisor_object(the_repository, &obj->oid)) + if (is_promisor_object(options->repo, &obj->oid)) /* * Further recursion does not need to be performed on this * object since it is a promisor object (so it does not need to @@ -162,7 +162,7 @@ static int mark_object(struct object *obj, enum object_type type, return 0; if (!(obj->flags & HAS_OBJ)) { - if (parent && !odb_has_object(the_repository->objects, &obj->oid, + if (parent && !odb_has_object(options->repo->objects, &obj->oid, HAS_OBJECT_RECHECK_PACKED)) { printf_ln(_("broken link from %7s %s\n" " to %7s %s"), @@ -181,7 +181,7 @@ static int mark_object(struct object *obj, enum object_type type, static void mark_object_reachable(struct object *obj) { - mark_object(obj, OBJ_ANY, NULL, NULL); + mark_object(obj, OBJ_ANY, NULL, &fsck_walk_options); } static int traverse_one_object(struct object *obj) @@ -222,10 +222,11 @@ static int mark_used(struct object *obj, enum object_type type UNUSED, static int mark_unreachable_referents(const struct object_id *oid, struct object_info *oi UNUSED, - void *data UNUSED) + void *data) { + struct repository *repo = data; struct fsck_options options; - struct object *obj = lookup_object(the_repository, oid); + struct object *obj = lookup_object(data, oid); if (!obj || !(obj->flags & HAS_OBJ)) return 0; /* not part of our original set */ @@ -237,13 +238,13 @@ static int mark_unreachable_referents(const struct object_id *oid, * (and we want to avoid parsing blobs). */ if (obj->type == OBJ_NONE) { - enum object_type type = odb_read_object_info(the_repository->objects, + enum object_type type = odb_read_object_info(repo->objects, &obj->oid, NULL); if (type > 0) object_as_type(obj, type, 0); } - fsck_options_init(&options, the_repository, FSCK_OPTIONS_DEFAULT); + fsck_options_init(&options, repo, FSCK_OPTIONS_DEFAULT); options.walk = mark_used; fsck_walk(obj, NULL, &options); if (obj->type == OBJ_TREE) @@ -385,7 +386,7 @@ static void check_connectivity(struct repository *repo) * traversal. */ odb_for_each_object(repo->objects, NULL, - mark_unreachable_referents, NULL, 0); + mark_unreachable_referents, repo, 0); } /* Look up all the requirements, warn about missing objects.. */ @@ -909,9 +910,10 @@ static void fsck_index(struct index_state *istate, const char *index_path, static int mark_object_for_connectivity(const struct object_id *oid, struct object_info *oi UNUSED, - void *cb_data UNUSED) + void *cb_data) { - struct object *obj = lookup_unknown_object(the_repository, oid); + struct repository *repo = cb_data; + struct object *obj = lookup_unknown_object(repo, oid); obj->flags |= HAS_OBJ; return 0; } @@ -1065,7 +1067,7 @@ int cmd_fsck(int argc, if (connectivity_only) { odb_for_each_object(repo->objects, NULL, - mark_object_for_connectivity, NULL, 0); + mark_object_for_connectivity, repo, 0); } else { odb_prepare_alternates(repo->objects); for (source = repo->objects->sources; source; source = source->next) From 6fea405bb92100a229c0ee83c98e062e271577cd Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 23 Mar 2026 16:03:03 +0100 Subject: [PATCH 35/93] builtin/fsck: stop using `the_repository` in error reporting In the preceding commit we have introduced the repository into `struct fsck_object_report`. This allows us to drop remaining uses of the global `the_repository` variable. Drop them and remove `USE_THE_REPOSITORY_VARIABLE`. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- builtin/fsck.c | 46 +++++++++++++++++++++++----------------------- 1 file changed, 23 insertions(+), 23 deletions(-) diff --git a/builtin/fsck.c b/builtin/fsck.c index e37f708480..99696604b8 100644 --- a/builtin/fsck.c +++ b/builtin/fsck.c @@ -1,4 +1,3 @@ -#define USE_THE_REPOSITORY_VARIABLE #include "builtin.h" #include "gettext.h" #include "hex.h" @@ -66,14 +65,14 @@ static const char *describe_object(const struct object_id *oid) return fsck_describe_object(&fsck_walk_options, oid); } -static const char *printable_type(const struct object_id *oid, +static const char *printable_type(struct repository *repo, + const struct object_id *oid, enum object_type type) { const char *ret; if (type == OBJ_NONE) - type = odb_read_object_info(the_repository->objects, - oid, NULL); + type = odb_read_object_info(repo->objects, oid, NULL); ret = type_name(type); if (!ret) @@ -82,17 +81,17 @@ static const char *printable_type(const struct object_id *oid, return ret; } -static int objerror(struct object *obj, const char *err) +static int objerror(struct repository *repo, struct object *obj, const char *err) { errors_found |= ERROR_OBJECT; /* TRANSLATORS: e.g. error in tree 01bfda: */ fprintf_ln(stderr, _("error in %s %s: %s"), - printable_type(&obj->oid, obj->type), + printable_type(repo, &obj->oid, obj->type), describe_object(&obj->oid), err); return -1; } -static int fsck_objects_error_func(struct fsck_options *o UNUSED, +static int fsck_objects_error_func(struct fsck_options *o, void *fsck_report, enum fsck_msg_type msg_type, enum fsck_msg_id msg_id UNUSED, @@ -106,13 +105,13 @@ static int fsck_objects_error_func(struct fsck_options *o UNUSED, case FSCK_WARN: /* TRANSLATORS: e.g. warning in tree 01bfda: */ fprintf_ln(stderr, _("warning in %s %s: %s"), - printable_type(oid, object_type), + printable_type(o->repo, oid, object_type), describe_object(oid), message); return 0; case FSCK_ERROR: /* TRANSLATORS: e.g. error in tree 01bfda: */ fprintf_ln(stderr, _("error in %s %s: %s"), - printable_type(oid, object_type), + printable_type(o->repo, oid, object_type), describe_object(oid), message); return 1; default: @@ -136,7 +135,7 @@ static int mark_object(struct object *obj, enum object_type type, if (!obj) { /* ... these references to parent->fld are safe here */ printf_ln(_("broken link from %7s %s"), - printable_type(&parent->oid, parent->type), + printable_type(options->repo, &parent->oid, parent->type), describe_object(&parent->oid)); printf_ln(_("broken link from %7s %s"), (type == OBJ_ANY ? _("unknown") : type_name(type)), @@ -147,7 +146,7 @@ static int mark_object(struct object *obj, enum object_type type, if (type != OBJ_ANY && obj->type != type) /* ... and the reference to parent is safe here */ - objerror(parent, _("wrong object type in link")); + objerror(options->repo, parent, _("wrong object type in link")); if (obj->flags & REACHABLE) return 0; @@ -166,9 +165,9 @@ static int mark_object(struct object *obj, enum object_type type, HAS_OBJECT_RECHECK_PACKED)) { printf_ln(_("broken link from %7s %s\n" " to %7s %s"), - printable_type(&parent->oid, parent->type), + printable_type(options->repo, &parent->oid, parent->type), describe_object(&parent->oid), - printable_type(&obj->oid, obj->type), + printable_type(options->repo, &obj->oid, obj->type), describe_object(&obj->oid)); errors_found |= ERROR_REACHABLE; } @@ -269,7 +268,7 @@ static void check_reachable_object(struct repository *repo, struct object *obj) if (has_object_pack(repo, &obj->oid)) return; /* it is in pack - forget about it */ printf_ln(_("missing %s %s"), - printable_type(&obj->oid, obj->type), + printable_type(repo, &obj->oid, obj->type), describe_object(&obj->oid)); errors_found |= ERROR_REACHABLE; return; @@ -296,7 +295,7 @@ static void check_unreachable_object(struct repository *repo, struct object *obj */ if (show_unreachable) { printf_ln(_("unreachable %s %s"), - printable_type(&obj->oid, obj->type), + printable_type(repo, &obj->oid, obj->type), describe_object(&obj->oid)); return; } @@ -316,7 +315,7 @@ static void check_unreachable_object(struct repository *repo, struct object *obj if (!(obj->flags & USED)) { if (show_dangling) printf_ln(_("dangling %s %s"), - printable_type(&obj->oid, obj->type), + printable_type(repo, &obj->oid, obj->type), describe_object(&obj->oid)); if (write_lost_and_found) { char *filename = repo_git_path(repo, "lost-found/%s/%s", @@ -402,7 +401,8 @@ static void check_connectivity(struct repository *repo) } } -static int fsck_obj(struct object *obj, void *buffer, unsigned long size) +static int fsck_obj(struct repository *repo, + struct object *obj, void *buffer, unsigned long size) { int err; @@ -412,11 +412,11 @@ static int fsck_obj(struct object *obj, void *buffer, unsigned long size) if (verbose) fprintf_ln(stderr, _("Checking %s %s"), - printable_type(&obj->oid, obj->type), + printable_type(repo, &obj->oid, obj->type), describe_object(&obj->oid)); if (fsck_walk(obj, NULL, &fsck_obj_options)) - objerror(obj, _("broken links")); + objerror(repo, obj, _("broken links")); err = fsck_object(obj, buffer, size, &fsck_obj_options); if (err) goto out; @@ -434,7 +434,7 @@ static int fsck_obj(struct object *obj, void *buffer, unsigned long size) if (show_tags && tag->tagged) { printf_ln(_("tagged %s %s (%s) in %s"), - printable_type(&tag->tagged->oid, tag->tagged->type), + printable_type(repo, &tag->tagged->oid, tag->tagged->type), describe_object(&tag->tagged->oid), tag->tag, describe_object(&tag->object.oid)); @@ -465,7 +465,7 @@ static int fsck_obj_buffer(const struct object_id *oid, enum object_type type, } obj->flags &= ~(REACHABLE | SEEN); obj->flags |= HAS_OBJ; - return fsck_obj(obj, buffer, size); + return fsck_obj(repo, obj, buffer, size); } static int default_refs; @@ -765,7 +765,7 @@ static int fsck_loose(const struct object_id *oid, const char *path, obj->flags &= ~(REACHABLE | SEEN); obj->flags |= HAS_OBJ; - if (fsck_obj(obj, contents, size)) + if (fsck_obj(data->repo, obj, contents, size)) errors_found |= ERROR_OBJECT; if (!eaten) @@ -830,7 +830,7 @@ static int fsck_cache_tree(struct repository *repo, struct cache_tree *it, const fsck_put_object_name(&fsck_walk_options, &it->oid, ":"); mark_object_reachable(obj); if (obj->type != OBJ_TREE) - err |= objerror(obj, _("non-tree in cache-tree")); + err |= objerror(repo, obj, _("non-tree in cache-tree")); } for (i = 0; i < it->subtree_nr; i++) err |= fsck_cache_tree(repo, it->down[i]->cache_tree, index_path); From 04c9c5e8d2d99050d260149cad9dde1302a02ff4 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Tue, 24 Mar 2026 07:18:26 +0100 Subject: [PATCH 36/93] commit-graph: fix writing generations with dates exceeding 34 bits The `timestamp_t` type is declared as `uintmax_t` and thus typically has 64 bits of precision. Usually, the full precision of such dates is not required: it would be comforting to know that Git is still around in millions of years, but all in all the chance is rather low. We abuse this fact in the commit-graph: instead of storing the full 64 bits of precision, committer dates only store 34 bits. This is still plenty of headroom, as it means that we can represent dates until year 2514. Commits which are dated beyond that year will simply get a date whose remaining bits are masked. The result of this is somewhat curious: the committer date will be different depending on whether a commit gets parsed via the commit-graph or via the object database. This isn't really too much of an issue in general though, as we don't typically use the date parsed from the commit-graph in user-facing output. But with 024b4c9697 (commit: make `repo_parse_commit_no_graph()` more robust, 2026-02-16) it started to become a problem when writing the commit-graph itself. This commit changed `repo_parse_commit_no_graph()` so that we re-parse the commit via the object database in case it was already parsed beforehand via the commit-graph. The consequence is that we may now act with two different commit dates at different stages: - Initially, we use the 34-bit precision timestamp when writing the chunk generation data. We thus correctly compute the offsets relative to the on-disk timestamp here. - Later, when writing the overflow data, we may end up with the full-precision timestamp. When the date is larger than 34 bits the result of this is an underflow when computing the offset. This causes a mismatch in the number of generation data overflow records we want to write, and that ultimately causes Git to die. Introduce a new helper function that computes the generation offset for a commit while correctly masking the date to 34 bits. This makes the previously-implicit assumptions about the commit date precision explicit and thus hopefully less fragile going forward. Adapt sites that compute the offset to use the function. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- commit-graph.c | 37 ++++++++++++++++++++++++++++++++++--- t/t5318-commit-graph.sh | 20 ++++++++++++++++++++ 2 files changed, 54 insertions(+), 3 deletions(-) diff --git a/commit-graph.c b/commit-graph.c index 6b1f02e179..ad3582451d 100644 --- a/commit-graph.c +++ b/commit-graph.c @@ -1319,6 +1319,37 @@ static int write_graph_chunk_data(struct hashfile *f, return 0; } +/* + * Compute the generation offset between the commit date and its generation. + * This is what's ultimately stored as generation number in the commit graph. + * + * Note that the computation of the commit date is more involved than you might + * think. Instead of using the full commit date, we're in fact masking bits so + * that only the 34 lowest bits are considered. This results from the fact that + * commit graphs themselves only ever store 34 bits of the commit date + * themselves. + * + * This means that if we have a commit date that exceeds 34 bits we'll end up + * in situations where depending on whether the commit has been parsed from the + * object database or the commit graph we'll have different dates, where the + * ones parsed from the object database would have full 64 bit precision. + * + * But ultimately, we only ever want the offset to be relative to what we + * actually end up storing on disk, and hence we have to mask all the other + * bits. + */ +static timestamp_t compute_generation_offset(struct commit *c) +{ + timestamp_t masked_date; + + if (sizeof(timestamp_t) > 4) + masked_date = c->date & (((timestamp_t) 1 << 34) - 1); + else + masked_date = c->date; + + return commit_graph_data_at(c)->generation - masked_date; +} + static int write_graph_chunk_generation_data(struct hashfile *f, void *data) { @@ -1329,7 +1360,7 @@ static int write_graph_chunk_generation_data(struct hashfile *f, struct commit *c = ctx->commits.items[i]; timestamp_t offset; repo_parse_commit(ctx->r, c); - offset = commit_graph_data_at(c)->generation - c->date; + offset = compute_generation_offset(c); display_progress(ctx->progress, ++ctx->progress_cnt); if (offset > GENERATION_NUMBER_V2_OFFSET_MAX) { @@ -1350,7 +1381,7 @@ static int write_graph_chunk_generation_data_overflow(struct hashfile *f, int i; for (i = 0; i < ctx->commits.nr; i++) { struct commit *c = ctx->commits.items[i]; - timestamp_t offset = commit_graph_data_at(c)->generation - c->date; + timestamp_t offset = compute_generation_offset(c); display_progress(ctx->progress, ++ctx->progress_cnt); if (offset > GENERATION_NUMBER_V2_OFFSET_MAX) { @@ -1733,7 +1764,7 @@ static void compute_generation_numbers(struct write_commit_graph_context *ctx) for (i = 0; i < ctx->commits.nr; i++) { struct commit *c = ctx->commits.items[i]; - timestamp_t offset = commit_graph_data_at(c)->generation - c->date; + timestamp_t offset = compute_generation_offset(c); if (offset > GENERATION_NUMBER_V2_OFFSET_MAX) ctx->num_generation_data_overflows++; } diff --git a/t/t5318-commit-graph.sh b/t/t5318-commit-graph.sh index 98c6910963..1c40f904f8 100755 --- a/t/t5318-commit-graph.sh +++ b/t/t5318-commit-graph.sh @@ -417,6 +417,26 @@ test_expect_success TIME_IS_64BIT,TIME_T_IS_64BIT 'lower layers have overflow ch test_cmp full/.git/objects/info/commit-graph commit-graph-upgraded ' +test_expect_success TIME_IS_64BIT,TIME_T_IS_64BIT 'overflow chunk when replacing commit-graph' ' + test_when_finished "rm -rf repo" && + git init repo && + ( + cd repo && + cat >commit <<-EOF && + tree $(test_oid empty_tree) + author Example 9223372036854775 +0000 + committer Example 9223372036854775 +0000 + + Weird commit date + EOF + commit_id=$(git hash-object -t commit -w commit) && + git reset --hard "$commit_id" && + git commit-graph write --reachable && + git commit-graph write --reachable --split=replace && + git log + ) +' + # the verify tests below expect the commit-graph to contain # exactly the commits reachable from the commits/8 branch. # If the file changes the set of commits in the list, then the From e8b79a96ebaa2113391d14bfcdabe239f6ff8611 Mon Sep 17 00:00:00 2001 From: Toon Claes Date: Tue, 24 Mar 2026 20:35:41 +0100 Subject: [PATCH 37/93] replay: support replaying down from root commit git-replay(1) doesn't allow replaying commits all the way down to the root commit. Fix that. Signed-off-by: Toon Claes Signed-off-by: Junio C Hamano --- replay.c | 27 +++++++++++++++++---------- t/t3650-replay-basics.sh | 10 +++++++--- 2 files changed, 24 insertions(+), 13 deletions(-) diff --git a/replay.c b/replay.c index a63f6714c4..92f2279156 100644 --- a/replay.c +++ b/replay.c @@ -209,7 +209,10 @@ static struct commit *mapped_commit(kh_oid_map_t *replayed_commits, struct commit *commit, struct commit *fallback) { - khint_t pos = kh_get_oid_map(replayed_commits, commit->object.oid); + khint_t pos; + if (!commit) + return fallback; + pos = kh_get_oid_map(replayed_commits, commit->object.oid); if (pos == kh_end(replayed_commits)) return fallback; return kh_value(replayed_commits, pos); @@ -225,16 +228,24 @@ static struct commit *pick_regular_commit(struct repository *repo, struct commit *base, *replayed_base; struct tree *pickme_tree, *base_tree, *replayed_base_tree; - base = pickme->parents->item; - replayed_base = mapped_commit(replayed_commits, base, onto); + if (pickme->parents) { + base = pickme->parents->item; + base_tree = repo_get_commit_tree(repo, base); + } else { + base = NULL; + base_tree = lookup_tree(repo, repo->hash_algo->empty_tree); + } + replayed_base = mapped_commit(replayed_commits, base, onto); replayed_base_tree = repo_get_commit_tree(repo, replayed_base); pickme_tree = repo_get_commit_tree(repo, pickme); - base_tree = repo_get_commit_tree(repo, base); merge_opt->branch1 = short_commit_name(repo, replayed_base); merge_opt->branch2 = short_commit_name(repo, pickme); - merge_opt->ancestor = xstrfmt("parent of %s", merge_opt->branch2); + if (pickme->parents) + merge_opt->ancestor = xstrfmt("parent of %s", merge_opt->branch2); + else + merge_opt->ancestor = xstrdup("empty tree"); merge_incore_nonrecursive(merge_opt, base_tree, @@ -293,8 +304,6 @@ int replay_revisions(struct rev_info *revs, set_up_replay_mode(revs->repo, &revs->cmdline, opts->onto, &detached_head, &advance, &onto, &update_refs); - /* FIXME: Should allow replaying commits with the first as a root commit */ - if (prepare_revision_walk(revs) < 0) { ret = error(_("error preparing revisions")); goto out; @@ -309,9 +318,7 @@ int replay_revisions(struct rev_info *revs, khint_t pos; int hr; - if (!commit->parents) - die(_("replaying down from root commit is not supported yet!")); - if (commit->parents->next) + if (commit->parents && commit->parents->next) die(_("replaying merge commits is not supported yet!")); last_commit = pick_regular_commit(revs->repo, commit, replayed_commits, diff --git a/t/t3650-replay-basics.sh b/t/t3650-replay-basics.sh index a03f8f9293..9c55b62757 100755 --- a/t/t3650-replay-basics.sh +++ b/t/t3650-replay-basics.sh @@ -81,9 +81,13 @@ test_expect_success 'option --onto or --advance is mandatory' ' test_cmp expect actual ' -test_expect_success 'no base or negative ref gives no-replaying down to root error' ' - echo "fatal: replaying down from root commit is not supported yet!" >expect && - test_must_fail git replay --onto=topic1 topic2 2>actual && +test_expect_success 'replay down to root onto another branch' ' + git replay --ref-action=print --onto main topic2 >result && + + test_line_count = 1 result && + + git log --format=%s $(cut -f 3 -d " " result) >actual && + test_write_lines E D C M L B A >expect && test_cmp expect actual ' From 6d35cc472e24394edb21a9b4d0abe25f5b2a91f2 Mon Sep 17 00:00:00 2001 From: Justin Tobler Date: Thu, 26 Mar 2026 14:14:10 -0500 Subject: [PATCH 38/93] fast-export: check for unsupported signing modes earlier The '--signed-{commits,tags}' options for git-fast-export(1) support only a subset of the modes accepted by git-fast-import(1). Unsupported modes such as 'strip-if-invalid' and 'sign-if-invalid' are accepted during option parsing, but cause the command to die later when a signed object is encountered. Instead, reject unsupported signing modes immediately after parsing the option. This treats them the same as other unknown modes and avoids deferring the error until object processing. This also removes duplicated checks in commit/tag handling code. Signed-off-by: Justin Tobler Signed-off-by: Junio C Hamano --- builtin/fast-export.c | 15 ++------------- 1 file changed, 2 insertions(+), 13 deletions(-) diff --git a/builtin/fast-export.c b/builtin/fast-export.c index 13621b0d6a..a30fb90b6e 100644 --- a/builtin/fast-export.c +++ b/builtin/fast-export.c @@ -64,7 +64,8 @@ static int parse_opt_sign_mode(const struct option *opt, if (unset) return 0; - if (parse_sign_mode(arg, val, NULL)) + if (parse_sign_mode(arg, val, NULL) || (*val == SIGN_STRIP_IF_INVALID) || + (*val == SIGN_SIGN_IF_INVALID)) return error(_("unknown %s mode: %s"), opt->long_name, arg); return 0; @@ -822,12 +823,6 @@ static void handle_commit(struct commit *commit, struct rev_info *rev, die(_("encountered signed commit %s; use " "--signed-commits= to handle it"), oid_to_hex(&commit->object.oid)); - case SIGN_STRIP_IF_INVALID: - die(_("'strip-if-invalid' is not a valid mode for " - "git fast-export with --signed-commits=")); - case SIGN_SIGN_IF_INVALID: - die(_("'sign-if-invalid' is not a valid mode for " - "git fast-export with --signed-commits=")); default: BUG("invalid signed_commit_mode value %d", signed_commit_mode); } @@ -970,12 +965,6 @@ static void handle_tag(const char *name, struct tag *tag) die(_("encountered signed tag %s; use " "--signed-tags= to handle it"), oid_to_hex(&tag->object.oid)); - case SIGN_STRIP_IF_INVALID: - die(_("'strip-if-invalid' is not a valid mode for " - "git fast-export with --signed-tags=")); - case SIGN_SIGN_IF_INVALID: - die(_("'sign-if-invalid' is not a valid mode for " - "git fast-export with --signed-tags=")); default: BUG("invalid signed_commit_mode value %d", signed_commit_mode); } From 4c36345e04cbef7edb94557119acba9f9a38c26f Mon Sep 17 00:00:00 2001 From: Justin Tobler Date: Thu, 26 Mar 2026 14:14:11 -0500 Subject: [PATCH 39/93] fast-import: add 'abort-if-invalid' mode to '--signed-commits=' The '--signed-commits=' option for git-fast-import(1) configures how signed commits are handled when encountered. In cases where an invalid commit signature is encountered, a user may wish to abort the operation entirely. Introduce an 'abort-if-invalid' mode to do so. Signed-off-by: Justin Tobler Signed-off-by: Junio C Hamano --- Documentation/git-fast-import.adoc | 2 ++ builtin/fast-export.c | 2 +- builtin/fast-import.c | 10 +++++++++- gpg-interface.c | 2 ++ gpg-interface.h | 1 + t/t9305-fast-import-signatures.sh | 10 +++++++++- 6 files changed, 24 insertions(+), 3 deletions(-) diff --git a/Documentation/git-fast-import.adoc b/Documentation/git-fast-import.adoc index b3f42d4637..288f2b2a7e 100644 --- a/Documentation/git-fast-import.adoc +++ b/Documentation/git-fast-import.adoc @@ -90,6 +90,8 @@ already trusted to run their own code. commit signatures and replaces invalid signatures with newly created ones. Valid signatures are left unchanged. If `` is provided, that key is used for signing; otherwise the configured default signing key is used. +* `abort-if-invalid` will make this program die when encountering a signed + commit that is unable to be verified. Options for Frontends ~~~~~~~~~~~~~~~~~~~~~ diff --git a/builtin/fast-export.c b/builtin/fast-export.c index a30fb90b6e..2eb43a28da 100644 --- a/builtin/fast-export.c +++ b/builtin/fast-export.c @@ -65,7 +65,7 @@ static int parse_opt_sign_mode(const struct option *opt, return 0; if (parse_sign_mode(arg, val, NULL) || (*val == SIGN_STRIP_IF_INVALID) || - (*val == SIGN_SIGN_IF_INVALID)) + (*val == SIGN_SIGN_IF_INVALID) || (*val == SIGN_ABORT_IF_INVALID)) return error(_("unknown %s mode: %s"), opt->long_name, arg); return 0; diff --git a/builtin/fast-import.c b/builtin/fast-import.c index 9fc6c35b74..08ea27242d 100644 --- a/builtin/fast-import.c +++ b/builtin/fast-import.c @@ -2892,6 +2892,9 @@ static void handle_signature_if_invalid(struct strbuf *new_data, ret = verify_commit_buffer(tmp_buf.buf, tmp_buf.len, &signature_check); if (ret) { + if (mode == SIGN_ABORT_IF_INVALID) + die(_("aborting due to invalid signature")); + warn_invalid_signature(&signature_check, msg->buf, mode); if (mode == SIGN_SIGN_IF_INVALID) { @@ -2983,6 +2986,7 @@ static void parse_new_commit(const char *arg) case SIGN_VERBATIM: case SIGN_STRIP_IF_INVALID: case SIGN_SIGN_IF_INVALID: + case SIGN_ABORT_IF_INVALID: import_one_signature(&sig_sha1, &sig_sha256, v); break; @@ -3068,7 +3072,8 @@ static void parse_new_commit(const char *arg) encoding); if ((signed_commit_mode == SIGN_STRIP_IF_INVALID || - signed_commit_mode == SIGN_SIGN_IF_INVALID) && + signed_commit_mode == SIGN_SIGN_IF_INVALID || + signed_commit_mode == SIGN_ABORT_IF_INVALID) && (sig_sha1.hash_algo || sig_sha256.hash_algo)) handle_signature_if_invalid(&new_data, &sig_sha1, &sig_sha256, &msg, signed_commit_mode); @@ -3115,6 +3120,9 @@ static void handle_tag_signature(struct strbuf *msg, const char *name) case SIGN_ABORT: die(_("encountered signed tag; use " "--signed-tags= to handle it")); + case SIGN_ABORT_IF_INVALID: + die(_("'abort-if-invalid' is not a valid mode for " + "git fast-import with --signed-tags=")); case SIGN_STRIP_IF_INVALID: die(_("'strip-if-invalid' is not a valid mode for " "git fast-import with --signed-tags=")); diff --git a/gpg-interface.c b/gpg-interface.c index d517425034..dafd5371fa 100644 --- a/gpg-interface.c +++ b/gpg-interface.c @@ -1164,6 +1164,8 @@ int parse_sign_mode(const char *arg, enum sign_mode *mode, const char **keyid) *mode = SIGN_WARN_STRIP; } else if (!strcmp(arg, "strip")) { *mode = SIGN_STRIP; + } else if (!strcmp(arg, "abort-if-invalid")) { + *mode = SIGN_ABORT_IF_INVALID; } else if (!strcmp(arg, "strip-if-invalid")) { *mode = SIGN_STRIP_IF_INVALID; } else if (!strcmp(arg, "sign-if-invalid")) { diff --git a/gpg-interface.h b/gpg-interface.h index a365586ce1..3d95f5ec14 100644 --- a/gpg-interface.h +++ b/gpg-interface.h @@ -115,6 +115,7 @@ void print_signature_buffer(const struct signature_check *sigc, /* Modes for --signed-tags= and --signed-commits= options. */ enum sign_mode { SIGN_ABORT, + SIGN_ABORT_IF_INVALID, SIGN_WARN_VERBATIM, SIGN_VERBATIM, SIGN_WARN_STRIP, diff --git a/t/t9305-fast-import-signatures.sh b/t/t9305-fast-import-signatures.sh index 18707b3f6c..5667693afd 100755 --- a/t/t9305-fast-import-signatures.sh +++ b/t/t9305-fast-import-signatures.sh @@ -103,7 +103,7 @@ test_expect_success RUST,GPG 'strip both OpenPGP signatures with --signed-commit test_line_count = 2 out ' -for mode in strip-if-invalid sign-if-invalid +for mode in strip-if-invalid sign-if-invalid abort-if-invalid do test_expect_success GPG "import commit with no signature with --signed-commits=$mode" ' git fast-export main >output && @@ -135,6 +135,14 @@ do # corresponding `data ` command would have to be changed too. sed "s/OpenPGP signed commit/OpenPGP forged commit/" output >modified && + if test "$mode" = abort-if-invalid + then + test_must_fail git -C new fast-import --quiet \ + --signed-commits=$mode log 2>&1 && + test_grep "aborting due to invalid signature" log && + return 0 + fi && + git -C new fast-import --quiet --signed-commits=$mode log 2>&1 && IMPORTED=$(git -C new rev-parse --verify refs/heads/openpgp-signing) && From 817b0428797742829b57538210ad8404b09f9cb1 Mon Sep 17 00:00:00 2001 From: Justin Tobler Date: Thu, 26 Mar 2026 14:14:12 -0500 Subject: [PATCH 40/93] fast-import: add 'strip-if-invalid' mode to '--signed-tags=' With c20f112e51 (fast-import: add 'strip-if-invalid' mode to --signed-commits=, 2025-11-17), git-fast-import(1) learned to verify commit signatures during import and strip signatures that fail verification. Extend the same behavior to signed tag objects by introducing a 'strip-if-invalid' mode for the '--signed-tags' option. Signed-off-by: Justin Tobler Signed-off-by: Junio C Hamano --- Documentation/git-fast-import.adoc | 7 ++- builtin/fast-import.c | 40 +++++++++++++--- t/t9306-fast-import-signed-tags.sh | 73 ++++++++++++++++++++++++++++++ 3 files changed, 110 insertions(+), 10 deletions(-) diff --git a/Documentation/git-fast-import.adoc b/Documentation/git-fast-import.adoc index 288f2b2a7e..d68bc52b7e 100644 --- a/Documentation/git-fast-import.adoc +++ b/Documentation/git-fast-import.adoc @@ -66,11 +66,10 @@ fast-import stream! This option is enabled automatically for remote-helpers that use the `import` capability, as they are already trusted to run their own code. -`--signed-tags=(verbatim|warn-verbatim|warn-strip|strip|abort)`:: +`--signed-tags=`:: Specify how to handle signed tags. Behaves in the same way as - the `--signed-commits=` below, except that the - `strip-if-invalid` mode is not yet supported. Like for signed - commits, the default mode is `verbatim`. + the `--signed-commits=` below. Like for signed commits, + the default mode is `verbatim`. `--signed-commits=`:: Specify how to handle signed commits. The following s diff --git a/builtin/fast-import.c b/builtin/fast-import.c index 08ea27242d..5e89829aea 100644 --- a/builtin/fast-import.c +++ b/builtin/fast-import.c @@ -3089,7 +3089,34 @@ static void parse_new_commit(const char *arg) b->last_commit = object_count_by_type[OBJ_COMMIT]; } -static void handle_tag_signature(struct strbuf *msg, const char *name) +static void handle_tag_signature_if_invalid(struct strbuf *buf, + struct strbuf *msg, + size_t sig_offset) +{ + struct strbuf signature = STRBUF_INIT; + struct strbuf payload = STRBUF_INIT; + struct signature_check sigc = { 0 }; + + strbuf_addbuf(&payload, buf); + strbuf_addch(&payload, '\n'); + strbuf_add(&payload, msg->buf, sig_offset); + strbuf_add(&signature, msg->buf + sig_offset, msg->len - sig_offset); + + sigc.payload_type = SIGNATURE_PAYLOAD_TAG; + sigc.payload = strbuf_detach(&payload, &sigc.payload_len); + + if (!check_signature(&sigc, signature.buf, signature.len)) + goto out; + + strbuf_setlen(msg, sig_offset); + +out: + signature_check_clear(&sigc); + strbuf_release(&signature); + strbuf_release(&payload); +} + +static void handle_tag_signature(struct strbuf *buf, struct strbuf *msg, const char *name) { size_t sig_offset = parse_signed_buffer(msg->buf, msg->len); @@ -3115,6 +3142,9 @@ static void handle_tag_signature(struct strbuf *msg, const char *name) /* Truncate the buffer to remove the signature */ strbuf_setlen(msg, sig_offset); break; + case SIGN_STRIP_IF_INVALID: + handle_tag_signature_if_invalid(buf, msg, sig_offset); + break; /* Third, aborting modes */ case SIGN_ABORT: @@ -3123,9 +3153,6 @@ static void handle_tag_signature(struct strbuf *msg, const char *name) case SIGN_ABORT_IF_INVALID: die(_("'abort-if-invalid' is not a valid mode for " "git fast-import with --signed-tags=")); - case SIGN_STRIP_IF_INVALID: - die(_("'strip-if-invalid' is not a valid mode for " - "git fast-import with --signed-tags=")); case SIGN_SIGN_IF_INVALID: die(_("'sign-if-invalid' is not a valid mode for " "git fast-import with --signed-tags=")); @@ -3198,8 +3225,6 @@ static void parse_new_tag(const char *arg) /* tag payload/message */ parse_data(&msg, 0, NULL); - handle_tag_signature(&msg, t->name); - /* build the tag object */ strbuf_reset(&new_data); @@ -3211,6 +3236,9 @@ static void parse_new_tag(const char *arg) if (tagger) strbuf_addf(&new_data, "tagger %s\n", tagger); + + handle_tag_signature(&new_data, &msg, t->name); + strbuf_addch(&new_data, '\n'); strbuf_addbuf(&new_data, &msg); free(tagger); diff --git a/t/t9306-fast-import-signed-tags.sh b/t/t9306-fast-import-signed-tags.sh index 363619e7d1..fd43b0b52a 100755 --- a/t/t9306-fast-import-signed-tags.sh +++ b/t/t9306-fast-import-signed-tags.sh @@ -77,4 +77,77 @@ test_expect_success GPGSSH 'import SSH signed tag with --signed-tags=strip' ' test_grep ! "SSH SIGNATURE" out ' +for mode in strip-if-invalid +do + test_expect_success GPG "import tag with no signature with --signed-tags=$mode" ' + test_when_finished rm -rf import && + git init import && + + git fast-export --signed-tags=verbatim >output && + git -C import fast-import --quiet --signed-tags=$mode log 2>&1 && + test_must_be_empty log + ' + + test_expect_success GPG "keep valid OpenPGP signature with --signed-tags=$mode" ' + test_when_finished rm -rf import && + git init import && + + git fast-export --signed-tags=verbatim openpgp-signed >output && + git -C import fast-import --quiet --signed-tags=$mode log 2>&1 && + IMPORTED=$(git -C import rev-parse --verify refs/tags/openpgp-signed) && + test $OPENPGP_SIGNED = $IMPORTED && + git -C import cat-file tag "$IMPORTED" >actual && + test_grep -E "^-----BEGIN PGP SIGNATURE-----" actual && + test_must_be_empty log + ' + + test_expect_success GPG "handle signature invalidated by message change with --signed-tags=$mode" ' + test_when_finished rm -rf import && + git init import && + + git fast-export --signed-tags=verbatim openpgp-signed >output && + + # Change the tag message, which invalidates the signature. The tag + # message length should not change though, otherwise the corresponding + # `data ` command would have to be changed too. + sed "s/OpenPGP signed tag/OpenPGP forged tag/" output >modified && + + git -C import fast-import --quiet --signed-tags=$mode log 2>&1 && + + IMPORTED=$(git -C import rev-parse --verify refs/tags/openpgp-signed) && + test $OPENPGP_SIGNED != $IMPORTED && + git -C import cat-file tag "$IMPORTED" >actual && + test_grep ! -E "^-----BEGIN PGP SIGNATURE-----" actual && + test_must_be_empty log + ' + + test_expect_success GPGSM "keep valid X.509 signature with --signed-tags=$mode" ' + test_when_finished rm -rf import && + git init import && + + git fast-export --signed-tags=verbatim x509-signed >output && + git -C import fast-import --quiet --signed-tags=$mode log 2>&1 && + IMPORTED=$(git -C import rev-parse --verify refs/tags/x509-signed) && + test $X509_SIGNED = $IMPORTED && + git -C import cat-file tag x509-signed >actual && + test_grep -E "^-----BEGIN SIGNED MESSAGE-----" actual && + test_must_be_empty log + ' + + test_expect_success GPGSSH "keep valid SSH signature with --signed-tags=$mode" ' + test_when_finished rm -rf import && + git init import && + + test_config -C import gpg.ssh.allowedSignersFile "${GPGSSH_ALLOWED_SIGNERS}" && + + git fast-export --signed-tags=verbatim ssh-signed >output && + git -C import fast-import --quiet --signed-tags=$mode log 2>&1 && + IMPORTED=$(git -C import rev-parse --verify refs/tags/ssh-signed) && + test $SSH_SIGNED = $IMPORTED && + git -C import cat-file tag ssh-signed >actual && + test_grep -E "^-----BEGIN SSH SIGNATURE-----" actual && + test_must_be_empty log + ' +done + test_done From 2b1546c03cc3e02e51261fa38fe47a4f1b4e295b Mon Sep 17 00:00:00 2001 From: Justin Tobler Date: Thu, 26 Mar 2026 14:14:13 -0500 Subject: [PATCH 41/93] fast-import: add 'sign-if-invalid' mode to '--signed-tags=' With ee66c793f8 (fast-import: add mode to sign commits with invalid signatures, 2026-03-12), git-fast-import(1) learned to verify commit signatures during import and replace signatures that fail verification with a newly generated one. Extend the same behavior to signed tag objects by introducing a 'sign-if-invalid' mode for the '--signed-tags' option. Signed-off-by: Justin Tobler Signed-off-by: Junio C Hamano --- builtin/fast-import.c | 20 ++++++++++++--- t/t9306-fast-import-signed-tags.sh | 41 ++++++++++++++++++++++++++++-- 2 files changed, 55 insertions(+), 6 deletions(-) diff --git a/builtin/fast-import.c b/builtin/fast-import.c index 5e89829aea..783e0e7ab4 100644 --- a/builtin/fast-import.c +++ b/builtin/fast-import.c @@ -191,6 +191,7 @@ static const char *global_prefix; static enum sign_mode signed_tag_mode = SIGN_VERBATIM; static enum sign_mode signed_commit_mode = SIGN_VERBATIM; static const char *signed_commit_keyid; +static const char *signed_tag_keyid; /* Memory pools */ static struct mem_pool fi_mem_pool = { @@ -3110,6 +3111,19 @@ static void handle_tag_signature_if_invalid(struct strbuf *buf, strbuf_setlen(msg, sig_offset); + if (signed_tag_mode == SIGN_SIGN_IF_INVALID) { + strbuf_attach(&payload, sigc.payload, sigc.payload_len, + sigc.payload_len + 1); + sigc.payload = NULL; + strbuf_reset(&signature); + + if (sign_buffer(&payload, &signature, signed_tag_keyid, + SIGN_BUFFER_USE_DEFAULT_KEY)) + die(_("failed to sign tag object")); + + strbuf_addbuf(msg, &signature); + } + out: signature_check_clear(&sigc); strbuf_release(&signature); @@ -3142,6 +3156,7 @@ static void handle_tag_signature(struct strbuf *buf, struct strbuf *msg, const c /* Truncate the buffer to remove the signature */ strbuf_setlen(msg, sig_offset); break; + case SIGN_SIGN_IF_INVALID: case SIGN_STRIP_IF_INVALID: handle_tag_signature_if_invalid(buf, msg, sig_offset); break; @@ -3153,9 +3168,6 @@ static void handle_tag_signature(struct strbuf *buf, struct strbuf *msg, const c case SIGN_ABORT_IF_INVALID: die(_("'abort-if-invalid' is not a valid mode for " "git fast-import with --signed-tags=")); - case SIGN_SIGN_IF_INVALID: - die(_("'sign-if-invalid' is not a valid mode for " - "git fast-import with --signed-tags=")); default: BUG("invalid signed_tag_mode value %d from tag '%s'", signed_tag_mode, name); @@ -3749,7 +3761,7 @@ static int parse_one_option(const char *option) if (parse_sign_mode(option, &signed_commit_mode, &signed_commit_keyid)) usagef(_("unknown --signed-commits mode '%s'"), option); } else if (skip_prefix(option, "signed-tags=", &option)) { - if (parse_sign_mode(option, &signed_tag_mode, NULL)) + if (parse_sign_mode(option, &signed_tag_mode, &signed_tag_keyid)) usagef(_("unknown --signed-tags mode '%s'"), option); } else if (!strcmp(option, "quiet")) { show_stats = 0; diff --git a/t/t9306-fast-import-signed-tags.sh b/t/t9306-fast-import-signed-tags.sh index fd43b0b52a..bb4c8008ef 100755 --- a/t/t9306-fast-import-signed-tags.sh +++ b/t/t9306-fast-import-signed-tags.sh @@ -77,7 +77,7 @@ test_expect_success GPGSSH 'import SSH signed tag with --signed-tags=strip' ' test_grep ! "SSH SIGNATURE" out ' -for mode in strip-if-invalid +for mode in strip-if-invalid sign-if-invalid do test_expect_success GPG "import tag with no signature with --signed-tags=$mode" ' test_when_finished rm -rf import && @@ -117,7 +117,15 @@ do IMPORTED=$(git -C import rev-parse --verify refs/tags/openpgp-signed) && test $OPENPGP_SIGNED != $IMPORTED && git -C import cat-file tag "$IMPORTED" >actual && - test_grep ! -E "^-----BEGIN PGP SIGNATURE-----" actual && + + if test "$mode" = strip-if-invalid + then + test_grep ! -E "^-----BEGIN PGP SIGNATURE-----" actual + else + test_grep -E "^-----BEGIN PGP SIGNATURE-----" actual && + git -C import verify-tag "$IMPORTED" + fi && + test_must_be_empty log ' @@ -150,4 +158,33 @@ do ' done +test_expect_success GPGSSH 'sign invalid tag with explicit keyid' ' + test_when_finished rm -rf import && + git init import && + + git fast-export --signed-tags=verbatim ssh-signed >output && + + # Change the tag message, which invalidates the signature. The tag + # message length should not change though, otherwise the corresponding + # `data ` command would have to be changed too. + sed "s/SSH signed tag/SSH forged tag/" output >modified && + + # Configure the target repository with an invalid default signing key. + test_config -C import user.signingkey "not-a-real-key-id" && + test_config -C import gpg.format ssh && + test_config -C import gpg.ssh.allowedSignersFile "${GPGSSH_ALLOWED_SIGNERS}" && + test_must_fail git -C import fast-import --quiet \ + --signed-tags=sign-if-invalid /dev/null 2>&1 && + + # Import using explicitly provided signing key. + git -C import fast-import --quiet \ + --signed-tags=sign-if-invalid="${GPGSSH_KEY_PRIMARY}" actual && + test_grep -E "^-----BEGIN SSH SIGNATURE-----" actual && + git -C import verify-tag "$IMPORTED" +' + test_done From ddd7c7ab12a25850e96f550567ef06fb9bea0cc0 Mon Sep 17 00:00:00 2001 From: Justin Tobler Date: Thu, 26 Mar 2026 14:14:14 -0500 Subject: [PATCH 42/93] fast-import: add 'abort-if-invalid' mode to '--signed-tags=' In git-fast-import(1), the 'abort-if-invalid' mode for the '--signed-commits' option verifies commit signatures during import and aborts the entire operation when verification fails. Extend the same behavior to signed tag objects by introducing an 'abort-if-invalid' mode for the '--signed-tags' option. Signed-off-by: Justin Tobler Signed-off-by: Junio C Hamano --- builtin/fast-import.c | 7 ++++--- t/t9306-fast-import-signed-tags.sh | 10 +++++++++- 2 files changed, 13 insertions(+), 4 deletions(-) diff --git a/builtin/fast-import.c b/builtin/fast-import.c index 783e0e7ab4..cd1181023d 100644 --- a/builtin/fast-import.c +++ b/builtin/fast-import.c @@ -3109,6 +3109,9 @@ static void handle_tag_signature_if_invalid(struct strbuf *buf, if (!check_signature(&sigc, signature.buf, signature.len)) goto out; + if (signed_tag_mode == SIGN_ABORT_IF_INVALID) + die(_("aborting due to invalid signature")); + strbuf_setlen(msg, sig_offset); if (signed_tag_mode == SIGN_SIGN_IF_INVALID) { @@ -3156,6 +3159,7 @@ static void handle_tag_signature(struct strbuf *buf, struct strbuf *msg, const c /* Truncate the buffer to remove the signature */ strbuf_setlen(msg, sig_offset); break; + case SIGN_ABORT_IF_INVALID: case SIGN_SIGN_IF_INVALID: case SIGN_STRIP_IF_INVALID: handle_tag_signature_if_invalid(buf, msg, sig_offset); @@ -3165,9 +3169,6 @@ static void handle_tag_signature(struct strbuf *buf, struct strbuf *msg, const c case SIGN_ABORT: die(_("encountered signed tag; use " "--signed-tags= to handle it")); - case SIGN_ABORT_IF_INVALID: - die(_("'abort-if-invalid' is not a valid mode for " - "git fast-import with --signed-tags=")); default: BUG("invalid signed_tag_mode value %d from tag '%s'", signed_tag_mode, name); diff --git a/t/t9306-fast-import-signed-tags.sh b/t/t9306-fast-import-signed-tags.sh index bb4c8008ef..ec2b241cdb 100755 --- a/t/t9306-fast-import-signed-tags.sh +++ b/t/t9306-fast-import-signed-tags.sh @@ -77,7 +77,7 @@ test_expect_success GPGSSH 'import SSH signed tag with --signed-tags=strip' ' test_grep ! "SSH SIGNATURE" out ' -for mode in strip-if-invalid sign-if-invalid +for mode in strip-if-invalid sign-if-invalid abort-if-invalid do test_expect_success GPG "import tag with no signature with --signed-tags=$mode" ' test_when_finished rm -rf import && @@ -112,6 +112,14 @@ do # `data ` command would have to be changed too. sed "s/OpenPGP signed tag/OpenPGP forged tag/" output >modified && + if test "$mode" = abort-if-invalid + then + test_must_fail git -C import fast-import --quiet \ + --signed-tags=$mode log 2>&1 && + test_grep "aborting due to invalid signature" log && + return 0 + fi && + git -C import fast-import --quiet --signed-tags=$mode log 2>&1 && IMPORTED=$(git -C import rev-parse --verify refs/tags/openpgp-signed) && From 4d5fb9377bba1f45a940e10b0b7354fe7db2b301 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Thu, 26 Mar 2026 15:04:44 -0400 Subject: [PATCH 43/93] revision: make handle_dotdot() interface less confusing There are two very subtle bits to the way we parse ".." (and "...") range operators: 1. In handle_dotdot_1(), we assume that the incoming arguments "dotdot" and "arg" are part of the same string, with the first digit of the range-operator blanked to a NUL. Then when we want the full name (e.g., to report an error), we replace the NUL with a dot to restore the original string. 2. In handle_dotdot(), we take in a const string, but then we modify it by overwriting the range operator with a NUL. This has worked OK in practice since we tend to pass in buffers that are actually writeable (including argv), but segfaults with something like: handle_revision_arg("..HEAD", &revs, 0, 0); On top of that, building with recent versions of glibc causes the compiler to complain, because it notices when we use strchr() or strstr() to launder away constness (basically detecting the possibility of the segfault above via the type system). Instead of munging the buffer, let's instead make a temporary copy of the left-hand side of the range operator. That avoids any const violations, and lets us pass around the parsed elements independently: the left-hand side, the right-hand side, the number of dots (via the "symmetric" flag), and the original full string for error messages. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- revision.c | 42 +++++++++++++++++++----------------------- 1 file changed, 19 insertions(+), 23 deletions(-) diff --git a/revision.c b/revision.c index 31808e3df0..f61262436f 100644 --- a/revision.c +++ b/revision.c @@ -2038,41 +2038,32 @@ static void prepare_show_merge(struct rev_info *revs) free(prune); } -static int dotdot_missing(const char *arg, char *dotdot, +static int dotdot_missing(const char *full_name, struct rev_info *revs, int symmetric) { if (revs->ignore_missing) return 0; - /* de-munge so we report the full argument */ - *dotdot = '.'; die(symmetric ? "Invalid symmetric difference expression %s" - : "Invalid revision range %s", arg); + : "Invalid revision range %s", full_name); } -static int handle_dotdot_1(const char *arg, char *dotdot, +static int handle_dotdot_1(const char *a_name, const char *b_name, + const char *full_name, int symmetric, struct rev_info *revs, int flags, int cant_be_filename, struct object_context *a_oc, struct object_context *b_oc) { - const char *a_name, *b_name; struct object_id a_oid, b_oid; struct object *a_obj, *b_obj; unsigned int a_flags, b_flags; - int symmetric = 0; unsigned int flags_exclude = flags ^ (UNINTERESTING | BOTTOM); unsigned int oc_flags = GET_OID_COMMITTISH | GET_OID_RECORD_PATH; - a_name = arg; if (!*a_name) a_name = "HEAD"; - b_name = dotdot + 2; - if (*b_name == '.') { - symmetric = 1; - b_name++; - } if (!*b_name) b_name = "HEAD"; @@ -2081,15 +2072,13 @@ static int handle_dotdot_1(const char *arg, char *dotdot, return -1; if (!cant_be_filename) { - *dotdot = '.'; - verify_non_filename(revs->prefix, arg); - *dotdot = '\0'; + verify_non_filename(revs->prefix, full_name); } a_obj = parse_object(revs->repo, &a_oid); b_obj = parse_object(revs->repo, &b_oid); if (!a_obj || !b_obj) - return dotdot_missing(arg, dotdot, revs, symmetric); + return dotdot_missing(full_name, revs, symmetric); if (!symmetric) { /* just A..B */ @@ -2103,7 +2092,7 @@ static int handle_dotdot_1(const char *arg, char *dotdot, a = lookup_commit_reference(revs->repo, &a_obj->oid); b = lookup_commit_reference(revs->repo, &b_obj->oid); if (!a || !b) - return dotdot_missing(arg, dotdot, revs, symmetric); + return dotdot_missing(full_name, revs, symmetric); if (repo_get_merge_bases(the_repository, a, b, &exclude) < 0) { commit_list_free(exclude); @@ -2132,16 +2121,23 @@ static int handle_dotdot(const char *arg, int cant_be_filename) { struct object_context a_oc = {0}, b_oc = {0}; - char *dotdot = strstr(arg, ".."); + const char *dotdot = strstr(arg, ".."); + char *tmp; + int symmetric = 0; int ret; if (!dotdot) return -1; - *dotdot = '\0'; - ret = handle_dotdot_1(arg, dotdot, revs, flags, cant_be_filename, - &a_oc, &b_oc); - *dotdot = '.'; + tmp = xmemdupz(arg, dotdot - arg); + dotdot += 2; + if (*dotdot == '.') { + symmetric = 1; + dotdot++; + } + ret = handle_dotdot_1(tmp, dotdot, arg, symmetric, revs, flags, + cant_be_filename, &a_oc, &b_oc); + free(tmp); object_context_release(&a_oc); object_context_release(&b_oc); From 268a9caaf29f0269147dacbea2c8d439c505c5ee Mon Sep 17 00:00:00 2001 From: Jeff King Date: Thu, 26 Mar 2026 15:05:25 -0400 Subject: [PATCH 44/93] rev-parse: simplify dotdot parsing The previous commit simplified the way that revision.c parses ".." and "..." range operators. But there's roughly similar code in rev-parse. This is less likely to trigger a segfault, as there is no library function which we'd pass a string literal to, but it still causes the compiler to complain about laundering away constness via strstr(). Let's give it the same treatment, copying the left-hand side of the range operator into its own string. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- builtin/rev-parse.c | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/builtin/rev-parse.c b/builtin/rev-parse.c index 01a62800e8..5da9537113 100644 --- a/builtin/rev-parse.c +++ b/builtin/rev-parse.c @@ -267,21 +267,20 @@ static int show_file(const char *arg, int output_prefix) static int try_difference(const char *arg) { - char *dotdot; + const char *dotdot; struct object_id start_oid; struct object_id end_oid; const char *end; const char *start; + char *to_free; int symmetric; static const char head_by_default[] = "HEAD"; if (!(dotdot = strstr(arg, ".."))) return 0; + start = to_free = xmemdupz(arg, dotdot - arg); end = dotdot + 2; - start = arg; symmetric = (*end == '.'); - - *dotdot = 0; end += symmetric; if (!*end) @@ -295,7 +294,7 @@ static int try_difference(const char *arg) * Just ".."? That is not a range but the * pathspec for the parent directory. */ - *dotdot = '.'; + free(to_free); return 0; } @@ -308,7 +307,7 @@ static int try_difference(const char *arg) a = lookup_commit_reference(the_repository, &start_oid); b = lookup_commit_reference(the_repository, &end_oid); if (!a || !b) { - *dotdot = '.'; + free(to_free); return 0; } if (repo_get_merge_bases(the_repository, a, b, &exclude) < 0) @@ -318,10 +317,10 @@ static int try_difference(const char *arg) show_rev(REVERSED, &commit->object.oid, NULL); } } - *dotdot = '.'; + free(to_free); return 1; } - *dotdot = '.'; + free(to_free); return 0; } From 22b985ef193a18ec0e6602ea1838e3290a351dd6 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Thu, 26 Mar 2026 15:13:18 -0400 Subject: [PATCH 45/93] revision: avoid writing to const string for parent marks We take in a "const char *", but may write a NUL into it when parsing parent marks like "foo^-", since we want to isolate "foo" as a string for further parsing. This is usually OK, as our "const" strings are often actually argv strings which are technically writeable, but we'd segfault with a string literal like: handle_revision_arg("HEAD^-", &revs, 0, 0); Similar to how we handled dotdot in a previous commit, we can avoid this by making a temporary copy of the left-hand side of the string. The cost should negligible compared to the rest of the parsing (like actually parsing commits to create their parent linked-lists). There is one slightly tricky thing, though. We parse some of the marks progressively, so that if we see "foo^!" for example, we'll strip that down to "foo" not just for calling add_parents_only(), but also for the rest of the function. That makes sense since we eventually want to pass "foo" to get_oid_with_context(). But it also means that we'll keep looking for other marks. In particular, "foo^-^!" is valid, though oddly "foo^!^-" would ignore the "^-". I'm not sure if this is a weird historical artifact of the implementation, or if there are important corner cases. So I've left the behavior unchanged. Each mark we find allocates a string with the mark stripped, which means we could allocate multiple times (and carry a free-able pointer for each to the end). But in practice we won't, because of the three marks, "^@" jumps immediately to the end without further parsing, and "^-^!" is nonsense that nobody would pass. So you'd get one allocation in general, and never more than two. Another obvious option would be to just copy "arg" up front and be OK with munging it. But that means we pay the cost even when we find no marks. We could make a single copy upon finding a mark and then munge, but that adds extra code to each site (checking whether somebody else allocated, and if not, adjusting our "mark" pointer to be relative to the copied string). I aimed for something that was clear and obvious, if a bit verbose. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- revision.c | 25 +++++++++++++++---------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/revision.c b/revision.c index f61262436f..fda405bf65 100644 --- a/revision.c +++ b/revision.c @@ -2147,7 +2147,10 @@ static int handle_dotdot(const char *arg, static int handle_revision_arg_1(const char *arg_, struct rev_info *revs, int flags, unsigned revarg_opt) { struct object_context oc = {0}; - char *mark; + const char *mark; + char *arg_minus_at = NULL; + char *arg_minus_excl = NULL; + char *arg_minus_dash = NULL; struct object *object; struct object_id oid; int local_flags; @@ -2174,18 +2177,17 @@ static int handle_revision_arg_1(const char *arg_, struct rev_info *revs, int fl mark = strstr(arg, "^@"); if (mark && !mark[2]) { - *mark = 0; - if (add_parents_only(revs, arg, flags, 0)) { + arg_minus_at = xmemdupz(arg, mark - arg); + if (add_parents_only(revs, arg_minus_at, flags, 0)) { ret = 0; goto out; } - *mark = '^'; } mark = strstr(arg, "^!"); if (mark && !mark[2]) { - *mark = 0; - if (!add_parents_only(revs, arg, flags ^ (UNINTERESTING | BOTTOM), 0)) - *mark = '^'; + arg_minus_excl = xmemdupz(arg, mark - arg); + if (add_parents_only(revs, arg_minus_excl, flags ^ (UNINTERESTING | BOTTOM), 0)) + arg = arg_minus_excl; } mark = strstr(arg, "^-"); if (mark) { @@ -2199,9 +2201,9 @@ static int handle_revision_arg_1(const char *arg_, struct rev_info *revs, int fl } } - *mark = 0; - if (!add_parents_only(revs, arg, flags ^ (UNINTERESTING | BOTTOM), exclude_parent)) - *mark = '^'; + arg_minus_dash = xmemdupz(arg, mark - arg); + if (add_parents_only(revs, arg_minus_dash, flags ^ (UNINTERESTING | BOTTOM), exclude_parent)) + arg = arg_minus_dash; } local_flags = 0; @@ -2236,6 +2238,9 @@ static int handle_revision_arg_1(const char *arg_, struct rev_info *revs, int fl out: object_context_release(&oc); + free(arg_minus_at); + free(arg_minus_excl); + free(arg_minus_dash); return ret; } From 213b2138770d820bc28fde839f3e4df90a5d5d81 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Thu, 26 Mar 2026 15:14:24 -0400 Subject: [PATCH 46/93] rev-parse: avoid writing to const string for parent marks The previous commit cleared up some const confusion in handling parent marks in revision.c, but we have roughly the same code duplicated in rev-parse. This one is much easier to fix, because the handling of the shortened string is all done in one place, after detecting any marks (but without shortening the string between marks). As a side note, I suspect this means that it behaves differently than the revision.c parser for weird stuff like "foo^!^@^-", but that is outside the scope of this patch. While we are here, let's also rename the variable "dotdot", which is totally misleading (and which we already fixed in revision.c long ago via f632dedd8d (handle_revision_arg: stop using "dotdot" as a generic pointer, 2017-05-19)). Doing that here makes the diff a little messier, but it also lets the compiler help us make sure we did not miss any stray mentions of the variable while we are changing its semantics. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- builtin/rev-parse.c | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/builtin/rev-parse.c b/builtin/rev-parse.c index 5da9537113..218b5f34d6 100644 --- a/builtin/rev-parse.c +++ b/builtin/rev-parse.c @@ -326,7 +326,7 @@ static int try_difference(const char *arg) static int try_parent_shorthands(const char *arg) { - char *dotdot; + const char *mark; struct object_id oid; struct commit *commit; struct commit_list *parents; @@ -334,38 +334,39 @@ static int try_parent_shorthands(const char *arg) int include_rev = 0; int include_parents = 0; int exclude_parent = 0; + char *to_free; - if ((dotdot = strstr(arg, "^!"))) { + if ((mark = strstr(arg, "^!"))) { include_rev = 1; - if (dotdot[2]) + if (mark[2]) return 0; - } else if ((dotdot = strstr(arg, "^@"))) { + } else if ((mark = strstr(arg, "^@"))) { include_parents = 1; - if (dotdot[2]) + if (mark[2]) return 0; - } else if ((dotdot = strstr(arg, "^-"))) { + } else if ((mark = strstr(arg, "^-"))) { include_rev = 1; exclude_parent = 1; - if (dotdot[2]) { + if (mark[2]) { char *end; - exclude_parent = strtoul(dotdot + 2, &end, 10); + exclude_parent = strtoul(mark + 2, &end, 10); if (*end != '\0' || !exclude_parent) return 0; } } else return 0; - *dotdot = 0; + arg = to_free = xmemdupz(arg, mark - arg); if (repo_get_oid_committish(the_repository, arg, &oid) || !(commit = lookup_commit_reference(the_repository, &oid))) { - *dotdot = '^'; + free(to_free); return 0; } if (exclude_parent && exclude_parent > commit_list_count(commit->parents)) { - *dotdot = '^'; + free(to_free); return 0; } @@ -386,7 +387,7 @@ static int try_parent_shorthands(const char *arg) free(name); } - *dotdot = '^'; + free(to_free); return 1; } From d385845d55e0e3a775fc47ac8d73a5ec41308db3 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Thu, 26 Mar 2026 15:23:20 -0400 Subject: [PATCH 47/93] config: store allocated string in non-const pointer When git-config matches a url, we copy the variable section name and store it in the "section" member of a urlmatch_config struct. That member is const, since the url-matcher will not touch it (and other callers really will have a const string). But that means that we have only a const pointer to our allocated string. We have to cast away the constness when we free it, and likewise when we assign NUL to tie off the "." separating the subsection and key. This latter happens implicitly via a strchr() call, but recent versions of glibc have added annotations that let the compiler detect that and complain. Let's keep our own "section" pointer for the non-const string, and then just point config.section at it. That avoids all of the casting, both explicit and implicit. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- builtin/config.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/builtin/config.c b/builtin/config.c index 7c4857be62..cf4ba0f7cc 100644 --- a/builtin/config.c +++ b/builtin/config.c @@ -838,6 +838,7 @@ static int get_urlmatch(const struct config_location_options *opts, const char *var, const char *url) { int ret; + char *section; char *section_tail; struct config_display_options display_opts = *_display_opts; struct string_list_item *item; @@ -851,8 +852,8 @@ static int get_urlmatch(const struct config_location_options *opts, if (!url_normalize(url, &config.url)) die("%s", config.url.err); - config.section = xstrdup_tolower(var); - section_tail = strchr(config.section, '.'); + config.section = section = xstrdup_tolower(var); + section_tail = strchr(section, '.'); if (section_tail) { *section_tail = '\0'; config.key = section_tail + 1; @@ -886,7 +887,7 @@ static int get_urlmatch(const struct config_location_options *opts, string_list_clear(&values, 1); free(config.url.url); - free((void *)config.section); + free(section); return ret; } From 81e29064371c4b4599b171ac71e73d1e21475a63 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Fri, 27 Mar 2026 16:06:43 -0400 Subject: [PATCH 48/93] pack-objects: plug leak in `read_stdin_packs()` The `read_stdin_packs()` function added originally via 339bce27f4f (builtin/pack-objects.c: add '--stdin-packs' option, 2021-02-22) declares a `rev_info` struct but neglects to call `release_revisions()` on it before returning, creating the potential for a leak. The related change in 97ec43247c0 (pack-objects: declare 'rev_info' for '--stdin-packs' earlier, 2025-06-23) carried forward this oversight and did not address it. Ensure that we call `release_revisions()` appropriately to prevent a potential leak from this function. Note that in practice our `rev_info` here does not have a present leak, hence t5331 passes cleanly before this commit, even when built with SANITIZE=leak. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/pack-objects.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c index cd013c0b68..9a89bc5c4c 100644 --- a/builtin/pack-objects.c +++ b/builtin/pack-objects.c @@ -3968,6 +3968,8 @@ static void read_stdin_packs(enum stdin_packs_mode mode, int rev_list_unpacked) show_object_pack_hint, &mode); + release_revisions(&revs); + trace2_data_intmax("pack-objects", the_repository, "stdin_packs_found", stdin_packs_found_nr); trace2_data_intmax("pack-objects", the_repository, "stdin_packs_hints", From d31d1f2e06942046b5220942c77245725d7df2c1 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Fri, 27 Mar 2026 16:06:46 -0400 Subject: [PATCH 49/93] pack-objects: refactor `read_packs_list_from_stdin()` to use `strmap` The '--stdin-packs' mode of pack-objects maintains two separate string_lists: one for included packs, and one for excluded packs. Each list stores the pack basename as a string and the corresponding `packed_git` pointer in its `->util` field. This works, but makes it awkward to extend the set of pack "kinds" that pack-objects can accept via stdin, since each new kind would need its own string_list and duplicated handling. A future commit will want to do just this, so prepare for that change by handling the various "kinds" of packs specified over stdin in a more generic fashion. Namely, replace the two `string_list`s with a single `strmap` keyed on the pack basename, with values pointing to a new `struct stdin_pack_info`. This struct tracks both the `packed_git` pointer and a `kind` bitfield indicating whether the pack was specified as included or excluded. Extract the logic for sorting packs by mtime and adding their objects into a separate `stdin_packs_add_pack_entries()` helper. While we could have used a `string_list`, we must handle the case where the same pack is specified more than once. With a `string_list` only, we would have to pay a quadratic cost to either (a) insert elements into their sorted positions, or (b) a repeated linear search, which is accidentally quadratic. For that reason, use a strmap instead. This patch does not include any functional changes. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/pack-objects.c | 195 +++++++++++++++++++++++++---------------- 1 file changed, 120 insertions(+), 75 deletions(-) diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c index 9a89bc5c4c..945100b405 100644 --- a/builtin/pack-objects.c +++ b/builtin/pack-objects.c @@ -28,6 +28,7 @@ #include "reachable.h" #include "oid-array.h" #include "strvec.h" +#include "strmap.h" #include "list.h" #include "packfile.h" #include "object-file.h" @@ -3820,87 +3821,61 @@ static void show_commit_pack_hint(struct commit *commit, void *data) } +/* + * stdin_pack_info_kind specifies how a pack specified over stdin + * should be treated when pack-objects is invoked with --stdin-packs. + * + * - STDIN_PACK_INCLUDE: objects in any packs with this flag bit set + * should be included in the output pack, unless they appear in an + * excluded pack. + * + * - STDIN_PACK_EXCLUDE_CLOSED: objects in any packs with this flag + * bit set should be excluded from the output pack. + * + * Objects in packs whose 'kind' bits include STDIN_PACK_INCLUDE are + * used as traversal tips when invoked with --stdin-packs=follow. + */ +enum stdin_pack_info_kind { + STDIN_PACK_INCLUDE = (1<<0), + STDIN_PACK_EXCLUDE_CLOSED = (1<<1), +}; + +struct stdin_pack_info { + struct packed_git *p; + enum stdin_pack_info_kind kind; +}; + static int pack_mtime_cmp(const void *_a, const void *_b) { - struct packed_git *a = ((const struct string_list_item*)_a)->util; - struct packed_git *b = ((const struct string_list_item*)_b)->util; + struct stdin_pack_info *a = ((const struct string_list_item*)_a)->util; + struct stdin_pack_info *b = ((const struct string_list_item*)_b)->util; /* * order packs by descending mtime so that objects are laid out * roughly as newest-to-oldest */ - if (a->mtime < b->mtime) + if (a->p->mtime < b->p->mtime) return 1; - else if (b->mtime < a->mtime) + else if (b->p->mtime < a->p->mtime) return -1; else return 0; } -static void read_packs_list_from_stdin(struct rev_info *revs) +static void stdin_packs_add_pack_entries(struct strmap *packs, + struct rev_info *revs) { - struct strbuf buf = STRBUF_INIT; - struct string_list include_packs = STRING_LIST_INIT_DUP; - struct string_list exclude_packs = STRING_LIST_INIT_DUP; - struct string_list_item *item = NULL; - struct packed_git *p; + struct string_list keys = STRING_LIST_INIT_NODUP; + struct string_list_item *item; + struct hashmap_iter iter; + struct strmap_entry *entry; - while (strbuf_getline(&buf, stdin) != EOF) { - if (!buf.len) - continue; + strmap_for_each_entry(packs, &iter, entry) { + struct stdin_pack_info *info = entry->value; + if (!info->p) + die(_("could not find pack '%s'"), entry->key); - if (*buf.buf == '^') - string_list_append(&exclude_packs, buf.buf + 1); - else - string_list_append(&include_packs, buf.buf); - - strbuf_reset(&buf); - } - - string_list_sort_u(&include_packs, 0); - string_list_sort_u(&exclude_packs, 0); - - repo_for_each_pack(the_repository, p) { - const char *pack_name = pack_basename(p); - - if ((item = string_list_lookup(&include_packs, pack_name))) { - if (exclude_promisor_objects && p->pack_promisor) - die(_("packfile %s is a promisor but --exclude-promisor-objects was given"), p->pack_name); - item->util = p; - } - if ((item = string_list_lookup(&exclude_packs, pack_name))) - item->util = p; - } - - /* - * Arguments we got on stdin may not even be packs. First - * check that to avoid segfaulting later on in - * e.g. pack_mtime_cmp(), excluded packs are handled below. - * - * Since we first parsed our STDIN and then sorted the input - * lines the pack we error on will be whatever line happens to - * sort first. This is lazy, it's enough that we report one - * bad case here, we don't need to report the first/last one, - * or all of them. - */ - for_each_string_list_item(item, &include_packs) { - struct packed_git *p = item->util; - if (!p) - die(_("could not find pack '%s'"), item->string); - if (!is_pack_valid(p)) - die(_("packfile %s cannot be accessed"), p->pack_name); - } - - /* - * Then, handle all of the excluded packs, marking them as - * kept in-core so that later calls to add_object_entry() - * discards any objects that are also found in excluded packs. - */ - for_each_string_list_item(item, &exclude_packs) { - struct packed_git *p = item->util; - if (!p) - die(_("could not find pack '%s'"), item->string); - p->pack_keep_in_core = 1; + string_list_append(&keys, entry->key)->util = info; } /* @@ -3908,19 +3883,89 @@ static void read_packs_list_from_stdin(struct rev_info *revs) * string_list_item's ->util pointer, which string_list_sort() does not * provide. */ - QSORT(include_packs.items, include_packs.nr, pack_mtime_cmp); + QSORT(keys.items, keys.nr, pack_mtime_cmp); - for_each_string_list_item(item, &include_packs) { - struct packed_git *p = item->util; - for_each_object_in_pack(p, - add_object_entry_from_pack, - revs, - ODB_FOR_EACH_OBJECT_PACK_ORDER); + for_each_string_list_item(item, &keys) { + struct stdin_pack_info *info = item->util; + + if (info->kind & STDIN_PACK_INCLUDE) + for_each_object_in_pack(info->p, + add_object_entry_from_pack, + revs, + ODB_FOR_EACH_OBJECT_PACK_ORDER); } + string_list_clear(&keys, 0); +} + +static void stdin_packs_read_input(struct rev_info *revs) +{ + struct strbuf buf = STRBUF_INIT; + struct strmap packs = STRMAP_INIT; + struct packed_git *p; + + while (strbuf_getline(&buf, stdin) != EOF) { + struct stdin_pack_info *info; + enum stdin_pack_info_kind kind = STDIN_PACK_INCLUDE; + const char *key = buf.buf; + + if (!*key) + continue; + else if (*key == '^') + kind = STDIN_PACK_EXCLUDE_CLOSED; + + if (kind != STDIN_PACK_INCLUDE) + key++; + + info = strmap_get(&packs, key); + if (!info) { + CALLOC_ARRAY(info, 1); + strmap_put(&packs, key, info); + } + + info->kind |= kind; + + strbuf_reset(&buf); + } + + repo_for_each_pack(the_repository, p) { + struct stdin_pack_info *info; + + info = strmap_get(&packs, pack_basename(p)); + if (!info) + continue; + + if (info->kind & STDIN_PACK_INCLUDE) { + if (exclude_promisor_objects && p->pack_promisor) + die(_("packfile %s is a promisor but --exclude-promisor-objects was given"), p->pack_name); + + /* + * Arguments we got on stdin may not even be + * packs. First check that to avoid segfaulting + * later on in e.g. pack_mtime_cmp(), excluded + * packs are handled below. + */ + if (!is_pack_valid(p)) + die(_("packfile %s cannot be accessed"), p->pack_name); + } + + if (info->kind & STDIN_PACK_EXCLUDE_CLOSED) { + /* + * Marking excluded packs as kept in-core so + * that later calls to add_object_entry() + * discards any objects that are also found in + * excluded packs. + */ + p->pack_keep_in_core = 1; + } + + info->p = p; + } + + stdin_packs_add_pack_entries(&packs, revs); + strbuf_release(&buf); - string_list_clear(&include_packs, 0); - string_list_clear(&exclude_packs, 0); + strmap_clear(&packs, 1); } static void add_unreachable_loose_objects(struct rev_info *revs); @@ -3957,7 +4002,7 @@ static void read_stdin_packs(enum stdin_packs_mode mode, int rev_list_unpacked) /* avoids adding objects in excluded packs */ ignore_packed_keep_in_core = 1; - read_packs_list_from_stdin(&revs); + stdin_packs_read_input(&revs); if (rev_list_unpacked) add_unreachable_loose_objects(&revs); From 5a4381f093508f0504a99b1abc9ca5e8ebd0901f Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Fri, 27 Mar 2026 16:06:49 -0400 Subject: [PATCH 50/93] t7704: demonstrate failure with once-cruft objects above the geometric split Add a test demonstrating a case where geometric repacking fails to produce a pack with full object closure, thus making it impossible to write a reachability bitmap. Mark the test with 'test_expect_failure' for now. The subsequent commit will explain the precise failure mode, and implement a fix. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- t/t7704-repack-cruft.sh | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/t/t7704-repack-cruft.sh b/t/t7704-repack-cruft.sh index aa2e2e6ad8..77133395b5 100755 --- a/t/t7704-repack-cruft.sh +++ b/t/t7704-repack-cruft.sh @@ -869,4 +869,26 @@ test_expect_success 'repack --write-midx includes cruft when already geometric' ) ' +test_expect_failure 'repack rescues once-cruft objects above geometric split' ' + git config repack.midxMustContainCruft false && + + test_commit reachable && + test_commit unreachable && + + unreachable="$(git rev-parse HEAD)" && + + git reset --hard HEAD^ && + git tag -d unreachable && + git reflog expire --all --expire=all && + + git repack --cruft -d && + + echo $unreachable | git pack-objects .git/objects/pack/pack && + + test_commit new && + + git update-ref refs/heads/other $unreachable && + git repack --geometric=2 -d --write-midx --write-bitmap-index +' + test_done From 3f7c0e722e2733aede32b1e531caf83e7043d1bd Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Fri, 27 Mar 2026 16:06:51 -0400 Subject: [PATCH 51/93] pack-objects: support excluded-open packs with --stdin-packs In cd846bacc7d (pack-objects: introduce '--stdin-packs=follow', 2025-06-23), pack-objects learned to traverse through commits in included packs when using '--stdin-packs=follow', rescuing reachable objects from unlisted packs into the output. When we encounter a commit in an excluded pack during this rescuing phase we will traverse through its parents. But because we set `revs.no_kept_objects = 1`, commit simplification will prevent us from showing it via `get_revision()`. (In practice, `--stdin-packs=follow` walks commits down to the roots, but only opens up trees for ones that do not appear in an excluded pack.) But there are certain cases where we *do* need to see the parents of an object in an excluded pack. Namely, if an object is rescue-able, but only reachable from object(s) which appear in excluded packs, then commit simplification will exclude those commits from the object traversal, and we will never see a copy of that object, and thus not rescue it. This is what causes the failure in the previous commit during repacking. When performing a geometric repack, packs above the geometric split that weren't part of the previous MIDX (e.g., packs pushed directly into `$GIT_DIR/objects/pack`) may not have full object closure. When those packs are listed as excluded via the '^' marker, the reachability traversal encounters the sequence described above, and may miss objects which we expect to rescue with `--stdin-packs=follow`. Introduce a new "excluded-open" pack prefix, '!'. Like '^'-prefixed packs, objects from '!'-prefixed packs are excluded from the resulting pack. But unlike '^', commits in '!'-prefixed packs *are* used as starting points for the follow traversal, and the traversal does not treat them as a closure boundary. In order to distinguish excluded-closed from excluded-open packs during the traversal, introduce a new `pack_keep_in_core_open` bit on `struct packed_git`, along with a corresponding `KEPT_PACK_IN_CORE_OPEN` flag for the kept-pack cache. In `add_object_entry_from_pack()`, move the `want_object_in_pack()` check to *after* `add_pending_oid()`. This is necessary so that commits from excluded-open packs are added as traversal tips even though their objects won't appear in the output. As a consequence, the caller `for_each_object_in_pack()` will always provide a non-NULL 'p', hence we are able to drop the "if (p)" conditional. The `include_check` and `include_check_obj` callbacks on `rev_info` are used to halt the walk at closed-excluded packs, since objects behind a '^' boundary are guaranteed to have closure and need not be rescued. The following commit will make use of this new functionality within the repack layer to resolve the test failure demonstrated in the previous commit. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- Documentation/git-pack-objects.adoc | 25 ++++-- builtin/pack-objects.c | 116 ++++++++++++++++++++++------ packfile.c | 3 +- packfile.h | 2 + t/t5331-pack-objects-stdin.sh | 105 +++++++++++++++++++++++++ 5 files changed, 218 insertions(+), 33 deletions(-) diff --git a/Documentation/git-pack-objects.adoc b/Documentation/git-pack-objects.adoc index 71b9682485..b78175fbe1 100644 --- a/Documentation/git-pack-objects.adoc +++ b/Documentation/git-pack-objects.adoc @@ -94,13 +94,24 @@ base-name:: included packs (those not beginning with `^`), excluding any objects listed in the excluded packs (beginning with `^`). + -When `mode` is "follow", objects from packs not listed on stdin receive -special treatment. Objects within unlisted packs will be included if -those objects are (1) reachable from the included packs, and (2) not -found in any excluded packs. This mode is useful, for example, to -resurrect once-unreachable objects found in cruft packs to generate -packs which are closed under reachability up to the boundary set by the -excluded packs. +When `mode` is "follow" packs may additionally be prefixed with `!`, +indicating that they are excluded but not necessarily closed under +reachability. In addition to objects in included packs, the resulting +pack may include additional objects based on the following: ++ +-- +* If any packs are marked with `!`, then objects reachable from such + packs or included ones via objects outside of excluded-closed packs + will be included. In this case, all `^` packs are treated as closed + under reachability. +* Otherwise (if there are no `!` packs), objects within unlisted packs + will be included if those objects are (1) reachable from the + included packs, and (2) not found in any excluded packs. +-- ++ +This mode is useful, for example, to resurrect once-unreachable +objects found in cruft packs to generate packs which are closed under +reachability up to the boundary set by the excluded packs. + Incompatible with `--revs`, or options that imply `--revs` (such as `--all`), with the exception of `--unpacked`, which is compatible. diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c index 945100b405..7b97784d6c 100644 --- a/builtin/pack-objects.c +++ b/builtin/pack-objects.c @@ -217,6 +217,7 @@ static int have_non_local_packs; static int incremental; static int ignore_packed_keep_on_disk; static int ignore_packed_keep_in_core; +static int ignore_packed_keep_in_core_open; static int ignore_packed_keep_in_core_has_cruft; static int allow_ofs_delta; static struct pack_idx_option pack_idx_opts; @@ -1618,7 +1619,8 @@ static int want_found_object(const struct object_id *oid, int exclude, /* * Then handle .keep first, as we have a fast(er) path there. */ - if (ignore_packed_keep_on_disk || ignore_packed_keep_in_core) { + if (ignore_packed_keep_on_disk || ignore_packed_keep_in_core || + ignore_packed_keep_in_core_open) { /* * Set the flags for the kept-pack cache to be the ones we want * to ignore. @@ -1632,6 +1634,8 @@ static int want_found_object(const struct object_id *oid, int exclude, flags |= KEPT_PACK_ON_DISK; if (ignore_packed_keep_in_core) flags |= KEPT_PACK_IN_CORE; + if (ignore_packed_keep_in_core_open) + flags |= KEPT_PACK_IN_CORE_OPEN; /* * If the object is in a pack that we want to ignore, *and* we @@ -1643,6 +1647,8 @@ static int want_found_object(const struct object_id *oid, int exclude, return 0; if (ignore_packed_keep_in_core && p->pack_keep_in_core) return 0; + if (ignore_packed_keep_in_core_open && p->pack_keep_in_core_open) + return 0; if (has_object_kept_pack(p->repo, oid, flags)) return 0; } else { @@ -3742,6 +3748,7 @@ static int add_object_entry_from_pack(const struct object_id *oid, void *_data) { off_t ofs; + struct object_info oi = OBJECT_INFO_INIT; enum object_type type = OBJ_NONE; display_progress(progress_state, ++nr_seen); @@ -3749,29 +3756,34 @@ static int add_object_entry_from_pack(const struct object_id *oid, if (have_duplicate_entry(oid, 0)) return 0; + stdin_packs_found_nr++; + ofs = nth_packed_object_offset(p, pos); + + oi.typep = &type; + if (packed_object_info(p, ofs, &oi) < 0) { + die(_("could not get type of object %s in pack %s"), + oid_to_hex(oid), p->pack_name); + } else if (type == OBJ_COMMIT) { + struct rev_info *revs = _data; + /* + * commits in included packs are used as starting points + * for the subsequent revision walk + * + * Note that we do want to walk through commits that are + * present in excluded-open ('!') packs to pick up any + * objects reachable from them not present in the + * excluded-closed ('^') packs. + * + * However, we'll only add those objects to the packing + * list after checking `want_object_in_pack()` below. + */ + add_pending_oid(revs, NULL, oid, 0); + } + if (!want_object_in_pack(oid, 0, &p, &ofs)) return 0; - if (p) { - struct object_info oi = OBJECT_INFO_INIT; - - oi.typep = &type; - if (packed_object_info(p, ofs, &oi) < 0) { - die(_("could not get type of object %s in pack %s"), - oid_to_hex(oid), p->pack_name); - } else if (type == OBJ_COMMIT) { - struct rev_info *revs = _data; - /* - * commits in included packs are used as starting points for the - * subsequent revision walk - */ - add_pending_oid(revs, NULL, oid, 0); - } - - stdin_packs_found_nr++; - } - create_object_entry(oid, type, 0, 0, 0, p, ofs); return 0; @@ -3832,12 +3844,18 @@ static void show_commit_pack_hint(struct commit *commit, void *data) * - STDIN_PACK_EXCLUDE_CLOSED: objects in any packs with this flag * bit set should be excluded from the output pack. * - * Objects in packs whose 'kind' bits include STDIN_PACK_INCLUDE are - * used as traversal tips when invoked with --stdin-packs=follow. + * - STDIN_PACK_EXCLUDE_OPEN: objects in any packs with this flag + * bit set should be excluded from the output pack, but are not + * guaranteed to be closed under reachability. + * + * Objects in packs whose 'kind' bits include STDIN_PACK_INCLUDE or + * STDIN_PACK_EXCLUDE_OPEN are used as traversal tips when invoked + * with --stdin-packs=follow. */ enum stdin_pack_info_kind { STDIN_PACK_INCLUDE = (1<<0), STDIN_PACK_EXCLUDE_CLOSED = (1<<1), + STDIN_PACK_EXCLUDE_OPEN = (1<<2), }; struct stdin_pack_info { @@ -3862,6 +3880,17 @@ static int pack_mtime_cmp(const void *_a, const void *_b) return 0; } +static int stdin_packs_include_check_obj(struct object *obj, void *data UNUSED) +{ + return !has_object_kept_pack(to_pack.repo, &obj->oid, + KEPT_PACK_IN_CORE); +} + +static int stdin_packs_include_check(struct commit *commit, void *data) +{ + return stdin_packs_include_check_obj((struct object *)commit, data); +} + static void stdin_packs_add_pack_entries(struct strmap *packs, struct rev_info *revs) { @@ -3888,7 +3917,19 @@ static void stdin_packs_add_pack_entries(struct strmap *packs, for_each_string_list_item(item, &keys) { struct stdin_pack_info *info = item->util; - if (info->kind & STDIN_PACK_INCLUDE) + if (info->kind & STDIN_PACK_EXCLUDE_OPEN) { + /* + * When open-excluded packs ("!") are present, stop + * the parent walk at closed-excluded ("^") packs. + * Objects behind a "^" boundary are guaranteed to + * have closure and should not be rescued. + */ + revs->include_check = stdin_packs_include_check; + revs->include_check_obj = stdin_packs_include_check_obj; + } + + if ((info->kind & STDIN_PACK_INCLUDE) || + (info->kind & STDIN_PACK_EXCLUDE_OPEN)) for_each_object_in_pack(info->p, add_object_entry_from_pack, revs, @@ -3898,7 +3939,8 @@ static void stdin_packs_add_pack_entries(struct strmap *packs, string_list_clear(&keys, 0); } -static void stdin_packs_read_input(struct rev_info *revs) +static void stdin_packs_read_input(struct rev_info *revs, + enum stdin_packs_mode mode) { struct strbuf buf = STRBUF_INIT; struct strmap packs = STRMAP_INIT; @@ -3913,6 +3955,8 @@ static void stdin_packs_read_input(struct rev_info *revs) continue; else if (*key == '^') kind = STDIN_PACK_EXCLUDE_CLOSED; + else if (*key == '!' && mode == STDIN_PACKS_MODE_FOLLOW) + kind = STDIN_PACK_EXCLUDE_OPEN; if (kind != STDIN_PACK_INCLUDE) key++; @@ -3959,6 +4003,20 @@ static void stdin_packs_read_input(struct rev_info *revs) p->pack_keep_in_core = 1; } + if (info->kind & STDIN_PACK_EXCLUDE_OPEN) { + /* + * Marking excluded open packs as kept in-core + * (open) for the same reason as we marked + * exclude closed packs as kept in-core. + * + * Use a separate flag here to ensure we don't + * halt our traversal at these packs, since they + * are not guaranteed to have closure. + * + */ + p->pack_keep_in_core_open = 1; + } + info->p = p; } @@ -4002,7 +4060,15 @@ static void read_stdin_packs(enum stdin_packs_mode mode, int rev_list_unpacked) /* avoids adding objects in excluded packs */ ignore_packed_keep_in_core = 1; - stdin_packs_read_input(&revs); + if (mode == STDIN_PACKS_MODE_FOLLOW) { + /* + * In '--stdin-packs=follow' mode, additionally ignore + * objects in excluded-open packs to prevent them from + * appearing in the resulting pack. + */ + ignore_packed_keep_in_core_open = 1; + } + stdin_packs_read_input(&revs, mode); if (rev_list_unpacked) add_unreachable_loose_objects(&revs); diff --git a/packfile.c b/packfile.c index 215a23e42b..076e444e32 100644 --- a/packfile.c +++ b/packfile.c @@ -2246,7 +2246,8 @@ struct packed_git **packfile_store_get_kept_pack_cache(struct packfile_store *st struct packed_git *p = e->pack; if ((p->pack_keep && (flags & KEPT_PACK_ON_DISK)) || - (p->pack_keep_in_core && (flags & KEPT_PACK_IN_CORE))) { + (p->pack_keep_in_core && (flags & KEPT_PACK_IN_CORE)) || + (p->pack_keep_in_core_open && (flags & KEPT_PACK_IN_CORE_OPEN))) { ALLOC_GROW(packs, nr + 1, alloc); packs[nr++] = p; } diff --git a/packfile.h b/packfile.h index 8b04a258a7..b7735c1977 100644 --- a/packfile.h +++ b/packfile.h @@ -28,6 +28,7 @@ struct packed_git { unsigned pack_local:1, pack_keep:1, pack_keep_in_core:1, + pack_keep_in_core_open:1, freshened:1, do_not_close:1, pack_promisor:1, @@ -266,6 +267,7 @@ int packfile_store_freshen_object(struct packfile_store *store, enum kept_pack_type { KEPT_PACK_ON_DISK = (1 << 0), KEPT_PACK_IN_CORE = (1 << 1), + KEPT_PACK_IN_CORE_OPEN = (1 << 2), }; /* diff --git a/t/t5331-pack-objects-stdin.sh b/t/t5331-pack-objects-stdin.sh index 7eb79bc2cd..c74b5861af 100755 --- a/t/t5331-pack-objects-stdin.sh +++ b/t/t5331-pack-objects-stdin.sh @@ -415,4 +415,109 @@ test_expect_success '--stdin-packs=follow tolerates missing commits' ' stdin_packs__follow_with_only HEAD HEAD^{tree} ' +test_expect_success '--stdin-packs=follow with open-excluded packs' ' + test_when_finished "rm -fr repo" && + + git init repo && + ( + cd repo && + git config set maintenance.auto false && + + git branch -M main && + + # Create the following commit structure: + # + # A <-- B <-- D (main) + # ^ + # \ + # C (other) + test_commit A && + test_commit B && + git checkout -B other && + test_commit C && + git checkout main && + test_commit D && + + A="$(echo A | git pack-objects --revs $packdir/pack)" && + B="$(echo A..B | git pack-objects --revs $packdir/pack)" && + C="$(echo B..C | git pack-objects --revs $packdir/pack)" && + D="$(echo B..D | git pack-objects --revs $packdir/pack)" && + + C_ONLY="$(git rev-parse other | git pack-objects $packdir/pack)" && + + git prune-packed && + + # Create a pack using --stdin-packs=follow where: + # + # - pack D is included, + # - pack C_ONLY is excluded, but open, + # - pack B is excluded, but closed, and + # - packs A and C are unknown + # + # The resulting pack should therefore contain: + # + # - objects from the included pack D, + # - A.t (rescued via D^{tree}), and + # - C^{tree} and C.t (rescued via pack C_ONLY) + # + # , but should omit: + # + # - C (excluded via C_ONLY), + # - objects from pack B (trivially excluded-closed) + # - A and A^{tree} (ancestors of B) + P=$(git pack-objects --stdin-packs=follow $packdir/pack <<-EOF + pack-$D.pack + !pack-$C_ONLY.pack + ^pack-$B.pack + EOF + ) && + + { + objects_in_packs $D && + git rev-parse A:A.t "C^{tree}" C:C.t + } >expect.raw && + sort expect.raw >expect && + + objects_in_packs $P >actual && + test_cmp expect actual + ) +' + +test_expect_success '--stdin-packs with !-delimited pack without follow' ' + test_when_finished "rm -fr repo" && + + git init repo && + ( + test_commit A && + test_commit B && + test_commit C && + + A="$(echo A | git pack-objects --revs $packdir/pack)" && + B="$(echo A..B | git pack-objects --revs $packdir/pack)" && + C="$(echo B..C | git pack-objects --revs $packdir/pack)" && + + cat >in <<-EOF && + !pack-$A.pack + pack-$B.pack + pack-$C.pack + EOF + + # Without --stdin-packs=follow, we treat the first + # line of input as a literal packfile name, and thus + # expect pack-objects to complain of a missing pack + test_must_fail git pack-objects --stdin-packs --stdout \ + >/dev/null err && + test_grep "could not find pack .!pack-$A.pack." err && + + # With --stdin-packs=follow, we treat the second line + # of input as indicating pack-$A.pack is an excluded + # open pack, and thus expect pack-objects to succeed + P=$(git pack-objects --stdin-packs=follow $packdir/pack expect && + objects_in_packs $P >actual && + test_cmp expect actual + ) +' + test_done From 9ad29df36d7c762677b5a4ecc6a6dc229c818b2a Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Fri, 27 Mar 2026 16:06:54 -0400 Subject: [PATCH 52/93] repack: mark non-MIDX packs above the split as excluded-open In 5ee86c273bf (repack: exclude cruft pack(s) from the MIDX where possible, 2025-06-23), geometric repacking learned to exclude cruft packs from the MIDX when 'repack.midxMustContainCruft' is set to 'false'. This works because packs generated with '--stdin-packs=follow' rescue any once-unreachable objects that later become reachable, making the resulting packs closed under reachability without needing the cruft pack in the MIDX. However, packs above the geometric split that were not part of the previous MIDX may not have full object closure. When such packs are marked as excluded-closed ('^'), pack-objects treats them as a reachability boundary and does not traverse through them during the follow pass, potentially leaving the resulting pack without full closure. Fix this by marking packs above the geometric split that were not in the previous MIDX as excluded-open ('!') instead of excluded-closed ('^'). This causes pack-objects to walk through their commits during the follow pass, rescuing any reachable objects not present in the closed-excluded packs. Note that MIDXs which were generated prior to this change and are unlucky enough to not be closed under reachability may still exhibit this bug, as we treat all MIDX'd packs as closed. That is true in an overwhelming number of cases, since in order to have a non-closed MIDX you would have to: - Generate a pack via an earlier geometric repack that is not closed under reachability. - Store that pack in the MIDX. - Avoid picking any commits to receive reachability bitmaps which happen to reach objects from which the missing objects are reachable. In the extremely rare chance that all of the above should happen, an all-into-one repack will resolve the issue. Unfortunately, there is no perfect way to determine whether a MIDX'd pack is closed outside of ensuring that there is a '1' bit in at least one bitmap for every bit position corresponding to objects in that pack. While this is possible to do, this approach would treat MIDX'd packs as open in cases where there is at least one object that is not reachable from the subset of commits selected for bitmapping. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/repack.c | 19 +++++++++++++++++-- t/t7704-repack-cruft.sh | 2 +- 2 files changed, 18 insertions(+), 3 deletions(-) diff --git a/builtin/repack.c b/builtin/repack.c index f6bb04bef7..4c5a82c2c8 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -369,8 +369,23 @@ int cmd_repack(int argc, */ for (i = 0; i < geometry.split; i++) fprintf(in, "%s\n", pack_basename(geometry.pack[i])); - for (i = geometry.split; i < geometry.pack_nr; i++) - fprintf(in, "^%s\n", pack_basename(geometry.pack[i])); + for (i = geometry.split; i < geometry.pack_nr; i++) { + const char *basename = pack_basename(geometry.pack[i]); + char marker = '^'; + + if (!midx_must_contain_cruft && + !string_list_has_string(&existing.midx_packs, + basename)) { + /* + * Assume non-MIDX'd packs are not + * necessarily closed under + * reachability. + */ + marker = '!'; + } + + fprintf(in, "%c%s\n", marker, basename); + } fclose(in); } diff --git a/t/t7704-repack-cruft.sh b/t/t7704-repack-cruft.sh index 77133395b5..9e03b04315 100755 --- a/t/t7704-repack-cruft.sh +++ b/t/t7704-repack-cruft.sh @@ -869,7 +869,7 @@ test_expect_success 'repack --write-midx includes cruft when already geometric' ) ' -test_expect_failure 'repack rescues once-cruft objects above geometric split' ' +test_expect_success 'repack rescues once-cruft objects above geometric split' ' git config repack.midxMustContainCruft false && test_commit reachable && From d8e34f971b31ae6583e796626c7280732fca68e1 Mon Sep 17 00:00:00 2001 From: Zakariyah Ali Date: Sat, 28 Mar 2026 00:40:19 +0100 Subject: [PATCH 53/93] t2000: modernise overall structure This test script that dates back to 2005 certainly shows its age and both its style and the way the tests are laid out do not match the modern standard. * Executables that prepare the data used to test the command should be inside the test_expect_success block in modern tests. * In modern tests, running a command that is being tested, making sure it succeeds, and inspecting other side effects that are expected, are all done in a single test_expect_success block. * A test_expect_success block in modern tests are laid out as test_expect_success 'title of the test' ' body of the test && ... body of the test ' not as test_expect_success \ 'title of the test' \ 'body of the test && ... body of the test' which is in a prehistoric style. * In modern tests, each &&-chained statement in the body of the test_expect_success block are indented with a horizontal tab, unlike prehistoric style that used 4-space indent. Signed-off-by: Zakariyah Ali Signed-off-by: Junio C Hamano --- t/t2000-conflict-when-checking-files-out.sh | 122 +++++++++++--------- 1 file changed, 66 insertions(+), 56 deletions(-) diff --git a/t/t2000-conflict-when-checking-files-out.sh b/t/t2000-conflict-when-checking-files-out.sh index f18616ad2b..af199d8191 100755 --- a/t/t2000-conflict-when-checking-files-out.sh +++ b/t/t2000-conflict-when-checking-files-out.sh @@ -35,30 +35,30 @@ show_files() { sed -e 's/^\([0-9]*\) [^ ]* [0-9a-f]* /tr: \1 /' } -date >path0 -mkdir path1 -date >path1/file1 +test_expect_success 'prepare files path0 and path1/file1' ' + date >path0 && + mkdir path1 && + date >path1/file1 && + git update-index --add path0 path1/file1 +' -test_expect_success \ - 'git update-index --add various paths.' \ - 'git update-index --add path0 path1/file1' +test_expect_success 'prepare working tree files with D/F conflicts' ' + rm -fr path0 path1 && + mkdir path0 && + date >path0/file0 && + date >path1 +' -rm -fr path0 path1 -mkdir path0 -date >path0/file0 -date >path1 +test_expect_success 'git checkout-index without -f should fail on conflicting work tree.' ' + test_must_fail git checkout-index -a +' -test_expect_success \ - 'git checkout-index without -f should fail on conflicting work tree.' \ - 'test_must_fail git checkout-index -a' - -test_expect_success \ - 'git checkout-index with -f should succeed.' \ - 'git checkout-index -f -a' - -test_expect_success \ - 'git checkout-index conflicting paths.' \ - 'test -f path0 && test -d path1 && test -f path1/file1' +test_expect_success 'git checkout-index with -f should succeed.' ' + git checkout-index -f -a && + test_path_is_file path0 && + test_path_is_dir path1 && + test_path_is_file path1/file1 +' test_expect_success SYMLINKS 'checkout-index -f twice with --prefix' ' mkdir -p tar/get && @@ -83,53 +83,63 @@ test_expect_success SYMLINKS 'checkout-index -f twice with --prefix' ' # path path3 is occupied by a non-directory. With "-f" it should remove # the symlink path3 and create directory path3 and file path3/file1. -mkdir path2 -date >path2/file0 -test_expect_success \ - 'git update-index --add path2/file0' \ - 'git update-index --add path2/file0' -test_expect_success \ - 'writing tree out with git write-tree' \ - 'tree1=$(git write-tree)' +test_expect_success 'prepare path2/file0 and index' ' + mkdir path2 && + date >path2/file0 && + git update-index --add path2/file0 +' + +test_expect_success 'write tree with path2/file0' ' + tree1=$(git write-tree) +' + test_debug 'show_files $tree1' -mkdir path3 -date >path3/file1 -test_expect_success \ - 'git update-index --add path3/file1' \ - 'git update-index --add path3/file1' -test_expect_success \ - 'writing tree out with git write-tree' \ - 'tree2=$(git write-tree)' +test_expect_success 'prepare path3/file1 and index' ' + mkdir path3 && + date >path3/file1 && + git update-index --add path3/file1 +' + +test_expect_success 'write tree with path3/file1' ' + tree2=$(git write-tree) +' + test_debug 'show_files $tree2' -rm -fr path3 -test_expect_success \ - 'read previously written tree and checkout.' \ - 'git read-tree -m $tree1 && git checkout-index -f -a' +test_expect_success 'read previously written tree and checkout.' ' + rm -fr path3 && + git read-tree -m $tree1 && + git checkout-index -f -a +' + test_debug 'show_files $tree1' -test_expect_success \ - 'add a symlink' \ - 'test_ln_s_add path2 path3' -test_expect_success \ - 'writing tree out with git write-tree' \ - 'tree3=$(git write-tree)' +test_expect_success 'add a symlink' ' + test_ln_s_add path2 path3 +' + +test_expect_success 'write tree with symlink path3' ' + tree3=$(git write-tree) +' + test_debug 'show_files $tree3' # Morten says "Got that?" here. # Test begins. -test_expect_success \ - 'read previously written tree and checkout.' \ - 'git read-tree $tree2 && git checkout-index -f -a' +test_expect_success 'read previously written tree and checkout.' ' + git read-tree $tree2 && + git checkout-index -f -a +' + test_debug 'show_files $tree2' -test_expect_success \ - 'checking out conflicting path with -f' \ - 'test ! -h path2 && test -d path2 && - test ! -h path3 && test -d path3 && - test ! -h path2/file0 && test -f path2/file0 && - test ! -h path3/file1 && test -f path3/file1' +test_expect_success 'checking out conflicting path with -f' ' + test_path_is_dir_not_symlink path2 && + test_path_is_dir_not_symlink path3 && + test_path_is_file_not_symlink path2/file0 && + test_path_is_file_not_symlink path3/file1 +' test_done From 849988bc7499d11f127305a8f20a3a054eb0b0c0 Mon Sep 17 00:00:00 2001 From: Trieu Huynh Date: Sat, 28 Mar 2026 22:59:35 +0900 Subject: [PATCH 54/93] t6101: avoid suppressing git's exit code Update t6101-rev-parse-parents.sh to redirect git-rev-parse output to a temporary file instead of piping it directly to not hide the exit code of git commands behind pipes, as a crash in git might go unnoticed. Signed-off-by: Trieu Huynh Signed-off-by: Junio C Hamano --- t/t6101-rev-parse-parents.sh | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/t/t6101-rev-parse-parents.sh b/t/t6101-rev-parse-parents.sh index 5f55ab98d3..7281889717 100755 --- a/t/t6101-rev-parse-parents.sh +++ b/t/t6101-rev-parse-parents.sh @@ -39,7 +39,8 @@ test_expect_success 'setup' ' ' test_expect_success 'start is valid' ' - git rev-parse start | grep "^$OID_REGEX$" + git rev-parse start >actual && + test_grep "^$OID_REGEX$" actual ' test_expect_success 'start^0' ' From 0f0ce0762503cb8f58a3ce07052a639e36e07ed5 Mon Sep 17 00:00:00 2001 From: Shreyansh Paliwal Date: Sat, 28 Mar 2026 20:51:58 +0530 Subject: [PATCH 55/93] doc: gitignore: clarify pattern base for info/exclude and core.excludesFile The pattern format section describes how patterns are interpreted relative to the location of a .gitignore file, but does not mention the behavior for exclude sources outside the working tree. Clarify that patterns from $GIT_DIR/info/exclude and core.excludesFile are treated as if they are specified at the root of the working tree, so a leading '/' anchors matches at the repository root. Reported-by: Dan Drake Signed-off-by: Shreyansh Paliwal Signed-off-by: Junio C Hamano --- Documentation/gitignore.adoc | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/Documentation/gitignore.adoc b/Documentation/gitignore.adoc index 9fccab4ae8..a3d24e5c34 100644 --- a/Documentation/gitignore.adoc +++ b/Documentation/gitignore.adoc @@ -96,6 +96,11 @@ PATTERN FORMAT particular `.gitignore` file itself. Otherwise the pattern may also match at any level below the `.gitignore` level. + - Patterns read from exclude sources that are outside the working tree, + such as $GIT_DIR/info/exclude and core.excludesFile, are treated as if + they are specified at the root of the working tree, i.e. a leading "/" + in such patterns anchors the match at the root of the repository. + - If there is a separator at the end of the pattern then the pattern will only match directories, otherwise the pattern can match both files and directories. From 3402850ee16f5a7a05d68cb29271a0e558659aaa Mon Sep 17 00:00:00 2001 From: Quentin Bernet Date: Mon, 30 Mar 2026 13:24:35 +0000 Subject: [PATCH 56/93] docs: fix "git stash [push]" documentation Both the synopsis and explanation are incorrect and contradict each other. The synopsis claims "push" can only be omitted when you do not give any options and arguments. The explanation correctly claims that non-option arguments are not allowed, except pathspec elements preceded by double hyphens. But it also adds "-p" to the list of exceptions, even though it is an option argument. Signed-off-by: Quentin Bernet Signed-off-by: Junio C Hamano --- Documentation/git-stash.adoc | 10 ++++------ builtin/stash.c | 4 ++-- 2 files changed, 6 insertions(+), 8 deletions(-) diff --git a/Documentation/git-stash.adoc b/Documentation/git-stash.adoc index 235d57ddd8..b05c990ecd 100644 --- a/Documentation/git-stash.adoc +++ b/Documentation/git-stash.adoc @@ -14,10 +14,10 @@ git stash drop [-q | --quiet] [] git stash pop [--index] [-q | --quiet] [] git stash apply [--index] [-q | --quiet] [] git stash branch [] -git stash [push [-p | --patch] [-S | --staged] [-k | --[no-]keep-index] [-q | --quiet] +git stash [push] [-p | --patch] [-S | --staged] [-k | --[no-]keep-index] [-q | --quiet] [-u | --include-untracked] [-a | --all] [(-m | --message) ] [--pathspec-from-file= [--pathspec-file-nul]] - [--] [...]] + [--] [...] git stash save [-p | --patch] [-S | --staged] [-k | --[no-]keep-index] [-q | --quiet] [-u | --include-untracked] [-a | --all] [] git stash clear @@ -60,10 +60,8 @@ COMMANDS the description along with the stashed state. + For quickly making a snapshot, you can omit "push". In this mode, -non-option arguments are not allowed to prevent a misspelled -subcommand from making an unwanted stash entry. The two exceptions to this -are `stash -p` which acts as alias for `stash push -p` and pathspec elements, -which are allowed after a double hyphen `--` for disambiguation. +pathspec elements are only allowed after a double hyphen `--` +to prevent a misspelled subcommand from making an unwanted stash entry. `save [-p | --patch] [-S | --staged] [-k | --[no-]keep-index] [-u | --include-untracked] [-a | --all] [-q | --quiet] []`:: diff --git a/builtin/stash.c b/builtin/stash.c index 95c5005b0b..0d27b2fb1f 100644 --- a/builtin/stash.c +++ b/builtin/stash.c @@ -50,10 +50,10 @@ #define BUILTIN_STASH_STORE_USAGE \ N_("git stash store [(-m | --message) ] [-q | --quiet] ") #define BUILTIN_STASH_PUSH_USAGE \ - N_("git stash [push [-p | --patch] [-S | --staged] [-k | --[no-]keep-index] [-q | --quiet]\n" \ + N_("git stash [push] [-p | --patch] [-S | --staged] [-k | --[no-]keep-index] [-q | --quiet]\n" \ " [-u | --include-untracked] [-a | --all] [(-m | --message) ]\n" \ " [--pathspec-from-file= [--pathspec-file-nul]]\n" \ - " [--] [...]]") + " [--] [...]") #define BUILTIN_STASH_SAVE_USAGE \ N_("git stash save [-p | --patch] [-S | --staged] [-k | --[no-]keep-index] [-q | --quiet]\n" \ " [-u | --include-untracked] [-a | --all] []") From 80871f356e88d23cc32cd852fd4a4548e861f47c Mon Sep 17 00:00:00 2001 From: Runxi Yu Date: Mon, 30 Mar 2026 13:18:20 +0200 Subject: [PATCH 57/93] t5516: test updateInstead with worktree and unborn bare HEAD This is a regression test which should presently fail, to demonstrate the behavior I encountered that looks like a bug. When a bare repository has a worktree checked out on a separate branch, receive.denyCurrentBranch=updateInstead should allow a push to that branch and update the linked worktree, as long as the linked worktree is clean. But, if the bare repository's own HEAD is repointed to an unborn branch, the push is rejected with "Working directory has staged changes", even though the linked worktree itself is clean. This test is essentially a minimal working example of what I encountered while actually using Git; it might not be the optimal way to demonstrate the underlying bug. I suspect builtin/receive-pack.c is using the bare repository's HEAD even when comparing it to the worktree's index. Signed-off-by: Runxi Yu Signed-off-by: Junio C Hamano --- t/t5516-fetch-push.sh | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/t/t5516-fetch-push.sh b/t/t5516-fetch-push.sh index 29e2f17608..f44250c38f 100755 --- a/t/t5516-fetch-push.sh +++ b/t/t5516-fetch-push.sh @@ -1816,6 +1816,24 @@ test_expect_success 'denyCurrentBranch and bare repository worktrees' ' test_must_fail git push --delete bare.git wt ' +# NEEDSWORK: updateInstead unexpectedly fails when bare HEAD points to unborn +# branch (or probably any ref that differs from the target worktree) despite +# the target worktree being clean. This seems to be because receive-pack.c +# diffs the target worktree index against the bare repository HEAD. +test_expect_failure 'updateInstead with bare repository worktree and unborn bare HEAD' ' + test_when_finished "rm -fr bare.git cloned" && + git clone --bare . bare.git && + git -C bare.git worktree add wt && + git -C bare.git config receive.denyCurrentBranch updateInstead && + git -C bare.git symbolic-ref HEAD refs/heads/unborn && + test_must_fail git -C bare.git rev-parse -q --verify HEAD^{commit} && + git clone . cloned && + test_commit -C cloned mozzarella && + git -C cloned push ../bare.git HEAD:wt && + test_path_exists bare.git/wt/mozzarella.t && + test "$(git -C cloned rev-parse HEAD)" = "$(git -C bare.git/wt rev-parse HEAD)" +' + test_expect_success 'refuse fetch to current branch of worktree' ' test_when_finished "git worktree remove --force wt && git branch -D wt" && git worktree add wt && From b310755ecaf4459eddd4f602b3cb02e793c01177 Mon Sep 17 00:00:00 2001 From: Pablo Sabater Date: Mon, 30 Mar 2026 13:18:21 +0200 Subject: [PATCH 58/93] t5516: clean up cloned and new-wt in denyCurrentBranch and worktrees test The 'denyCurrentBranch and worktrees' test creates a 'cloned' and a 'new-wt' but it doesn't clean them after the test. This makes other tests that use the same name after this one to fail. Add test_when_finished to clean them at the end. Signed-off-by: Pablo Sabater Signed-off-by: Junio C Hamano --- t/t5516-fetch-push.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/t/t5516-fetch-push.sh b/t/t5516-fetch-push.sh index f44250c38f..c40f2790d8 100755 --- a/t/t5516-fetch-push.sh +++ b/t/t5516-fetch-push.sh @@ -1792,6 +1792,7 @@ test_expect_success 'updateInstead with push-to-checkout hook' ' ' test_expect_success 'denyCurrentBranch and worktrees' ' + test_when_finished "rm -fr cloned && git worktree remove --force new-wt" && git worktree add new-wt && git clone . cloned && test_commit -C cloned first && From 8151f4fe7e4bf36f2656ae849a4ffaf386708178 Mon Sep 17 00:00:00 2001 From: Pablo Sabater Date: Mon, 30 Mar 2026 13:18:22 +0200 Subject: [PATCH 59/93] receive-pack: use worktree HEAD for updateInstead When a bare repo has linked worktrees, and its HEAD points to an unborn branch, pushing to a wt branch with updateInstead fails and rejects the push, even if the wt is clean. This happens because HEAD is checked only for the bare repo context, instead of the wt. Remove head_has_history and check for worktree->head_oid which does have the correct HEAD of the wt. Update the test added by Runxi's patch to expect success. Signed-off-by: Pablo Sabater Signed-off-by: Junio C Hamano --- builtin/receive-pack.c | 39 +++++++++++++++------------------------ t/t5516-fetch-push.sh | 6 +----- 2 files changed, 16 insertions(+), 29 deletions(-) diff --git a/builtin/receive-pack.c b/builtin/receive-pack.c index e34edff406..26a3a0bcd3 100644 --- a/builtin/receive-pack.c +++ b/builtin/receive-pack.c @@ -1380,32 +1380,16 @@ static int update_shallow_ref(struct command *cmd, struct shallow_info *si) return 0; } -/* - * NEEDSWORK: we should consolidate various implementations of "are we - * on an unborn branch?" test into one, and make the unified one more - * robust. !get_sha1() based check used here and elsewhere would not - * allow us to tell an unborn branch from corrupt ref, for example. - * For the purpose of fixing "deploy-to-update does not work when - * pushing into an empty repository" issue, this should suffice for - * now. - */ -static int head_has_history(void) -{ - struct object_id oid; - - return !repo_get_oid(the_repository, "HEAD", &oid); -} - static const char *push_to_deploy(unsigned char *sha1, struct strvec *env, - const char *work_tree) + const struct worktree *worktree) { struct child_process child = CHILD_PROCESS_INIT; strvec_pushl(&child.args, "update-index", "-q", "--ignore-submodules", "--refresh", NULL); strvec_pushv(&child.env, env->v); - child.dir = work_tree; + child.dir = worktree->path; child.no_stdin = 1; child.stdout_to_stderr = 1; child.git_cmd = 1; @@ -1417,7 +1401,7 @@ static const char *push_to_deploy(unsigned char *sha1, strvec_pushl(&child.args, "diff-files", "--quiet", "--ignore-submodules", "--", NULL); strvec_pushv(&child.env, env->v); - child.dir = work_tree; + child.dir = worktree->path; child.no_stdin = 1; child.stdout_to_stderr = 1; child.git_cmd = 1; @@ -1427,9 +1411,16 @@ static const char *push_to_deploy(unsigned char *sha1, child_process_init(&child); strvec_pushl(&child.args, "diff-index", "--quiet", "--cached", "--ignore-submodules", - /* diff-index with either HEAD or an empty tree */ - head_has_history() ? "HEAD" : empty_tree_oid_hex(the_repository->hash_algo), - "--", NULL); + /* + * diff-index with either HEAD or an empty tree + * + * NEEDSWORK: is_null_oid() cannot know whether it's an + * unborn HEAD or a corrupt ref. It works for now because + * it's only needed to know if we are comparing HEAD or an + * empty tree. + */ + !is_null_oid(&worktree->head_oid) ? "HEAD" : + empty_tree_oid_hex(the_repository->hash_algo), "--", NULL); strvec_pushv(&child.env, env->v); child.no_stdin = 1; child.no_stdout = 1; @@ -1442,7 +1433,7 @@ static const char *push_to_deploy(unsigned char *sha1, strvec_pushl(&child.args, "read-tree", "-u", "-m", hash_to_hex(sha1), NULL); strvec_pushv(&child.env, env->v); - child.dir = work_tree; + child.dir = worktree->path; child.no_stdin = 1; child.no_stdout = 1; child.stdout_to_stderr = 0; @@ -1490,7 +1481,7 @@ static const char *update_worktree(unsigned char *sha1, const struct worktree *w retval = push_to_checkout(sha1, &invoked_hook, &env, worktree->path); if (!invoked_hook) - retval = push_to_deploy(sha1, &env, worktree->path); + retval = push_to_deploy(sha1, &env, worktree); strvec_clear(&env); free(git_dir); diff --git a/t/t5516-fetch-push.sh b/t/t5516-fetch-push.sh index c40f2790d8..117cfa051f 100755 --- a/t/t5516-fetch-push.sh +++ b/t/t5516-fetch-push.sh @@ -1817,11 +1817,7 @@ test_expect_success 'denyCurrentBranch and bare repository worktrees' ' test_must_fail git push --delete bare.git wt ' -# NEEDSWORK: updateInstead unexpectedly fails when bare HEAD points to unborn -# branch (or probably any ref that differs from the target worktree) despite -# the target worktree being clean. This seems to be because receive-pack.c -# diffs the target worktree index against the bare repository HEAD. -test_expect_failure 'updateInstead with bare repository worktree and unborn bare HEAD' ' +test_expect_success 'updateInstead with bare repository worktree and unborn bare HEAD' ' test_when_finished "rm -fr bare.git cloned" && git clone --bare . bare.git && git -C bare.git worktree add wt && From a7aca156779f6c40d454f04ed151d2f482396f0f Mon Sep 17 00:00:00 2001 From: Jayesh Daga Date: Mon, 30 Mar 2026 18:38:06 +0000 Subject: [PATCH 60/93] read-cache: use istate->repo for trace2 logging trace2 calls in read-cache.c use the global 'the_repository', even though the relevant index_state provides an explicit repository pointer via 'istate->repo'. Using the global repository can result in incorrect trace2 output when multiple repository instances are in use, as events may be attributed to the wrong repository. Use 'istate->repo' instead to ensure correct repository attribution. Signed-off-by: Jayesh Daga Signed-off-by: Junio C Hamano --- read-cache.c | 32 ++++++++------------------------ 1 file changed, 8 insertions(+), 24 deletions(-) diff --git a/read-cache.c b/read-cache.c index 5049f9baca..b1074fbf06 100644 --- a/read-cache.c +++ b/read-cache.c @@ -2309,13 +2309,9 @@ int do_read_index(struct index_state *istate, const char *path, int must_exist) } munmap((void *)mmap, mmap_size); - /* - * TODO trace2: replace "the_repository" with the actual repo instance - * that is associated with the given "istate". - */ - trace2_data_intmax("index", the_repository, "read/version", + trace2_data_intmax("index", istate->repo, "read/version", istate->version); - trace2_data_intmax("index", the_repository, "read/cache_nr", + trace2_data_intmax("index", istate->repo, "read/cache_nr", istate->cache_nr); /* @@ -2360,16 +2356,12 @@ int read_index_from(struct index_state *istate, const char *path, if (istate->initialized) return istate->cache_nr; - /* - * TODO trace2: replace "the_repository" with the actual repo instance - * that is associated with the given "istate". - */ - trace2_region_enter_printf("index", "do_read_index", the_repository, + trace2_region_enter_printf("index", "do_read_index", istate->repo, "%s", path); trace_performance_enter(); ret = do_read_index(istate, path, 0); trace_performance_leave("read cache %s", path); - trace2_region_leave_printf("index", "do_read_index", the_repository, + trace2_region_leave_printf("index", "do_read_index", istate->repo, "%s", path); split_index = istate->split_index; @@ -3096,13 +3088,9 @@ static int do_write_index(struct index_state *istate, struct tempfile *tempfile, istate->timestamp.nsec = ST_MTIME_NSEC(st); trace_performance_since(start, "write index, changed mask = %x", istate->cache_changed); - /* - * TODO trace2: replace "the_repository" with the actual repo instance - * that is associated with the given "istate". - */ - trace2_data_intmax("index", the_repository, "write/version", + trace2_data_intmax("index", istate->repo, "write/version", istate->version); - trace2_data_intmax("index", the_repository, "write/cache_nr", + trace2_data_intmax("index", istate->repo, "write/cache_nr", istate->cache_nr); ret = 0; @@ -3144,14 +3132,10 @@ static int do_write_locked_index(struct index_state *istate, return ret; } - /* - * TODO trace2: replace "the_repository" with the actual repo instance - * that is associated with the given "istate". - */ - trace2_region_enter_printf("index", "do_write_index", the_repository, + trace2_region_enter_printf("index", "do_write_index", istate->repo, "%s", get_lock_file_path(lock)); ret = do_write_index(istate, lock->tempfile, write_extensions, flags); - trace2_region_leave_printf("index", "do_write_index", the_repository, + trace2_region_leave_printf("index", "do_write_index", istate->repo, "%s", get_lock_file_path(lock)); if (was_full) From 882c8e351d700e1738e696dfbc6312617f394570 Mon Sep 17 00:00:00 2001 From: Jayesh Daga Date: Tue, 31 Mar 2026 10:02:53 +0000 Subject: [PATCH 61/93] cache-tree: use index state repository in trace2 calls trace2 calls in cache-tree.c use the global 'the_repository', even though cache_tree_update() has access to an explicit repository pointer via 'istate->repo'. Using the global repository can result in incorrect trace2 output when multiple repository instances are in use, as events may be attributed to the wrong repository. Use 'istate->repo' in cache_tree_update() to ensure correct repository attribution. Other call sites are left unchanged as they do not have access to a repository instance. Signed-off-by: Jayesh Daga Signed-off-by: Junio C Hamano --- cache-tree.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cache-tree.c b/cache-tree.c index 60bcc07c3b..e4f9174c4a 100644 --- a/cache-tree.c +++ b/cache-tree.c @@ -488,12 +488,12 @@ int cache_tree_update(struct index_state *istate, int flags) prefetch_cache_entries(istate, must_check_existence); trace_performance_enter(); - trace2_region_enter("cache_tree", "update", the_repository); + trace2_region_enter("cache_tree", "update", istate->repo); transaction = odb_transaction_begin(the_repository->objects); i = update_one(istate->cache_tree, istate->cache, istate->cache_nr, "", 0, &skip, flags); odb_transaction_commit(transaction); - trace2_region_leave("cache_tree", "update", the_repository); + trace2_region_leave("cache_tree", "update", istate->repo); trace_performance_leave("cache_tree_update"); if (i < 0) return i; From 55903dc87bee544c314706c509168afbbe14d262 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Wed, 1 Apr 2026 01:57:46 +0200 Subject: [PATCH 62/93] CodingGuidelines: document our style for flags We have recently iterated a bit on our style for flags. Document this. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- Documentation/CodingGuidelines | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/Documentation/CodingGuidelines b/Documentation/CodingGuidelines index b8670751f5..4992e52093 100644 --- a/Documentation/CodingGuidelines +++ b/Documentation/CodingGuidelines @@ -668,6 +668,18 @@ For C programs: unsigned other_field:1; unsigned field_with_longer_name:1; + - When a function `F` accepts flags, those flags should be defined as `enum + F_flags`. Individual flag definitions should start with `F` and be in + all-uppercase letters. Flag values should be represented via bit shifts. + E.g. + + enum frobnicate_flags { + FROBNICATE_FOO = (1 << 0), + FROBNICATE_BAR = (1 << 1), + }; + + int frobnicate(enum frobnicate_flags flags); + - Array names should be named in the singular form if the individual items are subject of use. E.g.: From 75c702624d9a5f60a78c2d4d5e8de83468c9c5ec Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Wed, 1 Apr 2026 01:57:47 +0200 Subject: [PATCH 63/93] treewide: use enum for `odb_for_each_object()` flags We've got a couple of callsites where we pass `odb_for_each_object()` flags, but accept an `unsigned` flags field instead of the corresponding enum. Adapt these to accept the enum type instead. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- odb.c | 2 +- odb.h | 2 +- packfile.c | 2 +- packfile.h | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/odb.c b/odb.c index 3f94a53df1..9a11c60048 100644 --- a/odb.c +++ b/odb.c @@ -922,7 +922,7 @@ int odb_for_each_object(struct object_database *odb, const struct object_info *request, odb_for_each_object_cb cb, void *cb_data, - unsigned flags) + enum odb_for_each_object_flags flags) { struct odb_for_each_object_options opts = { .flags = flags, diff --git a/odb.h b/odb.h index 984bafca9d..09affaf6a5 100644 --- a/odb.h +++ b/odb.h @@ -522,7 +522,7 @@ int odb_for_each_object(struct object_database *odb, const struct object_info *request, odb_for_each_object_cb cb, void *cb_data, - unsigned flags); + enum odb_for_each_object_flags flags); enum odb_count_objects_flags { /* diff --git a/packfile.c b/packfile.c index ee9c7ea1d1..5d3b772973 100644 --- a/packfile.c +++ b/packfile.c @@ -2299,7 +2299,7 @@ int has_object_kept_pack(struct repository *r, const struct object_id *oid, int for_each_object_in_pack(struct packed_git *p, each_packed_object_fn cb, void *data, - unsigned flags) + enum odb_for_each_object_flags flags) { uint32_t i; int r = 0; diff --git a/packfile.h b/packfile.h index 45b35973f0..3eb10d6b65 100644 --- a/packfile.h +++ b/packfile.h @@ -352,7 +352,7 @@ typedef int each_packed_object_fn(const struct object_id *oid, void *data); int for_each_object_in_pack(struct packed_git *p, each_packed_object_fn, void *data, - unsigned flags); + enum odb_for_each_object_flags flags); /* * Iterate through all packed objects in the given packfile store and invoke From ff2e9d85d61f2f51793acbdb4bad68d48cc8bb85 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Wed, 1 Apr 2026 01:57:48 +0200 Subject: [PATCH 64/93] odb: rename `odb_write_object()` flags Rename `odb_write_object()` flags to be properly prefixed with the function name. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- cache-tree.c | 2 +- object-file.c | 4 ++-- odb.h | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/cache-tree.c b/cache-tree.c index 60bcc07c3b..60059edfb0 100644 --- a/cache-tree.c +++ b/cache-tree.c @@ -456,7 +456,7 @@ static int update_one(struct cache_tree *it, hash_object_file(the_hash_algo, buffer.buf, buffer.len, OBJ_TREE, &it->oid); } else if (odb_write_object_ext(the_repository->objects, buffer.buf, buffer.len, OBJ_TREE, - &it->oid, NULL, flags & WRITE_TREE_SILENT ? WRITE_OBJECT_SILENT : 0)) { + &it->oid, NULL, flags & WRITE_TREE_SILENT ? ODB_WRITE_OBJECT_SILENT : 0)) { strbuf_release(&buffer); return -1; } diff --git a/object-file.c b/object-file.c index 4f77ce0982..db1a420ab6 100644 --- a/object-file.c +++ b/object-file.c @@ -909,7 +909,7 @@ static int start_loose_object_common(struct odb_source *source, fd = create_tmpfile(source->odb->repo, tmp_file, filename); if (fd < 0) { - if (flags & WRITE_OBJECT_SILENT) + if (flags & ODB_WRITE_OBJECT_SILENT) return -1; else if (errno == EACCES) return error(_("insufficient permission for adding " @@ -1042,7 +1042,7 @@ static int write_loose_object(struct odb_source *source, utb.actime = mtime; utb.modtime = mtime; if (utime(tmp_file.buf, &utb) < 0 && - !(flags & WRITE_OBJECT_SILENT)) + !(flags & ODB_WRITE_OBJECT_SILENT)) warning_errno(_("failed utime() on %s"), tmp_file.buf); } diff --git a/odb.h b/odb.h index 09affaf6a5..083c25609e 100644 --- a/odb.h +++ b/odb.h @@ -568,12 +568,12 @@ enum { * changes that so that the object will be written as a loose object * and persisted. */ - WRITE_OBJECT_PERSIST = (1 << 0), + ODB_WRITE_OBJECT_PERSIST = (1 << 0), /* * Do not print an error in case something goes wrong. */ - WRITE_OBJECT_SILENT = (1 << 1), + ODB_WRITE_OBJECT_SILENT = (1 << 1), }; /* From b2d421ece6a8e095394e76930e6929ee036571ef Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Wed, 1 Apr 2026 01:57:49 +0200 Subject: [PATCH 65/93] odb: use enum for `odb_write_object` flags We've got a couple of functions that accept `odb_write_object()` flags, but all of them accept the flags as an `unsigned` integer. In fact, we don't even have an `enum` for the flags field. Introduce this `enum` and adapt functions accordingly according to our coding style. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- object-file.c | 3 ++- object-file.h | 3 ++- odb.c | 2 +- odb.h | 4 ++-- odb/source-files.c | 2 +- odb/source.h | 4 ++-- 6 files changed, 10 insertions(+), 8 deletions(-) diff --git a/object-file.c b/object-file.c index db1a420ab6..2146104de8 100644 --- a/object-file.c +++ b/object-file.c @@ -1169,7 +1169,8 @@ cleanup: int odb_source_loose_write_object(struct odb_source *source, const void *buf, unsigned long len, enum object_type type, struct object_id *oid, - struct object_id *compat_oid_in, unsigned flags) + struct object_id *compat_oid_in, + enum odb_write_object_flags flags) { const struct git_hash_algo *algo = source->odb->repo->hash_algo; const struct git_hash_algo *compat = source->odb->repo->compat_hash_algo; diff --git a/object-file.h b/object-file.h index 3686f182e4..5241b8dd5c 100644 --- a/object-file.h +++ b/object-file.h @@ -68,7 +68,8 @@ int odb_source_loose_freshen_object(struct odb_source *source, int odb_source_loose_write_object(struct odb_source *source, const void *buf, unsigned long len, enum object_type type, struct object_id *oid, - struct object_id *compat_oid_in, unsigned flags); + struct object_id *compat_oid_in, + enum odb_write_object_flags flags); int odb_source_loose_write_stream(struct odb_source *source, struct odb_write_stream *stream, size_t len, diff --git a/odb.c b/odb.c index 9a11c60048..8220661356 100644 --- a/odb.c +++ b/odb.c @@ -1053,7 +1053,7 @@ int odb_write_object_ext(struct object_database *odb, enum object_type type, struct object_id *oid, struct object_id *compat_oid, - unsigned flags) + enum odb_write_object_flags flags) { return odb_source_write_object(odb->sources, buf, len, type, oid, compat_oid, flags); diff --git a/odb.h b/odb.h index 083c25609e..9aadc1177a 100644 --- a/odb.h +++ b/odb.h @@ -561,7 +561,7 @@ int odb_find_abbrev_len(struct object_database *odb, int min_len, unsigned *out); -enum { +enum odb_write_object_flags { /* * By default, `odb_write_object()` does not actually write anything * into the object store, but only computes the object ID. This flag @@ -589,7 +589,7 @@ int odb_write_object_ext(struct object_database *odb, enum object_type type, struct object_id *oid, struct object_id *compat_oid, - unsigned flags); + enum odb_write_object_flags flags); static inline int odb_write_object(struct object_database *odb, const void *buf, unsigned long len, diff --git a/odb/source-files.c b/odb/source-files.c index 76797569de..b5abd20e97 100644 --- a/odb/source-files.c +++ b/odb/source-files.c @@ -161,7 +161,7 @@ static int odb_source_files_write_object(struct odb_source *source, enum object_type type, struct object_id *oid, struct object_id *compat_oid, - unsigned flags) + enum odb_write_object_flags flags) { return odb_source_loose_write_object(source, buf, len, type, oid, compat_oid, flags); diff --git a/odb/source.h b/odb/source.h index a9d7d0b96f..f706e0608a 100644 --- a/odb/source.h +++ b/odb/source.h @@ -197,7 +197,7 @@ struct odb_source { enum object_type type, struct object_id *oid, struct object_id *compat_oid, - unsigned flags); + enum odb_write_object_flags flags); /* * This callback is expected to persist the given object stream into @@ -405,7 +405,7 @@ static inline int odb_source_write_object(struct odb_source *source, enum object_type type, struct object_id *oid, struct object_id *compat_oid, - unsigned flags) + enum odb_write_object_flags flags) { return source->write_object(source, buf, len, type, oid, compat_oid, flags); From c63911b052dc286de5daddba8d4a20fd59348cee Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Wed, 1 Apr 2026 01:57:50 +0200 Subject: [PATCH 66/93] odb: rename `odb_has_object()` flags Rename `odb_has_object()` flags to be properly prefixed with the function name. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- builtin/cat-file.c | 2 +- builtin/fetch.c | 4 ++-- builtin/fsck.c | 2 +- builtin/index-pack.c | 2 +- builtin/receive-pack.c | 2 +- builtin/remote.c | 2 +- builtin/show-ref.c | 2 +- builtin/unpack-objects.c | 2 +- cache-tree.c | 8 ++++---- fetch-pack.c | 4 ++-- http-push.c | 8 ++++---- http-walker.c | 4 ++-- list-objects.c | 2 +- notes.c | 2 +- object-file.c | 2 +- odb.c | 6 +++--- odb.h | 8 ++++---- reflog.c | 2 +- refs.c | 2 +- remote.c | 2 +- shallow.c | 6 +++--- walker.c | 2 +- 22 files changed, 38 insertions(+), 38 deletions(-) diff --git a/builtin/cat-file.c b/builtin/cat-file.c index cd13a3a89f..d9fbad5358 100644 --- a/builtin/cat-file.c +++ b/builtin/cat-file.c @@ -161,7 +161,7 @@ static int cat_one_file(int opt, const char *exp_type, const char *obj_name) case 'e': ret = !odb_has_object(the_repository->objects, &oid, - HAS_OBJECT_RECHECK_PACKED | HAS_OBJECT_FETCH_PROMISOR); + ODB_HAS_OBJECT_RECHECK_PACKED | ODB_HAS_OBJECT_FETCH_PROMISOR); goto cleanup; case 'w': diff --git a/builtin/fetch.c b/builtin/fetch.c index 4795b2a13c..a22c319467 100644 --- a/builtin/fetch.c +++ b/builtin/fetch.c @@ -946,7 +946,7 @@ static int update_local_ref(struct ref *ref, int fast_forward = 0; if (!odb_has_object(the_repository->objects, &ref->new_oid, - HAS_OBJECT_RECHECK_PACKED | HAS_OBJECT_FETCH_PROMISOR)) + ODB_HAS_OBJECT_RECHECK_PACKED | ODB_HAS_OBJECT_FETCH_PROMISOR)) die(_("object %s not found"), oid_to_hex(&ref->new_oid)); if (oideq(&ref->old_oid, &ref->new_oid)) { @@ -1396,7 +1396,7 @@ static int check_exist_and_connected(struct ref *ref_map) */ for (r = rm; r; r = r->next) { if (!odb_has_object(the_repository->objects, &r->old_oid, - HAS_OBJECT_RECHECK_PACKED)) + ODB_HAS_OBJECT_RECHECK_PACKED)) return -1; } diff --git a/builtin/fsck.c b/builtin/fsck.c index 9bab32effe..4bd0faeff1 100644 --- a/builtin/fsck.c +++ b/builtin/fsck.c @@ -163,7 +163,7 @@ static int mark_object(struct object *obj, enum object_type type, if (!(obj->flags & HAS_OBJ)) { if (parent && !odb_has_object(the_repository->objects, &obj->oid, - HAS_OBJECT_RECHECK_PACKED)) { + ODB_HAS_OBJECT_RECHECK_PACKED)) { printf_ln(_("broken link from %7s %s\n" " to %7s %s"), printable_type(&parent->oid, parent->type), diff --git a/builtin/index-pack.c b/builtin/index-pack.c index d1e47279a8..d96d0eb8cf 100644 --- a/builtin/index-pack.c +++ b/builtin/index-pack.c @@ -891,7 +891,7 @@ static void sha1_object(const void *data, struct object_entry *obj_entry, if (startup_info->have_repository) { read_lock(); collision_test_needed = odb_has_object(the_repository->objects, oid, - HAS_OBJECT_FETCH_PROMISOR); + ODB_HAS_OBJECT_FETCH_PROMISOR); read_unlock(); } diff --git a/builtin/receive-pack.c b/builtin/receive-pack.c index e34edff406..32b0223884 100644 --- a/builtin/receive-pack.c +++ b/builtin/receive-pack.c @@ -1546,7 +1546,7 @@ static const char *update(struct command *cmd, struct shallow_info *si) if (!is_null_oid(new_oid) && !odb_has_object(the_repository->objects, new_oid, - HAS_OBJECT_RECHECK_PACKED | HAS_OBJECT_FETCH_PROMISOR)) { + ODB_HAS_OBJECT_RECHECK_PACKED | ODB_HAS_OBJECT_FETCH_PROMISOR)) { error("unpack should have generated %s, " "but I can't find it!", oid_to_hex(new_oid)); ret = "bad pack"; diff --git a/builtin/remote.c b/builtin/remote.c index 0fddaa1773..de989ea3ba 100644 --- a/builtin/remote.c +++ b/builtin/remote.c @@ -473,7 +473,7 @@ static int get_push_ref_states(const struct ref *remote_refs, else if (is_null_oid(&ref->old_oid)) info->status = PUSH_STATUS_CREATE; else if (odb_has_object(the_repository->objects, &ref->old_oid, - HAS_OBJECT_RECHECK_PACKED | HAS_OBJECT_FETCH_PROMISOR) && + ODB_HAS_OBJECT_RECHECK_PACKED | ODB_HAS_OBJECT_FETCH_PROMISOR) && ref_newer(&ref->new_oid, &ref->old_oid)) info->status = PUSH_STATUS_FASTFORWARD; else diff --git a/builtin/show-ref.c b/builtin/show-ref.c index 5d31acea7c..d508441632 100644 --- a/builtin/show-ref.c +++ b/builtin/show-ref.c @@ -37,7 +37,7 @@ static void show_one(const struct show_one_options *opts, struct object_id peeled; if (!odb_has_object(the_repository->objects, ref->oid, - HAS_OBJECT_RECHECK_PACKED | HAS_OBJECT_FETCH_PROMISOR)) + ODB_HAS_OBJECT_RECHECK_PACKED | ODB_HAS_OBJECT_FETCH_PROMISOR)) die("git show-ref: bad ref %s (%s)", ref->name, oid_to_hex(ref->oid)); diff --git a/builtin/unpack-objects.c b/builtin/unpack-objects.c index 6fc64e9e4b..871fc8fff5 100644 --- a/builtin/unpack-objects.c +++ b/builtin/unpack-objects.c @@ -449,7 +449,7 @@ static void unpack_delta_entry(enum object_type type, unsigned long delta_size, if (!delta_data) return; if (odb_has_object(the_repository->objects, &base_oid, - HAS_OBJECT_RECHECK_PACKED | HAS_OBJECT_FETCH_PROMISOR)) + ODB_HAS_OBJECT_RECHECK_PACKED | ODB_HAS_OBJECT_FETCH_PROMISOR)) ; /* Ok we have this one */ else if (resolve_against_held(nr, &base_oid, delta_data, delta_size)) diff --git a/cache-tree.c b/cache-tree.c index 60059edfb0..fe41068c34 100644 --- a/cache-tree.c +++ b/cache-tree.c @@ -239,7 +239,7 @@ int cache_tree_fully_valid(struct cache_tree *it) return 0; if (it->entry_count < 0 || odb_has_object(the_repository->objects, &it->oid, - HAS_OBJECT_RECHECK_PACKED | HAS_OBJECT_FETCH_PROMISOR)) + ODB_HAS_OBJECT_RECHECK_PACKED | ODB_HAS_OBJECT_FETCH_PROMISOR)) return 0; for (i = 0; i < it->subtree_nr; i++) { if (!cache_tree_fully_valid(it->down[i]->cache_tree)) @@ -292,7 +292,7 @@ static int update_one(struct cache_tree *it, if (0 <= it->entry_count && odb_has_object(the_repository->objects, &it->oid, - HAS_OBJECT_RECHECK_PACKED | HAS_OBJECT_FETCH_PROMISOR)) + ODB_HAS_OBJECT_RECHECK_PACKED | ODB_HAS_OBJECT_FETCH_PROMISOR)) return it->entry_count; /* @@ -400,7 +400,7 @@ static int update_one(struct cache_tree *it, if (is_null_oid(oid) || (!ce_missing_ok && !odb_has_object(the_repository->objects, oid, - HAS_OBJECT_RECHECK_PACKED | HAS_OBJECT_FETCH_PROMISOR))) { + ODB_HAS_OBJECT_RECHECK_PACKED | ODB_HAS_OBJECT_FETCH_PROMISOR))) { strbuf_release(&buffer); if (expected_missing) return -1; @@ -448,7 +448,7 @@ static int update_one(struct cache_tree *it, struct object_id oid; hash_object_file(the_hash_algo, buffer.buf, buffer.len, OBJ_TREE, &oid); - if (odb_has_object(the_repository->objects, &oid, HAS_OBJECT_RECHECK_PACKED)) + if (odb_has_object(the_repository->objects, &oid, ODB_HAS_OBJECT_RECHECK_PACKED)) oidcpy(&it->oid, &oid); else to_invalidate = 1; diff --git a/fetch-pack.c b/fetch-pack.c index 6ecd468ef7..0f24722a70 100644 --- a/fetch-pack.c +++ b/fetch-pack.c @@ -145,7 +145,7 @@ static struct commit *deref_without_lazy_fetch(const struct object_id *oid, if (commit) { if (mark_tags_complete_and_check_obj_db) { if (!odb_has_object(the_repository->objects, oid, - HAS_OBJECT_RECHECK_PACKED)) + ODB_HAS_OBJECT_RECHECK_PACKED)) die_in_commit_graph_only(oid); } return commit; @@ -2013,7 +2013,7 @@ static void update_shallow(struct fetch_pack_args *args, struct object_id *oid = si->shallow->oid; for (i = 0; i < si->shallow->nr; i++) if (odb_has_object(the_repository->objects, &oid[i], - HAS_OBJECT_RECHECK_PACKED | HAS_OBJECT_FETCH_PROMISOR)) + ODB_HAS_OBJECT_RECHECK_PACKED | ODB_HAS_OBJECT_FETCH_PROMISOR)) oid_array_append(&extra, &oid[i]); if (extra.nr) { setup_alternate_shallow(&shallow_lock, diff --git a/http-push.c b/http-push.c index 9ae6062198..06c3acbb5d 100644 --- a/http-push.c +++ b/http-push.c @@ -1449,7 +1449,7 @@ static void one_remote_ref(const char *refname) */ if (repo->can_update_info_refs && !odb_has_object(the_repository->objects, &ref->old_oid, - HAS_OBJECT_RECHECK_PACKED | HAS_OBJECT_FETCH_PROMISOR)) { + ODB_HAS_OBJECT_RECHECK_PACKED | ODB_HAS_OBJECT_FETCH_PROMISOR)) { obj = lookup_unknown_object(the_repository, &ref->old_oid); fprintf(stderr, " fetch %s for %s\n", oid_to_hex(&ref->old_oid), refname); @@ -1655,7 +1655,7 @@ static int delete_remote_branch(const char *pattern, int force) if (is_null_oid(&head_oid)) return error("Unable to resolve remote HEAD"); if (!odb_has_object(the_repository->objects, &head_oid, - HAS_OBJECT_RECHECK_PACKED | HAS_OBJECT_FETCH_PROMISOR)) + ODB_HAS_OBJECT_RECHECK_PACKED | ODB_HAS_OBJECT_FETCH_PROMISOR)) return error("Remote HEAD resolves to object %s\nwhich does not exist locally, perhaps you need to fetch?", oid_to_hex(&head_oid)); /* Remote branch must resolve to a known object */ @@ -1663,7 +1663,7 @@ static int delete_remote_branch(const char *pattern, int force) return error("Unable to resolve remote branch %s", remote_ref->name); if (!odb_has_object(the_repository->objects, &remote_ref->old_oid, - HAS_OBJECT_RECHECK_PACKED | HAS_OBJECT_FETCH_PROMISOR)) + ODB_HAS_OBJECT_RECHECK_PACKED | ODB_HAS_OBJECT_FETCH_PROMISOR)) return error("Remote branch %s resolves to object %s\nwhich does not exist locally, perhaps you need to fetch?", remote_ref->name, oid_to_hex(&remote_ref->old_oid)); /* Remote branch must be an ancestor of remote HEAD */ @@ -1886,7 +1886,7 @@ int cmd_main(int argc, const char **argv) !is_null_oid(&ref->old_oid) && !ref->force) { if (!odb_has_object(the_repository->objects, &ref->old_oid, - HAS_OBJECT_RECHECK_PACKED | HAS_OBJECT_FETCH_PROMISOR) || + ODB_HAS_OBJECT_RECHECK_PACKED | ODB_HAS_OBJECT_FETCH_PROMISOR) || !ref_newer(&ref->peer_ref->new_oid, &ref->old_oid)) { /* diff --git a/http-walker.c b/http-walker.c index e886e64866..1b6d496548 100644 --- a/http-walker.c +++ b/http-walker.c @@ -139,7 +139,7 @@ static int fill_active_slot(void *data UNUSED) obj_req = list_entry(pos, struct object_request, node); if (obj_req->state == WAITING) { if (odb_has_object(the_repository->objects, &obj_req->oid, - HAS_OBJECT_RECHECK_PACKED | HAS_OBJECT_FETCH_PROMISOR)) + ODB_HAS_OBJECT_RECHECK_PACKED | ODB_HAS_OBJECT_FETCH_PROMISOR)) obj_req->state = COMPLETE; else { start_object_request(obj_req); @@ -495,7 +495,7 @@ static int fetch_object(struct walker *walker, const struct object_id *oid) return error("Couldn't find request for %s in the queue", hex); if (odb_has_object(the_repository->objects, &obj_req->oid, - HAS_OBJECT_RECHECK_PACKED | HAS_OBJECT_FETCH_PROMISOR)) { + ODB_HAS_OBJECT_RECHECK_PACKED | ODB_HAS_OBJECT_FETCH_PROMISOR)) { if (obj_req->req) abort_http_object_request(&obj_req->req); abort_object_request(obj_req); diff --git a/list-objects.c b/list-objects.c index 91b23e22f7..724d723c48 100644 --- a/list-objects.c +++ b/list-objects.c @@ -75,7 +75,7 @@ static void process_blob(struct traversal_context *ctx, */ if (ctx->revs->exclude_promisor_objects && !odb_has_object(the_repository->objects, &obj->oid, - HAS_OBJECT_RECHECK_PACKED | HAS_OBJECT_FETCH_PROMISOR) && + ODB_HAS_OBJECT_RECHECK_PACKED | ODB_HAS_OBJECT_FETCH_PROMISOR) && is_promisor_object(ctx->revs->repo, &obj->oid)) return; diff --git a/notes.c b/notes.c index 51a7ef9f83..8f315e2a00 100644 --- a/notes.c +++ b/notes.c @@ -796,7 +796,7 @@ static int prune_notes_helper(const struct object_id *object_oid, struct note_delete_list *n; if (odb_has_object(the_repository->objects, object_oid, - HAS_OBJECT_RECHECK_PACKED | HAS_OBJECT_FETCH_PROMISOR)) + ODB_HAS_OBJECT_RECHECK_PACKED | ODB_HAS_OBJECT_FETCH_PROMISOR)) return 0; /* nothing to do for this note */ /* failed to find object => prune this note */ diff --git a/object-file.c b/object-file.c index 2146104de8..98a4678ca4 100644 --- a/object-file.c +++ b/object-file.c @@ -1378,7 +1378,7 @@ static int already_written(struct odb_transaction_files *transaction, { /* The object may already exist in the repository */ if (odb_has_object(transaction->base.source->odb, oid, - HAS_OBJECT_RECHECK_PACKED | HAS_OBJECT_FETCH_PROMISOR)) + ODB_HAS_OBJECT_RECHECK_PACKED | ODB_HAS_OBJECT_FETCH_PROMISOR)) return 1; /* Might want to keep the list sorted */ diff --git a/odb.c b/odb.c index 8220661356..9b28fe25ef 100644 --- a/odb.c +++ b/odb.c @@ -872,15 +872,15 @@ void *odb_read_object_peeled(struct object_database *odb, } int odb_has_object(struct object_database *odb, const struct object_id *oid, - enum has_object_flags flags) + enum odb_has_object_flags flags) { unsigned object_info_flags = 0; if (!startup_info->have_repository) return 0; - if (!(flags & HAS_OBJECT_RECHECK_PACKED)) + if (!(flags & ODB_HAS_OBJECT_RECHECK_PACKED)) object_info_flags |= OBJECT_INFO_QUICK; - if (!(flags & HAS_OBJECT_FETCH_PROMISOR)) + if (!(flags & ODB_HAS_OBJECT_FETCH_PROMISOR)) object_info_flags |= OBJECT_INFO_SKIP_FETCH_OBJECT; return odb_read_object_info_extended(odb, oid, NULL, object_info_flags) >= 0; diff --git a/odb.h b/odb.h index 9aadc1177a..8d739e118b 100644 --- a/odb.h +++ b/odb.h @@ -395,11 +395,11 @@ int odb_read_object_info(struct object_database *odb, const struct object_id *oid, unsigned long *sizep); -enum has_object_flags { +enum odb_has_object_flags { /* Retry packed storage after checking packed and loose storage */ - HAS_OBJECT_RECHECK_PACKED = (1 << 0), + ODB_HAS_OBJECT_RECHECK_PACKED = (1 << 0), /* Allow fetching the object in case the repository has a promisor remote. */ - HAS_OBJECT_FETCH_PROMISOR = (1 << 1), + ODB_HAS_OBJECT_FETCH_PROMISOR = (1 << 1), }; /* @@ -408,7 +408,7 @@ enum has_object_flags { */ int odb_has_object(struct object_database *odb, const struct object_id *oid, - enum has_object_flags flags); + enum odb_has_object_flags flags); int odb_freshen_object(struct object_database *odb, const struct object_id *oid); diff --git a/reflog.c b/reflog.c index 1460ae9d0d..82337078d0 100644 --- a/reflog.c +++ b/reflog.c @@ -168,7 +168,7 @@ static int tree_is_complete(const struct object_id *oid) complete = 1; while (tree_entry(&desc, &entry)) { if (!odb_has_object(the_repository->objects, &entry.oid, - HAS_OBJECT_RECHECK_PACKED | HAS_OBJECT_FETCH_PROMISOR) || + ODB_HAS_OBJECT_RECHECK_PACKED | ODB_HAS_OBJECT_FETCH_PROMISOR) || (S_ISDIR(entry.mode) && !tree_is_complete(&entry.oid))) { tree->object.flags |= INCOMPLETE; complete = 0; diff --git a/refs.c b/refs.c index 685a0c247b..97cdea1f76 100644 --- a/refs.c +++ b/refs.c @@ -425,7 +425,7 @@ int ref_resolves_to_object(const char *refname, if (flags & REF_ISBROKEN) return 0; if (!odb_has_object(repo->objects, oid, - HAS_OBJECT_RECHECK_PACKED | HAS_OBJECT_FETCH_PROMISOR)) { + ODB_HAS_OBJECT_RECHECK_PACKED | ODB_HAS_OBJECT_FETCH_PROMISOR)) { error(_("%s does not point to a valid object!"), refname); return 0; } diff --git a/remote.c b/remote.c index 7ca2a6501b..a664cd166a 100644 --- a/remote.c +++ b/remote.c @@ -1723,7 +1723,7 @@ void set_ref_status_for_push(struct ref *remote_refs, int send_mirror, if (!reject_reason && !ref->deletion && !is_null_oid(&ref->old_oid)) { if (starts_with(ref->name, "refs/tags/")) reject_reason = REF_STATUS_REJECT_ALREADY_EXISTS; - else if (!odb_has_object(the_repository->objects, &ref->old_oid, HAS_OBJECT_RECHECK_PACKED)) + else if (!odb_has_object(the_repository->objects, &ref->old_oid, ODB_HAS_OBJECT_RECHECK_PACKED)) reject_reason = REF_STATUS_REJECT_FETCH_FIRST; else if (!lookup_commit_reference_gently(the_repository, &ref->old_oid, 1) || !lookup_commit_reference_gently(the_repository, &ref->new_oid, 1)) diff --git a/shallow.c b/shallow.c index 7a3dd56795..a8ad92e303 100644 --- a/shallow.c +++ b/shallow.c @@ -360,7 +360,7 @@ static int write_one_shallow(const struct commit_graft *graft, void *cb_data) return 0; if (data->flags & QUICK) { if (!odb_has_object(the_repository->objects, &graft->oid, - HAS_OBJECT_RECHECK_PACKED | HAS_OBJECT_FETCH_PROMISOR)) + ODB_HAS_OBJECT_RECHECK_PACKED | ODB_HAS_OBJECT_FETCH_PROMISOR)) return 0; } else if (data->flags & SEEN_ONLY) { struct commit *c = lookup_commit(the_repository, &graft->oid); @@ -528,7 +528,7 @@ void prepare_shallow_info(struct shallow_info *info, struct oid_array *sa) ALLOC_ARRAY(info->theirs, sa->nr); for (size_t i = 0; i < sa->nr; i++) { if (odb_has_object(the_repository->objects, sa->oid + i, - HAS_OBJECT_RECHECK_PACKED | HAS_OBJECT_FETCH_PROMISOR)) { + ODB_HAS_OBJECT_RECHECK_PACKED | ODB_HAS_OBJECT_FETCH_PROMISOR)) { struct commit_graft *graft; graft = lookup_commit_graft(the_repository, &sa->oid[i]); @@ -567,7 +567,7 @@ void remove_nonexistent_theirs_shallow(struct shallow_info *info) if (i != dst) info->theirs[dst] = info->theirs[i]; if (odb_has_object(the_repository->objects, oid + info->theirs[i], - HAS_OBJECT_RECHECK_PACKED | HAS_OBJECT_FETCH_PROMISOR)) + ODB_HAS_OBJECT_RECHECK_PACKED | ODB_HAS_OBJECT_FETCH_PROMISOR)) dst++; } info->nr_theirs = dst; diff --git a/walker.c b/walker.c index 91332539d3..e98eb6da53 100644 --- a/walker.c +++ b/walker.c @@ -155,7 +155,7 @@ static int process(struct walker *walker, struct object *obj) obj->flags |= SEEN; if (odb_has_object(the_repository->objects, &obj->oid, - HAS_OBJECT_RECHECK_PACKED | HAS_OBJECT_FETCH_PROMISOR)) { + ODB_HAS_OBJECT_RECHECK_PACKED | ODB_HAS_OBJECT_FETCH_PROMISOR)) { /* We already have it, so we should scan it now. */ obj->flags |= TO_SCAN; } From 109bcb7d1d2f0d2f0514beec15779190c0b89575 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Wed, 1 Apr 2026 01:57:51 +0200 Subject: [PATCH 67/93] odb: drop unneeded headers and forward decls There's a couple of unneeded forward declarations and headers in "odb.h". Drop these. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- odb.h | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/odb.h b/odb.h index 8d739e118b..3a711f6547 100644 --- a/odb.h +++ b/odb.h @@ -1,19 +1,17 @@ #ifndef ODB_H #define ODB_H -#include "hashmap.h" #include "object.h" #include "oidset.h" #include "oidmap.h" #include "string-list.h" #include "thread-utils.h" -struct oidmap; -struct oidtree; +struct cached_object_entry; +struct packed_git; +struct repository; struct strbuf; struct strvec; -struct repository; -struct multi_pack_index; /* * Set this to 0 to prevent odb_read_object_info_extended() from fetching missing @@ -31,10 +29,6 @@ extern int fetch_if_missing; */ char *compute_alternate_path(const char *path, struct strbuf *err); -struct packed_git; -struct packfile_store; -struct cached_object_entry; - /* * A transaction may be started for an object database prior to writing new * objects via odb_transaction_begin(). These objects are not committed until From 42148dafdf74f8458e7a710dcb982c0be0e40566 Mon Sep 17 00:00:00 2001 From: Siddharth Shrimali Date: Wed, 1 Apr 2026 11:50:29 +0530 Subject: [PATCH 68/93] t7004: replace wc -l with modern test helpers Pipelines of the form "test $(git tag | wc -l) -eq 0" suppress git's exit code. This means a crash or unexpected failure from git tag would go undetected. Additionally, the use of $(...) creates a subshell for each check, which adds unnecessary overhead. Replace these patterns with test_must_be_empty and test_line_count. These helpers check the output of git directly from a file, ensuring git's exit code is captured properly via the preceding "&&" chain. They also provide better diagnostics on failure by printing the contents of the file when a check does not pass. Signed-off-by: Siddharth Shrimali Signed-off-by: Junio C Hamano --- t/t7004-tag.sh | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/t/t7004-tag.sh b/t/t7004-tag.sh index ce2ff2a28a..faf7d97fc4 100755 --- a/t/t7004-tag.sh +++ b/t/t7004-tag.sh @@ -33,8 +33,10 @@ test_expect_success 'listing all tags in an empty tree should succeed' ' ' test_expect_success 'listing all tags in an empty tree should output nothing' ' - test $(git tag -l | wc -l) -eq 0 && - test $(git tag | wc -l) -eq 0 + git tag -l >actual && + test_must_be_empty actual && + git tag >actual && + test_must_be_empty actual ' test_expect_success 'sort tags, ignore case' ' @@ -178,7 +180,8 @@ test_expect_success 'listing tags using a non-matching pattern should succeed' ' ' test_expect_success 'listing tags using a non-matching pattern should output nothing' ' - test $(git tag -l xxx | wc -l) -eq 0 + git tag -l xxx >actual && + test_must_be_empty actual ' # special cases for creating tags: @@ -188,13 +191,15 @@ test_expect_success 'trying to create a tag with the name of one existing should ' test_expect_success 'trying to create a tag with a non-valid name should fail' ' - test $(git tag -l | wc -l) -eq 1 && + git tag -l >actual && + test_line_count = 1 actual && test_must_fail git tag "" && test_must_fail git tag .othertag && test_must_fail git tag "other tag" && test_must_fail git tag "othertag^" && test_must_fail git tag "other~tag" && - test $(git tag -l | wc -l) -eq 1 + git tag -l >actual && + test_line_count = 1 actual ' test_expect_success 'creating a tag using HEAD directly should succeed' ' From c0ce43376b178d876bce2073b81737ce49657def Mon Sep 17 00:00:00 2001 From: Nick Golden Date: Wed, 1 Apr 2026 15:00:33 -0400 Subject: [PATCH 69/93] read-cache: disable renames in add_files_to_cache add_files_to_cache() refreshes the index from worktree changes and does not need rename detection. When unmerged entries and a deleted stage-0 path are present together, rename detection can pair them and rewrite an unmerged diff pair to point at the deleted path. That later makes "git commit -a" and "git add -u" try to stat the deleted path and die with "unable to stat". Disable rename detection in this callback-driven staging path and add a regression test covering the crash. Signed-off-by: Nick Golden Signed-off-by: Junio C Hamano --- read-cache.c | 1 + t/t2200-add-update.sh | 38 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 39 insertions(+) diff --git a/read-cache.c b/read-cache.c index 0c07c3aef7..bacb841a2e 100644 --- a/read-cache.c +++ b/read-cache.c @@ -3975,6 +3975,7 @@ int add_files_to_cache(struct repository *repo, const char *prefix, rev.diffopt.format_callback = update_callback; rev.diffopt.format_callback_data = &data; rev.diffopt.flags.override_submodule_config = 1; + rev.diffopt.detect_rename = 0; /* staging worktree changes does not need renames */ rev.max_count = 0; /* do not compare unmerged paths with stage #2 */ /* diff --git a/t/t2200-add-update.sh b/t/t2200-add-update.sh index 06e83d3333..0a96655cfe 100755 --- a/t/t2200-add-update.sh +++ b/t/t2200-add-update.sh @@ -200,6 +200,44 @@ test_expect_success 'add -u resolves unmerged paths' ' test_cmp expect actual ' +test_expect_success 'add -u avoids rename pairing on unmerged paths' ' + test_create_repo rename-crash && + ( + cd rename-crash && + test_seq 1 100 | + sed "s/.*/line &: same text/" >conflict.txt && + cp conflict.txt bystander.txt && + git add conflict.txt bystander.txt && + git commit -m "initial: two files with identical content" && + main_branch=$(git symbolic-ref --short HEAD) && + git checkout -b feature && + sed "s/^line 50:.*/line 50: FEATURE/" \ + conflict.txt >conflict.txt.tmp && + mv conflict.txt.tmp conflict.txt && + git add conflict.txt && + git commit -m "feature: modify line 50" && + git checkout "$main_branch" && + sed "s/^line 50:.*/line 50: MAIN/" \ + conflict.txt >conflict.txt.tmp && + mv conflict.txt.tmp conflict.txt && + git add conflict.txt && + git commit -m "main: modify line 50 differently" && + test_must_fail git merge feature && + rm bystander.txt && + git add -u >out && + test_must_be_empty out && + git ls-files -u >actual && + test_must_be_empty actual && + git ls-files bystander.txt conflict.txt >actual && + cat >expect <<-\EOF && + conflict.txt + EOF + test_cmp expect actual && + git diff-files --name-only >actual && + test_must_be_empty actual + ) +' + test_expect_success '"add -u non-existent" should fail' ' test_must_fail git add -u non-existent && git ls-files >actual && From e5ae639f1a25642650cefedf6478ff5903ffb2f0 Mon Sep 17 00:00:00 2001 From: Toon Claes Date: Wed, 1 Apr 2026 22:55:10 +0200 Subject: [PATCH 70/93] builtin/replay: mark options as not negatable The options '--onto', '--advance', '--revert', and '--ref-action' of git-replay(1) are not negatable. Mark them as such using PARSE_OPT_NONEG. Signed-off-by: Toon Claes Signed-off-by: Junio C Hamano --- builtin/replay.c | 28 ++++++++++++++++------------ 1 file changed, 16 insertions(+), 12 deletions(-) diff --git a/builtin/replay.c b/builtin/replay.c index a0879b020f..85aa9fa0a4 100644 --- a/builtin/replay.c +++ b/builtin/replay.c @@ -89,20 +89,24 @@ int cmd_replay(int argc, NULL }; struct option replay_options[] = { - OPT_STRING(0, "advance", &opts.advance, - N_("branch"), - N_("make replay advance given branch")), - OPT_STRING(0, "onto", &opts.onto, - N_("revision"), - N_("replay onto given commit")), OPT_BOOL(0, "contained", &opts.contained, N_("update all branches that point at commits in ")), - OPT_STRING(0, "revert", &opts.revert, - N_("branch"), - N_("revert commits onto given branch")), - OPT_STRING(0, "ref-action", &ref_action, - N_("mode"), - N_("control ref update behavior (update|print)")), + OPT_STRING_F(0, "onto", &opts.onto, + N_("revision"), + N_("replay onto given commit"), + PARSE_OPT_NONEG), + OPT_STRING_F(0, "advance", &opts.advance, + N_("branch"), + N_("make replay advance given branch"), + PARSE_OPT_NONEG), + OPT_STRING_F(0, "revert", &opts.revert, + N_("branch"), + N_("revert commits onto given branch"), + PARSE_OPT_NONEG), + OPT_STRING_F(0, "ref-action", &ref_action, + N_("mode"), + N_("control ref update behavior (update|print)"), + PARSE_OPT_NONEG), OPT_END() }; From 6542cacbb33490ab83ef87a5fbee694cd2863bdd Mon Sep 17 00:00:00 2001 From: Toon Claes Date: Wed, 1 Apr 2026 22:55:11 +0200 Subject: [PATCH 71/93] replay: use stuck form in documentation and help message gitcli(7) suggests to use stuck form. Change the documentation strings to use this form. While at it, reorder them to match the order in the docs. Signed-off-by: Toon Claes Signed-off-by: Junio C Hamano --- Documentation/git-replay.adoc | 25 +++++++++++++------------ builtin/replay.c | 4 ++-- 2 files changed, 15 insertions(+), 14 deletions(-) diff --git a/Documentation/git-replay.adoc b/Documentation/git-replay.adoc index 997097e420..5bb478c281 100644 --- a/Documentation/git-replay.adoc +++ b/Documentation/git-replay.adoc @@ -9,7 +9,8 @@ git-replay - EXPERIMENTAL: Replay commits on a new base, works with bare repos t SYNOPSIS -------- [verse] -(EXPERIMENTAL!) 'git replay' ([--contained] --onto | --advance | --revert ) [--ref-action[=]] +(EXPERIMENTAL!) 'git replay' ([--contained] --onto= | --advance= | --revert=) + [--ref-action=] DESCRIPTION ----------- @@ -26,7 +27,7 @@ THIS COMMAND IS EXPERIMENTAL. THE BEHAVIOR MAY CHANGE. OPTIONS ------- ---onto :: +--onto=:: Starting point at which to create the new commits. May be any valid commit, and not just an existing branch name. + @@ -34,7 +35,7 @@ When `--onto` is specified, the branch(es) in the revision range will be updated to point at the new commits, similar to the way `git rebase --update-refs` updates multiple branches in the affected range. ---advance :: +--advance=:: Starting point at which to create the new commits; must be a branch name. + @@ -42,7 +43,7 @@ The history is replayed on top of the and is updated to point at the tip of the resulting history. This is different from `--onto`, which uses the target only as a starting point without updating it. ---revert :: +--revert=:: Starting point at which to create the reverted commits; must be a branch name. + @@ -79,8 +80,8 @@ The default mode can be configured via the `replay.refAction` configuration vari :: Range of commits to replay; see "Specifying Ranges" in - linkgit:git-rev-parse[1]. In `--advance ` or - `--revert ` mode, the range should have a single tip, + linkgit:git-rev-parse[1]. In `--advance=` or + `--revert=` mode, the range should have a single tip, so that it's clear to which tip the advanced or reverted should point. Any commits in the range whose changes are already present in the branch the commits are being @@ -127,7 +128,7 @@ EXAMPLES To simply rebase `mybranch` onto `target`: ------------ -$ git replay --onto target origin/main..mybranch +$ git replay --onto=target origin/main..mybranch ------------ The refs are updated atomically and no output is produced on success. @@ -135,14 +136,14 @@ The refs are updated atomically and no output is produced on success. To see what would be updated without actually updating: ------------ -$ git replay --ref-action=print --onto target origin/main..mybranch +$ git replay --ref-action=print --onto=target origin/main..mybranch update refs/heads/mybranch ${NEW_mybranch_HASH} ${OLD_mybranch_HASH} ------------ To cherry-pick the commits from mybranch onto target: ------------ -$ git replay --advance target origin/main..mybranch +$ git replay --advance=target origin/main..mybranch ------------ Note that the first two examples replay the exact same commits and on @@ -154,7 +155,7 @@ What if you have a stack of branches, one depending upon another, and you'd really like to rebase the whole set? ------------ -$ git replay --contained --onto origin/main origin/main..tipbranch +$ git replay --contained --onto=origin/main origin/main..tipbranch ------------ All three branches (`branch1`, `branch2`, and `tipbranch`) are updated @@ -165,7 +166,7 @@ commits to replay using the syntax `A..B`; any range expression will do: ------------ -$ git replay --onto origin/main ^base branch1 branch2 branch3 +$ git replay --onto=origin/main ^base branch1 branch2 branch3 ------------ This will simultaneously rebase `branch1`, `branch2`, and `branch3`, @@ -176,7 +177,7 @@ that they have in common, but that does not need to be the case. To revert commits on a branch: ------------ -$ git replay --revert main topic~2..topic +$ git replay --revert=main topic~2..topic ------------ This reverts the last two commits from `topic`, creating revert commits on diff --git a/builtin/replay.c b/builtin/replay.c index 85aa9fa0a4..fbfeb780b6 100644 --- a/builtin/replay.c +++ b/builtin/replay.c @@ -84,8 +84,8 @@ int cmd_replay(int argc, const char *const replay_usage[] = { N_("(EXPERIMENTAL!) git replay " - "([--contained] --onto | --advance | --revert ) " - "[--ref-action[=]] "), + "([--contained] --onto= | --advance= | --revert=)\n" + "[--ref-action=] "), NULL }; struct option replay_options[] = { From 23d83f8ddbef9adcb87671358b473e55cf90c90b Mon Sep 17 00:00:00 2001 From: Toon Claes Date: Wed, 1 Apr 2026 22:55:12 +0200 Subject: [PATCH 72/93] replay: allow to specify a ref with option --ref When option '--onto' is passed to git-replay(1), the command will update refs from the passed to the command. When using option '--advance' or '--revert', the argument of that option is a ref that will be updated. To enable users to specify which ref to update, add option '--ref'. When using option '--ref', the refs described above are left untouched and instead the argument of this option is updated instead. Because this introduces code paths in replay.c that jump to `out` before init_basic_merge_options() is called on `merge_opt`, zero-initialize the struct. Signed-off-by: Toon Claes Signed-off-by: Junio C Hamano --- Documentation/git-replay.adoc | 22 +++++++++++- builtin/replay.c | 8 ++++- replay.c | 35 ++++++++++++++----- replay.h | 7 ++++ t/t3650-replay-basics.sh | 66 +++++++++++++++++++++++++++++++++++ 5 files changed, 128 insertions(+), 10 deletions(-) diff --git a/Documentation/git-replay.adoc b/Documentation/git-replay.adoc index 5bb478c281..a32f72aead 100644 --- a/Documentation/git-replay.adoc +++ b/Documentation/git-replay.adoc @@ -10,7 +10,7 @@ SYNOPSIS -------- [verse] (EXPERIMENTAL!) 'git replay' ([--contained] --onto= | --advance= | --revert=) - [--ref-action=] + [--ref=] [--ref-action=] DESCRIPTION ----------- @@ -66,6 +66,16 @@ incompatible with `--contained` (which is a modifier for `--onto` only). Update all branches that point at commits in . Requires `--onto`. +--ref=:: + Override which reference is updated with the result of the replay. + The ref must be fully qualified. + When used with `--onto`, the `` should have a + single tip and only the specified reference is updated instead of + inferring refs from the revision range. + When used with `--advance` or `--revert`, the specified reference is + updated instead of the branch given to those options. + This option is incompatible with `--contained`. + --ref-action[=]:: Control how references are updated. The mode can be: + @@ -189,6 +199,16 @@ NOTE: For reverting an entire merge request as a single commit (rather than commit-by-commit), consider using `git merge-tree --merge-base $TIP HEAD $BASE` which can avoid unnecessary merge conflicts. +To replay onto a specific commit while updating a different reference: + +------------ +$ git replay --onto=112233 --ref=refs/heads/mybranch aabbcc..ddeeff +------------ + +This replays the range `aabbcc..ddeeff` onto commit `112233` and updates +`refs/heads/mybranch` to point at the result. This can be useful when you want +to use bare commit IDs instead of branch names. + GIT --- Part of the linkgit:git[1] suite diff --git a/builtin/replay.c b/builtin/replay.c index fbfeb780b6..39e3a86f6c 100644 --- a/builtin/replay.c +++ b/builtin/replay.c @@ -85,7 +85,7 @@ int cmd_replay(int argc, const char *const replay_usage[] = { N_("(EXPERIMENTAL!) git replay " "([--contained] --onto= | --advance= | --revert=)\n" - "[--ref-action=] "), + "[--ref=] [--ref-action=] "), NULL }; struct option replay_options[] = { @@ -103,6 +103,10 @@ int cmd_replay(int argc, N_("branch"), N_("revert commits onto given branch"), PARSE_OPT_NONEG), + OPT_STRING_F(0, "ref", &opts.ref, + N_("branch"), + N_("reference to update with result"), + PARSE_OPT_NONEG), OPT_STRING_F(0, "ref-action", &ref_action, N_("mode"), N_("control ref update behavior (update|print)"), @@ -126,6 +130,8 @@ int cmd_replay(int argc, opts.contained, "--contained"); die_for_incompatible_opt2(!!opts.revert, "--revert", opts.contained, "--contained"); + die_for_incompatible_opt2(!!opts.ref, "--ref", + !!opts.contained, "--contained"); /* Parse ref action mode from command line or config */ ref_mode = get_ref_action_mode(repo, ref_action); diff --git a/replay.c b/replay.c index d7239d4c83..b958ddabfa 100644 --- a/replay.c +++ b/replay.c @@ -347,13 +347,15 @@ int replay_revisions(struct rev_info *revs, struct commit *last_commit = NULL; struct commit *commit; struct commit *onto = NULL; - struct merge_options merge_opt; + struct merge_options merge_opt = { 0 }; struct merge_result result = { .clean = 1, }; bool detached_head; char *advance; char *revert; + const char *ref; + struct object_id old_oid; enum replay_mode mode = REPLAY_MODE_PICK; int ret; @@ -364,6 +366,27 @@ int replay_revisions(struct rev_info *revs, set_up_replay_mode(revs->repo, &revs->cmdline, opts->onto, &detached_head, &advance, &revert, &onto, &update_refs); + if (opts->ref) { + struct object_id oid; + + if (update_refs && strset_get_size(update_refs) > 1) { + ret = error(_("'--ref' cannot be used with multiple revision ranges")); + goto out; + } + if (check_refname_format(opts->ref, 0) || !starts_with(opts->ref, "refs/")) { + ret = error(_("'%s' is not a valid refname"), opts->ref); + goto out; + } + ref = opts->ref; + if (!refs_read_ref(get_main_ref_store(revs->repo), opts->ref, &oid)) + oidcpy(&old_oid, &oid); + else + oidclr(&old_oid, revs->repo->hash_algo); + } else { + ref = advance ? advance : revert; + oidcpy(&old_oid, &onto->object.oid); + } + /* FIXME: Should allow replaying commits with the first as a root commit */ if (prepare_revision_walk(revs) < 0) { @@ -399,7 +422,7 @@ int replay_revisions(struct rev_info *revs, kh_value(replayed_commits, pos) = last_commit; /* Update any necessary branches */ - if (advance || revert) + if (ref) continue; for (decoration = get_name_decoration(&commit->object); @@ -433,13 +456,9 @@ int replay_revisions(struct rev_info *revs, goto out; } - /* In --advance or --revert mode, update the target ref */ - if (advance || revert) { - const char *ref = advance ? advance : revert; - replay_result_queue_update(out, ref, - &onto->object.oid, + if (ref) + replay_result_queue_update(out, ref, &old_oid, &last_commit->object.oid); - } ret = 0; diff --git a/replay.h b/replay.h index e916a5f975..0ab74b9805 100644 --- a/replay.h +++ b/replay.h @@ -24,6 +24,13 @@ struct replay_revisions_options { */ const char *onto; + /* + * Reference to update with the result of the replay. This will not + * update any refs from `onto`, `advance`, or `revert`. Ignores + * `contained`. + */ + const char *ref; + /* * Starting point at which to create revert commits; must be a branch * name. The branch will be updated to point to the revert commits. diff --git a/t/t3650-replay-basics.sh b/t/t3650-replay-basics.sh index 217f6fb292..d5c7dd1bf4 100755 --- a/t/t3650-replay-basics.sh +++ b/t/t3650-replay-basics.sh @@ -495,4 +495,70 @@ test_expect_success 'git replay --revert incompatible with --advance' ' test_grep "cannot be used together" error ' +test_expect_success 'using --onto with --ref' ' + git branch test-ref-onto topic2 && + test_when_finished "git branch -D test-ref-onto" && + + git replay --ref-action=print --onto=main --ref=refs/heads/test-ref-onto topic1..topic2 >result && + + test_line_count = 1 result && + test_grep "^update refs/heads/test-ref-onto " result && + + git log --format=%s $(cut -f 3 -d " " result) >actual && + test_write_lines E D M L B A >expect && + test_cmp expect actual +' + +test_expect_success 'using --advance with --ref' ' + git branch test-ref-advance main && + git branch test-ref-target main && + test_when_finished "git branch -D test-ref-advance test-ref-target" && + + git replay --ref-action=print --advance=test-ref-advance --ref=refs/heads/test-ref-target topic1..topic2 >result && + + test_line_count = 1 result && + test_grep "^update refs/heads/test-ref-target " result +' + +test_expect_success 'using --revert with --ref' ' + git branch test-ref-revert topic4 && + git branch test-ref-revert-target topic4 && + test_when_finished "git branch -D test-ref-revert test-ref-revert-target" && + + git replay --ref-action=print --revert=test-ref-revert --ref=refs/heads/test-ref-revert-target topic4~1..topic4 >result && + + test_line_count = 1 result && + test_grep "^update refs/heads/test-ref-revert-target " result +' + +test_expect_success '--ref is incompatible with --contained' ' + test_must_fail git replay --onto=main --ref=refs/heads/main --contained topic1..topic2 2>err && + test_grep "cannot be used together" err +' + +test_expect_success '--ref with nonexistent fully-qualified ref' ' + test_when_finished "git update-ref -d refs/heads/new-branch" && + + git replay --onto=main --ref=refs/heads/new-branch topic1..topic2 && + + git log --format=%s -2 new-branch >actual && + test_write_lines E D >expect && + test_cmp expect actual +' + +test_expect_success '--ref must be a valid refname' ' + test_must_fail git replay --onto=main --ref="refs/heads/bad..ref" topic1..topic2 2>err && + test_grep "is not a valid refname" err +' + +test_expect_success '--ref requires fully qualified ref' ' + test_must_fail git replay --onto=main --ref=main topic1..topic2 2>err && + test_grep "is not a valid refname" err +' + +test_expect_success '--onto with --ref rejects multiple revision ranges' ' + test_must_fail git replay --onto=main --ref=refs/heads/topic2 ^topic1 topic2 topic4 2>err && + test_grep "cannot be used with multiple revision ranges" err +' + test_done From 34c17b840d5bdb8060ef6309aee04f919616c9de Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Thu, 2 Apr 2026 09:31:14 +0200 Subject: [PATCH 73/93] reftable: introduce "reftable-system.h" header We're including a couple of standard headers like in a bunch of locations, which makes it hard for a project to plug in their own logic for making required functionality available. For us this is for example via "compat/posix.h", which already includes all of the system headers relevant to us. Introduce a new "reftable-system.h" header that allows projects to provide their own headers. This new header is supposed to contain all the project-specific bits to provide the POSIX-like environment, and some additional supporting code. With this change, we thus have the following split in our system-specific code: - "reftable/reftable-system.h" is the project-specific header that provides a POSIX-like environment. Every project is expected to provide their own implementation. - "reftable/system.h" contains the project-independent definition of the interfaces that a project needs to implement. This file should not be touched by a project. - "reftable/system.c" contains the project-specific implementation of the interfaces defined in "system.h". Again, every project is expected to provide their own implementation. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- reftable/reftable-basics.h | 2 +- reftable/reftable-block.h | 3 +-- reftable/reftable-blocksource.h | 2 +- reftable/reftable-error.h | 2 ++ reftable/reftable-fsck.h | 1 + reftable/reftable-iterator.h | 1 + reftable/reftable-merged.h | 1 + reftable/reftable-record.h | 2 +- reftable/reftable-stack.h | 1 + reftable/reftable-system.h | 15 +++++++++++++++ reftable/reftable-table.h | 1 + reftable/reftable-writer.h | 4 +--- reftable/system.h | 11 +++++++---- 13 files changed, 34 insertions(+), 12 deletions(-) create mode 100644 reftable/reftable-system.h diff --git a/reftable/reftable-basics.h b/reftable/reftable-basics.h index 6d73f19c85..dc8622682d 100644 --- a/reftable/reftable-basics.h +++ b/reftable/reftable-basics.h @@ -9,7 +9,7 @@ #ifndef REFTABLE_BASICS_H #define REFTABLE_BASICS_H -#include +#include "reftable-system.h" /* A buffer that contains arbitrary byte slices. */ struct reftable_buf { diff --git a/reftable/reftable-block.h b/reftable/reftable-block.h index 0b05a8f7e3..94c79b5c58 100644 --- a/reftable/reftable-block.h +++ b/reftable/reftable-block.h @@ -9,8 +9,7 @@ #ifndef REFTABLE_BLOCK_H #define REFTABLE_BLOCK_H -#include - +#include "reftable-system.h" #include "reftable-basics.h" #include "reftable-blocksource.h" #include "reftable-iterator.h" diff --git a/reftable/reftable-blocksource.h b/reftable/reftable-blocksource.h index f5ba867bd6..40c1e94646 100644 --- a/reftable/reftable-blocksource.h +++ b/reftable/reftable-blocksource.h @@ -9,7 +9,7 @@ #ifndef REFTABLE_BLOCKSOURCE_H #define REFTABLE_BLOCKSOURCE_H -#include +#include "reftable-system.h" /* * Generic wrapper for a seekable readable file. diff --git a/reftable/reftable-error.h b/reftable/reftable-error.h index d100e0df92..0535e1478b 100644 --- a/reftable/reftable-error.h +++ b/reftable/reftable-error.h @@ -9,6 +9,8 @@ #ifndef REFTABLE_ERROR_H #define REFTABLE_ERROR_H +#include "reftable-system.h" + /* * Errors in reftable calls are signaled with negative integer return values. 0 * means success. diff --git a/reftable/reftable-fsck.h b/reftable/reftable-fsck.h index 007a392cf9..340fc7762e 100644 --- a/reftable/reftable-fsck.h +++ b/reftable/reftable-fsck.h @@ -1,6 +1,7 @@ #ifndef REFTABLE_FSCK_H #define REFTABLE_FSCK_H +#include "reftable-system.h" #include "reftable-stack.h" enum reftable_fsck_error { diff --git a/reftable/reftable-iterator.h b/reftable/reftable-iterator.h index af582028c2..a050cc153b 100644 --- a/reftable/reftable-iterator.h +++ b/reftable/reftable-iterator.h @@ -9,6 +9,7 @@ #ifndef REFTABLE_ITERATOR_H #define REFTABLE_ITERATOR_H +#include "reftable-system.h" #include "reftable-record.h" struct reftable_iterator_vtable; diff --git a/reftable/reftable-merged.h b/reftable/reftable-merged.h index e5af846b32..02a9966835 100644 --- a/reftable/reftable-merged.h +++ b/reftable/reftable-merged.h @@ -9,6 +9,7 @@ #ifndef REFTABLE_MERGED_H #define REFTABLE_MERGED_H +#include "reftable-system.h" #include "reftable-iterator.h" /* diff --git a/reftable/reftable-record.h b/reftable/reftable-record.h index 385a74cc86..e18c538238 100644 --- a/reftable/reftable-record.h +++ b/reftable/reftable-record.h @@ -9,8 +9,8 @@ #ifndef REFTABLE_RECORD_H #define REFTABLE_RECORD_H +#include "reftable-system.h" #include "reftable-basics.h" -#include /* * Basic data types diff --git a/reftable/reftable-stack.h b/reftable/reftable-stack.h index c2415cbc6e..5f7be573fa 100644 --- a/reftable/reftable-stack.h +++ b/reftable/reftable-stack.h @@ -9,6 +9,7 @@ #ifndef REFTABLE_STACK_H #define REFTABLE_STACK_H +#include "reftable-system.h" #include "reftable-writer.h" /* diff --git a/reftable/reftable-system.h b/reftable/reftable-system.h new file mode 100644 index 0000000000..4a18a6a790 --- /dev/null +++ b/reftable/reftable-system.h @@ -0,0 +1,15 @@ +#ifndef REFTABLE_SYSTEM_H +#define REFTABLE_SYSTEM_H + +/* + * This header defines the platform-specific bits required to compile the + * reftable library. It should provide an environment that bridges over the + * gaps between POSIX and your system, as well as the zlib interfaces. This + * header is expected to be changed by the individual project. + */ + +#define MINGW_DONT_HANDLE_IN_USE_ERROR +#include "compat/posix.h" +#include "compat/zlib-compat.h" + +#endif diff --git a/reftable/reftable-table.h b/reftable/reftable-table.h index 5f935d02e3..d7666b53a1 100644 --- a/reftable/reftable-table.h +++ b/reftable/reftable-table.h @@ -9,6 +9,7 @@ #ifndef REFTABLE_TABLE_H #define REFTABLE_TABLE_H +#include "reftable-system.h" #include "reftable-iterator.h" #include "reftable-block.h" #include "reftable-blocksource.h" diff --git a/reftable/reftable-writer.h b/reftable/reftable-writer.h index 1e7003cd69..065dd93dc6 100644 --- a/reftable/reftable-writer.h +++ b/reftable/reftable-writer.h @@ -9,11 +9,9 @@ #ifndef REFTABLE_WRITER_H #define REFTABLE_WRITER_H +#include "reftable-system.h" #include "reftable-record.h" -#include -#include /* ssize_t */ - /* Writing single reftables */ /* reftable_write_options sets options for writing a single reftable. */ diff --git a/reftable/system.h b/reftable/system.h index c54ed4cad6..a7eb6acd4a 100644 --- a/reftable/system.h +++ b/reftable/system.h @@ -9,11 +9,14 @@ #ifndef SYSTEM_H #define SYSTEM_H -/* This header glues the reftable library to the rest of Git */ +/* + * This header defines the platform-agnostic interface that is to be + * implemented by the project to make it work on their respective supported + * systems, and to integrate it into the project itself. This header is not + * expected to be changed by the individual project. + */ -#define MINGW_DONT_HANDLE_IN_USE_ERROR -#include "compat/posix.h" -#include "compat/zlib-compat.h" +#include "reftable-system.h" /* * Return a random 32 bit integer. This function is expected to return From b45ea595e6f6b03a749abc2c8e508504429a4cf3 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Thu, 2 Apr 2026 09:31:15 +0200 Subject: [PATCH 74/93] reftable/stack: provide fsync(3p) via system header Users of the reftable library are expected to provide their own function callback in cases they want to sync(3p) data to disk via the reftable write options. But if no such function was provided we end up calling fsync(3p) directly, which may not even be available on some systems. While dropping the explicit call to fsync(3p) would work, it would lead to an unsafe default behaviour where a project may have forgotten to set up the callback function, and that could lead to potential data loss. So this is not a great solution. Instead, drop the callback function and make it mandatory for the project to define fsync(3p). In the case of Git, we can then easily inject our custom implementation via the "reftable-system.h" header so that we continue to use `fsync_component()`. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- refs/reftable-backend.c | 6 ------ reftable/reftable-system.h | 3 +++ reftable/reftable-writer.h | 6 ------ reftable/stack.c | 13 +++---------- reftable/system.c | 6 ++++++ 5 files changed, 12 insertions(+), 22 deletions(-) diff --git a/refs/reftable-backend.c b/refs/reftable-backend.c index b124404663..daea30a5b4 100644 --- a/refs/reftable-backend.c +++ b/refs/reftable-backend.c @@ -366,11 +366,6 @@ static int reftable_be_config(const char *var, const char *value, return 0; } -static int reftable_be_fsync(int fd) -{ - return fsync_component(FSYNC_COMPONENT_REFERENCE, fd); -} - static struct ref_store *reftable_be_init(struct repository *repo, const char *payload, const char *gitdir, @@ -408,7 +403,6 @@ static struct ref_store *reftable_be_init(struct repository *repo, refs->write_options.disable_auto_compact = !git_env_bool("GIT_TEST_REFTABLE_AUTOCOMPACTION", 1); refs->write_options.lock_timeout_ms = 100; - refs->write_options.fsync = reftable_be_fsync; repo_config(the_repository, reftable_be_config, &refs->write_options); diff --git a/reftable/reftable-system.h b/reftable/reftable-system.h index 4a18a6a790..76f3e33e90 100644 --- a/reftable/reftable-system.h +++ b/reftable/reftable-system.h @@ -12,4 +12,7 @@ #include "compat/posix.h" #include "compat/zlib-compat.h" +int reftable_fsync(int fd); +#define fsync(fd) reftable_fsync(fd) + #endif diff --git a/reftable/reftable-writer.h b/reftable/reftable-writer.h index 065dd93dc6..a66db415c8 100644 --- a/reftable/reftable-writer.h +++ b/reftable/reftable-writer.h @@ -61,12 +61,6 @@ struct reftable_write_options { */ long lock_timeout_ms; - /* - * Optional callback used to fsync files to disk. Falls back to using - * fsync(3P) when unset. - */ - int (*fsync)(int fd); - /* * Callback function to execute whenever the stack is being reloaded. * This can be used e.g. to discard cached information that relies on diff --git a/reftable/stack.c b/reftable/stack.c index 1c9f21dfe1..fa87b46c37 100644 --- a/reftable/stack.c +++ b/reftable/stack.c @@ -29,13 +29,6 @@ static int stack_filename(struct reftable_buf *dest, struct reftable_stack *st, return 0; } -static int stack_fsync(const struct reftable_write_options *opts, int fd) -{ - if (opts->fsync) - return opts->fsync(fd); - return fsync(fd); -} - static ssize_t reftable_write_data(int fd, const void *data, size_t size) { size_t total_written = 0; @@ -69,7 +62,7 @@ static ssize_t fd_writer_write(void *arg, const void *data, size_t sz) static int fd_writer_flush(void *arg) { struct fd_writer *writer = arg; - return stack_fsync(writer->opts, writer->fd); + return fsync(writer->fd); } static int fd_read_lines(int fd, char ***namesp) @@ -812,7 +805,7 @@ int reftable_addition_commit(struct reftable_addition *add) goto done; } - err = stack_fsync(&add->stack->opts, add->tables_list_lock.fd); + err = fsync(add->tables_list_lock.fd); if (err < 0) { err = REFTABLE_IO_ERROR; goto done; @@ -1480,7 +1473,7 @@ static int stack_compact_range(struct reftable_stack *st, goto done; } - err = stack_fsync(&st->opts, tables_list_lock.fd); + err = fsync(tables_list_lock.fd); if (err < 0) { err = REFTABLE_IO_ERROR; unlink(new_table_path.buf); diff --git a/reftable/system.c b/reftable/system.c index 725a25844e..4d7e366b55 100644 --- a/reftable/system.c +++ b/reftable/system.c @@ -5,6 +5,7 @@ #include "reftable-error.h" #include "../lockfile.h" #include "../tempfile.h" +#include "../write-or-die.h" uint32_t reftable_rand(void) { @@ -131,3 +132,8 @@ int flock_commit(struct reftable_flock *l) return 0; } + +int reftable_fsync(int fd) +{ + return fsync_component(FSYNC_COMPONENT_REFERENCE, fd); +} From aa8938573050e6ab44a46e3b9f26c0e442f835aa Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Thu, 2 Apr 2026 09:31:16 +0200 Subject: [PATCH 75/93] reftable/fsck: use REFTABLE_UNUSED instead of UNUSED While we have the reftable-specific `REFTABLE_UNUSED` header, we accidentally introduced a new usage of the Git-specific `UNUSED` header into the reftable library in 9051638519 (reftable: add code to facilitate consistency checks, 2025-10-07). Convert the site to use `REFTABLE_UNUSED`. Ideally, we'd move the definition of `UNUSED` into "git-compat-util.h" so that it becomes in accessible to the reftable library. But this is unfortunately not easily possible as "compat/mingw-posix.h" requires this macro, and this header is included by "compat/posix.h". Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- reftable/fsck.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/reftable/fsck.c b/reftable/fsck.c index 26b9115b14..8e73fc83f2 100644 --- a/reftable/fsck.c +++ b/reftable/fsck.c @@ -63,7 +63,7 @@ static int table_check_name(struct reftable_table *table, static int table_checks(struct reftable_table *table, reftable_fsck_report_fn report_fn, - reftable_fsck_verbose_fn verbose_fn UNUSED, + reftable_fsck_verbose_fn verbose_fn REFTABLE_UNUSED, void *cb_data) { table_check_fn table_check_fns[] = { From cb0882de1979522b2fc3dc4c3064b0ad21d50b06 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Thu, 2 Apr 2026 09:31:17 +0200 Subject: [PATCH 76/93] reftable/system: add abstraction to retrieve time in milliseconds We directly call gettimeofday(3p), which may not be available on some platforms. Provide the infrastructure to let projects easily use their own implementations of this function. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- reftable/stack.c | 27 ++++----------------------- reftable/system.c | 6 ++++++ reftable/system.h | 3 +++ 3 files changed, 13 insertions(+), 23 deletions(-) diff --git a/reftable/stack.c b/reftable/stack.c index fa87b46c37..1fba96ddb3 100644 --- a/reftable/stack.c +++ b/reftable/stack.c @@ -365,45 +365,26 @@ done: return err; } -/* return negative if a before b. */ -static int tv_cmp(struct timeval *a, struct timeval *b) -{ - time_t diff = a->tv_sec - b->tv_sec; - int udiff = a->tv_usec - b->tv_usec; - - if (diff != 0) - return diff; - - return udiff; -} - static int reftable_stack_reload_maybe_reuse(struct reftable_stack *st, int reuse_open) { char **names = NULL, **names_after = NULL; - struct timeval deadline; + uint64_t deadline; int64_t delay = 0; int tries = 0, err; int fd = -1; - err = gettimeofday(&deadline, NULL); - if (err < 0) - goto out; - deadline.tv_sec += 3; + deadline = reftable_time_ms() + 3000; while (1) { - struct timeval now; - - err = gettimeofday(&now, NULL); - if (err < 0) - goto out; + uint64_t now = reftable_time_ms(); /* * Only look at deadlines after the first few times. This * simplifies debugging in GDB. */ tries++; - if (tries > 3 && tv_cmp(&now, &deadline) >= 0) + if (tries > 3 && now >= deadline) goto out; fd = open(st->list_file, O_RDONLY); diff --git a/reftable/system.c b/reftable/system.c index 4d7e366b55..cd76e56be8 100644 --- a/reftable/system.c +++ b/reftable/system.c @@ -4,6 +4,7 @@ #include "basics.h" #include "reftable-error.h" #include "../lockfile.h" +#include "../trace.h" #include "../tempfile.h" #include "../write-or-die.h" @@ -137,3 +138,8 @@ int reftable_fsync(int fd) { return fsync_component(FSYNC_COMPONENT_REFERENCE, fd); } + +uint64_t reftable_time_ms(void) +{ + return getnanotime() / 1000000; +} diff --git a/reftable/system.h b/reftable/system.h index a7eb6acd4a..071bfa3d58 100644 --- a/reftable/system.h +++ b/reftable/system.h @@ -111,4 +111,7 @@ int flock_release(struct reftable_flock *l); */ int flock_commit(struct reftable_flock *l); +/* Report the time in milliseconds. */ +uint64_t reftable_time_ms(void); + #endif From 87e4eee3f94ec261a92a76d06261b227b00de461 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Thu, 2 Apr 2026 09:31:18 +0200 Subject: [PATCH 77/93] reftable/system: add abstraction to mmap files In our codebase we have a couple of wrappers around mmap(3p) that allow us to reimplement the syscall on platforms that don't have it natively, like for example Windows. Other projects that embed the reftable library may have a different infra though to hook up mmap wrappers, but these are currently hard to integrate. Provide the infrastructure to let projects easily define the mmap interface with a custom struct and custom functions. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- reftable/blocksource.c | 19 +++++++------------ reftable/system.c | 20 ++++++++++++++++++++ reftable/system.h | 18 ++++++++++++++++++ 3 files changed, 45 insertions(+), 12 deletions(-) diff --git a/reftable/blocksource.c b/reftable/blocksource.c index 573c81287f..7f7441f751 100644 --- a/reftable/blocksource.c +++ b/reftable/blocksource.c @@ -93,13 +93,12 @@ void block_source_from_buf(struct reftable_block_source *bs, } struct file_block_source { - uint64_t size; - unsigned char *data; + struct reftable_mmap mmap; }; static uint64_t file_size(void *b) { - return ((struct file_block_source *)b)->size; + return ((struct file_block_source *)b)->mmap.size; } static void file_release_data(void *b REFTABLE_UNUSED, struct reftable_block_data *dest REFTABLE_UNUSED) @@ -109,7 +108,7 @@ static void file_release_data(void *b REFTABLE_UNUSED, struct reftable_block_dat static void file_close(void *v) { struct file_block_source *b = v; - munmap(b->data, b->size); + reftable_munmap(&b->mmap); reftable_free(b); } @@ -117,8 +116,8 @@ static ssize_t file_read_data(void *v, struct reftable_block_data *dest, uint64_ uint32_t size) { struct file_block_source *b = v; - assert(off + size <= b->size); - dest->data = b->data + off; + assert(off + size <= b->mmap.size); + dest->data = (unsigned char *) b->mmap.data + off; dest->len = size; return size; } @@ -156,13 +155,9 @@ int reftable_block_source_from_file(struct reftable_block_source *bs, goto out; } - p->size = st.st_size; - p->data = mmap(NULL, st.st_size, PROT_READ, MAP_PRIVATE, fd, 0); - if (p->data == MAP_FAILED) { - err = REFTABLE_IO_ERROR; - p->data = NULL; + err = reftable_mmap(&p->mmap, fd, st.st_size); + if (err < 0) goto out; - } assert(!bs->ops); bs->ops = &file_vtable; diff --git a/reftable/system.c b/reftable/system.c index cd76e56be8..9063641f30 100644 --- a/reftable/system.c +++ b/reftable/system.c @@ -143,3 +143,23 @@ uint64_t reftable_time_ms(void) { return getnanotime() / 1000000; } + +int reftable_mmap(struct reftable_mmap *out, int fd, size_t len) +{ + void *data = xmmap_gently(NULL, len, PROT_READ, MAP_PRIVATE, fd, 0); + if (data == MAP_FAILED) + return REFTABLE_IO_ERROR; + + out->data = data; + out->size = len; + + return 0; +} + +int reftable_munmap(struct reftable_mmap *mmap) +{ + if (munmap(mmap->data, mmap->size) < 0) + return REFTABLE_IO_ERROR; + memset(mmap, 0, sizeof(*mmap)); + return 0; +} diff --git a/reftable/system.h b/reftable/system.h index 071bfa3d58..c0e2cbe0ff 100644 --- a/reftable/system.h +++ b/reftable/system.h @@ -114,4 +114,22 @@ int flock_commit(struct reftable_flock *l); /* Report the time in milliseconds. */ uint64_t reftable_time_ms(void); +struct reftable_mmap { + void *data; + size_t size; + void *priv; +}; + +/* + * Map the file into memory. Returns 0 on success, a reftable error code on + * error. + */ +int reftable_mmap(struct reftable_mmap *out, int fd, size_t len); + +/* + * Unmap the file from memory. Returns 0 on success, a reftable error code on + * error. + */ +int reftable_munmap(struct reftable_mmap *mmap); + #endif From 0c8424c259b417c6aadc23f5398e55edd7b047a2 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Thu, 2 Apr 2026 08:51:18 +0200 Subject: [PATCH 78/93] t: work around multibyte bug in quoted heredocs with Dash v0.5.13 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When executing our test suite with Dash v0.5.13.2 one can observe several test failures that all have the same symptoms: we have a quoted heredoc that contains multibyte characters, but the final data does not match what we actually wanted to write. One such example is in t0300, where we see the diffs like the following: --- expect-stdout 2026-04-01 07:25:45.249919440 +0000 +++ stdout 2026-04-01 07:25:45.254919509 +0000 @@ -1,5 +1,5 @@ protocol=https host=example.com -path=perú.git +path=perú.git username=foo password=bar While seemingly the same, the data that we've written via the heredoc contains some invisible bytes. The expected hex representation of the string is: 7065 72c3 ba2e 6769 74 per...git But what we actually get instead is this string: 7065 7285 02c3 ba02 852e 6769 74 per.......git What's important to note here is that the multibyte character exists in both versions. But in the broken version we see that the bytes are wrapped in a sequence of "85 02" and "02 85". This is the CTLMBCHAR byte sequence of Dash, which it uses internally to quote multibyte sequences. As it turns out, this bug was introduced in c5bf970 (expand: Add multi-byte support to pmatch, 2024-06-02), which adds multibyte support to more contexts of Dash. One of these contexts seems to be in heredocs, and Dash _does_ correctly unquote these multibyte sequences when using an unquoted heredoc. But the bug seems to be that this unquoting does not happen in quoted heredocs, and the bug still exists on the latest "master" branch. For now, work around the bug by using unquoted heredocs instead. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- t/t0300-credentials.sh | 4 +++- t/t3430-rebase-merges.sh | 6 ++++-- t/t3902-quoted.sh | 16 +++++++++------- t/t4014-format-patch.sh | 16 ++++++++++++---- t/t4201-shortlog.sh | 4 +++- t/t9001-send-email.sh | 12 +++++++++--- 6 files changed, 40 insertions(+), 18 deletions(-) diff --git a/t/t0300-credentials.sh b/t/t0300-credentials.sh index 07aa834d33..64ead1571a 100755 --- a/t/t0300-credentials.sh +++ b/t/t0300-credentials.sh @@ -675,7 +675,9 @@ test_expect_success 'match percent-encoded values' ' test_expect_success 'match percent-encoded UTF-8 values in path' ' test_config credential.https://example.com.useHttpPath true && test_config credential.https://example.com/perú.git.helper "$HELPER" && - check fill <<-\EOF + # NOTE: do not quote this heredoc, Dash 0.5.13 has a bug with heredocs + # that contain multibyte chars. + check fill <<-EOF url=https://example.com/per%C3%BA.git -- protocol=https diff --git a/t/t3430-rebase-merges.sh b/t/t3430-rebase-merges.sh index cc627e34a7..84b2d0e664 100755 --- a/t/t3430-rebase-merges.sh +++ b/t/t3430-rebase-merges.sh @@ -507,9 +507,11 @@ test_expect_success 'octopus merges' ' git rebase -i --force-rebase -r HEAD^^ && test "Hank" = "$(git show -s --format=%an HEAD)" && test "$before" != $(git rev-parse HEAD) && - test_cmp_graph HEAD^^.. <<-\EOF + # NOTE: do not quote this heredoc, Dash 0.5.13 has a bug with heredocs + # that contain multibyte chars. + test_cmp_graph HEAD^^.. <<-EOF *-. Tüntenfüsch - |\ \ + |\\ \\ | | * three | * | two | |/ diff --git a/t/t3902-quoted.sh b/t/t3902-quoted.sh index f528008c36..8660ec5cb0 100755 --- a/t/t3902-quoted.sh +++ b/t/t3902-quoted.sh @@ -60,16 +60,18 @@ With SP in it "\346\277\261\351\207\216\347\264\224" EOF -cat >expect.raw <<\EOF +# NOTE: do not quote this heredoc, Dash 0.5.13 has a bug with heredocs +# that contain multibyte chars. +cat >expect.raw <expect <<'EOF' +# NOTE: do not quote this heredoc, Dash 0.5.13 has a bug with heredocs +# that contain multibyte chars. +cat >expect < @@ -1300,7 +1302,9 @@ test_expect_success 'format-patch wraps extremely long from-header (non-ASCII wi test_cmp expect actual ' -cat >expect <<'EOF' +# NOTE: do not quote this heredoc, Dash 0.5.13 has a bug with heredocs +# that contain multibyte chars. +cat >expect <expect <<'EOF' +# NOTE: do not quote this heredoc, Dash 0.5.13 has a bug with heredocs +# that contain multibyte chars. +cat >expect <patch && - cat >expect <<-\EOF && + # NOTE: do not quote this heredoc, Dash 0.5.13 has a bug with heredocs + # that contain multibyte chars. + cat >expect <<-EOF && From: C O Mitter Content-Type: text/plain; charset=UTF-8 diff --git a/t/t4201-shortlog.sh b/t/t4201-shortlog.sh index 5f23fc147b..9f41d56d9a 100755 --- a/t/t4201-shortlog.sh +++ b/t/t4201-shortlog.sh @@ -105,7 +105,9 @@ test_expect_success 'output from user-defined format is re-wrapped' ' ' test_expect_success !MINGW,ICONV 'shortlog wrapping' ' - cat >expect <<\EOF && + # NOTE: do not quote this heredoc, Dash 0.5.13 has a bug with heredocs + # that contain multibyte chars. + cat >expect <email-using-8bit <<\EOF +# NOTE: do not quote this heredoc, Dash 0.5.13 has a bug with heredocs +# that contain multibyte chars. +cat >email-using-8bit < From: author@example.com @@ -1735,7 +1737,9 @@ test_expect_success $PREREQ '--8bit-encoding overrides sendemail.8bitEncoding' ' ' test_expect_success $PREREQ 'setup expect' ' - cat >email-using-8bit <<-\EOF + # NOTE: do not quote this heredoc, Dash 0.5.13 has a bug with heredocs + # that contain multibyte chars. + cat >email-using-8bit <<-EOF From fe6ecc66ece37198fe5db91fa2fc41d9f4fe5cc4 Mon Sep 17 00:00:00 2001 Message-ID: From: author@example.com @@ -1764,7 +1768,9 @@ test_expect_success $PREREQ '--8bit-encoding also treats subject' ' ' test_expect_success $PREREQ 'setup expect' ' - cat >email-using-8bit <<-\EOF + # NOTE: do not quote this heredoc, Dash 0.5.13 has a bug with heredocs + # that contain multibyte chars. + cat >email-using-8bit <<-EOF From fe6ecc66ece37198fe5db91fa2fc41d9f4fe5cc4 Mon Sep 17 00:00:00 2001 Message-ID: From: A U Thor From d48c5d5a4c801dfe9acd5dc4a3c1b94430883f52 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Thu, 2 Apr 2026 08:51:19 +0200 Subject: [PATCH 79/93] t9300: work around partial read bug in Dash v0.5.13 When executing t9300 with Dash v0.5.13.1 we can see that the test hangs completely with the following (condensed) trace: git fast-import + error=1 + read output + cat input + echo checkpoint + echo progress checkpoint + test rogress checkpoint = progress checkpoint + test rogress checkpoint = UNEXPECTED + echo cruft: rogress checkpoint cruft: rogress checkpoint + read output + test = progress checkpoint + test = UNEXPECTED + echo cruft: cruft: + read output Basically, what's happening here is that we spawn git-fast-import(1) and wait for it to output a certain string, "progress checkpoint". Curiously though, what we end up reading is "rogress checkpoint" -- so the first byte of the expected string is missing. Same as in the preceding commit, this seems to be a bug in Dash itself that bisects to c5bf970 (expand: Add multi-byte support to pmatch, 2024-06-02). But other than in the preceding commit, this bug has already been fixed upstream in 079059a (input: Fix heap-buffer-overflow in preadbuffer on long lines, 2026-02-11), which is part of v0.5.13.2. For now though, work around the bug by waiting for the expected output in a different way. There is no good reason why one version should work better than the other, but at least the new version doesn't exhibit the bug. And, if you ask me, it's also slightly easier to read. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- t/t9300-fast-import.sh | 32 ++++++++++++++------------------ 1 file changed, 14 insertions(+), 18 deletions(-) diff --git a/t/t9300-fast-import.sh b/t/t9300-fast-import.sh index 5685cce6fe..479437760b 100755 --- a/t/t9300-fast-import.sh +++ b/t/t9300-fast-import.sh @@ -3635,25 +3635,21 @@ background_import_then_checkpoint () { echo "progress checkpoint" ) >&8 & - error=1 ;# assume the worst - while read output <&9 - do - if test "$output" = "progress checkpoint" - then - error=0 - break - elif test "$output" = "UNEXPECTED" - then - break - fi - # otherwise ignore cruft - echo >&2 "cruft: $output" - done + last=$( + while read output <&9 + do + if test "$output" = "progress checkpoint" || test "$output" = "UNEXPECTED" + then + echo "$output" + break + else + # otherwise ignore cruft + echo >&2 "cruft: $output" + fi + done + ) - if test $error -eq 1 - then - false - fi + test "$last" = "progress checkpoint" } background_import_still_running () { From 89152af176ea94ea8f3249115b6e00827fbbeb70 Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Fri, 3 Apr 2026 08:55:02 +0000 Subject: [PATCH 80/93] cmake: use writev(3p) wrapper as needed This is a companion patch of 3b9b2c2a29a (compat/posix: introduce writev(3p) wrapper, 2026-03-13) where support for using the `writev()` wrapper was introduced in the `Makefile` and the Meson-based build, but the CMake build still needs that treatment, too. Signed-off-by: Johannes Schindelin Signed-off-by: Junio C Hamano --- contrib/buildsystems/CMakeLists.txt | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/contrib/buildsystems/CMakeLists.txt b/contrib/buildsystems/CMakeLists.txt index 28877feb9d..ad58c320be 100644 --- a/contrib/buildsystems/CMakeLists.txt +++ b/contrib/buildsystems/CMakeLists.txt @@ -374,7 +374,7 @@ endif() #function checks set(function_checks strcasestr memmem strlcpy strtoimax strtoumax strtoull - setenv mkdtemp poll pread memmem) + setenv mkdtemp poll pread memmem writev) #unsetenv,hstrerror are incompatible with windows build if(NOT WIN32) @@ -419,6 +419,10 @@ if(NOT HAVE_MEMMEM) list(APPEND compat_SOURCES compat/memmem.c) endif() +if(NOT HAVE_WRITEV) + list(APPEND compat_SOURCES compat/writev.c) +endif() + if(NOT WIN32) if(NOT HAVE_UNSETENV) list(APPEND compat_SOURCES compat/unsetenv.c) From c664ee2001a1ea0ecbc6448b24303687e6caf1cb Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Fri, 3 Apr 2026 09:56:23 +0000 Subject: [PATCH 81/93] mingw: use strftime() directly in UCRT builds The `mingw_strftime()` wrapper exists to work around msvcrt.dll's incomplete `strftime()` implementation by dynamically loading the version from ucrtbase.dll at runtime via `LoadLibrary()` + `GetProcAddress()`. When the binary is already linked against UCRT (i.e. when building in the UCRT64 environment), the linked-in `strftime()` is the ucrtbase.dll version, making the dynamic loading needless churn: It's calling the very same code. Simply guard both the declaration and implementation so that the unnecessary work-around is skipped in UCRT builds. Signed-off-by: Johannes Schindelin Signed-off-by: Junio C Hamano --- compat/mingw.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/compat/mingw.c b/compat/mingw.c index c667a2dcda..338ec3535e 100644 --- a/compat/mingw.c +++ b/compat/mingw.c @@ -1394,6 +1394,9 @@ revert_attrs: size_t mingw_strftime(char *s, size_t max, const char *format, const struct tm *tm) { +#ifdef _UCRT + size_t ret = strftime(s, max, format, tm); +#else /* a pointer to the original strftime in case we can't find the UCRT version */ static size_t (*fallback)(char *, size_t, const char *, const struct tm *) = strftime; size_t ret; @@ -1404,6 +1407,7 @@ size_t mingw_strftime(char *s, size_t max, ret = strftime(s, max, format, tm); else ret = fallback(s, max, format, tm); +#endif if (!ret && errno == EINVAL) die("invalid strftime format: '%s'", format); From 2855562ca6a9c6b0e7bc780b050c1e83c9fcfbd0 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Fri, 3 Apr 2026 15:26:34 -0700 Subject: [PATCH 82/93] A couple more on top of -rc0 Signed-off-by: Junio C Hamano --- Documentation/RelNotes/2.54.0.adoc | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/Documentation/RelNotes/2.54.0.adoc b/Documentation/RelNotes/2.54.0.adoc index 629e603f43..04c038f035 100644 --- a/Documentation/RelNotes/2.54.0.adoc +++ b/Documentation/RelNotes/2.54.0.adoc @@ -277,6 +277,12 @@ Performance, Internal Implementation, Development Support etc. * In case homebrew breaks REG_ENHANCED again, leave a in-code comment to suggest use of our replacement regex as a workaround. + * MinGW build updates. + + * The way dash 0.5.13 handles non-ASCII contents in here-doc + is buggy and breaks our existing tests, which unfortunately + have been rewritten to avoid triggering the bug. + Fixes since v2.53 ----------------- From 339eba65a7f8aa596199e04f45683c48a1562b9c Mon Sep 17 00:00:00 2001 From: Trieu Huynh Date: Sat, 4 Apr 2026 18:15:57 +0700 Subject: [PATCH 83/93] backfill: auto-detect sparse-checkout from config Commit 85127bcdea ("backfill: assume --sparse when sparse-checkout is enabled") intended for 'git backfill' to consult the repository configuration when the user does not pass '--sparse' or '--no-sparse' on the command line. It added the sentinel check: if (ctx->sparse < 0) ctx->sparse = cfg->apply_sparse_checkout; However, the ctx->sparse field is initialized to 0 instead of -1, so this guard never triggers. Consequently, the repository config (core.sparseCheckout) is never checked, and the command always performs a full backfill even when sparse-checkout is enabled. Fix this by initializing ctx->sparse to -1, ensuring the existing fallback logic correctly reads the repository configuration when no explicit flags are provided. Add a test to verify that 'git backfill' automatically respects sparse-checkout settings when no flags are passed. Signed-off-by: Trieu Huynh Signed-off-by: Junio C Hamano --- builtin/backfill.c | 2 +- t/t5620-backfill.sh | 15 +++++++++++++++ 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/builtin/backfill.c b/builtin/backfill.c index 33e1ea2f84..77d154958c 100644 --- a/builtin/backfill.c +++ b/builtin/backfill.c @@ -120,7 +120,7 @@ int cmd_backfill(int argc, const char **argv, const char *prefix, struct reposit .repo = repo, .current_batch = OID_ARRAY_INIT, .min_batch_size = 50000, - .sparse = 0, + .sparse = -1, }; struct option options[] = { OPT_INTEGER(0, "min-batch-size", &ctx.min_batch_size, diff --git a/t/t5620-backfill.sh b/t/t5620-backfill.sh index 58c81556e7..bed4987124 100755 --- a/t/t5620-backfill.sh +++ b/t/t5620-backfill.sh @@ -119,6 +119,21 @@ test_expect_success 'backfill --sparse' ' test_line_count = 0 missing ' +test_expect_success 'backfill auto-detects sparse-checkout from config' ' + git clone --sparse --filter=blob:none \ + --single-branch --branch=main \ + "file://$(pwd)/srv.bare" backfill-auto-sparse && + + git -C backfill-auto-sparse rev-list --quiet --objects --missing=print HEAD >missing && + test_line_count = 44 missing && + + GIT_TRACE2_EVENT="$(pwd)/auto-sparse-trace" git \ + -C backfill-auto-sparse backfill && + + test_trace2_data promisor fetch_count 4 Date: Mon, 6 Apr 2026 05:45:29 +0000 Subject: [PATCH 84/93] unify and bump _WIN32_WINNT definition to Windows 8.1 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Git for Windows doesn't support anything prior to Windows 8.1 since 2.47.0 and Git followed along with commits like ce6ccba (mingw: drop Windows 7-specific work-around, 2025-08-04). There is no need to pretend to the compiler that we still support Windows Vista, just to lock us out of easy access to newer APIs. There is also no need to have conflicting and unused definitions claiming we support some versions of Windows XP or even Windows NT 4.0. Bump all definitions of _WIN32_WINNT to a realistic value of Windows 8.1. This will also simplify code for a followup commit that will improve cpu core detection on multi-socket systems. Signed-off-by: Matthias Aßhauer Signed-off-by: Johannes Schindelin Signed-off-by: Junio C Hamano --- compat/mingw.c | 2 +- compat/nedmalloc/malloc.c.h | 2 +- compat/poll/poll.c | 4 ++-- compat/posix.h | 2 +- compat/win32/flush.c | 2 ++ 5 files changed, 7 insertions(+), 5 deletions(-) diff --git a/compat/mingw.c b/compat/mingw.c index 338ec3535e..2023c16db6 100644 --- a/compat/mingw.c +++ b/compat/mingw.c @@ -2464,7 +2464,7 @@ repeat: if (supports_file_rename_info_ex) { /* * Our minimum required Windows version is still set to Windows - * Vista. We thus have to declare required infrastructure for + * 8.1. We thus have to declare required infrastructure for * FileRenameInfoEx ourselves until we bump _WIN32_WINNT to * 0x0A00. Furthermore, we have to handle cases where the * FileRenameInfoEx call isn't supported yet. diff --git a/compat/nedmalloc/malloc.c.h b/compat/nedmalloc/malloc.c.h index 814845d4b3..e0c567586c 100644 --- a/compat/nedmalloc/malloc.c.h +++ b/compat/nedmalloc/malloc.c.h @@ -500,7 +500,7 @@ MAX_RELEASE_CHECK_RATE default: 4095 unless not HAVE_MMAP #ifdef WIN32 #define WIN32_LEAN_AND_MEAN #ifndef _WIN32_WINNT -#define _WIN32_WINNT 0x403 +#define _WIN32_WINNT 0x603 #endif #include #define HAVE_MMAP 1 diff --git a/compat/poll/poll.c b/compat/poll/poll.c index a2becd16cd..ea362b4a8e 100644 --- a/compat/poll/poll.c +++ b/compat/poll/poll.c @@ -20,7 +20,7 @@ #define DISABLE_SIGN_COMPARE_WARNINGS -/* To bump the minimum Windows version to Windows Vista */ +/* To bump the minimum Windows version to Windows 8.1 */ #include "git-compat-util.h" /* Tell gcc not to warn about the (nfd < 0) tests, below. */ @@ -41,7 +41,7 @@ #if (defined _WIN32 || defined __WIN32__) && ! defined __CYGWIN__ # define WIN32_NATIVE # if defined (_MSC_VER) && !defined(_WIN32_WINNT) -# define _WIN32_WINNT 0x0502 +# define _WIN32_WINNT 0x0603 # endif # include # include diff --git a/compat/posix.h b/compat/posix.h index 3c611d2736..94699a03fa 100644 --- a/compat/posix.h +++ b/compat/posix.h @@ -76,7 +76,7 @@ #if defined(WIN32) && !defined(__CYGWIN__) /* Both MinGW and MSVC */ # if !defined(_WIN32_WINNT) -# define _WIN32_WINNT 0x0600 +# define _WIN32_WINNT 0x0603 # endif #define WIN32_LEAN_AND_MEAN /* stops windows.h including winsock.h */ #include diff --git a/compat/win32/flush.c b/compat/win32/flush.c index 291f90ea94..7244ff69ac 100644 --- a/compat/win32/flush.c +++ b/compat/win32/flush.c @@ -6,7 +6,9 @@ int win32_fsync_no_flush(int fd) { IO_STATUS_BLOCK io_status; +#ifndef FLUSH_FLAGS_FILE_DATA_ONLY #define FLUSH_FLAGS_FILE_DATA_ONLY 1 +#endif DECLARE_PROC_ADDR(ntdll.dll, NTSTATUS, NTAPI, NtFlushBuffersFileEx, HANDLE FileHandle, ULONG Flags, PVOID Parameters, ULONG ParameterSize, From 2f8c3f6a5a6d6a3de205be709e1a598b9d4b0b3e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Matthias=20A=C3=9Fhauer?= Date: Mon, 6 Apr 2026 05:45:30 +0000 Subject: [PATCH 85/93] compat/winansi: drop pre-Vista workaround MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 1edeb9a (Win32: warn if the console font doesn't support Unicode, 2014-06-10) introduced both code to detect the current console font on Windows Vista and newer and a fallback for older systems to detect the default console font and issue a warning if that font doesn't support unicode. Since we haven't supported any Windows older than Vista in almost a decade, we don't need to keep the workaround. Signed-off-by: Matthias Aßhauer Signed-off-by: Johannes Schindelin Signed-off-by: Junio C Hamano --- compat/winansi.c | 37 ++++--------------------------------- 1 file changed, 4 insertions(+), 33 deletions(-) diff --git a/compat/winansi.c b/compat/winansi.c index ac2ffb7869..3ce1900939 100644 --- a/compat/winansi.c +++ b/compat/winansi.c @@ -32,47 +32,18 @@ static int non_ascii_used = 0; static HANDLE hthread, hread, hwrite; static HANDLE hconsole1, hconsole2; -#ifdef __MINGW32__ -#if !defined(__MINGW64_VERSION_MAJOR) || __MINGW64_VERSION_MAJOR < 5 -typedef struct _CONSOLE_FONT_INFOEX { - ULONG cbSize; - DWORD nFont; - COORD dwFontSize; - UINT FontFamily; - UINT FontWeight; - WCHAR FaceName[LF_FACESIZE]; -} CONSOLE_FONT_INFOEX, *PCONSOLE_FONT_INFOEX; -#endif -#endif - static void warn_if_raster_font(void) { DWORD fontFamily = 0; - DECLARE_PROC_ADDR(kernel32.dll, BOOL, WINAPI, - GetCurrentConsoleFontEx, HANDLE, BOOL, - PCONSOLE_FONT_INFOEX); + CONSOLE_FONT_INFOEX cfi; /* don't bother if output was ascii only */ if (!non_ascii_used) return; - /* GetCurrentConsoleFontEx is available since Vista */ - if (INIT_PROC_ADDR(GetCurrentConsoleFontEx)) { - CONSOLE_FONT_INFOEX cfi; - cfi.cbSize = sizeof(cfi); - if (GetCurrentConsoleFontEx(console, 0, &cfi)) - fontFamily = cfi.FontFamily; - } else { - /* pre-Vista: check default console font in registry */ - HKEY hkey; - if (ERROR_SUCCESS == RegOpenKeyExA(HKEY_CURRENT_USER, "Console", - 0, KEY_READ, &hkey)) { - DWORD size = sizeof(fontFamily); - RegQueryValueExA(hkey, "FontFamily", NULL, NULL, - (LPVOID) &fontFamily, &size); - RegCloseKey(hkey); - } - } + cfi.cbSize = sizeof(cfi); + if (GetCurrentConsoleFontEx(console, 0, &cfi)) + fontFamily = cfi.FontFamily; if (!(fontFamily & TMPF_TRUETYPE)) { const wchar_t *msg = L"\nWarning: Your console font probably " From 26b9946dd756a2efc29f898e53327676a22adc3e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ren=C3=A9=20Scharfe?= Date: Mon, 6 Apr 2026 11:31:21 +0200 Subject: [PATCH 86/93] history: fix short help for argument of --update-refs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit "print" is not a valid argument for --update-refs. List both valid alternatives literally in the argh string, consistent with documentation and usage string. Signed-off-by: René Scharfe Signed-off-by: Junio C Hamano --- builtin/history.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/builtin/history.c b/builtin/history.c index 568dc75ee7..9526938085 100644 --- a/builtin/history.c +++ b/builtin/history.c @@ -437,8 +437,8 @@ static int cmd_history_reword(int argc, enum ref_action action = REF_ACTION_DEFAULT; int dry_run = 0; struct option options[] = { - OPT_CALLBACK_F(0, "update-refs", &action, N_(""), - N_("control which refs should be updated (branches|head)"), + OPT_CALLBACK_F(0, "update-refs", &action, "(branches|head)", + N_("control which refs should be updated"), PARSE_OPT_NONEG, parse_ref_action), OPT_BOOL('n', "dry-run", &dry_run, N_("perform a dry-run without updating any refs")), @@ -666,8 +666,8 @@ static int cmd_history_split(int argc, enum ref_action action = REF_ACTION_DEFAULT; int dry_run = 0; struct option options[] = { - OPT_CALLBACK_F(0, "update-refs", &action, N_(""), - N_("control ref update behavior (branches|head|print)"), + OPT_CALLBACK_F(0, "update-refs", &action, "(branches|head)", + N_("control ref update behavior"), PARSE_OPT_NONEG, parse_ref_action), OPT_BOOL('n', "dry-run", &dry_run, N_("perform a dry-run without updating any refs")), From 1adf5bca8c3cf778103548b9355777cf2d12efdd Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Mon, 6 Apr 2026 15:42:30 -0700 Subject: [PATCH 87/93] A handful before -rc1 Signed-off-by: Junio C Hamano --- Documentation/RelNotes/2.54.0.adoc | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/Documentation/RelNotes/2.54.0.adoc b/Documentation/RelNotes/2.54.0.adoc index 04c038f035..c692dddb4a 100644 --- a/Documentation/RelNotes/2.54.0.adoc +++ b/Documentation/RelNotes/2.54.0.adoc @@ -119,6 +119,8 @@ UI, Workflows & Features * "git replay" (experimental) learns, in addition to "pick" and "replay", a new operating mode "revert". + * git replay now supports replaying down to the root commit. + Performance, Internal Implementation, Development Support etc. -------------------------------------------------------------- @@ -283,6 +285,21 @@ Performance, Internal Implementation, Development Support etc. is buggy and breaks our existing tests, which unfortunately have been rewritten to avoid triggering the bug. + * Object name handling (disambiguation and abbreviation) has been + refactored to be backend-generic, moving logic into the respective + object database backends. + + * pack-objects's --stdin-packs=follow mode learns to handle + excluded-but-open packs. + + * A few code paths that spawned child processes for network + connection weren't wait(2)ing for their children and letting "init" + reap them instead; they have been tightened. + + * Adjust the codebase for C23 that changes functions like strchr() + that discarded constness when they return a pointer into a const + string to preserve constness. + Fixes since v2.53 ----------------- @@ -512,3 +529,6 @@ Fixes since v2.53 (merge 37182267a0 kh/doc-interpret-trailers-1 later to maint). (merge f64c50e768 jc/rerere-modern-strbuf-handling later to maint). (merge 699248d89e th/t8003-unhide-git-failures later to maint). + (merge d8e34f971b za/t2000-modernise later to maint). + (merge 849988bc74 th/t6101-unhide-git-failures later to maint). + (merge 0f0ce07625 sp/doc-gitignore-oowt later to maint). From 7c4e9e957a427d4c6a19265a528de0a161ff9b62 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Tue, 7 Apr 2026 14:59:08 -0700 Subject: [PATCH 88/93] A bit more before -rc1 Signed-off-by: Junio C Hamano --- Documentation/RelNotes/2.54.0.adoc | 31 ++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/Documentation/RelNotes/2.54.0.adoc b/Documentation/RelNotes/2.54.0.adoc index c692dddb4a..27dbfdc6a5 100644 --- a/Documentation/RelNotes/2.54.0.adoc +++ b/Documentation/RelNotes/2.54.0.adoc @@ -121,6 +121,9 @@ UI, Workflows & Features * git replay now supports replaying down to the root commit. + * Handling of signed commits and tags in fast-import has been made more + configurable. + Performance, Internal Implementation, Development Support etc. -------------------------------------------------------------- @@ -300,6 +303,19 @@ Performance, Internal Implementation, Development Support etc. that discarded constness when they return a pointer into a const string to preserve constness. + * A handful of inappropriate uses of the_repository have been + rewritten to use the right repository structure instance in the + read-cache.c codepath. + + * Internals of "git fsck" have been refactored to not depend on the + global `the_repository` variable. + + * Reduce dependency on `the_repository` in add-patch.c file. + + * The way the "git log -L:" feature is bolted onto the + log/diff machinery is being reworked a bit to make the feature + compatible with more diff options, like -S/G. + Fixes since v2.53 ----------------- @@ -478,6 +494,21 @@ Fixes since v2.53 refspec is a single-object refspec, which has been corrected. (merge 4e5dc601dd kj/refspec-parsing-outside-repository later to maint). + * Fix a regression in writing the commit-graph where commits with dates + exceeding 34 bits (beyond year 2514) could cause an underflow and + crash Git during the generation data overflow chunk writing. + + * The value of a wrong pointer variable was referenced in an error + message that reported that it shouldn't be NULL. + (merge 753ecf4205 yc/path-walk-fix-error-reporting later to maint). + + * The check in "receive-pack" to prevent a checked out branch from + getting updated via updateInstead mechanism has been corrected. + + * "git backfill" is capable of auto-detecting a sparsely checked out + working tree, which was broken. + (merge 339eba65a7 th/backfill-auto-detect-sparseness-fix later to maint). + * Other code cleanup, docfix, build fix, etc. (merge d79fff4a11 jk/remote-tracking-ref-leakfix later to maint). (merge 7a747f972d dd/t5403-modernise later to maint). From 7d8727ff0b621a9729c2de6a3698063b7b3ba2d6 Mon Sep 17 00:00:00 2001 From: Justin Tobler Date: Tue, 7 Apr 2026 15:17:30 -0500 Subject: [PATCH 89/93] object-file: avoid ODB transaction when not writing objects In ce1661f9da (odb: add transaction interface, 2025-09-16), existing ODB transaction logic is adapted to create a transaction interface at the ODB layer. The intent here is for the ODB transaction interface to eventually provide an object source agnostic means to manage transactions. An unintended consequence of this change though is that `object-file.c:index_fd()` may enter the ODB transaction path even when no object write is requested. In non-repository contexts, this can result in a NULL dereference and segfault. One such case occurs when running git-diff(1) outside of a repository with "core.bigFileThreshold" forcing the streaming path in `index_fd()`: $ echo foo >foo $ echo bar >bar $ git -c core.bigFileThreshold=1 diff -- foo bar In this scenario, the caller only needs to compute the object ID. Object hashing does not require an ODB, so starting a transaction is both unnecessary and invalid. Fix the bug by avoiding the use of ODB transactions in `index_fd()` when callers are only interested in computing the object hash. Reported-by: Luca Stefani Signed-off-by: Justin Tobler [jc: adjusted to fd13909e (Merge branch 'jt/odb-transaction', 2025-10-02)] Signed-off-by: Junio C Hamano --- object-file.c | 47 +++++++++++++++++++++++++++++++++++------ t/t1517-outside-repo.sh | 8 +++++++ 2 files changed, 48 insertions(+), 7 deletions(-) diff --git a/object-file.c b/object-file.c index 4675c8ed6b..5d72e65bde 100644 --- a/object-file.c +++ b/object-file.c @@ -1599,6 +1599,34 @@ static int index_blob_packfile_transaction(struct odb_transaction *transaction, return 0; } +static int hash_blob_stream(const struct git_hash_algo *hash_algo, + struct object_id *result_oid, int fd, size_t size) +{ + unsigned char buf[16384]; + struct git_hash_ctx ctx; + unsigned header_len; + + header_len = format_object_header((char *)buf, sizeof(buf), + OBJ_BLOB, size); + hash_algo->init_fn(&ctx); + git_hash_update(&ctx, buf, header_len); + + while (size) { + size_t rsize = size < sizeof(buf) ? size : sizeof(buf); + ssize_t read_result = read_in_full(fd, buf, rsize); + + if ((read_result < 0) || ((size_t)read_result != rsize)) + return -1; + + git_hash_update(&ctx, buf, rsize); + size -= read_result; + } + + git_hash_final_oid(result_oid, &ctx); + + return 0; +} + int index_fd(struct index_state *istate, struct object_id *oid, int fd, struct stat *st, enum object_type type, const char *path, unsigned flags) @@ -1620,14 +1648,19 @@ int index_fd(struct index_state *istate, struct object_id *oid, ret = index_core(istate, oid, fd, xsize_t(st->st_size), type, path, flags); } else { - struct odb_transaction *transaction; + if (flags & INDEX_WRITE_OBJECT) { + struct odb_transaction *transaction; - transaction = odb_transaction_begin(the_repository->objects); - ret = index_blob_packfile_transaction(the_repository->objects->transaction, - oid, fd, - xsize_t(st->st_size), - path, flags); - odb_transaction_commit(transaction); + transaction = odb_transaction_begin(the_repository->objects); + ret = index_blob_packfile_transaction(the_repository->objects->transaction, + oid, fd, + xsize_t(st->st_size), + path, flags); + odb_transaction_commit(transaction); + } else { + ret = hash_blob_stream(the_repository->hash_algo, oid, + fd, xsize_t(st->st_size)); + } } close(fd); diff --git a/t/t1517-outside-repo.sh b/t/t1517-outside-repo.sh index c824c1a25c..e1d35170de 100755 --- a/t/t1517-outside-repo.sh +++ b/t/t1517-outside-repo.sh @@ -93,6 +93,14 @@ test_expect_success 'diff outside repository' ' test_cmp expect actual ' +test_expect_success 'hash object exceeding bigFileThreshold outside repository' ' + ( + cd non-repo && + echo foo >foo && + git -c core.bigFileThreshold=1 hash-object --stdin Date: Wed, 8 Apr 2026 08:21:34 -0700 Subject: [PATCH 90/93] Git 2.54-rc1 Signed-off-by: Junio C Hamano --- GIT-VERSION-GEN | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/GIT-VERSION-GEN b/GIT-VERSION-GEN index 92ea811ae6..9c55beb496 100755 --- a/GIT-VERSION-GEN +++ b/GIT-VERSION-GEN @@ -1,6 +1,6 @@ #!/bin/sh -DEF_VER=v2.54.0-rc0 +DEF_VER=v2.54.0-rc1 LF=' ' From 373d43e0829d91c61f35f46f6715a23adda7b35d Mon Sep 17 00:00:00 2001 From: Adrian Ratiu Date: Wed, 8 Apr 2026 19:11:48 +0300 Subject: [PATCH 91/93] t1800: add &&-chains to test helper functions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add the missing &&'s so we properly propagate failures between commands in the hook helper functions. Also add a missing mkdir -p arg (found by adding the &&). Reported-by: SZEDER Gábor Signed-off-by: Adrian Ratiu Signed-off-by: Junio C Hamano --- t/t1800-hook.sh | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/t/t1800-hook.sh b/t/t1800-hook.sh index 96749fc06d..33decc66c0 100755 --- a/t/t1800-hook.sh +++ b/t/t1800-hook.sh @@ -6,16 +6,16 @@ test_description='git-hook command and config-managed multihooks' . "$TEST_DIRECTORY"/lib-terminal.sh setup_hooks () { - test_config hook.ghi.command "/path/ghi" - test_config hook.ghi.event pre-commit --add - test_config hook.ghi.event test-hook --add - test_config_global hook.def.command "/path/def" + test_config hook.ghi.command "/path/ghi" && + test_config hook.ghi.event pre-commit --add && + test_config hook.ghi.event test-hook --add && + test_config_global hook.def.command "/path/def" && test_config_global hook.def.event pre-commit --add } setup_hookdir () { - mkdir .git/hooks - write_script .git/hooks/pre-commit <<-EOF + mkdir -p .git/hooks && + write_script .git/hooks/pre-commit <<-EOF && echo \"Legacy Hook\" EOF test_when_finished rm -rf .git/hooks From 2226ffaacd93d3fe5554687a70d9190d72596f96 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Wed, 8 Apr 2026 13:20:55 -0400 Subject: [PATCH 92/93] run_processes_parallel(): fix order of sigpipe handling In commit ec0becacc9 (run-command: add stdin callback for parallelization, 2026-01-28), we taught run_processes_parallel() to ignore SIGPIPE, since we wouldn't want a write() to a broken pipe of one of the children to take down the whole process. But there's a subtle ordering issue. After we ignore SIGPIPE, we call pp_init(), which installs its own cleanup handler for multiple signals using sigchain_push_common(), which includes SIGPIPE. So if we receive SIGPIPE while writing to a child, we'll trigger that handler first, pop it off the stack, and then re-raise (which is then ignored because of the SIG_IGN we pushed first). But what does that handler do? It tries to clean up all of the child processes, under the assumption that when we re-raise the signal we'll be exiting the process! So a hook that exits without reading all of its input will cause us to get SIGPIPE, which will put us in a signal handler that then tries to kill() that same child. This seems to be mostly harmless on Linux. The process has already exited by this point, and though kill() does not complain (since the process has not been reaped with a wait() call), it does not affect the exit status of the process. However, this seems not to be true on all platforms. This case is triggered by t5401.13, "pre-receive hook that forgets to read its input". This test fails on NonStop since that hook was converted to the run_processes_parallel() API. We can fix it by reordering the code a bit. We should run pp_init() first, and then push our SIG_IGN onto the stack afterwards, so that it is truly ignored while feeding the sub-processes. Note that we also reorder the popping at the end of the function, too. This is not technically necessary, as we are doing two pops either way, but now the pops will correctly match their pushes. This also fixes a related case that we can't test yet. If we did have more than one process to run, then one child causing SIGPIPE would cause us to kill() all of the children (which might still actually be running). But the hook API is the only user of the new feed_pipe feature, and it does not yet support parallel hook execution. So for now we'll always execute the processes sequentially. Once parallel hook execution exists, we'll be able to add a test which covers this. Reported-by: Randall S. Becker Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- run-command.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/run-command.c b/run-command.c index 32c290ee6a..574d5c40f0 100644 --- a/run-command.c +++ b/run-command.c @@ -1895,14 +1895,19 @@ void run_processes_parallel(const struct run_process_parallel_opts *opts) "max:%"PRIuMAX, (uintmax_t)opts->processes); + pp_init(&pp, opts, &pp_sig); + /* * Child tasks might receive input via stdin, terminating early (or not), so * ignore the default SIGPIPE which gets handled by each feed_pipe_fn which * actually writes the data to children stdin fds. + * + * This _must_ come after pp_init(), because it installs its own + * SIGPIPE handler (to cleanup children), and we want to supersede + * that. */ sigchain_push(SIGPIPE, SIG_IGN); - pp_init(&pp, opts, &pp_sig); while (1) { for (i = 0; i < spawn_cap && !pp.shutdown && @@ -1928,10 +1933,10 @@ void run_processes_parallel(const struct run_process_parallel_opts *opts) } } - pp_cleanup(&pp, opts); - sigchain_pop(SIGPIPE); + pp_cleanup(&pp, opts); + if (do_trace2) trace2_region_leave(tr2_category, tr2_label, NULL); } From b15384c06f77bc2d34d0d3623a8a58218313a561 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Wed, 8 Apr 2026 11:00:10 -0700 Subject: [PATCH 93/93] A bit more post -rc1 Signed-off-by: Junio C Hamano --- Documentation/RelNotes/2.54.0.adoc | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/Documentation/RelNotes/2.54.0.adoc b/Documentation/RelNotes/2.54.0.adoc index 27dbfdc6a5..30ec959e7e 100644 --- a/Documentation/RelNotes/2.54.0.adoc +++ b/Documentation/RelNotes/2.54.0.adoc @@ -509,6 +509,12 @@ Fixes since v2.53 working tree, which was broken. (merge 339eba65a7 th/backfill-auto-detect-sparseness-fix later to maint). + * add_files_to_cache() used diff_files() to detect only the paths that + are different between the index and the working tree and add them, + which does not need rename detection, which interfered with unnecessary + conflicts. + (merge c0ce43376b ng/add-files-to-cache-wo-rename later to maint). + * Other code cleanup, docfix, build fix, etc. (merge d79fff4a11 jk/remote-tracking-ref-leakfix later to maint). (merge 7a747f972d dd/t5403-modernise later to maint).