Merge branch 'en/backfill-fixes-and-edges'

The 'git backfill' command now rejects revision-limiting options that
are incompatible with its operation, uses standard documentation for
revision ranges, and includes blobs from boundary commits by default
to improve performance of subsequent operations.

* en/backfill-fixes-and-edges:
  backfill: default to grabbing edge blobs too
  backfill: document acceptance of revision-range in more standard manner
  backfill: reject rev-list arguments that do not make sense
This commit is contained in:
Junio C Hamano
2026-05-17 22:58:29 +09:00
3 changed files with 153 additions and 10 deletions
+19 -3
View File
@@ -9,7 +9,7 @@ git-backfill - Download missing objects in a partial clone
SYNOPSIS
--------
[synopsis]
git backfill [--min-batch-size=<n>] [--[no-]sparse]
git backfill [--min-batch-size=<n>] [--[no-]sparse] [--[no-]include-edges] [<revision-range>]
DESCRIPTION
-----------
@@ -43,7 +43,7 @@ smaller network calls than downloading the entire repository at clone
time.
By default, `git backfill` downloads all blobs reachable from the `HEAD`
commit. This set can be restricted or expanded using various options.
commit. This set can be restricted or expanded using various options below.
THIS COMMAND IS EXPERIMENTAL. ITS BEHAVIOR MAY CHANGE IN THE FUTURE.
@@ -63,7 +63,23 @@ OPTIONS
current sparse-checkout. If the sparse-checkout feature is enabled,
then `--sparse` is assumed and can be disabled with `--no-sparse`.
You may also specify the commit limiting options from linkgit:git-rev-list[1].
`--include-edges`::
`--no-include-edges`::
Include blobs from boundary commits in the backfill. Useful in
preparation for commands like `git log -p A..B` or `git replay
--onto TARGET A..B`, where A..B normally excludes A but you need
the blobs from A as well. `--include-edges` is the default.
`<revision-range>`::
Backfill only blobs reachable from commits in the specified
revision range. When no _<revision-range>_ is specified, it
defaults to `HEAD` (i.e. the whole history leading to the
current commit). For a complete list of ways to spell
_<revision-range>_, see the "Specifying Ranges" section of
linkgit:gitrevisions[7].
+
You may also use commit-limiting options understood by
linkgit:git-rev-list[1] such as `--first-parent`, `--since`, or pathspecs.
SEE ALSO
--------
+30 -1
View File
@@ -26,7 +26,7 @@
#include "path-walk.h"
static const char * const builtin_backfill_usage[] = {
N_("git backfill [--min-batch-size=<n>] [--[no-]sparse]"),
N_("git backfill [--min-batch-size=<n>] [--[no-]sparse] [--[no-]include-edges] [<revision-range>]"),
NULL
};
@@ -35,6 +35,7 @@ struct backfill_context {
struct oid_array current_batch;
size_t min_batch_size;
int sparse;
int include_edges;
struct rev_info revs;
};
@@ -78,6 +79,28 @@ static int fill_missing_blobs(const char *path UNUSED,
return 0;
}
static void reject_unsupported_rev_list_options(struct rev_info *revs)
{
if (revs->diffopt.pickaxe)
die(_("'%s' cannot be used with 'git backfill'"),
(revs->diffopt.pickaxe_opts & DIFF_PICKAXE_REGEX) ? "-G" : "-S");
if (revs->diffopt.filter || revs->diffopt.filter_not)
die(_("'%s' cannot be used with 'git backfill'"),
"--diff-filter");
if (revs->diffopt.flags.follow_renames)
die(_("'%s' cannot be used with 'git backfill'"),
"--follow");
if (revs->line_level_traverse)
die(_("'%s' cannot be used with 'git backfill'"),
"-L");
if (revs->explicit_diff_merges)
die(_("'%s' cannot be used with 'git backfill'"),
"--diff-merges");
if (revs->filter.choice)
die(_("'%s' cannot be used with 'git backfill'"),
"--filter");
}
static int do_backfill(struct backfill_context *ctx)
{
struct path_walk_info info = PATH_WALK_INFO_INIT;
@@ -94,6 +117,8 @@ static int do_backfill(struct backfill_context *ctx)
/* Walk from HEAD if otherwise unspecified. */
if (!ctx->revs.pending.nr)
add_head_to_pending(&ctx->revs);
if (ctx->include_edges)
ctx->revs.edge_hint = 1;
info.blobs = 1;
info.tags = info.commits = info.trees = 0;
@@ -121,12 +146,15 @@ int cmd_backfill(int argc, const char **argv, const char *prefix, struct reposit
.min_batch_size = 50000,
.sparse = -1,
.revs = REV_INFO_INIT,
.include_edges = 1,
};
struct option options[] = {
OPT_UNSIGNED(0, "min-batch-size", &ctx.min_batch_size,
N_("Minimum number of objects to request at a time")),
OPT_BOOL(0, "sparse", &ctx.sparse,
N_("Restrict the missing objects to the current sparse-checkout")),
OPT_BOOL(0, "include-edges", &ctx.include_edges,
N_("Include blobs from boundary commits in the backfill")),
OPT_END(),
};
struct repo_config_values *cfg = repo_config_values(the_repository);
@@ -144,6 +172,7 @@ int cmd_backfill(int argc, const char **argv, const char *prefix, struct reposit
if (argc > 1)
die(_("unrecognized argument: %s"), argv[1]);
reject_unsupported_rev_list_options(&ctx.revs);
repo_config(repo, git_default_config, NULL);
+104 -6
View File
@@ -257,11 +257,12 @@ test_expect_success 'backfill with revision range' '
git -C backfill-revs rev-list --quiet --objects --missing=print HEAD >missing &&
test_line_count = 48 missing &&
git -C backfill-revs backfill HEAD~2..HEAD &&
GIT_TRACE2_EVENT="$(pwd)/backfill-trace" git -C backfill-revs backfill HEAD~2..HEAD &&
# 30 objects downloaded.
# 36 objects downloaded, 12 still missing
test_trace2_data promisor fetch_count 36 <backfill-trace &&
git -C backfill-revs rev-list --quiet --objects --missing=print HEAD >missing &&
test_line_count = 18 missing
test_line_count = 12 missing
'
test_expect_success 'backfill with revisions over stdin' '
@@ -279,11 +280,12 @@ test_expect_success 'backfill with revisions over stdin' '
^HEAD~2
EOF
git -C backfill-revs backfill --stdin <in &&
GIT_TRACE2_EVENT="$(pwd)/backfill-trace" git -C backfill-revs backfill --stdin <in &&
# 30 objects downloaded.
# 36 objects downloaded, 12 still missing
test_trace2_data promisor fetch_count 36 <backfill-trace &&
git -C backfill-revs rev-list --quiet --objects --missing=print HEAD >missing &&
test_line_count = 18 missing
test_line_count = 12 missing
'
test_expect_success 'backfill with prefix pathspec' '
@@ -398,6 +400,102 @@ test_expect_success 'backfill with --since' '
test_line_count = 6 missing
'
test_expect_success 'backfill range with include-edges enables fetch-free git-log' '
git clone --no-checkout --filter=blob:none \
--single-branch --branch=main \
"file://$(pwd)/srv.bare" backfill-log &&
# Backfill the range with default include edges.
git -C backfill-log backfill HEAD~2..HEAD &&
# git log -p needs edge blobs for the "before" side of
# diffs. With edge inclusion, all needed blobs are local.
GIT_TRACE2_EVENT="$(pwd)/log-trace" git \
-C backfill-log log -p HEAD~2..HEAD >log-output &&
# No promisor fetches should have been needed.
! grep "fetch_count" log-trace
'
test_expect_success 'backfill range without include edges causes on-demand fetches in git-log' '
git clone --no-checkout --filter=blob:none \
--single-branch --branch=main \
"file://$(pwd)/srv.bare" backfill-log-no-bdy &&
# Backfill WITHOUT include edges -- file.3 v1 blobs are missing.
git -C backfill-log-no-bdy backfill --no-include-edges HEAD~2..HEAD &&
# git log -p HEAD~2..HEAD computes diff of commit 7 against
# commit 6. It needs file.3 v1 (the "before" side), which was
# not backfilled. This triggers on-demand promisor fetches.
GIT_TRACE2_EVENT="$(pwd)/log-no-bdy-trace" git \
-C backfill-log-no-bdy log -p HEAD~2..HEAD >log-output &&
grep "fetch_count" log-no-bdy-trace
'
test_expect_success 'backfill range enables fetch-free replay' '
# Create a repo with a branch to replay.
git init replay-src &&
(
cd replay-src &&
git config uploadpack.allowfilter 1 &&
git config uploadpack.allowanysha1inwant 1 &&
test_commit base &&
git checkout -b topic &&
test_commit topic-change &&
git checkout main &&
test_commit main-change
) &&
git clone --bare --filter=blob:none \
"file://$(pwd)/replay-src" replay-dest.git &&
# Backfill the replay range: --onto main, replaying topic~1..topic.
# For replay, we need TARGET^! plus the range.
main_oid=$(git -C replay-dest.git rev-parse main) &&
topic_oid=$(git -C replay-dest.git rev-parse topic) &&
base_oid=$(git -C replay-dest.git rev-parse topic~1) &&
git -C replay-dest.git backfill \
"$main_oid^!" "$base_oid..$topic_oid" &&
# Now replay should complete without any promisor fetches.
GIT_TRACE2_EVENT="$(pwd)/replay-trace" git -C replay-dest.git \
replay --onto main topic~1..topic >replay-out &&
! grep "fetch_count" replay-trace
'
test_expect_success 'backfill enables fetch-free merge' '
# Create a repo with two branches to merge.
git init merge-src &&
(
cd merge-src &&
git config uploadpack.allowfilter 1 &&
git config uploadpack.allowanysha1inwant 1 &&
test_commit merge-base &&
git checkout -b side &&
test_commit side-change &&
git checkout main &&
test_commit main-side-change
) &&
git clone --filter=blob:none \
"file://$(pwd)/merge-src" merge-dest &&
# The clone checked out main, fetching its blobs.
# Backfill the three endpoint commits needed for merge.
main_oid=$(git -C merge-dest rev-parse origin/main) &&
side_oid=$(git -C merge-dest rev-parse origin/side) &&
mbase=$(git -C merge-dest merge-base origin/main origin/side) &&
git -C merge-dest backfill --no-include-edges \
"$main_oid^!" "$side_oid^!" "$mbase^!" &&
# Merge should complete without promisor fetches.
GIT_TRACE2_EVENT="$(pwd)/merge-trace" git -C merge-dest \
merge origin/side -m "test merge" &&
! grep "fetch_count" merge-trace
'
. "$TEST_DIRECTORY"/lib-httpd.sh
start_httpd