for-each-ref: introduce a '--start-after' option

The `git-for-each-ref(1)` command is used to iterate over references
present in a repository. In large repositories with millions of
references, it would be optimal to paginate this output such that we
can start iteration from a given reference. This would avoid having to
iterate over all references from the beginning each time when paginating
through results.

The previous commit added 'seek' functionality to the reference
backends. Utilize this and expose a '--start-after' option in
'git-for-each-ref(1)'. When used, the reference iteration seeks to the
lexicographically next reference and iterates from there onward.

This enables efficient pagination workflows, where the calling script
can remember the last provided reference and use that as the starting
point for the next set of references:
    git for-each-ref --count=100
    git for-each-ref --count=100 --start-after=refs/heads/branch-100
    git for-each-ref --count=100 --start-after=refs/heads/branch-200

Since the reference iterators only allow seeking to a specified marker
via the `ref_iterator_seek()`, we introduce a helper function
`start_ref_iterator_after()`, which seeks to next reference by simply
adding (char) 1 to the marker.

We must note that pagination always continues from the provided marker,
as such any concurrent reference updates lexicographically behind the
marker will not be output. Document the same.

Signed-off-by: Karthik Nayak <karthik.188@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
This commit is contained in:
Karthik Nayak
2025-07-15 13:28:30 +02:00
committed by Junio C Hamano
parent 526530a16a
commit dabecb9db2
5 changed files with 272 additions and 19 deletions

View File

@@ -2683,6 +2683,41 @@ static int filter_exclude_match(struct ref_filter *filter, const char *refname)
return match_pattern(filter->exclude.v, refname, filter->ignore_case);
}
/*
* We need to seek to the reference right after a given marker but excluding any
* matching references. So we seek to the lexicographically next reference.
*/
static int start_ref_iterator_after(struct ref_iterator *iter, const char *marker)
{
struct strbuf sb = STRBUF_INIT;
int ret;
strbuf_addstr(&sb, marker);
strbuf_addch(&sb, 1);
ret = ref_iterator_seek(iter, sb.buf, 0);
strbuf_release(&sb);
return ret;
}
static int for_each_fullref_with_seek(struct ref_filter *filter, each_ref_fn cb,
void *cb_data, unsigned int flags)
{
struct ref_iterator *iter;
int ret = 0;
iter = refs_ref_iterator_begin(get_main_ref_store(the_repository), "",
NULL, 0, flags);
if (filter->start_after)
ret = start_ref_iterator_after(iter, filter->start_after);
if (ret)
return ret;
return do_for_each_ref_iterator(iter, cb, cb_data);
}
/*
* This is the same as for_each_fullref_in(), but it tries to iterate
* only over the patterns we'll care about. Note that it _doesn't_ do a full
@@ -2694,8 +2729,8 @@ static int for_each_fullref_in_pattern(struct ref_filter *filter,
{
if (filter->kind & FILTER_REFS_ROOT_REFS) {
/* In this case, we want to print all refs including root refs. */
return refs_for_each_include_root_refs(get_main_ref_store(the_repository),
cb, cb_data);
return for_each_fullref_with_seek(filter, cb, cb_data,
DO_FOR_EACH_INCLUDE_ROOT_REFS);
}
if (!filter->match_as_path) {
@@ -2704,8 +2739,7 @@ static int for_each_fullref_in_pattern(struct ref_filter *filter,
* prefixes like "refs/heads/" etc. are stripped off,
* so we have to look at everything:
*/
return refs_for_each_fullref_in(get_main_ref_store(the_repository),
"", NULL, cb, cb_data);
return for_each_fullref_with_seek(filter, cb, cb_data, 0);
}
if (filter->ignore_case) {
@@ -2714,14 +2748,12 @@ static int for_each_fullref_in_pattern(struct ref_filter *filter,
* so just return everything and let the caller
* sort it out.
*/
return refs_for_each_fullref_in(get_main_ref_store(the_repository),
"", NULL, cb, cb_data);
return for_each_fullref_with_seek(filter, cb, cb_data, 0);
}
if (!filter->name_patterns[0]) {
/* no patterns; we have to look at everything */
return refs_for_each_fullref_in(get_main_ref_store(the_repository),
"", filter->exclude.v, cb, cb_data);
return for_each_fullref_with_seek(filter, cb, cb_data, 0);
}
return refs_for_each_fullref_in_prefixes(get_main_ref_store(the_repository),
@@ -3189,6 +3221,7 @@ void filter_is_base(struct repository *r,
static int do_filter_refs(struct ref_filter *filter, unsigned int type, each_ref_fn fn, void *cb_data)
{
const char *prefix = NULL;
int ret = 0;
filter->kind = type & FILTER_REFS_KIND_MASK;
@@ -3207,19 +3240,28 @@ static int do_filter_refs(struct ref_filter *filter, unsigned int type, each_ref
* of filter_ref_kind().
*/
if (filter->kind == FILTER_REFS_BRANCHES)
ret = refs_for_each_fullref_in(get_main_ref_store(the_repository),
"refs/heads/", NULL,
fn, cb_data);
prefix = "refs/heads/";
else if (filter->kind == FILTER_REFS_REMOTES)
ret = refs_for_each_fullref_in(get_main_ref_store(the_repository),
"refs/remotes/", NULL,
fn, cb_data);
prefix = "refs/remotes/";
else if (filter->kind == FILTER_REFS_TAGS)
ret = refs_for_each_fullref_in(get_main_ref_store(the_repository),
"refs/tags/", NULL, fn,
cb_data);
else if (filter->kind & FILTER_REFS_REGULAR)
prefix = "refs/tags/";
if (prefix) {
struct ref_iterator *iter;
iter = refs_ref_iterator_begin(get_main_ref_store(the_repository),
"", NULL, 0, 0);
if (filter->start_after)
ret = start_ref_iterator_after(iter, filter->start_after);
else if (prefix)
ret = ref_iterator_seek(iter, prefix, 1);
if (!ret)
ret = do_for_each_ref_iterator(iter, fn, cb_data);
} else if (filter->kind & FILTER_REFS_REGULAR) {
ret = for_each_fullref_in_pattern(filter, fn, cb_data);
}
/*
* When printing all ref types, HEAD is already included,