From d6ec08788e667d4f556e9c2d97bbd7adb7e582be Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ren=C3=A9=20Scharfe?= Date: Fri, 18 Jul 2025 11:39:06 +0200 Subject: [PATCH 1/3] commit: convert pop_most_recent_commit() to prio_queue MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit pop_most_recent_commit() calls commit_list_insert_by_date() for parent commits, which is itself called in a loop. This can lead to quadratic complexity if there are many merges. Replace the commit_list with a prio_queue to ensure logarithmic worst case complexity and convert all three users. Add a performance test that exercises one of them using a pathological history that consists of 50% merges and 50% root commits to demonstrate the speedup: Test v2.50.1 HEAD ---------------------------------------------------------------------- 1501.2: rev-parse ':/65535' 2.48(2.47+0.00) 0.20(0.19+0.00) -91.9% Alas, sane histories don't benefit from the conversion much, and traversing Git's own history takes a 1% performance hit on my machine: $ hyperfine -w3 -L git ./git_2.50.1,./git '{git} rev-parse :/^Initial.revision' Benchmark 1: ./git_2.50.1 rev-parse :/^Initial.revision Time (mean ± σ): 1.071 s ± 0.004 s [User: 1.052 s, System: 0.017 s] Range (min … max): 1.067 s … 1.078 s 10 runs Benchmark 2: ./git rev-parse :/^Initial.revision Time (mean ± σ): 1.079 s ± 0.003 s [User: 1.060 s, System: 0.017 s] Range (min … max): 1.074 s … 1.083 s 10 runs Summary ./git_2.50.1 rev-parse :/^Initial.revision ran 1.01 ± 0.00 times faster than ./git rev-parse :/^Initial.revision Signed-off-by: René Scharfe Signed-off-by: Junio C Hamano --- commit.c | 7 +-- commit.h | 8 ++-- fetch-pack.c | 13 +++--- object-name.c | 10 ++--- t/meson.build | 1 + t/perf/p1501-rev-parse-oneline.sh | 71 +++++++++++++++++++++++++++++++ walker.c | 11 +++-- 7 files changed, 100 insertions(+), 21 deletions(-) create mode 100755 t/perf/p1501-rev-parse-oneline.sh diff --git a/commit.c b/commit.c index e915b2b9a1..0200759aaa 100644 --- a/commit.c +++ b/commit.c @@ -31,6 +31,7 @@ #include "parse.h" #include "object-file.h" #include "object-file-convert.h" +#include "prio-queue.h" static struct commit_extra_header *read_commit_extra_header_lines(const char *buf, size_t len, const char **); @@ -738,17 +739,17 @@ void commit_list_sort_by_date(struct commit_list **list) commit_list_sort(list, commit_list_compare_by_date); } -struct commit *pop_most_recent_commit(struct commit_list **list, +struct commit *pop_most_recent_commit(struct prio_queue *queue, unsigned int mark) { - struct commit *ret = pop_commit(list); + struct commit *ret = prio_queue_get(queue); struct commit_list *parents = ret->parents; while (parents) { struct commit *commit = parents->item; if (!repo_parse_commit(the_repository, commit) && !(commit->object.flags & mark)) { commit->object.flags |= mark; - commit_list_insert_by_date(commit, list); + prio_queue_put(queue, commit); } parents = parents->next; } diff --git a/commit.h b/commit.h index 70c870dae4..9630c076d6 100644 --- a/commit.h +++ b/commit.h @@ -201,10 +201,10 @@ const char *repo_logmsg_reencode(struct repository *r, const char *skip_blank_lines(const char *msg); -/** Removes the first commit from a list sorted by date, and adds all - * of its parents. - **/ -struct commit *pop_most_recent_commit(struct commit_list **list, +struct prio_queue; + +/* Removes the first commit from a prio_queue and adds its parents. */ +struct commit *pop_most_recent_commit(struct prio_queue *queue, unsigned int mark); struct commit *pop_commit(struct commit_list **stack); diff --git a/fetch-pack.c b/fetch-pack.c index fa4231fee7..6afe2f964e 100644 --- a/fetch-pack.c +++ b/fetch-pack.c @@ -34,6 +34,7 @@ #include "commit-graph.h" #include "sigchain.h" #include "mergesort.h" +#include "prio-queue.h" static int transfer_unpack_limit = -1; static int fetch_unpack_limit = -1; @@ -600,7 +601,7 @@ done: return count ? retval : 0; } -static struct commit_list *complete; +static struct prio_queue complete = { compare_commits_by_commit_date }; static int mark_complete(const struct object_id *oid) { @@ -608,7 +609,7 @@ static int mark_complete(const struct object_id *oid) if (commit && !(commit->object.flags & COMPLETE)) { commit->object.flags |= COMPLETE; - commit_list_insert(commit, &complete); + prio_queue_put(&complete, commit); } return 0; } @@ -625,9 +626,12 @@ static int mark_complete_oid(const char *refname UNUSED, static void mark_recent_complete_commits(struct fetch_pack_args *args, timestamp_t cutoff) { - while (complete && cutoff <= complete->item->date) { + while (complete.nr) { + struct commit *item = prio_queue_peek(&complete); + if (item->date < cutoff) + break; print_verbose(args, _("Marking %s as complete"), - oid_to_hex(&complete->item->object.oid)); + oid_to_hex(&item->object.oid)); pop_most_recent_commit(&complete, COMPLETE); } } @@ -797,7 +801,6 @@ static void mark_complete_and_common_ref(struct fetch_negotiator *negotiator, refs_for_each_rawref(get_main_ref_store(the_repository), mark_complete_oid, NULL); for_each_cached_alternate(NULL, mark_alternate_complete); - commit_list_sort_by_date(&complete); if (cutoff) mark_recent_complete_commits(args, cutoff); } diff --git a/object-name.c b/object-name.c index 9288b2dd24..8a787b3ff3 100644 --- a/object-name.c +++ b/object-name.c @@ -28,6 +28,7 @@ #include "commit-reach.h" #include "date.h" #include "object-file-convert.h" +#include "prio-queue.h" static int get_oid_oneline(struct repository *r, const char *, struct object_id *, const struct commit_list *); @@ -1457,7 +1458,7 @@ static int get_oid_oneline(struct repository *r, const char *prefix, struct object_id *oid, const struct commit_list *list) { - struct commit_list *copy = NULL, **copy_tail = © + struct prio_queue copy = { compare_commits_by_commit_date }; const struct commit_list *l; int found = 0; int negative = 0; @@ -1479,9 +1480,9 @@ static int get_oid_oneline(struct repository *r, for (l = list; l; l = l->next) { l->item->object.flags |= ONELINE_SEEN; - copy_tail = &commit_list_insert(l->item, copy_tail)->next; + prio_queue_put(©, l->item); } - while (copy) { + while (copy.nr) { const char *p, *buf; struct commit *commit; int matches; @@ -1503,7 +1504,7 @@ static int get_oid_oneline(struct repository *r, regfree(®ex); for (l = list; l; l = l->next) clear_commit_marks(l->item, ONELINE_SEEN); - free_commit_list(copy); + clear_prio_queue(©); return found ? 0 : -1; } @@ -2057,7 +2058,6 @@ static enum get_oid_result get_oid_with_context_1(struct repository *repo, cb.list = &list; refs_for_each_ref(get_main_ref_store(repo), handle_one_ref, &cb); refs_head_ref(get_main_ref_store(repo), handle_one_ref, &cb); - commit_list_sort_by_date(&list); ret = get_oid_oneline(repo, name + 2, oid, list); free_commit_list(list); diff --git a/t/meson.build b/t/meson.build index d052fc3e23..b39f6d008d 100644 --- a/t/meson.build +++ b/t/meson.build @@ -1117,6 +1117,7 @@ benchmarks = [ 'perf/p1450-fsck.sh', 'perf/p1451-fsck-skip-list.sh', 'perf/p1500-graph-walks.sh', + 'perf/p1501-rev-parse-oneline.sh', 'perf/p2000-sparse-operations.sh', 'perf/p3400-rebase.sh', 'perf/p3404-rebase-interactive.sh', diff --git a/t/perf/p1501-rev-parse-oneline.sh b/t/perf/p1501-rev-parse-oneline.sh new file mode 100755 index 0000000000..538fa9c404 --- /dev/null +++ b/t/perf/p1501-rev-parse-oneline.sh @@ -0,0 +1,71 @@ +#!/bin/sh + +test_description='Test :/ object name notation' + +. ./perf-lib.sh + +test_perf_fresh_repo + +# +# Creates lots of merges to make history traversal costly. In +# particular it creates 2^($max_level-1)-1 2-way merges on top of +# 2^($max_level-1) root commits. E.g., the commit history looks like +# this for a $max_level of 3: +# +# _1_ +# / \ +# 2 3 +# / \ / \ +# 4 5 6 7 +# +# The numbers are the fast-import marks, which also are the commit +# messages. 1 is the HEAD commit and a merge, 2 and 3 are also merges, +# 4-7 are the root commits. +# +build_history () { + local max_level="$1" && + local level="${2:-1}" && + local mark="${3:-1}" && + if test $level -eq $max_level + then + echo "reset refs/heads/master" && + echo "from $ZERO_OID" && + echo "commit refs/heads/master" && + echo "mark :$mark" && + echo "committer C 1234567890 +0000" && + echo "data < 1234567890 +0000" && + echo "data <commits && + sed -n -e "s/ .*$//p" -e "q" expect && + sed -n -e "s/^.* //p" -e "q" needle +' + +test_perf "rev-parse :/$(cat needle)" ' + git rev-parse :/$(cat needle) >actual +' + +test_expect_success 'verify result' ' + test_cmp expect actual +' + +test_done diff --git a/walker.c b/walker.c index b470d43e54..c6d0e20c72 100644 --- a/walker.c +++ b/walker.c @@ -14,6 +14,7 @@ #include "blob.h" #include "refs.h" #include "progress.h" +#include "prio-queue.h" static struct object_id current_commit_oid; @@ -78,7 +79,7 @@ static int process_tree(struct walker *walker, struct tree *tree) #define SEEN (1U << 1) #define TO_SCAN (1U << 2) -static struct commit_list *complete = NULL; +static struct prio_queue complete = { compare_commits_by_commit_date }; static int process_commit(struct walker *walker, struct commit *commit) { @@ -87,7 +88,10 @@ static int process_commit(struct walker *walker, struct commit *commit) if (repo_parse_commit(the_repository, commit)) return -1; - while (complete && complete->item->date >= commit->date) { + while (complete.nr) { + struct commit *item = prio_queue_peek(&complete); + if (item->date < commit->date) + break; pop_most_recent_commit(&complete, COMPLETE); } @@ -233,7 +237,7 @@ static int mark_complete(const char *path UNUSED, if (commit) { commit->object.flags |= COMPLETE; - commit_list_insert(commit, &complete); + prio_queue_put(&complete, commit); } return 0; } @@ -302,7 +306,6 @@ int walker_fetch(struct walker *walker, int targets, char **target, if (!walker->get_recover) { refs_for_each_ref(get_main_ref_store(the_repository), mark_complete, NULL); - commit_list_sort_by_date(&complete); } for (i = 0; i < targets; i++) { From 3d5091d232ea991a6a991c86e9fb000f5a9009a0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ren=C3=A9=20Scharfe?= Date: Fri, 18 Jul 2025 11:39:14 +0200 Subject: [PATCH 2/3] prio-queue: add prio_queue_replace() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a function to replace the top element of the queue that basically does the same as prio_queue_get() followed by prio_queue_put(), but without the work by prio_queue_get() to rebalance the heap. It can be used to optimize loops that get one element and then immediately add another one. That's common e.g., with commit history traversal, where we get out a commit and then put in its parents. Signed-off-by: René Scharfe Signed-off-by: Junio C Hamano --- prio-queue.c | 45 ++++++++++++++++++++++++++----------- prio-queue.h | 8 +++++++ t/unit-tests/u-prio-queue.c | 23 +++++++++++++++++++ 3 files changed, 63 insertions(+), 13 deletions(-) diff --git a/prio-queue.c b/prio-queue.c index ec33ac27db..9748528ce6 100644 --- a/prio-queue.c +++ b/prio-queue.c @@ -58,22 +58,10 @@ void prio_queue_put(struct prio_queue *queue, void *thing) } } -void *prio_queue_get(struct prio_queue *queue) +static void sift_down_root(struct prio_queue *queue) { - void *result; size_t ix, child; - if (!queue->nr) - return NULL; - if (!queue->compare) - return queue->array[--queue->nr].data; /* LIFO */ - - result = queue->array[0].data; - if (!--queue->nr) - return result; - - queue->array[0] = queue->array[queue->nr]; - /* Push down the one at the root */ for (ix = 0; ix * 2 + 1 < queue->nr; ix = child) { child = ix * 2 + 1; /* left */ @@ -86,6 +74,23 @@ void *prio_queue_get(struct prio_queue *queue) swap(queue, child, ix); } +} + +void *prio_queue_get(struct prio_queue *queue) +{ + void *result; + + if (!queue->nr) + return NULL; + if (!queue->compare) + return queue->array[--queue->nr].data; /* LIFO */ + + result = queue->array[0].data; + if (!--queue->nr) + return result; + + queue->array[0] = queue->array[queue->nr]; + sift_down_root(queue); return result; } @@ -97,3 +102,17 @@ void *prio_queue_peek(struct prio_queue *queue) return queue->array[queue->nr - 1].data; return queue->array[0].data; } + +void prio_queue_replace(struct prio_queue *queue, void *thing) +{ + if (!queue->nr) { + prio_queue_put(queue, thing); + } else if (!queue->compare) { + queue->array[queue->nr - 1].ctr = queue->insertion_ctr++; + queue->array[queue->nr - 1].data = thing; + } else { + queue->array[0].ctr = queue->insertion_ctr++; + queue->array[0].data = thing; + sift_down_root(queue); + } +} diff --git a/prio-queue.h b/prio-queue.h index 38d032636d..da7fad2f1f 100644 --- a/prio-queue.h +++ b/prio-queue.h @@ -52,6 +52,14 @@ void *prio_queue_get(struct prio_queue *); */ void *prio_queue_peek(struct prio_queue *); +/* + * Replace the "thing" that compares the smallest with a new "thing", + * like prio_queue_get()+prio_queue_put() would do, but in a more + * efficient way. Does the same as prio_queue_put() if the queue is + * empty. + */ +void prio_queue_replace(struct prio_queue *queue, void *thing); + void clear_prio_queue(struct prio_queue *); /* Reverse the LIFO elements */ diff --git a/t/unit-tests/u-prio-queue.c b/t/unit-tests/u-prio-queue.c index 145e689c9c..63e58114ae 100644 --- a/t/unit-tests/u-prio-queue.c +++ b/t/unit-tests/u-prio-queue.c @@ -13,6 +13,7 @@ static int intcmp(const void *va, const void *vb, void *data UNUSED) #define STACK -3 #define GET -4 #define REVERSE -5 +#define REPLACE -6 static int show(int *v) { @@ -51,6 +52,15 @@ static void test_prio_queue(int *input, size_t input_size, case REVERSE: prio_queue_reverse(&pq); break; + case REPLACE: + peek = prio_queue_peek(&pq); + cl_assert(i + 1 < input_size); + cl_assert(input[i + 1] >= 0); + cl_assert(j < result_size); + cl_assert_equal_i(result[j], show(peek)); + j++; + prio_queue_replace(&pq, &input[++i]); + break; default: prio_queue_put(&pq, &input[i]); break; @@ -81,6 +91,13 @@ void test_prio_queue__empty(void) ((int []){ 1, 2, MISSING, 1, 2, MISSING })); } +void test_prio_queue__replace(void) +{ + TEST_INPUT(((int []){ REPLACE, 6, 2, 4, REPLACE, 5, 7, GET, + REPLACE, 1, DUMP }), + ((int []){ MISSING, 2, 4, 5, 1, 6, 7 })); +} + void test_prio_queue__stack(void) { TEST_INPUT(((int []){ STACK, 8, 1, 5, 4, 6, 2, 3, DUMP }), @@ -92,3 +109,9 @@ void test_prio_queue__reverse_stack(void) TEST_INPUT(((int []){ STACK, 1, 2, 3, 4, 5, 6, REVERSE, DUMP }), ((int []){ 1, 2, 3, 4, 5, 6 })); } + +void test_prio_queue__replace_stack(void) +{ + TEST_INPUT(((int []){ STACK, 8, 1, 5, REPLACE, 4, 6, 2, 3, DUMP }), + ((int []){ 5, 3, 2, 6, 4, 1, 8 })); +} From a79e3519d66671a408041dac8c56d99078de41f2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ren=C3=A9=20Scharfe?= Date: Fri, 18 Jul 2025 11:39:11 +0200 Subject: [PATCH 3/3] commit: use prio_queue_replace() in pop_most_recent_commit() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Optimize pop_most_recent_commit() by adding the first parent using the more efficient prio_queue_peek() and prio_queue_replace() instead of prio_queue_get() and prio_queue_put(). On my machine this neutralizes the performance hit it took in Git's own repository when we converted it to prio_queue two patches ago (git_pq): $ hyperfine -w3 -L git ./git_2.50.1,./git_pq,./git '{git} rev-parse :/^Initial.revision' Benchmark 1: ./git_2.50.1 rev-parse :/^Initial.revision Time (mean ± σ): 1.073 s ± 0.003 s [User: 1.053 s, System: 0.019 s] Range (min … max): 1.069 s … 1.078 s 10 runs Benchmark 2: ./git_pq rev-parse :/^Initial.revision Time (mean ± σ): 1.077 s ± 0.002 s [User: 1.057 s, System: 0.018 s] Range (min … max): 1.072 s … 1.079 s 10 runs Benchmark 3: ./git rev-parse :/^Initial.revision Time (mean ± σ): 1.069 s ± 0.003 s [User: 1.049 s, System: 0.018 s] Range (min … max): 1.065 s … 1.074 s 10 runs Summary ./git rev-parse :/^Initial.revision ran 1.00 ± 0.00 times faster than ./git_2.50.1 rev-parse :/^Initial.revision 1.01 ± 0.00 times faster than ./git_pq rev-parse :/^Initial.revision Signed-off-by: René Scharfe Signed-off-by: Junio C Hamano --- commit.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/commit.c b/commit.c index 0200759aaa..5a40ed99cb 100644 --- a/commit.c +++ b/commit.c @@ -742,17 +742,24 @@ void commit_list_sort_by_date(struct commit_list **list) struct commit *pop_most_recent_commit(struct prio_queue *queue, unsigned int mark) { - struct commit *ret = prio_queue_get(queue); + struct commit *ret = prio_queue_peek(queue); + int get_pending = 1; struct commit_list *parents = ret->parents; while (parents) { struct commit *commit = parents->item; if (!repo_parse_commit(the_repository, commit) && !(commit->object.flags & mark)) { commit->object.flags |= mark; - prio_queue_put(queue, commit); + if (get_pending) + prio_queue_replace(queue, commit); + else + prio_queue_put(queue, commit); + get_pending = 0; } parents = parents->next; } + if (get_pending) + prio_queue_get(queue); return ret; }