From d0b94577dda3a50c1833626a70ebefd478bfcbf9 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Fri, 4 Jul 2025 11:42:56 +0200 Subject: [PATCH 1/2] BreakingChanges: announce switch to "reftable" format The "reftable" format has come a long way and has matured nicely since it has been merged into git via 57db2a094d5 (refs: introduce reftable backend, 2024-02-07). It fixes longstanding issues that cannot be fixed with the "files" format in a backwards-compatible way and performs significantly better in many use cases. Announce that we will switch to the "reftable" format in Git 3.0 for newly created repositories and wire up the change, hidden behind the WITH_BREAKING_CHANGES preprocessor define. This switch is dependent on support in the larger Git ecosystem. Most importantly, libraries like JGit, libgit2 and Gitoxide should support the reftable backend so that we don't break all applications and tools built on top of those libraries. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- Documentation/BreakingChanges.adoc | 47 ++++++++++++++++++++++++++++++ help.c | 2 ++ repository.h | 6 ++++ setup.c | 2 ++ t/t0001-init.sh | 11 +++++++ 5 files changed, 68 insertions(+) diff --git a/Documentation/BreakingChanges.adoc b/Documentation/BreakingChanges.adoc index c6bd94986c..f8d2eba061 100644 --- a/Documentation/BreakingChanges.adoc +++ b/Documentation/BreakingChanges.adoc @@ -118,6 +118,53 @@ Cf. <2f5de416-04ba-c23d-1e0b-83bb655829a7@zombino.com>, <20170223155046.e7nxivfwqqoprsqj@LykOS.localdomain>, . +* The default storage format for references in newly created repositories will + be changed from "files" to "reftable". The "reftable" format provides + multiple advantages over the "files" format: ++ + ** It is impossible to store two references that only differ in casing on + case-insensitive filesystems with the "files" format. This issue is common + on Windows and macOS platforms. As the "reftable" backend does not use + filesystem paths to encode reference names this problem goes away. + ** Similarly, macOS normalizes path names that contain unicode characters, + which has the consequence that you cannot store two names with unicode + characters that are encoded differently with the "files" backend. Again, + this is not an issue with the "reftable" backend. + ** Deleting references with the "files" backend requires Git to rewrite the + complete "packed-refs" file. In large repositories with many references + this file can easily be dozens of megabytes in size, in extreme cases it + may be gigabytes. The "reftable" backend uses tombstone markers for + deleted references and thus does not have to rewrite all of its data. + ** Repository housekeeping with the "files" backend typically performs + all-into-one repacks of references. This can be quite expensive, and + consequently housekeeping is a tradeoff between the number of loose + references that accumulate and slow down operations that read references, + and compressing those loose references into the "packed-refs" file. The + "reftable" backend uses geometric compaction after every write, which + amortizes costs and ensures that the backend is always in a + well-maintained state. + ** Operations that write multiple references at once are not atomic with the + "files" backend. Consequently, Git may see in-between states when it reads + references while a reference transaction is in the process of being + committed to disk. + ** Writing many references at once is slow with the "files" backend because + every reference is created as a separate file. The "reftable" backend + significantly outperforms the "files" backend by multiple orders of + magnitude. + ** The reftable backend uses a binary format with prefix compression for + reference names. As a result, the format uses less space compared to the + "packed-refs" file. ++ +Users that get immediate benefit from the "reftable" backend could continue to +opt-in to the "reftable" format manually by setting the "init.defaultRefFormat" +config. But defaults matter, and we think that overall users will have a better +experience with less platform-specific quirks when they use the new backend by +default. ++ +A prerequisite for this change is that the ecosystem is ready to support the +"reftable" format. Most importantly, alternative implementations of Git like +JGit, libgit2 and Gitoxide need to support it. + === Removals * Support for grafting commits has long been superseded by git-replace(1). diff --git a/help.c b/help.c index 21b778707a..89cd47e3b8 100644 --- a/help.c +++ b/help.c @@ -810,6 +810,8 @@ void get_version_info(struct strbuf *buf, int show_build_options) SHA1_UNSAFE_BACKEND); #endif strbuf_addf(buf, "SHA-256: %s\n", SHA256_BACKEND); + strbuf_addf(buf, "default-ref-format: %s\n", + ref_storage_format_to_name(REF_STORAGE_FORMAT_DEFAULT)); } } diff --git a/repository.h b/repository.h index c4c92b2ab9..77c4189d5d 100644 --- a/repository.h +++ b/repository.h @@ -20,6 +20,12 @@ enum ref_storage_format { REF_STORAGE_FORMAT_REFTABLE, }; +#ifdef WITH_BREAKING_CHANGES /* Git 3.0 */ +# define REF_STORAGE_FORMAT_DEFAULT REF_STORAGE_FORMAT_REFTABLE +#else +# define REF_STORAGE_FORMAT_DEFAULT REF_STORAGE_FORMAT_FILES +#endif + struct repo_path_cache { char *squash_msg; char *merge_msg; diff --git a/setup.c b/setup.c index f93bd6a24a..f0c06c655a 100644 --- a/setup.c +++ b/setup.c @@ -2541,6 +2541,8 @@ static void repository_format_configure(struct repository_format *repo_fmt, repo_fmt->ref_storage_format = ref_format; } else if (cfg.ref_format != REF_STORAGE_FORMAT_UNKNOWN) { repo_fmt->ref_storage_format = cfg.ref_format; + } else { + repo_fmt->ref_storage_format = REF_STORAGE_FORMAT_DEFAULT; } repo_set_ref_storage_format(the_repository, repo_fmt->ref_storage_format); } diff --git a/t/t0001-init.sh b/t/t0001-init.sh index f11a40811f..186664162f 100755 --- a/t/t0001-init.sh +++ b/t/t0001-init.sh @@ -658,6 +658,17 @@ test_expect_success 'init warns about invalid init.defaultRefFormat' ' test_cmp expected actual ' +test_expect_success 'default ref format' ' + test_when_finished "rm -rf refformat" && + ( + sane_unset GIT_DEFAULT_REF_FORMAT && + git init refformat + ) && + git version --build-options | sed -ne "s/^default-ref-format: //p" >expect && + git -C refformat rev-parse --show-ref-format >actual && + test_cmp expect actual +' + backends="files reftable" for format in $backends do From 793b14e1c833dd4ea0d85cdef53cc5ab38f7915e Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Fri, 4 Jul 2025 11:42:57 +0200 Subject: [PATCH 2/2] setup: use "reftable" format when experimental features are enabled With the preceding commit we have announced the switch to the "reftable" format in Git 3.0 for newly created repositories. The format is being battle tested by GitLab and a couple of other developers, and except for a small handful of issues exposed early after it has been merged it has been rock solid. Regardless of that though the test user base is still comparatively small, which increases the risk that we miss critical bugs. Address this by enabling the reftable format when experimental features are enabled. This should increase the test user base by some margin and thus give us more input before making the format the default. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- Documentation/config/feature.adoc | 6 ++++++ setup.c | 12 +++++++++++ t/t0001-init.sh | 34 +++++++++++++++++++++++++++++++ 3 files changed, 52 insertions(+) diff --git a/Documentation/config/feature.adoc b/Documentation/config/feature.adoc index cb49ff2604..924f5ff4e3 100644 --- a/Documentation/config/feature.adoc +++ b/Documentation/config/feature.adoc @@ -24,6 +24,12 @@ reusing objects from multiple packs instead of just one. * `pack.usePathWalk` may speed up packfile creation and make the packfiles be significantly smaller in the presence of certain filename collisions with Git's default name-hash. ++ +* `init.defaultRefFormat=reftable` causes newly initialized repositories to use +the reftable format for storing references. This new format solves issues with +case-insensitive filesystems, compresses better and performs significantly +better with many use cases. Refer to Documentation/technical/reftable.adoc for +more information on this new storage format. feature.manyFiles:: Enable config options that optimize for repos with many files in the diff --git a/setup.c b/setup.c index f0c06c655a..97d7824d07 100644 --- a/setup.c +++ b/setup.c @@ -2481,6 +2481,18 @@ static int read_default_format_config(const char *key, const char *value, goto out; } + /* + * Enable the reftable format when "features.experimental" is enabled. + * "init.defaultRefFormat" takes precedence over this setting. + */ + if (!strcmp(key, "feature.experimental") && + cfg->ref_format == REF_STORAGE_FORMAT_UNKNOWN && + git_config_bool(key, value)) { + cfg->ref_format = REF_STORAGE_FORMAT_REFTABLE; + ret = 0; + goto out; + } + ret = 0; out: free(str); diff --git a/t/t0001-init.sh b/t/t0001-init.sh index 186664162f..f593c53687 100755 --- a/t/t0001-init.sh +++ b/t/t0001-init.sh @@ -749,6 +749,40 @@ test_expect_success "GIT_DEFAULT_REF_FORMAT= overrides init.defaultRefFormat" ' test_cmp expect actual ' +test_expect_success "init with feature.experimental=true" ' + test_when_finished "rm -rf refformat" && + test_config_global feature.experimental true && + ( + sane_unset GIT_DEFAULT_REF_FORMAT && + git init refformat + ) && + echo reftable >expect && + git -C refformat rev-parse --show-ref-format >actual && + test_cmp expect actual +' + +test_expect_success "init.defaultRefFormat overrides feature.experimental=true" ' + test_when_finished "rm -rf refformat" && + test_config_global feature.experimental true && + test_config_global init.defaultRefFormat files && + ( + sane_unset GIT_DEFAULT_REF_FORMAT && + git init refformat + ) && + echo files >expect && + git -C refformat rev-parse --show-ref-format >actual && + test_cmp expect actual +' + +test_expect_success "GIT_DEFAULT_REF_FORMAT= overrides feature.experimental=true" ' + test_when_finished "rm -rf refformat" && + test_config_global feature.experimental true && + GIT_DEFAULT_REF_FORMAT=files git init refformat && + echo files >expect && + git -C refformat rev-parse --show-ref-format >actual && + test_cmp expect actual +' + for from_format in $backends do test_expect_success "re-init with same format ($from_format)" '