submodule: fix case-folding gitdir filesystem colisions

Add a new check when extension.submoduleEncoding is enabled to
detect and prevent case-folding filesystem colisions. When this
new check is triggered, a stricter casefolding aware URI encoding
is used to percent-encode uppercase characters.

By using this check/retry mechanism the uppercase encoding is
only applied when necessary, so case-sensitive filesystems are
not affected.

Signed-off-by: Adrian Ratiu <adrian.ratiu@collabora.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
This commit is contained in:
Adrian Ratiu
2025-11-19 23:10:28 +02:00
committed by Junio C Hamano
parent 8bd559ce79
commit f9e76772f5
4 changed files with 125 additions and 2 deletions

View File

@@ -2238,15 +2238,58 @@ out:
return ret;
}
static int check_casefolding_conflict(const char *git_dir,
const char *submodule_name,
const bool suffixes_match)
{
char *p, *modules_dir = xstrdup(git_dir);
struct dirent *de;
DIR *dir = NULL;
int ret = 0;
if ((p = find_last_dir_sep(modules_dir)))
*p = '\0';
/* No conflict is possible if modules_dir doesn't exist (first clone) */
if (!is_directory(modules_dir))
goto cleanup;
dir = opendir(modules_dir);
if (!dir) {
ret = -1;
goto cleanup;
}
/* Check for another directory under .git/modules that differs only in case. */
while ((de = readdir(dir))) {
if (!strcmp(de->d_name, ".") || !strcmp(de->d_name, ".."))
continue;
if ((suffixes_match || is_git_directory(git_dir)) &&
!strcasecmp(de->d_name, submodule_name) &&
strcmp(de->d_name, submodule_name)) {
ret = -1; /* collision found */
break;
}
}
cleanup:
if (dir)
closedir(dir);
FREE_AND_NULL(modules_dir);
return ret;
}
/*
* Encoded gitdir validation function used when extensions.submoduleEncoding is enabled.
* This does not print errors like the non-encoded version, because encoding is supposed
* to mitigate / fix all these.
*/
static int validate_submodule_encoded_git_dir(char *git_dir, const char *submodule_name UNUSED)
static int validate_submodule_encoded_git_dir(char *git_dir, const char *submodule_name)
{
const char *modules_marker = "/modules/";
char *p = git_dir, *last_submodule_name = NULL;
int config_ignorecase = 0;
if (!the_repository->repository_format_submodule_encoding)
BUG("validate_submodule_encoded_git_dir() must be called with "
@@ -2262,6 +2305,14 @@ static int validate_submodule_encoded_git_dir(char *git_dir, const char *submodu
if (!last_submodule_name || strchr(last_submodule_name, '/'))
return -1;
/* Prevent conflicts on case-folding filesystems */
repo_config_get_bool(the_repository, "core.ignorecase", &config_ignorecase);
if (ignore_case || config_ignorecase) {
bool suffixes_match = !strcmp(last_submodule_name, submodule_name);
return check_casefolding_conflict(git_dir, submodule_name,
suffixes_match);
}
return 0;
}
@@ -2650,13 +2701,37 @@ void submodule_name_to_gitdir(struct strbuf *buf, struct repository *r,
if (!validate_and_set_submodule_gitdir(buf, submodule_name))
return;
/* Case 2: Try URI-safe (RFC3986) encoding first, this fixes nested gitdirs */
/* Case 2.1: Try URI-safe (RFC3986) encoding first, this fixes nested gitdirs */
strbuf_reset(buf);
repo_git_path_append(r, buf, "modules/");
strbuf_addstr_urlencode(buf, submodule_name, is_rfc3986_unreserved);
if (!validate_and_set_submodule_gitdir(buf, submodule_name))
return;
/* Case 2.2: Try extended uppercase URI (RFC3986) encoding, to fix case-folding */
strbuf_reset(buf);
repo_git_path_append(r, buf, "modules/");
strbuf_addstr_urlencode(buf, submodule_name, is_casefolding_rfc3986_unreserved);
if (!validate_and_set_submodule_gitdir(buf, submodule_name))
return;
/* Case 2.3: Try some derived gitdir names, see if one sticks */
for (char c = '0'; c <= '9'; c++) {
strbuf_reset(buf);
repo_git_path_append(r, buf, "modules/");
strbuf_addstr_urlencode(buf, submodule_name, is_rfc3986_unreserved);
strbuf_addch(buf, c);
if (!validate_and_set_submodule_gitdir(buf, submodule_name))
return;
strbuf_reset(buf);
repo_git_path_append(r, buf, "modules/");
strbuf_addstr_urlencode(buf, submodule_name, is_casefolding_rfc3986_unreserved);
strbuf_addch(buf, c);
if (!validate_and_set_submodule_gitdir(buf, submodule_name))
return;
}
/* Case 3: Nothing worked: error out */
die(_("Cannot construct a valid gitdir path for submodule '%s': "
"please set a unique git config for 'submodule.%s.gitdir'."),

View File

@@ -161,4 +161,39 @@ test_expect_success 'disabling extensions.submoduleEncoding prevents nested subm
)
'
test_expect_success CASE_INSENSITIVE_FS 'verify case-folding conflicts are correctly encoded' '
git clone -c extensions.submoduleEncoding=true main cloned-folding &&
(
cd cloned-folding &&
# conflict: the "folding" gitdir will already be taken
git submodule add ../new-sub "folding" &&
test_commit lowercase &&
git submodule add ../new-sub "FoldinG" &&
test_commit uppercase &&
# conflict: the "foo" gitdir will already be taken
git submodule add ../new-sub "FOO" &&
test_commit uppercase-foo &&
git submodule add ../new-sub "foo" &&
test_commit lowercase-foo &&
# create a multi conflict between foobar, fooBar and foo%42ar
# the "foo" gitdir will already be taken
git submodule add ../new-sub "foobar" &&
test_commit lowercase-foobar &&
git submodule add ../new-sub "foo%42ar" &&
test_commit encoded-foo%42ar &&
git submodule add ../new-sub "fooBar" &&
test_commit mixed-fooBar
) &&
verify_submodule_gitdir_path cloned-folding "folding" "modules/folding" &&
verify_submodule_gitdir_path cloned-folding "FoldinG" "modules/%46oldin%47" &&
verify_submodule_gitdir_path cloned-folding "FOO" "modules/FOO" &&
verify_submodule_gitdir_path cloned-folding "foo" "modules/foo0" &&
verify_submodule_gitdir_path cloned-folding "foobar" "modules/foobar" &&
verify_submodule_gitdir_path cloned-folding "foo%42ar" "modules/foo%42ar" &&
verify_submodule_gitdir_path cloned-folding "fooBar" "modules/fooBar0"
'
test_done

12
url.c
View File

@@ -14,6 +14,18 @@ int is_rfc3986_unreserved(char ch)
ch == '-' || ch == '_' || ch == '.' || ch == '~';
}
/*
* This is a variant of is_rfc3986_unreserved() that treats uppercase
* letters as "reserved". This forces them to be percent-encoded, allowing
* 'Foo' (%46oo) and 'foo' (foo) to be distinct on case-folding filesystems.
*/
int is_casefolding_rfc3986_unreserved(char c)
{
return (c >= 'a' && c <= 'z') ||
(c >= '0' && c <= '9') ||
c == '-' || c == '.' || c == '_' || c == '~';
}
int is_urlschemechar(int first_flag, int ch)
{
/*

1
url.h
View File

@@ -22,5 +22,6 @@ void end_url_with_slash(struct strbuf *buf, const char *url);
void str_end_url_with_slash(const char *url, char **dest);
int is_rfc3986_unreserved(char ch);
int is_casefolding_rfc3986_unreserved(char c);
#endif /* URL_H */