diff options
| author | Adrian Ratiu <adrian.ratiu@collabora.com> | 2025-11-07 17:05:46 +0200 |
|---|---|---|
| committer | Junio C Hamano <gitster@pobox.com> | 2025-11-07 09:01:11 -0800 |
| commit | 829f0f6b5ae1dced0e6b7a6a819b010845f1d119 (patch) | |
| tree | 288353641ea76bbfbc23a106a564ab958c233242 | |
| parent | builtin/credential-store: move is_rfc3986_unreserved to url.[ch] (diff) | |
| download | git-829f0f6b5ae1dced0e6b7a6a819b010845f1d119.tar.gz git-829f0f6b5ae1dced0e6b7a6a819b010845f1d119.zip | |
submodule: add extension to encode gitdir paths
Add a submoduleEncoding extension which fixes filesystem collisions by
encoding gitdir paths. At a high level, this implements a mechanism to
encode -> validate -> retry until a working gitdir path is found.
Credit goes to Junio for coming up with this design: encoding is only
applied when necessary, e.g. uppercase characters are encoded only on
case-folding filesystems and only if a real conflict is detected.
To make this work, we rely on the submodule.<name>.gitdir config as the
single source of truth for gitidir paths: the config is always set when
the extension is enabled. Users who care about gitdir paths are expected
to get/set the config and not the underlying encoding implementation.
This commit adds the basic encoding logic which addresses nested gitdirs.
The next commit fixes case-folding, the next commit fixes names longer
than NAME_MAX. The idea is the encoding can be improved over time in a
way which is transparent to users.
Suggested-by: Junio C Hamano <gitster@pobox.com>
Suggested-by: Phillip Wood <phillip.wood123@gmail.com>
Suggested-by: Patrick Steinhardt <ps@pks.im>
Based-on-patch-by: Brandon Williams <bwilliams.eng@gmail.com>
Signed-off-by: Adrian Ratiu <adrian.ratiu@collabora.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
| -rw-r--r-- | Documentation/config/extensions.adoc | 6 | ||||
| -rw-r--r-- | Documentation/config/submodule.adoc | 5 | ||||
| -rw-r--r-- | builtin/submodule--helper.c | 17 | ||||
| -rw-r--r-- | repository.c | 1 | ||||
| -rw-r--r-- | repository.h | 1 | ||||
| -rw-r--r-- | setup.c | 7 | ||||
| -rw-r--r-- | setup.h | 1 | ||||
| -rw-r--r-- | submodule.c | 110 | ||||
| -rw-r--r-- | t/lib-verify-submodule-gitdir-path.sh | 24 | ||||
| -rw-r--r-- | t/meson.build | 1 | ||||
| -rwxr-xr-x | t/t7425-submodule-encoding.sh | 146 | ||||
| -rwxr-xr-x | t/t9902-completion.sh | 1 |
12 files changed, 294 insertions, 26 deletions
diff --git a/Documentation/config/extensions.adoc b/Documentation/config/extensions.adoc index 532456644b..e33040fff5 100644 --- a/Documentation/config/extensions.adoc +++ b/Documentation/config/extensions.adoc @@ -73,6 +73,12 @@ relativeWorktrees::: repaired with either the `--relative-paths` option or with the `worktree.useRelativePaths` config set to `true`. +submoduleEncoding::: + If enabled, submodule gitdir paths are encoded to avoid filesystem + conflicts due to nested gitdirs, case insensitivity or other issues + When enabled, the submodule.<name>.gitdir config is always set for + all submodulesand is the single point of authority for gitdir paths. + worktreeConfig::: If enabled, then worktrees will load config settings from the `$GIT_DIR/config.worktree` file in addition to the diff --git a/Documentation/config/submodule.adoc b/Documentation/config/submodule.adoc index 0672d99117..ddaadc3dc5 100644 --- a/Documentation/config/submodule.adoc +++ b/Documentation/config/submodule.adoc @@ -52,6 +52,11 @@ submodule.<name>.active:: submodule.active config option. See linkgit:gitsubmodules[7] for details. +submodule.<name>.gitdir:: + This option sets the gitdir path for submodule <name>, allowing users to + override the default path. Only works when `extensions.submoduleEncoding` + is enabled, otherwise does nothing. See linkgit:git-config[1] for details. + submodule.active:: A repeated field which contains a pathspec used to match against a submodule's path to determine if the submodule is of interest to git diff --git a/builtin/submodule--helper.c b/builtin/submodule--helper.c index 2873b2780e..abd20eee53 100644 --- a/builtin/submodule--helper.c +++ b/builtin/submodule--helper.c @@ -1208,6 +1208,22 @@ static int module_summary(int argc, const char **argv, const char *prefix, return ret; } +static int module_gitdir(int argc, const char **argv, const char *prefix UNUSED, + struct repository *repo) +{ + struct strbuf gitdir = STRBUF_INIT; + + if (argc != 2) + usage(_("git submodule--helper gitdir <name>")); + + submodule_name_to_gitdir(&gitdir, repo, argv[1]); + + printf("%s\n", gitdir.buf); + + strbuf_release(&gitdir); + return 0; +} + struct sync_cb { const char *prefix; const char *super_prefix; @@ -3591,6 +3607,7 @@ int cmd_submodule__helper(int argc, NULL }; struct option options[] = { + OPT_SUBCOMMAND("gitdir", &fn, module_gitdir), OPT_SUBCOMMAND("clone", &fn, module_clone), OPT_SUBCOMMAND("add", &fn, module_add), OPT_SUBCOMMAND("update", &fn, module_update), diff --git a/repository.c b/repository.c index 6faf5c7398..26a21c0d71 100644 --- a/repository.c +++ b/repository.c @@ -288,6 +288,7 @@ int repo_init(struct repository *repo, repo->repository_format_worktree_config = format.worktree_config; repo->repository_format_relative_worktrees = format.relative_worktrees; repo->repository_format_precious_objects = format.precious_objects; + repo->repository_format_submodule_encoding = format.submodule_encoding; /* take ownership of format.partial_clone */ repo->repository_format_partial_clone = format.partial_clone; diff --git a/repository.h b/repository.h index 5808a5d610..7e39b2acf7 100644 --- a/repository.h +++ b/repository.h @@ -158,6 +158,7 @@ struct repository { int repository_format_worktree_config; int repository_format_relative_worktrees; int repository_format_precious_objects; + int repository_format_submodule_encoding; /* Indicate if a repository has a different 'commondir' from 'gitdir' */ unsigned different_commondir:1; @@ -687,6 +687,9 @@ static enum extension_result handle_extension(const char *var, } else if (!strcmp(ext, "relativeworktrees")) { data->relative_worktrees = git_config_bool(var, value); return EXTENSION_OK; + } else if (!strcmp(ext, "submoduleencoding")) { + data->submodule_encoding = git_config_bool(var, value); + return EXTENSION_OK; } return EXTENSION_UNKNOWN; } @@ -1865,6 +1868,8 @@ const char *setup_git_directory_gently(int *nongit_ok) repo_fmt.worktree_config; the_repository->repository_format_relative_worktrees = repo_fmt.relative_worktrees; + the_repository->repository_format_submodule_encoding = + repo_fmt.submodule_encoding; /* take ownership of repo_fmt.partial_clone */ the_repository->repository_format_partial_clone = repo_fmt.partial_clone; @@ -1963,6 +1968,8 @@ void check_repository_format(struct repository_format *fmt) fmt->ref_storage_format); the_repository->repository_format_worktree_config = fmt->worktree_config; + the_repository->repository_format_submodule_encoding = + fmt->submodule_encoding; the_repository->repository_format_relative_worktrees = fmt->relative_worktrees; the_repository->repository_format_partial_clone = @@ -130,6 +130,7 @@ struct repository_format { char *partial_clone; /* value of extensions.partialclone */ int worktree_config; int relative_worktrees; + int submodule_encoding; int is_bare; int hash_algo; int compat_hash_algo; diff --git a/submodule.c b/submodule.c index 35c55155f7..ceaff0c1aa 100644 --- a/submodule.c +++ b/submodule.c @@ -31,6 +31,7 @@ #include "commit-reach.h" #include "read-cache-ll.h" #include "setup.h" +#include "url.h" static int config_update_recurse_submodules = RECURSE_SUBMODULES_OFF; static int initialized_fetch_ref_tips; @@ -2256,17 +2257,31 @@ out: return ret; } +/* + * Find the last submodule name in the gitdir path (modules can be nested). + * Returns a pointer into `path` to the beginning of the name or NULL if not found. + */ +static char *find_last_submodule_name(char *git_dir_path) +{ + const char *modules_marker = "/modules/"; + char *p = git_dir_path; + char *last = NULL; + + while ((p = strstr(p, modules_marker))) { + last = p + strlen(modules_marker); + p++; + } + + return last; +} + int validate_submodule_git_dir(char *git_dir, const char *submodule_name) { size_t len = strlen(git_dir), suffix_len = strlen(submodule_name); - char *p; + char *p = git_dir + len - suffix_len; + bool suffixes_match = !strcmp(p, submodule_name); int ret = 0; - if (len <= suffix_len || (p = git_dir + len - suffix_len)[-1] != '/' || - strcmp(p, submodule_name)) - BUG("submodule name '%s' not a suffix of git dir '%s'", - submodule_name, git_dir); - /* * We prevent the contents of sibling submodules' git directories to * clash. @@ -2277,7 +2292,7 @@ int validate_submodule_git_dir(char *git_dir, const char *submodule_name) * but the latter directory is already designated to contain the hooks * of the former. */ - for (; *p; p++) { + for (; *p && suffixes_match; p++) { if (is_dir_sep(*p)) { char c = *p; @@ -2294,6 +2309,15 @@ int validate_submodule_git_dir(char *git_dir, const char *submodule_name) } } + /* tests after this check are only for encoded names, when the extension is enabled */ + if (!the_repository->repository_format_submodule_encoding) + return 0; + + /* Prevent the use of '/' in names */ + p = find_last_submodule_name(git_dir); + if (p && strchr(p, '/') != NULL) + return error("submodule gitdir name '%s' contains unexpected '/'", p); + return 0; } @@ -2581,29 +2605,63 @@ cleanup: return ret; } +static int validate_and_set_submodule_gitdir(struct strbuf *gitdir_path, + const char *submodule_name) +{ + char *key; + + if (validate_submodule_git_dir(gitdir_path->buf, submodule_name)) + return -1; + + key = xstrfmt("submodule.%s.gitdir", submodule_name); + repo_config_set_gently(the_repository, key, gitdir_path->buf); + FREE_AND_NULL(key); + + return 0; + +} + void submodule_name_to_gitdir(struct strbuf *buf, struct repository *r, const char *submodule_name) { + const char *gitdir; + char *key; + + repo_git_path_append(r, buf, "modules/"); + strbuf_addstr(buf, submodule_name); + + /* If extensions.submoduleEncoding is disabled, use the plain path set above */ + if (!r->repository_format_submodule_encoding) + return; + + /* Extension is enabled: use the gitdir config if it exists */ + key = xstrfmt("submodule.%s.gitdir", submodule_name); + if (!repo_config_get_string_tmp(r, key, &gitdir)) { + strbuf_reset(buf); + strbuf_addstr(buf, gitdir); + FREE_AND_NULL(key); + return; + } + FREE_AND_NULL(key); + /* - * NEEDSWORK: The current way of mapping a submodule's name to - * its location in .git/modules/ has problems with some naming - * schemes. For example, if a submodule is named "foo" and - * another is named "foo/bar" (whether present in the same - * superproject commit or not - the problem will arise if both - * superproject commits have been checked out at any point in - * time), or if two submodule names only have different cases in - * a case-insensitive filesystem. - * - * There are several solutions, including encoding the path in - * some way, introducing a submodule.<name>.gitdir config in - * .git/config (not .gitmodules) that allows overriding what the - * gitdir of a submodule would be (and teach Git, upon noticing - * a clash, to automatically determine a non-clashing name and - * to write such a config), or introducing a - * submodule.<name>.gitdir config in .gitmodules that repo - * administrators can explicitly set. Nothing has been decided, - * so for now, just append the name at the end of the path. + * The gitdir config does not exist, even though the extension is enabled. + * Therefore we are in one of the following cases: */ + + /* Case 1: legacy migration of valid plain submodule names */ + if (!validate_and_set_submodule_gitdir(buf, submodule_name)) + return; + + /* Case 2: Try URI-safe (RFC3986) encoding first, this fixes nested gitdirs */ + strbuf_reset(buf); repo_git_path_append(r, buf, "modules/"); - strbuf_addstr(buf, submodule_name); + strbuf_addstr_urlencode(buf, submodule_name, is_rfc3986_unreserved); + if (!validate_and_set_submodule_gitdir(buf, submodule_name)) + return; + + /* Case 3: error out */ + die(_("Cannot construct a valid gitdir path for submodule '%s': " + "please set a unique git config for 'submodule.%s.gitdir'."), + submodule_name, submodule_name); } diff --git a/t/lib-verify-submodule-gitdir-path.sh b/t/lib-verify-submodule-gitdir-path.sh new file mode 100644 index 0000000000..62794df976 --- /dev/null +++ b/t/lib-verify-submodule-gitdir-path.sh @@ -0,0 +1,24 @@ +# Helper to verify if repo $1 contains a submodule named $2 with gitdir path $3 + +# This does not check filesystem existence. That is done in submodule.c via the +# submodule_name_to_gitdir() API which this helper ends up calling. The gitdirs +# might or might not exist (e.g. when adding a new submodule), so this only +# checks the expected configuration path, which might be overridden by the user. + +verify_submodule_gitdir_path() { + repo="$1" && + name="$2" && + path="$3" && + ( + cd "$repo" && + # Compute expected absolute path + expected="$(git rev-parse --git-common-dir)/$path" && + expected="$(test-tool path-utils real_path "$expected")" && + # Compute actual absolute path + actual="$(git submodule--helper gitdir "$name")" && + actual="$(test-tool path-utils real_path "$actual")" && + echo "$expected" >expect && + echo "$actual" >actual && + test_cmp expect actual + ) +} diff --git a/t/meson.build b/t/meson.build index a5531df415..4187b35aee 100644 --- a/t/meson.build +++ b/t/meson.build @@ -884,6 +884,7 @@ integration_tests = [ 't7422-submodule-output.sh', 't7423-submodule-symlinks.sh', 't7424-submodule-mixed-ref-formats.sh', + 't7425-submodule-encoding.sh', 't7450-bad-git-dotfiles.sh', 't7500-commit-template-squash-signoff.sh', 't7501-commit-basic-functionality.sh', diff --git a/t/t7425-submodule-encoding.sh b/t/t7425-submodule-encoding.sh new file mode 100755 index 0000000000..a42d358f5b --- /dev/null +++ b/t/t7425-submodule-encoding.sh @@ -0,0 +1,146 @@ +#!/bin/sh + +test_description='submodules handle mixed legacy and new (encoded) style gitdir paths' + +. ./test-lib.sh +. "$TEST_DIRECTORY"/lib-verify-submodule-gitdir-path.sh + +test_expect_success 'setup: allow file protocol' ' + git config --global protocol.file.allow always +' + +test_expect_success 'create repo with mixed encoded and non-encoded submodules' ' + git init -b main legacy-sub && + test_commit -C legacy-sub legacy-initial && + legacy_rev=$(git -C legacy-sub rev-parse HEAD) && + + git init -b main new-sub && + test_commit -C new-sub new-initial && + new_rev=$(git -C new-sub rev-parse HEAD) && + + git init -b main main && + ( + cd main && + git submodule add ../legacy-sub legacy && + test_commit legacy-sub && + + git config core.repositoryformatversion 1 && + git config extensions.submoduleEncoding true && + + git submodule add ../new-sub "New Sub" && + test_commit new + ) +' + +test_expect_success 'verify submodule name is properly encoded' ' + verify_submodule_gitdir_path main legacy modules/legacy && + verify_submodule_gitdir_path main "New Sub" "modules/New Sub" +' + +test_expect_success 'clone from repo with both legacy and new-style submodules' ' + git clone --recurse-submodules main cloned-non-encoding && + ( + cd cloned-non-encoding && + + test_path_is_dir .git/modules/legacy && + test_path_is_dir .git/modules/"New Sub" && + + git submodule status >list && + test_grep "$legacy_rev legacy" list && + test_grep "$new_rev New Sub" list + ) && + + git clone -c extensions.submoduleEncoding=true --recurse-submodules main cloned-encoding && + ( + cd cloned-encoding && + + test_path_is_dir .git/modules/legacy && + test_path_is_dir ".git/modules/New Sub" && + + git submodule status >list && + test_grep "$legacy_rev legacy" list && + test_grep "$new_rev New Sub" list + ) +' + +test_expect_success 'commit and push changes to encoded submodules' ' + git -C legacy-sub config receive.denyCurrentBranch updateInstead && + git -C new-sub config receive.denyCurrentBranch updateInstead && + git -C main config receive.denyCurrentBranch updateInstead && + ( + cd cloned-encoding && + + git -C legacy switch --track -C main origin/main && + test_commit -C legacy second-commit && + git -C legacy push && + + git -C "New Sub" switch --track -C main origin/main && + test_commit -C "New Sub" second-commit && + git -C "New Sub" push && + + # Stage and commit submodule changes in superproject + git switch --track -C main origin/main && + git add legacy "New Sub" && + git commit -m "update submodules" && + + # push superproject commit to main repo + git push + ) && + + # update expected legacy & new submodule checksums + legacy_rev=$(git -C legacy-sub rev-parse HEAD) && + new_rev=$(git -C new-sub rev-parse HEAD) +' + +test_expect_success 'fetch mixed submodule changes and verify updates' ' + ( + cd main && + + # only update submodules because superproject was + # pushed into at the end of last test + git submodule update --init --recursive && + + test_path_is_dir .git/modules/legacy && + test_path_is_dir ".git/modules/New Sub" && + + # Verify both submodules are at the expected commits + git submodule status >list && + test_grep "$legacy_rev legacy" list && + test_grep "$new_rev New Sub" list + ) +' + +test_expect_success 'setup submodules with nested git dirs' ' + git init nested && + test_commit -C nested nested && + ( + cd nested && + cat >.gitmodules <<-EOF && + [submodule "hippo"] + url = . + path = thing1 + [submodule "hippo/hooks"] + url = . + path = thing2 + EOF + git clone . thing1 && + git clone . thing2 && + git add .gitmodules thing1 thing2 && + test_tick && + git commit -m nested + ) +' + +test_expect_success 'git dirs of encoded sibling submodules must not be nested' ' + git clone -c extensions.submoduleEncoding=true --recurse-submodules nested clone_nested && + verify_submodule_gitdir_path clone_nested hippo modules/hippo && + verify_submodule_gitdir_path clone_nested hippo/hooks modules/hippo%2fhooks +' + +test_expect_success 'submodule git dir nesting detection must work with parallel cloning' ' + git clone -c extensions.submoduleEncoding=true --recurse-submodules --jobs=2 nested clone_parallel && + verify_submodule_gitdir_path clone_parallel hippo modules/hippo && + verify_submodule_gitdir_path clone_parallel hippo/hooks modules/hippo%2fhooks +' + +test_done diff --git a/t/t9902-completion.sh b/t/t9902-completion.sh index 964e1f1569..ffb9c8b522 100755 --- a/t/t9902-completion.sh +++ b/t/t9902-completion.sh @@ -3053,6 +3053,7 @@ test_expect_success 'git config set - variable name - __git_compute_second_level submodule.sub.fetchRecurseSubmodules Z submodule.sub.ignore Z submodule.sub.active Z + submodule.sub.gitdir Z EOF ' |
