From 5c5a4a1c05932378d259b1fdd9526cab971656a2 Mon Sep 17 00:00:00 2001 From: Filip Hejsek Date: Sun, 28 Jan 2024 04:29:33 +0100 Subject: [PATCH 01/10] t0411: add tests for cloning from partial repo Cloning from a partial repository must not fetch missing objects into the partial repository, because that can lead to arbitrary code execution. Add a couple of test cases, pretending to the `upload-pack` command (and to that command only) that it is working on a repository owned by someone else. Helped-by: Jeff King Signed-off-by: Filip Hejsek Signed-off-by: Johannes Schindelin --- t/t0411-clone-from-partial.sh | 60 +++++++++++++++++++++++++++++++++++ 1 file changed, 60 insertions(+) create mode 100755 t/t0411-clone-from-partial.sh diff --git a/t/t0411-clone-from-partial.sh b/t/t0411-clone-from-partial.sh new file mode 100755 index 0000000000..fb72a0a9ff --- /dev/null +++ b/t/t0411-clone-from-partial.sh @@ -0,0 +1,60 @@ +#!/bin/sh + +test_description='check that local clone does not fetch from promisor remotes' + +. ./test-lib.sh + +test_expect_success 'create evil repo' ' + git init tmp && + test_commit -C tmp a && + git -C tmp config uploadpack.allowfilter 1 && + git clone --filter=blob:none --no-local --no-checkout tmp evil && + rm -rf tmp && + + git -C evil config remote.origin.uploadpack \"\$TRASH_DIRECTORY/fake-upload-pack\" && + write_script fake-upload-pack <<-\EOF && + echo >&2 "fake-upload-pack running" + >"$TRASH_DIRECTORY/script-executed" + exit 1 + EOF + export TRASH_DIRECTORY && + + # empty shallow file disables local clone optimization + >evil/.git/shallow +' + +test_expect_failure 'local clone must not fetch from promisor remote and execute script' ' + rm -f script-executed && + test_must_fail git clone \ + --upload-pack="GIT_TEST_ASSUME_DIFFERENT_OWNER=true git-upload-pack" \ + evil clone1 2>err && + ! grep "fake-upload-pack running" err && + test_path_is_missing script-executed +' + +test_expect_failure 'clone from file://... must not fetch from promisor remote and execute script' ' + rm -f script-executed && + test_must_fail git clone \ + --upload-pack="GIT_TEST_ASSUME_DIFFERENT_OWNER=true git-upload-pack" \ + "file://$(pwd)/evil" clone2 2>err && + ! grep "fake-upload-pack running" err && + test_path_is_missing script-executed +' + +test_expect_failure 'fetch from file://... must not fetch from promisor remote and execute script' ' + rm -f script-executed && + test_must_fail git fetch \ + --upload-pack="GIT_TEST_ASSUME_DIFFERENT_OWNER=true git-upload-pack" \ + "file://$(pwd)/evil" 2>err && + ! grep "fake-upload-pack running" err && + test_path_is_missing script-executed +' + +test_expect_success 'pack-objects should fetch from promisor remote and execute script' ' + rm -f script-executed && + echo "HEAD" | test_must_fail git -C evil pack-objects --revs --stdout >/dev/null 2>err && + grep "fake-upload-pack running" err && + test_path_is_file script-executed +' + +test_done From f4aa8c8bb11dae6e769cd930565173808cbb69c8 Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Wed, 10 Apr 2024 14:39:37 +0200 Subject: [PATCH 02/10] fetch/clone: detect dubious ownership of local repositories When cloning from somebody else's repositories, it is possible that, say, the `upload-pack` command is overridden in the repository that is about to be cloned, which would then be run in the user's context who started the clone. To remind the user that this is a potentially unsafe operation, let's extend the ownership checks we have already established for regular gitdir discovery to extend also to local repositories that are about to be cloned. This protection extends also to file:// URLs. The fixes in this commit address CVE-2024-32004. Note: This commit does not touch the `fetch`/`clone` code directly, but instead the function used implicitly by both: `enter_repo()`. This function is also used by `git receive-pack` (i.e. pushes), by `git upload-archive`, by `git daemon` and by `git http-backend`. In setups that want to serve repositories owned by different users than the account running the service, this will require `safe.*` settings to be configured accordingly. Also note: there are tiny time windows where a time-of-check-time-of-use ("TOCTOU") race is possible. The real solution to those would be to work with `fstat()` and `openat()`. However, the latter function is not available on Windows (and would have to be emulated with rather expensive low-level `NtCreateFile()` calls), and the changes would be quite extensive, for my taste too extensive for the little gain given that embargoed releases need to pay extra attention to avoid introducing inadvertent bugs. Signed-off-by: Johannes Schindelin --- cache.h | 12 ++++++++++++ path.c | 2 ++ setup.c | 21 +++++++++++++++++++++ t/t0411-clone-from-partial.sh | 6 +++--- 4 files changed, 38 insertions(+), 3 deletions(-) diff --git a/cache.h b/cache.h index fcf49706ad..a46a3e4b6b 100644 --- a/cache.h +++ b/cache.h @@ -606,6 +606,18 @@ void set_git_work_tree(const char *tree); #define ALTERNATE_DB_ENVIRONMENT "GIT_ALTERNATE_OBJECT_DIRECTORIES" +/* + * Check if a repository is safe and die if it is not, by verifying the + * ownership of the worktree (if any), the git directory, and the gitfile (if + * any). + * + * Exemptions for known-safe repositories can be added via `safe.directory` + * config settings; for non-bare repositories, their worktree needs to be + * added, for bare ones their git directory. + */ +void die_upon_dubious_ownership(const char *gitfile, const char *worktree, + const char *gitdir); + void setup_work_tree(void); /* * Find the commondir and gitdir of the repository that contains the current diff --git a/path.c b/path.c index 492e17ad12..d61f70e87d 100644 --- a/path.c +++ b/path.c @@ -840,6 +840,7 @@ const char *enter_repo(const char *path, int strict) if (!suffix[i]) return NULL; gitfile = read_gitfile(used_path.buf); + die_upon_dubious_ownership(gitfile, NULL, used_path.buf); if (gitfile) { strbuf_reset(&used_path); strbuf_addstr(&used_path, gitfile); @@ -850,6 +851,7 @@ const char *enter_repo(const char *path, int strict) } else { const char *gitfile = read_gitfile(path); + die_upon_dubious_ownership(gitfile, NULL, path); if (gitfile) path = gitfile; if (chdir(path)) diff --git a/setup.c b/setup.c index cefd5f63c4..9d401ae4c8 100644 --- a/setup.c +++ b/setup.c @@ -1165,6 +1165,27 @@ static int ensure_valid_ownership(const char *gitfile, return data.is_safe; } +void die_upon_dubious_ownership(const char *gitfile, const char *worktree, + const char *gitdir) +{ + struct strbuf report = STRBUF_INIT, quoted = STRBUF_INIT; + const char *path; + + if (ensure_valid_ownership(gitfile, worktree, gitdir, &report)) + return; + + strbuf_complete(&report, '\n'); + path = gitfile ? gitfile : gitdir; + sq_quote_buf_pretty("ed, path); + + die(_("detected dubious ownership in repository at '%s'\n" + "%s" + "To add an exception for this directory, call:\n" + "\n" + "\tgit config --global --add safe.directory %s"), + path, report.buf, quoted.buf); +} + static int allowed_bare_repo_cb(const char *key, const char *value, void *d) { enum allowed_bare_repo *allowed_bare_repo = d; diff --git a/t/t0411-clone-from-partial.sh b/t/t0411-clone-from-partial.sh index fb72a0a9ff..eb3360dbca 100755 --- a/t/t0411-clone-from-partial.sh +++ b/t/t0411-clone-from-partial.sh @@ -23,7 +23,7 @@ test_expect_success 'create evil repo' ' >evil/.git/shallow ' -test_expect_failure 'local clone must not fetch from promisor remote and execute script' ' +test_expect_success 'local clone must not fetch from promisor remote and execute script' ' rm -f script-executed && test_must_fail git clone \ --upload-pack="GIT_TEST_ASSUME_DIFFERENT_OWNER=true git-upload-pack" \ @@ -32,7 +32,7 @@ test_expect_failure 'local clone must not fetch from promisor remote and execute test_path_is_missing script-executed ' -test_expect_failure 'clone from file://... must not fetch from promisor remote and execute script' ' +test_expect_success 'clone from file://... must not fetch from promisor remote and execute script' ' rm -f script-executed && test_must_fail git clone \ --upload-pack="GIT_TEST_ASSUME_DIFFERENT_OWNER=true git-upload-pack" \ @@ -41,7 +41,7 @@ test_expect_failure 'clone from file://... must not fetch from promisor remote a test_path_is_missing script-executed ' -test_expect_failure 'fetch from file://... must not fetch from promisor remote and execute script' ' +test_expect_success 'fetch from file://... must not fetch from promisor remote and execute script' ' rm -f script-executed && test_must_fail git fetch \ --upload-pack="GIT_TEST_ASSUME_DIFFERENT_OWNER=true git-upload-pack" \ From 7b70e9efb18c2cc3f219af399bd384c5801ba1d7 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Tue, 16 Apr 2024 04:35:33 -0400 Subject: [PATCH 03/10] upload-pack: disable lazy-fetching by default The upload-pack command tries to avoid trusting the repository in which it's run (e.g., by not running any hooks and not using any config that contains arbitrary commands). But if the server side of a fetch or a clone is a partial clone, then either upload-pack or its child pack-objects may run a lazy "git fetch" under the hood. And it is very easy to convince fetch to run arbitrary commands. The "server" side can be a local repository owned by someone else, who would be able to configure commands that are run during a clone with the current user's permissions. This issue has been designated CVE-2024-32004. The fix in this commit's parent helps in this scenario, as well as in related scenarios using SSH to clone, where the untrusted .git directory is owned by a different user id. But if you received one as a zip file, on a USB stick, etc, it may be owned by your user but still untrusted. This has been designated CVE-2024-32465. To mitigate the issue more completely, let's disable lazy fetching entirely during `upload-pack`. While fetching from a partial repository should be relatively rare, it is certainly not an unreasonable workflow. And thus we need to provide an escape hatch. This commit works by respecting a GIT_NO_LAZY_FETCH environment variable (to skip the lazy-fetch), and setting it in upload-pack, but only when the user has not already done so (which gives us the escape hatch). The name of the variable is specifically chosen to match what has already been added in 'master' via e6d5479e7a (git: extend --no-lazy-fetch to work across subprocesses, 2024-02-27). Since we're building this fix as a backport for older versions, we could cherry-pick that patch and its earlier steps. However, we don't really need the niceties (like a "--no-lazy-fetch" option) that it offers. By using the same name, everything should just work when the two are eventually merged, but here are a few notes: - the blocking of the fetch in e6d5479e7a is incomplete! It sets fetch_if_missing to 0 when we setup the repository variable, but that isn't enough. pack-objects in particular will call prefetch_to_pack() even if that variable is 0. This patch by contrast checks the environment variable at the lowest level before we call the lazy fetch, where we can be sure to catch all code paths. Possibly the setting of fetch_if_missing from e6d5479e7a can be reverted, but it may be useful to have. For example, some code may want to use that flag to change behavior before it gets to the point of trying to start the fetch. At any rate, that's all outside the scope of this patch. - there's documentation for GIT_NO_LAZY_FETCH in e6d5479e7a. We can live without that here, because for the most part the user shouldn't need to set it themselves. The exception is if they do want to override upload-pack's default, and that requires a separate documentation section (which is added here) - it would be nice to use the NO_LAZY_FETCH_ENVIRONMENT macro added by e6d5479e7a, but those definitions have moved from cache.h to environment.h between 2.39.3 and master. I just used the raw string literals, and we can replace them with the macro once this topic is merged to master. At least with respect to CVE-2024-32004, this does render this commit's parent commit somewhat redundant. However, it is worth retaining that commit as defense in depth, and because it may help other issues (e.g., symlink/hardlink TOCTOU races, where zip files are not really an interesting attack vector). The tests in t0411 still pass, but now we have _two_ mechanisms ensuring that the evil command is not run. Let's beef up the existing ones to check that they failed for the expected reason, that we refused to run upload-pack at all with an alternate user id. And add two new ones for the same-user case that both the restriction and its escape hatch. Signed-off-by: Jeff King Signed-off-by: Johannes Schindelin --- Documentation/git-upload-pack.txt | 16 ++++++++++++++++ builtin/upload-pack.c | 2 ++ promisor-remote.c | 10 ++++++++++ t/t0411-clone-from-partial.sh | 18 ++++++++++++++++++ 4 files changed, 46 insertions(+) diff --git a/Documentation/git-upload-pack.txt b/Documentation/git-upload-pack.txt index b656b47567..fc4c62d7bc 100644 --- a/Documentation/git-upload-pack.txt +++ b/Documentation/git-upload-pack.txt @@ -55,6 +55,22 @@ ENVIRONMENT admins may need to configure some transports to allow this variable to be passed. See the discussion in linkgit:git[1]. +`GIT_NO_LAZY_FETCH`:: + When cloning or fetching from a partial repository (i.e., one + itself cloned with `--filter`), the server-side `upload-pack` + may need to fetch extra objects from its upstream in order to + complete the request. By default, `upload-pack` will refuse to + perform such a lazy fetch, because `git fetch` may run arbitrary + commands specified in configuration and hooks of the source + repository (and `upload-pack` tries to be safe to run even in + untrusted `.git` directories). ++ +This is implemented by having `upload-pack` internally set the +`GIT_NO_LAZY_FETCH` variable to `1`. If you want to override it +(because you are fetching from a partial clone, and you are sure +you trust it), you can explicitly set `GIT_NO_LAZY_FETCH` to +`0`. + SEE ALSO -------- linkgit:gitnamespaces[7] diff --git a/builtin/upload-pack.c b/builtin/upload-pack.c index 25b69da2bf..f446ff04f6 100644 --- a/builtin/upload-pack.c +++ b/builtin/upload-pack.c @@ -35,6 +35,8 @@ int cmd_upload_pack(int argc, const char **argv, const char *prefix) packet_trace_identity("upload-pack"); read_replace_refs = 0; + /* TODO: This should use NO_LAZY_FETCH_ENVIRONMENT */ + xsetenv("GIT_NO_LAZY_FETCH", "1", 0); argc = parse_options(argc, argv, prefix, options, upload_pack_usage, 0); diff --git a/promisor-remote.c b/promisor-remote.c index faa7612941..550a38f752 100644 --- a/promisor-remote.c +++ b/promisor-remote.c @@ -20,6 +20,16 @@ static int fetch_objects(struct repository *repo, int i; FILE *child_in; + /* TODO: This should use NO_LAZY_FETCH_ENVIRONMENT */ + if (git_env_bool("GIT_NO_LAZY_FETCH", 0)) { + static int warning_shown; + if (!warning_shown) { + warning_shown = 1; + warning(_("lazy fetching disabled; some objects may not be available")); + } + return -1; + } + child.git_cmd = 1; child.in = -1; if (repo != the_repository) diff --git a/t/t0411-clone-from-partial.sh b/t/t0411-clone-from-partial.sh index eb3360dbca..b3d6ddc4bc 100755 --- a/t/t0411-clone-from-partial.sh +++ b/t/t0411-clone-from-partial.sh @@ -28,6 +28,7 @@ test_expect_success 'local clone must not fetch from promisor remote and execute test_must_fail git clone \ --upload-pack="GIT_TEST_ASSUME_DIFFERENT_OWNER=true git-upload-pack" \ evil clone1 2>err && + grep "detected dubious ownership" err && ! grep "fake-upload-pack running" err && test_path_is_missing script-executed ' @@ -37,6 +38,7 @@ test_expect_success 'clone from file://... must not fetch from promisor remote a test_must_fail git clone \ --upload-pack="GIT_TEST_ASSUME_DIFFERENT_OWNER=true git-upload-pack" \ "file://$(pwd)/evil" clone2 2>err && + grep "detected dubious ownership" err && ! grep "fake-upload-pack running" err && test_path_is_missing script-executed ' @@ -46,6 +48,7 @@ test_expect_success 'fetch from file://... must not fetch from promisor remote a test_must_fail git fetch \ --upload-pack="GIT_TEST_ASSUME_DIFFERENT_OWNER=true git-upload-pack" \ "file://$(pwd)/evil" 2>err && + grep "detected dubious ownership" err && ! grep "fake-upload-pack running" err && test_path_is_missing script-executed ' @@ -57,4 +60,19 @@ test_expect_success 'pack-objects should fetch from promisor remote and execute test_path_is_file script-executed ' +test_expect_success 'clone from promisor remote does not lazy-fetch by default' ' + rm -f script-executed && + test_must_fail git clone evil no-lazy 2>err && + grep "lazy fetching disabled" err && + test_path_is_missing script-executed +' + +test_expect_success 'promisor lazy-fetching can be re-enabled' ' + rm -f script-executed && + test_must_fail env GIT_NO_LAZY_FETCH=0 \ + git clone evil lazy-ok 2>err && + grep "fake-upload-pack running" err && + test_path_is_file script-executed +' + test_done From e69ac42fcc866d3d6f84ea42bc656673440a07f5 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Tue, 16 Apr 2024 04:52:13 -0400 Subject: [PATCH 04/10] docs: document security issues around untrusted .git dirs For a long time our general philosophy has been that it's unsafe to run arbitrary Git commands if you don't trust the hooks or config in .git, but that running upload-pack should be OK. E.g., see 1456b043fc (Remove post-upload-hook, 2009-12-10), or the design of uploadpack.packObjectsHook. But we never really documented this (and even the discussions that led to 1456b043fc were not on the public list!). Let's try to make our approach more clear, but also be realistic that even upload-pack carries some risk. Helped-by: Filip Hejsek Helped-by: Junio C Hamano Signed-off-by: Jeff King Signed-off-by: Johannes Schindelin --- Documentation/git-upload-pack.txt | 15 +++++++++++++++ Documentation/git.txt | 31 +++++++++++++++++++++++++++++++ 2 files changed, 46 insertions(+) diff --git a/Documentation/git-upload-pack.txt b/Documentation/git-upload-pack.txt index fc4c62d7bc..1d30a4f6b4 100644 --- a/Documentation/git-upload-pack.txt +++ b/Documentation/git-upload-pack.txt @@ -71,6 +71,21 @@ This is implemented by having `upload-pack` internally set the you trust it), you can explicitly set `GIT_NO_LAZY_FETCH` to `0`. +SECURITY +-------- + +Most Git commands should not be run in an untrusted `.git` directory +(see the section `SECURITY` in linkgit:git[1]). `upload-pack` tries to +avoid any dangerous configuration options or hooks from the repository +it's serving, making it safe to clone an untrusted directory and run +commands on the resulting clone. + +For an extra level of safety, you may be able to run `upload-pack` as an +alternate user. The details will be platform dependent, but on many +systems you can run: + + git clone --no-local --upload-pack='sudo -u nobody git-upload-pack' ... + SEE ALSO -------- linkgit:gitnamespaces[7] diff --git a/Documentation/git.txt b/Documentation/git.txt index 1d33e083ab..d2969461a4 100644 --- a/Documentation/git.txt +++ b/Documentation/git.txt @@ -1032,6 +1032,37 @@ The index is also capable of storing multiple entries (called "stages") for a given pathname. These stages are used to hold the various unmerged version of a file when a merge is in progress. +SECURITY +-------- + +Some configuration options and hook files may cause Git to run arbitrary +shell commands. Because configuration and hooks are not copied using +`git clone`, it is generally safe to clone remote repositories with +untrusted content, inspect them with `git log`, and so on. + +However, it is not safe to run Git commands in a `.git` directory (or +the working tree that surrounds it) when that `.git` directory itself +comes from an untrusted source. The commands in its config and hooks +are executed in the usual way. + +By default, Git will refuse to run when the repository is owned by +someone other than the user running the command. See the entry for +`safe.directory` in linkgit:git-config[1]. While this can help protect +you in a multi-user environment, note that you can also acquire +untrusted repositories that are owned by you (for example, if you +extract a zip file or tarball from an untrusted source). In such cases, +you'd need to "sanitize" the untrusted repository first. + +If you have an untrusted `.git` directory, you should first clone it +with `git clone --no-local` to obtain a clean copy. Git does restrict +the set of options and hooks that will be run by `upload-pack`, which +handles the server side of a clone or fetch, but beware that the +surface area for attack against `upload-pack` is large, so this does +carry some risk. The safest thing is to serve the repository as an +unprivileged user (either via linkgit:git-daemon[1], ssh, or using +other tools to change user ids). See the discussion in the `SECURITY` +section of linkgit:git-upload-pack[1]. + FURTHER DOCUMENTATION --------------------- From c30a574a0b50e64f26885f740dd49d2420b9bed7 Mon Sep 17 00:00:00 2001 From: Filip Hejsek Date: Sun, 28 Jan 2024 04:30:25 +0100 Subject: [PATCH 05/10] has_dir_name(): do not get confused by characters < '/' There is a bug in directory/file ("D/F") conflict checking optimization: It assumes that such a conflict cannot happen if a newly added entry's path is lexicgraphically "greater than" the last already-existing index entry _and_ contains a directory separator that comes strictly after the common prefix (`len > len_eq_offset`). This assumption is incorrect, though: `a-` sorts _between_ `a` and `a/b`, their common prefix is `a`, the slash comes after the common prefix, and there is still a file/directory conflict. Let's re-design this logic, taking these facts into consideration: - It is impossible for a file to sort after another file with whose directory it conflicts because the trailing NUL byte is always smaller than any other character. - Since there are quite a number of ASCII characters that sort before the slash (e.g. `-`, `.`, the space character), looking at the last already-existing index entry is not enough to determine whether there is a D/F conflict when the first character different from the existing last index entry's path is a slash. If it is not a slash, there cannot be a file/directory conflict. And if the existing index entry's first different character is a slash, it also cannot be a file/directory conflict because the optimization requires the newly-added entry's path to sort _after_ the existing entry's, and the conflicting file's path would not. So let's fall back to the regular binary search whenever the newly-added item's path differs in a slash character. If it does not, and it sorts after the last index entry, there is no D/F conflict and the new index entry can be safely appended. This fix also nicely simplifies the logic and makes it much easier to reason about, while the impact on performance should be negligible: After this fix, the optimization will be skipped only when index entry's paths differ in a slash and a space, `!`, `"`, `#`, `$`, `%`, `&`, `'`, | ( `)`, `*`, `+`, `,`, `-`, or `.`, which should be a rare situation. Signed-off-by: Filip Hejsek Signed-off-by: Johannes Schindelin --- read-cache.c | 72 +++++++++++++----------------------------------- t/t0000-basic.sh | 28 +++++++++++++++++++ 2 files changed, 47 insertions(+), 53 deletions(-) diff --git a/read-cache.c b/read-cache.c index 46f5e497b1..383ec6d366 100644 --- a/read-cache.c +++ b/read-cache.c @@ -1186,19 +1186,32 @@ static int has_dir_name(struct index_state *istate, istate->cache[istate->cache_nr - 1]->name, &len_eq_last); if (cmp_last > 0) { - if (len_eq_last == 0) { + if (name[len_eq_last] != '/') { /* * The entry sorts AFTER the last one in the - * index and their paths have no common prefix, - * so there cannot be a F/D conflict. + * index. + * + * If there were a conflict with "file", then our + * name would start with "file/" and the last index + * entry would start with "file" but not "file/". + * + * The next character after common prefix is + * not '/', so there can be no conflict. */ return retval; } else { /* * The entry sorts AFTER the last one in the - * index, but has a common prefix. Fall through - * to the loop below to disect the entry's path - * and see where the difference is. + * index, and the next character after common + * prefix is '/'. + * + * Either the last index entry is a file in + * conflict with this entry, or it has a name + * which sorts between this entry and the + * potential conflicting file. + * + * In both cases, we fall through to the loop + * below and let the regular search code handle it. */ } } else if (cmp_last == 0) { @@ -1222,53 +1235,6 @@ static int has_dir_name(struct index_state *istate, } len = slash - name; - if (cmp_last > 0) { - /* - * (len + 1) is a directory boundary (including - * the trailing slash). And since the loop is - * decrementing "slash", the first iteration is - * the longest directory prefix; subsequent - * iterations consider parent directories. - */ - - if (len + 1 <= len_eq_last) { - /* - * The directory prefix (including the trailing - * slash) also appears as a prefix in the last - * entry, so the remainder cannot collide (because - * strcmp said the whole path was greater). - * - * EQ: last: xxx/A - * this: xxx/B - * - * LT: last: xxx/file_A - * this: xxx/file_B - */ - return retval; - } - - if (len > len_eq_last) { - /* - * This part of the directory prefix (excluding - * the trailing slash) is longer than the known - * equal portions, so this sub-directory cannot - * collide with a file. - * - * GT: last: xxxA - * this: xxxB/file - */ - return retval; - } - - /* - * This is a possible collision. Fall through and - * let the regular search code handle it. - * - * last: xxx - * this: xxx/file - */ - } - pos = index_name_stage_pos(istate, name, len, stage, EXPAND_SPARSE); if (pos >= 0) { /* diff --git a/t/t0000-basic.sh b/t/t0000-basic.sh index 502b4bcf9e..2ba219b18b 100755 --- a/t/t0000-basic.sh +++ b/t/t0000-basic.sh @@ -1200,6 +1200,34 @@ test_expect_success 'very long name in the index handled sanely' ' test $len = 4098 ' +# D/F conflict checking uses an optimization when adding to the end. +# make sure it does not get confused by `a-` sorting _between_ +# `a` and `a/`. +test_expect_success 'more update-index D/F conflicts' ' + # empty the index to make sure our entry is last + git read-tree --empty && + cacheinfo=100644,$(test_oid empty_blob) && + git update-index --add --cacheinfo $cacheinfo,path5/a && + + test_must_fail git update-index --add --cacheinfo $cacheinfo,path5/a/file && + test_must_fail git update-index --add --cacheinfo $cacheinfo,path5/a/b/file && + test_must_fail git update-index --add --cacheinfo $cacheinfo,path5/a/b/c/file && + + # "a-" sorts between "a" and "a/" + git update-index --add --cacheinfo $cacheinfo,path5/a- && + + test_must_fail git update-index --add --cacheinfo $cacheinfo,path5/a/file && + test_must_fail git update-index --add --cacheinfo $cacheinfo,path5/a/b/file && + test_must_fail git update-index --add --cacheinfo $cacheinfo,path5/a/b/c/file && + + cat >expected <<-\EOF && + path5/a + path5/a- + EOF + git ls-files >actual && + test_cmp expected actual +' + test_expect_success 'test_must_fail on a failing git command' ' test_must_fail git notacommand ' From b20c10fd9b035f46e48112d2cd33d7cb740012b6 Mon Sep 17 00:00:00 2001 From: Filip Hejsek Date: Sun, 28 Jan 2024 04:32:47 +0100 Subject: [PATCH 06/10] t7423: add tests for symlinked submodule directories Submodule operations must not follow symlinks in working tree, because otherwise files might be written to unintended places, leading to vulnerabilities. Signed-off-by: Filip Hejsek Signed-off-by: Johannes Schindelin --- t/t7423-submodule-symlinks.sh | 66 +++++++++++++++++++++++++++++++++++ 1 file changed, 66 insertions(+) create mode 100755 t/t7423-submodule-symlinks.sh diff --git a/t/t7423-submodule-symlinks.sh b/t/t7423-submodule-symlinks.sh new file mode 100755 index 0000000000..a72f3cbcab --- /dev/null +++ b/t/t7423-submodule-symlinks.sh @@ -0,0 +1,66 @@ +#!/bin/sh + +test_description='check that submodule operations do not follow symlinks' + +. ./test-lib.sh + +test_expect_success 'prepare' ' + git config --global protocol.file.allow always && + test_commit initial && + git init upstream && + test_commit -C upstream upstream submodule_file && + git submodule add ./upstream a/sm && + test_tick && + git commit -m submodule +' + +test_expect_failure SYMLINKS 'git submodule update must not create submodule behind symlink' ' + rm -rf a b && + mkdir b && + ln -s b a && + test_must_fail git submodule update && + test_path_is_missing b/sm +' + +test_expect_failure SYMLINKS,CASE_INSENSITIVE_FS 'git submodule update must not create submodule behind symlink on case insensitive fs' ' + rm -rf a b && + mkdir b && + ln -s b A && + test_must_fail git submodule update && + test_path_is_missing b/sm +' + +prepare_symlink_to_repo() { + rm -rf a && + mkdir a && + git init a/target && + git -C a/target fetch ../../upstream && + ln -s target a/sm +} + +test_expect_success SYMLINKS 'git restore --recurse-submodules must not be confused by a symlink' ' + prepare_symlink_to_repo && + test_must_fail git restore --recurse-submodules a/sm && + test_path_is_missing a/sm/submodule_file && + test_path_is_dir a/target/.git && + test_path_is_missing a/target/submodule_file +' + +test_expect_failure SYMLINKS 'git restore --recurse-submodules must not migrate git dir of symlinked repo' ' + prepare_symlink_to_repo && + rm -rf .git/modules && + test_must_fail git restore --recurse-submodules a/sm && + test_path_is_dir a/target/.git && + test_path_is_missing .git/modules/a/sm && + test_path_is_missing a/target/submodule_file +' + +test_expect_failure SYMLINKS 'git checkout -f --recurse-submodules must not migrate git dir of symlinked repo when removing submodule' ' + prepare_symlink_to_repo && + rm -rf .git/modules && + test_must_fail git checkout -f --recurse-submodules initial && + test_path_is_dir a/target/.git && + test_path_is_missing .git/modules/a/sm +' + +test_done From 9cf85473209ea8ae2b56c13145c4704d12ee1374 Mon Sep 17 00:00:00 2001 From: Filip Hejsek Date: Sun, 28 Jan 2024 05:09:17 +0100 Subject: [PATCH 07/10] clone: prevent clashing git dirs when cloning submodule in parallel While it is expected to have several git dirs within the `.git/modules/` tree, it is important that they do not interfere with each other. For example, if one submodule was called "captain" and another submodule "captain/hooks", their respective git dirs would clash, as they would be located in `.git/modules/captain/` and `.git/modules/captain/hooks/`, respectively, i.e. the latter's files could clash with the actual Git hooks of the former. To prevent these clashes, and in particular to prevent hooks from being written and then executed as part of a recursive clone, we introduced checks as part of the fix for CVE-2019-1387 in a8dee3ca61 (Disallow dubiously-nested submodule git directories, 2019-10-01). It is currently possible to bypass the check for clashing submodule git dirs in two ways: 1. parallel cloning 2. checkout --recurse-submodules Let's check not only before, but also after parallel cloning (and before checking out the submodule), that the git dir is not clashing with another one, otherwise fail. This addresses the parallel cloning issue. As to the parallel checkout issue: It requires quite a few manual steps to create clashing git dirs because Git itself would refuse to initialize the inner one, as demonstrated by the test case. Nevertheless, let's teach the recursive checkout (namely, the `submodule_move_head()` function that is used by the recursive checkout) to be careful to verify that it does not use a clashing git dir, and if it does, disable it (by deleting the `HEAD` file so that subsequent Git calls won't recognize it as a git dir anymore). Note: The parallel cloning test case contains a `cat err` that proved to be highly useful when analyzing the racy nature of the operation (the operation can fail with three different error messages, depending on timing), and was left on purpose to ease future debugging should the need arise. Signed-off-by: Filip Hejsek Signed-off-by: Johannes Schindelin --- builtin/submodule--helper.c | 17 +++++++++++++++++ submodule.c | 17 +++++++++++++++++ t/t7450-bad-git-dotfiles.sh | 34 ++++++++++++++++++++++++++++++++-- 3 files changed, 66 insertions(+), 2 deletions(-) diff --git a/builtin/submodule--helper.c b/builtin/submodule--helper.c index 6743fb27bd..b76e13ddce 100644 --- a/builtin/submodule--helper.c +++ b/builtin/submodule--helper.c @@ -1717,6 +1717,23 @@ static int clone_submodule(const struct module_clone_data *clone_data, free(path); } + /* + * We already performed this check at the beginning of this function, + * before cloning the objects. This tries to detect racy behavior e.g. + * in parallel clones, where another process could easily have made the + * gitdir nested _after_ it was created. + * + * To prevent further harm coming from this unintentionally-nested + * gitdir, let's disable it by deleting the `HEAD` file. + */ + if (validate_submodule_git_dir(sm_gitdir, clone_data->name) < 0) { + char *head = xstrfmt("%s/HEAD", sm_gitdir); + unlink(head); + free(head); + die(_("refusing to create/use '%s' in another submodule's " + "git dir"), sm_gitdir); + } + connect_work_tree_and_git_dir(clone_data_path, sm_gitdir, 0); p = git_pathdup_submodule(clone_data_path, "config"); diff --git a/submodule.c b/submodule.c index fae24ef34a..71ec23ad98 100644 --- a/submodule.c +++ b/submodule.c @@ -2146,10 +2146,27 @@ int submodule_move_head(const char *path, if (old_head) { if (!submodule_uses_gitfile(path)) absorb_git_dir_into_superproject(path); + else { + char *dotgit = xstrfmt("%s/.git", path); + char *git_dir = xstrdup(read_gitfile(dotgit)); + + free(dotgit); + if (validate_submodule_git_dir(git_dir, + sub->name) < 0) + die(_("refusing to create/use '%s' in " + "another submodule's git dir"), + git_dir); + free(git_dir); + } } else { struct strbuf gitdir = STRBUF_INIT; submodule_name_to_gitdir(&gitdir, the_repository, sub->name); + if (validate_submodule_git_dir(gitdir.buf, + sub->name) < 0) + die(_("refusing to create/use '%s' in another " + "submodule's git dir"), + gitdir.buf); connect_work_tree_and_git_dir(path, gitdir.buf, 0); strbuf_release(&gitdir); diff --git a/t/t7450-bad-git-dotfiles.sh b/t/t7450-bad-git-dotfiles.sh index ba1f569bcb..8f94129e74 100755 --- a/t/t7450-bad-git-dotfiles.sh +++ b/t/t7450-bad-git-dotfiles.sh @@ -292,7 +292,7 @@ test_expect_success WINDOWS 'prevent git~1 squatting on Windows' ' fi ' -test_expect_success 'git dirs of sibling submodules must not be nested' ' +test_expect_success 'setup submodules with nested git dirs' ' git init nested && test_commit -C nested nested && ( @@ -310,9 +310,39 @@ test_expect_success 'git dirs of sibling submodules must not be nested' ' git add .gitmodules thing1 thing2 && test_tick && git commit -m nested - ) && + ) +' + +test_expect_success 'git dirs of sibling submodules must not be nested' ' test_must_fail git clone --recurse-submodules nested clone 2>err && test_i18ngrep "is inside git dir" err ' +test_expect_success 'submodule git dir nesting detection must work with parallel cloning' ' + test_must_fail git clone --recurse-submodules --jobs=2 nested clone_parallel 2>err && + cat err && + grep -E "(already exists|is inside git dir|not a git repository)" err && + { + test_path_is_missing .git/modules/hippo/HEAD || + test_path_is_missing .git/modules/hippo/hooks/HEAD + } +' + +test_expect_success 'checkout -f --recurse-submodules must not use a nested gitdir' ' + git clone nested nested_checkout && + ( + cd nested_checkout && + git submodule init && + git submodule update thing1 && + mkdir -p .git/modules/hippo/hooks/refs && + mkdir -p .git/modules/hippo/hooks/objects/info && + echo "../../../../objects" >.git/modules/hippo/hooks/objects/info/alternates && + echo "ref: refs/heads/master" >.git/modules/hippo/hooks/HEAD + ) && + test_must_fail git -C nested_checkout checkout -f --recurse-submodules HEAD 2>err && + cat err && + grep "is inside git dir" err && + test_path_is_missing nested_checkout/thing2/.git +' + test_done From 97065761333fd62db1912d81b489db938d8c991d Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Fri, 22 Mar 2024 11:19:22 +0100 Subject: [PATCH 08/10] submodules: submodule paths must not contain symlinks When creating a submodule path, we must be careful not to follow symbolic links. Otherwise we may follow a symbolic link pointing to a gitdir (which are valid symbolic links!) e.g. while cloning. On case-insensitive filesystems, however, we blindly replace a directory that has been created as part of the `clone` operation with a symlink when the path to the latter differs only in case from the former's path. Let's simply avoid this situation by expecting not ever having to overwrite any existing file/directory/symlink upon cloning. That way, we won't even replace a directory that we just created. This addresses CVE-2024-32002. Reported-by: Filip Hejsek Signed-off-by: Johannes Schindelin --- builtin/submodule--helper.c | 35 +++++++++++++++++++++++++++ t/t7406-submodule-update.sh | 48 +++++++++++++++++++++++++++++++++++++ 2 files changed, 83 insertions(+) diff --git a/builtin/submodule--helper.c b/builtin/submodule--helper.c index b76e13ddce..4c1a7dbcda 100644 --- a/builtin/submodule--helper.c +++ b/builtin/submodule--helper.c @@ -1641,12 +1641,35 @@ static char *clone_submodule_sm_gitdir(const char *name) return sm_gitdir; } +static int dir_contains_only_dotgit(const char *path) +{ + DIR *dir = opendir(path); + struct dirent *e; + int ret = 1; + + if (!dir) + return 0; + + e = readdir_skip_dot_and_dotdot(dir); + if (!e) + ret = 0; + else if (strcmp(DEFAULT_GIT_DIR_ENVIRONMENT, e->d_name) || + (e = readdir_skip_dot_and_dotdot(dir))) { + error("unexpected item '%s' in '%s'", e->d_name, path); + ret = 0; + } + + closedir(dir); + return ret; +} + static int clone_submodule(const struct module_clone_data *clone_data, struct string_list *reference) { char *p; char *sm_gitdir = clone_submodule_sm_gitdir(clone_data->name); char *sm_alternate = NULL, *error_strategy = NULL; + struct stat st; struct child_process cp = CHILD_PROCESS_INIT; const char *clone_data_path = clone_data->path; char *to_free = NULL; @@ -1660,6 +1683,10 @@ static int clone_submodule(const struct module_clone_data *clone_data, "git dir"), sm_gitdir); if (!file_exists(sm_gitdir)) { + if (clone_data->require_init && !stat(clone_data_path, &st) && + !is_empty_dir(clone_data_path)) + die(_("directory not empty: '%s'"), clone_data_path); + if (safe_create_leading_directories_const(sm_gitdir) < 0) die(_("could not create directory '%s'"), sm_gitdir); @@ -1704,6 +1731,14 @@ static int clone_submodule(const struct module_clone_data *clone_data, if(run_command(&cp)) die(_("clone of '%s' into submodule path '%s' failed"), clone_data->url, clone_data_path); + + if (clone_data->require_init && !stat(clone_data_path, &st) && + !dir_contains_only_dotgit(clone_data_path)) { + char *dot_git = xstrfmt("%s/.git", clone_data_path); + unlink(dot_git); + free(dot_git); + die(_("directory not empty: '%s'"), clone_data_path); + } } else { char *path; diff --git a/t/t7406-submodule-update.sh b/t/t7406-submodule-update.sh index f094e3d7f3..63c24f7f7c 100755 --- a/t/t7406-submodule-update.sh +++ b/t/t7406-submodule-update.sh @@ -1179,4 +1179,52 @@ test_expect_success 'submodule update --recursive skip submodules with strategy= test_cmp expect.err actual.err ' +test_expect_success CASE_INSENSITIVE_FS,SYMLINKS \ + 'submodule paths must not follow symlinks' ' + + # This is only needed because we want to run this in a self-contained + # test without having to spin up an HTTP server; However, it would not + # be needed in a real-world scenario where the submodule is simply + # hosted on a public site. + test_config_global protocol.file.allow always && + + # Make sure that Git tries to use symlinks on Windows + test_config_global core.symlinks true && + + tell_tale_path="$PWD/tell.tale" && + git init hook && + ( + cd hook && + mkdir -p y/hooks && + write_script y/hooks/post-checkout <<-EOF && + echo HOOK-RUN >&2 + echo hook-run >"$tell_tale_path" + EOF + git add y/hooks/post-checkout && + test_tick && + git commit -m post-checkout + ) && + + hook_repo_path="$(pwd)/hook" && + git init captain && + ( + cd captain && + git submodule add --name x/y "$hook_repo_path" A/modules/x && + test_tick && + git commit -m add-submodule && + + printf .git >dotgit.txt && + git hash-object -w --stdin dot-git.hash && + printf "120000 %s 0\ta\n" "$(cat dot-git.hash)" >index.info && + git update-index --index-info err && + grep "directory not empty" err && + test_path_is_missing "$tell_tale_path" +' + test_done From eafffd9ad417bdf0a3c63e5276d5a18f563cd291 Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Fri, 12 Apr 2024 21:00:44 +0200 Subject: [PATCH 09/10] clone_submodule: avoid using `access()` on directories In 0060fd1511b (clone --recurse-submodules: prevent name squatting on Windows, 2019-09-12), I introduced code to verify that a git dir either does not exist, or is at least empty, to fend off attacks where an inadvertently (and likely maliciously) pre-populated git dir would be used while cloning submodules recursively. The logic used `access(, X_OK)` to verify that a directory exists before calling `is_empty_dir()` on it. That is a curious way to check for a directory's existence and might well fail for unwanted reasons. Even the original author (it was I ;-) ) struggles to explain why this function was used rather than `stat()`. This code was _almost_ copypastad in the previous commit, but that `access()` call was caught during review. Let's use `stat()` instead also in the code that was almost copied verbatim. Let's not use `lstat()` because in the unlikely event that somebody snuck a symbolic link in, pointing to a crafted directory, we want to verify that that directory is empty. Signed-off-by: Johannes Schindelin --- builtin/submodule--helper.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/builtin/submodule--helper.c b/builtin/submodule--helper.c index 4c1a7dbcda..9eacc43574 100644 --- a/builtin/submodule--helper.c +++ b/builtin/submodule--helper.c @@ -1742,7 +1742,7 @@ static int clone_submodule(const struct module_clone_data *clone_data, } else { char *path; - if (clone_data->require_init && !access(clone_data_path, X_OK) && + if (clone_data->require_init && !stat(clone_data_path, &st) && !is_empty_dir(clone_data_path)) die(_("directory not empty: '%s'"), clone_data_path); if (safe_create_leading_directories_const(clone_data_path) < 0) From e8d0608944486019ea0e1ed2ed29776811a565c2 Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Tue, 26 Mar 2024 14:37:25 +0100 Subject: [PATCH 10/10] submodule: require the submodule path to contain directories only Submodules are stored in subdirectories of their superproject. When these subdirectories have been replaced with symlinks by a malicious actor, all kinds of mayhem can be caused. This _should_ not be possible, but many CVEs in the past showed that _when_ possible, it allows attackers to slip in code that gets executed during, say, a `git clone --recursive` operation. Let's add some defense-in-depth to disallow submodule paths to have anything except directories in them. Signed-off-by: Johannes Schindelin --- builtin/submodule--helper.c | 32 +++++++++++++++- submodule.c | 72 +++++++++++++++++++++++++++++++++++ submodule.h | 5 +++ t/t7423-submodule-symlinks.sh | 9 +++-- 4 files changed, 113 insertions(+), 5 deletions(-) diff --git a/builtin/submodule--helper.c b/builtin/submodule--helper.c index 9eacc43574..941afe1568 100644 --- a/builtin/submodule--helper.c +++ b/builtin/submodule--helper.c @@ -294,6 +294,9 @@ static void runcommand_in_submodule_cb(const struct cache_entry *list_item, struct child_process cp = CHILD_PROCESS_INIT; char *displaypath; + if (validate_submodule_path(path) < 0) + exit(128); + displaypath = get_submodule_displaypath(path, info->prefix); sub = submodule_from_path(the_repository, null_oid(), path); @@ -620,6 +623,9 @@ static void status_submodule(const char *path, const struct object_id *ce_oid, .free_removed_argv_elements = 1, }; + if (validate_submodule_path(path) < 0) + exit(128); + if (!submodule_from_path(the_repository, null_oid(), path)) die(_("no submodule mapping found in .gitmodules for path '%s'"), path); @@ -1220,6 +1226,9 @@ static void sync_submodule(const char *path, const char *prefix, if (!is_submodule_active(the_repository, path)) return; + if (validate_submodule_path(path) < 0) + exit(128); + sub = submodule_from_path(the_repository, null_oid(), path); if (sub && sub->url) { @@ -1360,6 +1369,9 @@ static void deinit_submodule(const char *path, const char *prefix, struct strbuf sb_config = STRBUF_INIT; char *sub_git_dir = xstrfmt("%s/.git", path); + if (validate_submodule_path(path) < 0) + exit(128); + sub = submodule_from_path(the_repository, null_oid(), path); if (!sub || !sub->name) @@ -1674,6 +1686,9 @@ static int clone_submodule(const struct module_clone_data *clone_data, const char *clone_data_path = clone_data->path; char *to_free = NULL; + if (validate_submodule_path(clone_data_path) < 0) + exit(128); + if (!is_absolute_path(clone_data->path)) clone_data_path = to_free = xstrfmt("%s/%s", get_git_work_tree(), clone_data->path); @@ -2542,6 +2557,9 @@ static int update_submodule(struct update_data *update_data) { int ret; + if (validate_submodule_path(update_data->sm_path) < 0) + return -1; + ret = determine_submodule_update_strategy(the_repository, update_data->just_cloned, update_data->sm_path, @@ -2649,12 +2667,21 @@ static int update_submodules(struct update_data *update_data) for (i = 0; i < suc.update_clone_nr; i++) { struct update_clone_data ucd = suc.update_clone[i]; - int code; + int code = 128; oidcpy(&update_data->oid, &ucd.oid); update_data->just_cloned = ucd.just_cloned; update_data->sm_path = ucd.sub->path; + /* + * Verify that the submodule path does not contain any + * symlinks; if it does, it might have been tampered with. + * TODO: allow exempting it via + * `safe.submodule.path` or something + */ + if (validate_submodule_path(update_data->sm_path) < 0) + goto fail; + code = ensure_core_worktree(update_data->sm_path); if (code) goto fail; @@ -3361,6 +3388,9 @@ static int module_add(int argc, const char **argv, const char *prefix) normalize_path_copy(add_data.sm_path, add_data.sm_path); strip_dir_trailing_slashes(add_data.sm_path); + if (validate_submodule_path(add_data.sm_path) < 0) + exit(128); + die_on_index_match(add_data.sm_path, force); die_on_repo_without_commits(add_data.sm_path); diff --git a/submodule.c b/submodule.c index 71ec23ad98..0b87ae6340 100644 --- a/submodule.c +++ b/submodule.c @@ -1005,6 +1005,9 @@ static int submodule_has_commits(struct repository *r, .super_oid = super_oid }; + if (validate_submodule_path(path) < 0) + exit(128); + oid_array_for_each_unique(commits, check_has_commit, &has_commit); if (has_commit.result) { @@ -1127,6 +1130,9 @@ static int push_submodule(const char *path, const struct string_list *push_options, int dry_run) { + if (validate_submodule_path(path) < 0) + exit(128); + if (for_each_remote_ref_submodule(path, has_remote, NULL) > 0) { struct child_process cp = CHILD_PROCESS_INIT; strvec_push(&cp.args, "push"); @@ -1176,6 +1182,9 @@ static void submodule_push_check(const char *path, const char *head, struct child_process cp = CHILD_PROCESS_INIT; int i; + if (validate_submodule_path(path) < 0) + exit(128); + strvec_push(&cp.args, "submodule--helper"); strvec_push(&cp.args, "push-check"); strvec_push(&cp.args, head); @@ -1507,6 +1516,9 @@ static struct fetch_task *fetch_task_create(struct submodule_parallel_fetch *spf struct fetch_task *task = xmalloc(sizeof(*task)); memset(task, 0, sizeof(*task)); + if (validate_submodule_path(path) < 0) + exit(128); + task->sub = submodule_from_path(spf->r, treeish_name, path); if (!task->sub) { @@ -1879,6 +1891,9 @@ unsigned is_submodule_modified(const char *path, int ignore_untracked) const char *git_dir; int ignore_cp_exit_code = 0; + if (validate_submodule_path(path) < 0) + exit(128); + strbuf_addf(&buf, "%s/.git", path); git_dir = read_gitfile(buf.buf); if (!git_dir) @@ -1955,6 +1970,9 @@ int submodule_uses_gitfile(const char *path) struct strbuf buf = STRBUF_INIT; const char *git_dir; + if (validate_submodule_path(path) < 0) + exit(128); + strbuf_addf(&buf, "%s/.git", path); git_dir = read_gitfile(buf.buf); if (!git_dir) { @@ -1994,6 +2012,9 @@ int bad_to_remove_submodule(const char *path, unsigned flags) struct strbuf buf = STRBUF_INIT; int ret = 0; + if (validate_submodule_path(path) < 0) + exit(128); + if (!file_exists(path) || is_empty_dir(path)) return 0; @@ -2044,6 +2065,9 @@ void submodule_unset_core_worktree(const struct submodule *sub) { struct strbuf config_path = STRBUF_INIT; + if (validate_submodule_path(sub->path) < 0) + exit(128); + submodule_name_to_gitdir(&config_path, the_repository, sub->name); strbuf_addstr(&config_path, "/config"); @@ -2066,6 +2090,9 @@ static int submodule_has_dirty_index(const struct submodule *sub) { struct child_process cp = CHILD_PROCESS_INIT; + if (validate_submodule_path(sub->path) < 0) + exit(128); + prepare_submodule_repo_env(&cp.env); cp.git_cmd = 1; @@ -2083,6 +2110,10 @@ static int submodule_has_dirty_index(const struct submodule *sub) static void submodule_reset_index(const char *path) { struct child_process cp = CHILD_PROCESS_INIT; + + if (validate_submodule_path(path) < 0) + exit(128); + prepare_submodule_repo_env(&cp.env); cp.git_cmd = 1; @@ -2287,6 +2318,34 @@ int validate_submodule_git_dir(char *git_dir, const char *submodule_name) return 0; } +int validate_submodule_path(const char *path) +{ + char *p = xstrdup(path); + struct stat st; + int i, ret = 0; + char sep; + + for (i = 0; !ret && p[i]; i++) { + if (!is_dir_sep(p[i])) + continue; + + sep = p[i]; + p[i] = '\0'; + /* allow missing components, but no symlinks */ + ret = lstat(p, &st) || !S_ISLNK(st.st_mode) ? 0 : -1; + p[i] = sep; + if (ret) + error(_("expected '%.*s' in submodule path '%s' not to " + "be a symbolic link"), i, p, p); + } + if (!lstat(p, &st) && S_ISLNK(st.st_mode)) + ret = error(_("expected submodule path '%s' not to be a " + "symbolic link"), p); + free(p); + return ret; +} + + /* * Embeds a single submodules git directory into the superprojects git dir, * non recursively. @@ -2297,6 +2356,9 @@ static void relocate_single_git_dir_into_superproject(const char *path) struct strbuf new_gitdir = STRBUF_INIT; const struct submodule *sub; + if (validate_submodule_path(path) < 0) + exit(128); + if (submodule_uses_worktrees(path)) die(_("relocate_gitdir for submodule '%s' with " "more than one worktree not supported"), path); @@ -2337,6 +2399,9 @@ static void absorb_git_dir_into_superproject_recurse(const char *path) struct child_process cp = CHILD_PROCESS_INIT; + if (validate_submodule_path(path) < 0) + exit(128); + cp.dir = path; cp.git_cmd = 1; cp.no_stdin = 1; @@ -2359,6 +2424,10 @@ void absorb_git_dir_into_superproject(const char *path) int err_code; const char *sub_git_dir; struct strbuf gitdir = STRBUF_INIT; + + if (validate_submodule_path(path) < 0) + exit(128); + strbuf_addf(&gitdir, "%s/.git", path); sub_git_dir = resolve_gitdir_gently(gitdir.buf, &err_code); @@ -2501,6 +2570,9 @@ int submodule_to_gitdir(struct strbuf *buf, const char *submodule) const char *git_dir; int ret = 0; + if (validate_submodule_path(submodule) < 0) + exit(128); + strbuf_reset(buf); strbuf_addstr(buf, submodule); strbuf_complete(buf, '/'); diff --git a/submodule.h b/submodule.h index b52a4ff1e7..fb770f1687 100644 --- a/submodule.h +++ b/submodule.h @@ -148,6 +148,11 @@ void submodule_name_to_gitdir(struct strbuf *buf, struct repository *r, */ int validate_submodule_git_dir(char *git_dir, const char *submodule_name); +/* + * Make sure that the given submodule path does not follow symlinks. + */ +int validate_submodule_path(const char *path); + #define SUBMODULE_MOVE_HEAD_DRY_RUN (1<<0) #define SUBMODULE_MOVE_HEAD_FORCE (1<<1) int submodule_move_head(const char *path, diff --git a/t/t7423-submodule-symlinks.sh b/t/t7423-submodule-symlinks.sh index a72f3cbcab..3d3c7af3ce 100755 --- a/t/t7423-submodule-symlinks.sh +++ b/t/t7423-submodule-symlinks.sh @@ -14,15 +14,16 @@ test_expect_success 'prepare' ' git commit -m submodule ' -test_expect_failure SYMLINKS 'git submodule update must not create submodule behind symlink' ' +test_expect_success SYMLINKS 'git submodule update must not create submodule behind symlink' ' rm -rf a b && mkdir b && ln -s b a && + test_path_is_missing b/sm && test_must_fail git submodule update && test_path_is_missing b/sm ' -test_expect_failure SYMLINKS,CASE_INSENSITIVE_FS 'git submodule update must not create submodule behind symlink on case insensitive fs' ' +test_expect_success SYMLINKS,CASE_INSENSITIVE_FS 'git submodule update must not create submodule behind symlink on case insensitive fs' ' rm -rf a b && mkdir b && ln -s b A && @@ -46,7 +47,7 @@ test_expect_success SYMLINKS 'git restore --recurse-submodules must not be confu test_path_is_missing a/target/submodule_file ' -test_expect_failure SYMLINKS 'git restore --recurse-submodules must not migrate git dir of symlinked repo' ' +test_expect_success SYMLINKS 'git restore --recurse-submodules must not migrate git dir of symlinked repo' ' prepare_symlink_to_repo && rm -rf .git/modules && test_must_fail git restore --recurse-submodules a/sm && @@ -55,7 +56,7 @@ test_expect_failure SYMLINKS 'git restore --recurse-submodules must not migrate test_path_is_missing a/target/submodule_file ' -test_expect_failure SYMLINKS 'git checkout -f --recurse-submodules must not migrate git dir of symlinked repo when removing submodule' ' +test_expect_success SYMLINKS 'git checkout -f --recurse-submodules must not migrate git dir of symlinked repo when removing submodule' ' prepare_symlink_to_repo && rm -rf .git/modules && test_must_fail git checkout -f --recurse-submodules initial &&