Merge branch 'vd/fsck-submodule-url-test'

Tighten URL checks fsck makes in a URL recorded for submodules.

* vd/fsck-submodule-url-test:
  submodule-config.c: strengthen URL fsck check
  t7450: test submodule urls
  test-submodule: remove command line handling for check-name
  submodule-config.h: move check_submodule_url
This commit is contained in:
Junio C Hamano
2024-01-26 08:54:47 -08:00
5 changed files with 203 additions and 151 deletions

View File

@ -14,6 +14,8 @@
#include "parse-options.h"
#include "thread-utils.h"
#include "tree-walk.h"
#include "url.h"
#include "urlmatch.h"
/*
* submodule cache lookup structure
@ -228,6 +230,144 @@ in_component:
return 0;
}
static int starts_with_dot_slash(const char *const path)
{
return path_match_flags(path, PATH_MATCH_STARTS_WITH_DOT_SLASH |
PATH_MATCH_XPLATFORM);
}
static int starts_with_dot_dot_slash(const char *const path)
{
return path_match_flags(path, PATH_MATCH_STARTS_WITH_DOT_DOT_SLASH |
PATH_MATCH_XPLATFORM);
}
static int submodule_url_is_relative(const char *url)
{
return starts_with_dot_slash(url) || starts_with_dot_dot_slash(url);
}
/*
* Count directory components that a relative submodule URL should chop
* from the remote_url it is to be resolved against.
*
* In other words, this counts "../" components at the start of a
* submodule URL.
*
* Returns the number of directory components to chop and writes a
* pointer to the next character of url after all leading "./" and
* "../" components to out.
*/
static int count_leading_dotdots(const char *url, const char **out)
{
int result = 0;
while (1) {
if (starts_with_dot_dot_slash(url)) {
result++;
url += strlen("../");
continue;
}
if (starts_with_dot_slash(url)) {
url += strlen("./");
continue;
}
*out = url;
return result;
}
}
/*
* Check whether a transport is implemented by git-remote-curl.
*
* If it is, returns 1 and writes the URL that would be passed to
* git-remote-curl to the "out" parameter.
*
* Otherwise, returns 0 and leaves "out" untouched.
*
* Examples:
* http::https://example.com/repo.git -> 1, https://example.com/repo.git
* https://example.com/repo.git -> 1, https://example.com/repo.git
* git://example.com/repo.git -> 0
*
* This is for use in checking for previously exploitable bugs that
* required a submodule URL to be passed to git-remote-curl.
*/
static int url_to_curl_url(const char *url, const char **out)
{
/*
* We don't need to check for case-aliases, "http.exe", and so
* on because in the default configuration, is_transport_allowed
* prevents URLs with those schemes from being cloned
* automatically.
*/
if (skip_prefix(url, "http::", out) ||
skip_prefix(url, "https::", out) ||
skip_prefix(url, "ftp::", out) ||
skip_prefix(url, "ftps::", out))
return 1;
if (starts_with(url, "http://") ||
starts_with(url, "https://") ||
starts_with(url, "ftp://") ||
starts_with(url, "ftps://")) {
*out = url;
return 1;
}
return 0;
}
int check_submodule_url(const char *url)
{
const char *curl_url;
if (looks_like_command_line_option(url))
return -1;
if (submodule_url_is_relative(url) || starts_with(url, "git://")) {
char *decoded;
const char *next;
int has_nl;
/*
* This could be appended to an http URL and url-decoded;
* check for malicious characters.
*/
decoded = url_decode(url);
has_nl = !!strchr(decoded, '\n');
free(decoded);
if (has_nl)
return -1;
/*
* URLs which escape their root via "../" can overwrite
* the host field and previous components, resolving to
* URLs like https::example.com/submodule.git and
* https:///example.com/submodule.git that were
* susceptible to CVE-2020-11008.
*/
if (count_leading_dotdots(url, &next) > 0 &&
(*next == ':' || *next == '/'))
return -1;
}
else if (url_to_curl_url(url, &curl_url)) {
int ret = 0;
char *normalized = url_normalize(curl_url, NULL);
if (normalized) {
char *decoded = url_decode(normalized);
if (strchr(decoded, '\n'))
ret = -1;
free(normalized);
free(decoded);
} else {
ret = -1;
}
return ret;
}
return 0;
}
static int name_and_item_from_var(const char *var, struct strbuf *name,
struct strbuf *item)
{