From bb3a9265e505e9593faa260860f9b8929af0963e Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Thu, 1 Dec 2022 15:45:57 +0100 Subject: [PATCH 1/4] fsck: refactor `fsck_blob()` to allow for more checks In general, we don't need to validate blob contents as they are opaque blobs about whose content Git doesn't need to care about. There are some exceptions though when blobs are linked into trees so that they would be interpreted by Git. We only have a single such check right now though, which is the one for gitmodules that has been added in the context of CVE-2018-11235. Now we have found another vulnerability with gitattributes that can lead to out-of-bounds writes and reads. So let's refactor `fsck_blob()` so that it is more extensible and can check different types of blobs. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- fsck.c | 55 +++++++++++++++++++++++++++++-------------------------- 1 file changed, 29 insertions(+), 26 deletions(-) diff --git a/fsck.c b/fsck.c index 3ec500d707..ddcb2d264c 100644 --- a/fsck.c +++ b/fsck.c @@ -1170,38 +1170,41 @@ static int fsck_gitmodules_fn(const char *var, const char *value, void *vdata) static int fsck_blob(const struct object_id *oid, const char *buf, unsigned long size, struct fsck_options *options) { - struct fsck_gitmodules_data data; - struct config_options config_opts = { 0 }; - - if (!oidset_contains(&options->gitmodules_found, oid)) - return 0; - oidset_insert(&options->gitmodules_done, oid); + int ret = 0; if (object_on_skiplist(options, oid)) return 0; - if (!buf) { - /* - * A missing buffer here is a sign that the caller found the - * blob too gigantic to load into memory. Let's just consider - * that an error. - */ - return report(options, oid, OBJ_BLOB, - FSCK_MSG_GITMODULES_LARGE, - ".gitmodules too large to parse"); + if (oidset_contains(&options->gitmodules_found, oid)) { + struct config_options config_opts = { 0 }; + struct fsck_gitmodules_data data; + + oidset_insert(&options->gitmodules_done, oid); + + if (!buf) { + /* + * A missing buffer here is a sign that the caller found the + * blob too gigantic to load into memory. Let's just consider + * that an error. + */ + return report(options, oid, OBJ_BLOB, + FSCK_MSG_GITMODULES_LARGE, + ".gitmodules too large to parse"); + } + + data.oid = oid; + data.options = options; + data.ret = 0; + config_opts.error_action = CONFIG_ERROR_SILENT; + if (git_config_from_mem(fsck_gitmodules_fn, CONFIG_ORIGIN_BLOB, + ".gitmodules", buf, size, &data, &config_opts)) + data.ret |= report(options, oid, OBJ_BLOB, + FSCK_MSG_GITMODULES_PARSE, + "could not parse gitmodules blob"); + ret |= data.ret; } - data.oid = oid; - data.options = options; - data.ret = 0; - config_opts.error_action = CONFIG_ERROR_SILENT; - if (git_config_from_mem(fsck_gitmodules_fn, CONFIG_ORIGIN_BLOB, - ".gitmodules", buf, size, &data, &config_opts)) - data.ret |= report(options, oid, OBJ_BLOB, - FSCK_MSG_GITMODULES_PARSE, - "could not parse gitmodules blob"); - - return data.ret; + return ret; } int fsck_object(struct object *obj, void *data, unsigned long size, From a59a8c687f18db2b4c54a9d0795f93c4df1f9703 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Thu, 1 Dec 2022 15:46:01 +0100 Subject: [PATCH 2/4] fsck: pull out function to check a set of blobs In `fsck_finish()` we check all blobs for consistency that we have found during the tree walk, but that haven't yet been checked. This is only required for gitmodules right now, but will also be required for a new check for gitattributes. Pull out a function `fsck_blobs()` that allows the caller to check a set of blobs for consistency. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- fsck.c | 34 ++++++++++++++++++++++------------ 1 file changed, 22 insertions(+), 12 deletions(-) diff --git a/fsck.c b/fsck.c index ddcb2d264c..4762ca9478 100644 --- a/fsck.c +++ b/fsck.c @@ -1243,19 +1243,21 @@ int fsck_error_function(struct fsck_options *o, return 1; } -int fsck_finish(struct fsck_options *options) +static int fsck_blobs(struct oidset *blobs_found, struct oidset *blobs_done, + enum fsck_msg_id msg_missing, enum fsck_msg_id msg_type, + struct fsck_options *options, const char *blob_type) { int ret = 0; struct oidset_iter iter; const struct object_id *oid; - oidset_iter_init(&options->gitmodules_found, &iter); + oidset_iter_init(blobs_found, &iter); while ((oid = oidset_iter_next(&iter))) { enum object_type type; unsigned long size; char *buf; - if (oidset_contains(&options->gitmodules_done, oid)) + if (oidset_contains(blobs_done, oid)) continue; buf = read_object_file(oid, &type, &size); @@ -1263,25 +1265,33 @@ int fsck_finish(struct fsck_options *options) if (is_promisor_object(oid)) continue; ret |= report(options, - oid, OBJ_BLOB, - FSCK_MSG_GITMODULES_MISSING, - "unable to read .gitmodules blob"); + oid, OBJ_BLOB, msg_missing, + "unable to read %s blob", blob_type); continue; } if (type == OBJ_BLOB) ret |= fsck_blob(oid, buf, size, options); else - ret |= report(options, - oid, type, - FSCK_MSG_GITMODULES_BLOB, - "non-blob found at .gitmodules"); + ret |= report(options, oid, type, msg_type, + "non-blob found at %s", blob_type); free(buf); } + oidset_clear(blobs_found); + oidset_clear(blobs_done); + + return ret; +} + +int fsck_finish(struct fsck_options *options) +{ + int ret = 0; + + ret |= fsck_blobs(&options->gitmodules_found, &options->gitmodules_done, + FSCK_MSG_GITMODULES_MISSING, FSCK_MSG_GITMODULES_BLOB, + options, ".gitmodules"); - oidset_clear(&options->gitmodules_found); - oidset_clear(&options->gitmodules_done); return ret; } From f8587c31c96172aac547f83977c98fa8f0e2aa67 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Thu, 1 Dec 2022 15:46:05 +0100 Subject: [PATCH 3/4] fsck: move checks for gitattributes Move the checks for gitattributes so that they can be extended more readily. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- fsck.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/fsck.c b/fsck.c index 4762ca9478..3a7fb9ebba 100644 --- a/fsck.c +++ b/fsck.c @@ -614,17 +614,19 @@ static int fsck_tree(const struct object_id *tree_oid, ".gitmodules is a symbolic link"); } + if (is_hfs_dotgitattributes(name) || is_ntfs_dotgitattributes(name)) { + if (S_ISLNK(mode)) + retval += report(options, tree_oid, OBJ_TREE, + FSCK_MSG_GITATTRIBUTES_SYMLINK, + ".gitattributes is a symlink"); + } + if (S_ISLNK(mode)) { if (is_hfs_dotgitignore(name) || is_ntfs_dotgitignore(name)) retval += report(options, tree_oid, OBJ_TREE, FSCK_MSG_GITIGNORE_SYMLINK, ".gitignore is a symlink"); - if (is_hfs_dotgitattributes(name) || - is_ntfs_dotgitattributes(name)) - retval += report(options, tree_oid, OBJ_TREE, - FSCK_MSG_GITATTRIBUTES_SYMLINK, - ".gitattributes is a symlink"); if (is_hfs_dotmailmap(name) || is_ntfs_dotmailmap(name)) retval += report(options, tree_oid, OBJ_TREE, From 27ab4784d5c9e24345b9f5b443609cbe527c51f9 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Thu, 1 Dec 2022 15:46:09 +0100 Subject: [PATCH 4/4] fsck: implement checks for gitattributes Recently, a vulnerability was reported that can lead to an out-of-bounds write when reading an unreasonably large gitattributes file. The root cause of this error are multiple integer overflows in different parts of the code when there are either too many lines, when paths are too long, when attribute names are too long, or when there are too many attributes declared for a pattern. As all of these are related to size, it seems reasonable to restrict the size of the gitattributes file via git-fsck(1). This allows us to both stop distributing known-vulnerable objects via common hosting platforms that have fsck enabled, and users to protect themselves by enabling the `fetch.fsckObjects` config. There are basically two checks: 1. We verify that size of the gitattributes file is smaller than 100MB. 2. We verify that the maximum line length does not exceed 2048 bytes. With the preceding commits, both of these conditions would cause us to either ignore the complete gitattributes file or blob in the first case, or the specific line in the second case. Now with these consistency checks added, we also grow the ability to stop distributing such files in the first place when `receive.fsckObjects` is enabled. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- fsck.c | 38 +++++++++++++++++++++++++++++++++++++- fsck.h | 12 ++++++++++++ t/t1450-fsck.sh | 24 ++++++++++++++++++++++++ 3 files changed, 73 insertions(+), 1 deletion(-) diff --git a/fsck.c b/fsck.c index 3a7fb9ebba..614c776429 100644 --- a/fsck.c +++ b/fsck.c @@ -2,6 +2,7 @@ #include "object-store.h" #include "repository.h" #include "object.h" +#include "attr.h" #include "blob.h" #include "tree.h" #include "tree-walk.h" @@ -615,7 +616,10 @@ static int fsck_tree(const struct object_id *tree_oid, } if (is_hfs_dotgitattributes(name) || is_ntfs_dotgitattributes(name)) { - if (S_ISLNK(mode)) + if (!S_ISLNK(mode)) + oidset_insert(&options->gitattributes_found, + entry_oid); + else retval += report(options, tree_oid, OBJ_TREE, FSCK_MSG_GITATTRIBUTES_SYMLINK, ".gitattributes is a symlink"); @@ -1206,6 +1210,35 @@ static int fsck_blob(const struct object_id *oid, const char *buf, ret |= data.ret; } + if (oidset_contains(&options->gitattributes_found, oid)) { + const char *ptr; + + oidset_insert(&options->gitattributes_done, oid); + + if (!buf || size > ATTR_MAX_FILE_SIZE) { + /* + * A missing buffer here is a sign that the caller found the + * blob too gigantic to load into memory. Let's just consider + * that an error. + */ + return report(options, oid, OBJ_BLOB, + FSCK_MSG_GITATTRIBUTES_LARGE, + ".gitattributes too large to parse"); + } + + for (ptr = buf; *ptr; ) { + const char *eol = strchrnul(ptr, '\n'); + if (eol - ptr >= ATTR_MAX_LINE_LENGTH) { + ret |= report(options, oid, OBJ_BLOB, + FSCK_MSG_GITATTRIBUTES_LINE_LENGTH, + ".gitattributes has too long lines to parse"); + break; + } + + ptr = *eol ? eol + 1 : eol; + } + } + return ret; } @@ -1293,6 +1326,9 @@ int fsck_finish(struct fsck_options *options) ret |= fsck_blobs(&options->gitmodules_found, &options->gitmodules_done, FSCK_MSG_GITMODULES_MISSING, FSCK_MSG_GITMODULES_BLOB, options, ".gitmodules"); + ret |= fsck_blobs(&options->gitattributes_found, &options->gitattributes_done, + FSCK_MSG_GITATTRIBUTES_MISSING, FSCK_MSG_GITATTRIBUTES_BLOB, + options, ".gitattributes"); return ret; } diff --git a/fsck.h b/fsck.h index d07f7a2459..cc3379d4e9 100644 --- a/fsck.h +++ b/fsck.h @@ -55,6 +55,10 @@ enum fsck_msg_type { FUNC(GITMODULES_URL, ERROR) \ FUNC(GITMODULES_PATH, ERROR) \ FUNC(GITMODULES_UPDATE, ERROR) \ + FUNC(GITATTRIBUTES_MISSING, ERROR) \ + FUNC(GITATTRIBUTES_LARGE, ERROR) \ + FUNC(GITATTRIBUTES_LINE_LENGTH, ERROR) \ + FUNC(GITATTRIBUTES_BLOB, ERROR) \ /* warnings */ \ FUNC(BAD_FILEMODE, WARN) \ FUNC(EMPTY_NAME, WARN) \ @@ -129,6 +133,8 @@ struct fsck_options { struct oidset skiplist; struct oidset gitmodules_found; struct oidset gitmodules_done; + struct oidset gitattributes_found; + struct oidset gitattributes_done; kh_oid_map_t *object_names; }; @@ -136,18 +142,24 @@ struct fsck_options { .skiplist = OIDSET_INIT, \ .gitmodules_found = OIDSET_INIT, \ .gitmodules_done = OIDSET_INIT, \ + .gitattributes_found = OIDSET_INIT, \ + .gitattributes_done = OIDSET_INIT, \ .error_func = fsck_error_function \ } #define FSCK_OPTIONS_STRICT { \ .strict = 1, \ .gitmodules_found = OIDSET_INIT, \ .gitmodules_done = OIDSET_INIT, \ + .gitattributes_found = OIDSET_INIT, \ + .gitattributes_done = OIDSET_INIT, \ .error_func = fsck_error_function, \ } #define FSCK_OPTIONS_MISSING_GITMODULES { \ .strict = 1, \ .gitmodules_found = OIDSET_INIT, \ .gitmodules_done = OIDSET_INIT, \ + .gitattributes_found = OIDSET_INIT, \ + .gitattributes_done = OIDSET_INIT, \ .error_func = fsck_error_cb_print_missing_gitmodules, \ } diff --git a/t/t1450-fsck.sh b/t/t1450-fsck.sh index 5071ac63a5..9e0afe1fbf 100755 --- a/t/t1450-fsck.sh +++ b/t/t1450-fsck.sh @@ -865,4 +865,28 @@ test_expect_success 'detect corrupt index file in fsck' ' test_i18ngrep "bad index file" errors ' +test_expect_success 'fsck error on gitattributes with excessive line lengths' ' + blob=$(printf "pattern %02048d" 1 | git hash-object -w --stdin) && + test_when_finished "remove_object $blob" && + tree=$(printf "100644 blob %s\t%s\n" $blob .gitattributes | git mktree) && + test_when_finished "remove_object $tree" && + cat >expected <<-EOF && + error in blob $blob: gitattributesLineLength: .gitattributes has too long lines to parse + EOF + test_must_fail git fsck --no-dangling >actual 2>&1 && + test_cmp expected actual +' + +test_expect_success 'fsck error on gitattributes with excessive size' ' + blob=$(test-tool genzeros $((100 * 1024 * 1024 + 1)) | git hash-object -w --stdin) && + test_when_finished "remove_object $blob" && + tree=$(printf "100644 blob %s\t%s\n" $blob .gitattributes | git mktree) && + test_when_finished "remove_object $tree" && + cat >expected <<-EOF && + error in blob $blob: gitattributesLarge: .gitattributes too large to parse + EOF + test_must_fail git fsck --no-dangling >actual 2>&1 && + test_cmp expected actual +' + test_done