diff --git a/Documentation/git.txt b/Documentation/git.txt index 11228956cd..22c2b537aa 100644 --- a/Documentation/git.txt +++ b/Documentation/git.txt @@ -911,6 +911,15 @@ for full details. should not normally need to set this to `0`, but it may be useful when trying to salvage data from a corrupted repository. +`GIT_COMMIT_GRAPH_PARANOIA`:: + If this Boolean environment variable is set to false, ignore the + case where commits exist in the commit graph but not in the + object database. Normally, Git will check whether commits loaded + from the commit graph exist in the object database to avoid + issues with stale commit graphs, but this check comes with a + performance penalty. The default is `1` (i.e., be paranoid about + stale commits in the commit graph). + `GIT_ALLOW_PROTOCOL`:: If set to a colon-separated list of protocols, behave as if `protocol.allow` is set to `never`, and each of the listed diff --git a/commit-graph.c b/commit-graph.c index c2b782af3b..210f11a8e8 100644 --- a/commit-graph.c +++ b/commit-graph.c @@ -1024,14 +1024,18 @@ int repo_find_commit_pos_in_graph(struct repository *r, struct commit *c, struct commit *lookup_commit_in_graph(struct repository *repo, const struct object_id *id) { + static int object_paranoia = -1; struct commit *commit; uint32_t pos; + if (object_paranoia == -1) + object_paranoia = git_env_bool(GIT_COMMIT_GRAPH_PARANOIA, 1); + if (!prepare_commit_graph(repo)) return NULL; if (!search_commit_pos_in_graph(id, repo->objects->commit_graph, &pos)) return NULL; - if (!has_object(repo, id, 0)) + if (object_paranoia && !has_object(repo, id, 0)) return NULL; commit = lookup_commit(repo, id); diff --git a/commit-graph.h b/commit-graph.h index c6870274c5..e519cb81cb 100644 --- a/commit-graph.h +++ b/commit-graph.h @@ -8,6 +8,12 @@ #define GIT_TEST_COMMIT_GRAPH_DIE_ON_PARSE "GIT_TEST_COMMIT_GRAPH_DIE_ON_PARSE" #define GIT_TEST_COMMIT_GRAPH_CHANGED_PATHS "GIT_TEST_COMMIT_GRAPH_CHANGED_PATHS" +/* + * This environment variable controls whether commits looked up via the + * commit graph will be double checked to exist in the object database. + */ +#define GIT_COMMIT_GRAPH_PARANOIA "GIT_COMMIT_GRAPH_PARANOIA" + /* * This method is only used to enhance coverage of the commit-graph * feature in the test suite with the GIT_TEST_COMMIT_GRAPH and diff --git a/commit.c b/commit.c index b3223478bc..7399e90212 100644 --- a/commit.c +++ b/commit.c @@ -28,6 +28,7 @@ #include "shallow.h" #include "tree.h" #include "hook.h" +#include "parse.h" static struct commit_extra_header *read_commit_extra_header_lines(const char *buf, size_t len, const char **); @@ -572,8 +573,21 @@ int repo_parse_commit_internal(struct repository *r, return -1; if (item->object.parsed) return 0; - if (use_commit_graph && parse_commit_in_graph(r, item)) + if (use_commit_graph && parse_commit_in_graph(r, item)) { + static int object_paranoia = -1; + + if (object_paranoia == -1) + object_paranoia = git_env_bool(GIT_COMMIT_GRAPH_PARANOIA, 1); + + if (object_paranoia && !has_object(r, &item->object.oid, 0)) { + unparse_commit(r, &item->object.oid); + return quiet_on_missing ? -1 : + error(_("commit %s exists in commit-graph but not in the object database"), + oid_to_hex(&item->object.oid)); + } + return 0; + } if (oid_object_info_extended(r, &item->object.oid, &oi, flags) < 0) return quiet_on_missing ? -1 : diff --git a/t/t5318-commit-graph.sh b/t/t5318-commit-graph.sh index 6505ff595a..78993c0737 100755 --- a/t/t5318-commit-graph.sh +++ b/t/t5318-commit-graph.sh @@ -895,4 +895,52 @@ test_expect_success 'reader notices too-small generations chunk' ' test_cmp expect.err err ' +test_expect_success 'stale commit cannot be parsed when given directly' ' + test_when_finished "rm -rf repo" && + git init repo && + ( + cd repo && + test_commit A && + test_commit B && + git commit-graph write --reachable && + + oid=$(git rev-parse B) && + rm .git/objects/"$(test_oid_to_path "$oid")" && + + # Verify that it is possible to read the commit from the + # commit graph when not being paranoid, ... + GIT_COMMIT_GRAPH_PARANOIA=false git rev-list B && + # ... but parsing the commit when double checking that + # it actually exists in the object database should fail. + test_must_fail git rev-list -1 B + ) +' + +test_expect_success 'stale commit cannot be parsed when traversing graph' ' + test_when_finished "rm -rf repo" && + git init repo && + ( + cd repo && + + test_commit A && + test_commit B && + test_commit C && + git commit-graph write --reachable && + + # Corrupt the repository by deleting the intermittent commit + # object. Commands should notice that this object is absent and + # thus that the repository is corrupt even if the commit graph + # exists. + oid=$(git rev-parse B) && + rm .git/objects/"$(test_oid_to_path "$oid")" && + + # Again, we should be able to parse the commit when not + # being paranoid about commit graph staleness... + GIT_COMMIT_GRAPH_PARANOIA=false git rev-parse HEAD~2 && + # ... but fail when we are paranoid. + test_must_fail git rev-parse HEAD~2 2>error && + grep "error: commit $oid exists in commit-graph but not in the object database" error + ) +' + test_done