From 35a9f1e99c5d31635bb78a4f2d498e72c04fc471 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?SZEDER=20G=C3=A1bor?= Date: Fri, 5 Jun 2020 13:00:23 +0000 Subject: [PATCH 01/18] tree-walk.c: don't match submodule entries for 'submod/anything' MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Submodules should be handled the same as regular directories with respect to the presence of a trailing slash, i.e. commands like: git diff rev1 rev2 -- $path git rev-list HEAD -- $path should produce the same output whether $path is 'submod' or 'submod/'. This has been fixed in commit 74b4f7f277 (tree-walk.c: ignore trailing slash on submodule in tree_entry_interesting(), 2014-01-23). Unfortunately, that commit had the unintended side effect to handle 'submod/anything' the same as 'submod' and 'submod/' as well, e.g.: $ git log --oneline --name-only -- sha1collisiondetection/whatever 4125f78222 sha1dc: update from upstream sha1collisiondetection 07a20f569b Makefile: fix unaligned loads in sha1dc with UBSan sha1collisiondetection 23e37f8e9d sha1dc: update from upstream sha1collisiondetection 86cfd61e6b sha1dc: optionally use sha1collisiondetection as a submodule sha1collisiondetection Fix this by rejecting submodules as partial pathnames when their trailing slash is followed by anything. Signed-off-by: SZEDER Gábor Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- t/t4010-diff-pathspec.sh | 4 +++- tree-walk.c | 9 ++++++++- 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/t/t4010-diff-pathspec.sh b/t/t4010-diff-pathspec.sh index e5ca359edf..65cc703c65 100755 --- a/t/t4010-diff-pathspec.sh +++ b/t/t4010-diff-pathspec.sh @@ -125,7 +125,9 @@ test_expect_success 'setup submodules' ' test_expect_success 'diff-tree ignores trailing slash on submodule path' ' git diff --name-only HEAD^ HEAD submod >expect && git diff --name-only HEAD^ HEAD submod/ >actual && - test_cmp expect actual + test_cmp expect actual && + git diff --name-only HEAD^ HEAD -- submod/whatever >actual && + test_must_be_empty actual ' test_expect_success 'diff multiple wildcard pathspecs' ' diff --git a/tree-walk.c b/tree-walk.c index bb0ad34c54..0160294712 100644 --- a/tree-walk.c +++ b/tree-walk.c @@ -851,7 +851,14 @@ static int match_entry(const struct pathspec_item *item, if (matchlen > pathlen) { if (match[pathlen] != '/') return 0; - if (!S_ISDIR(entry->mode) && !S_ISGITLINK(entry->mode)) + /* + * Reject non-directories as partial pathnames, except + * when match is a submodule with a trailing slash and + * nothing else (to handle 'submod/' and 'submod' + * uniformly). + */ + if (!S_ISDIR(entry->mode) && + (!S_ISGITLINK(entry->mode) || matchlen > pathlen + 1)) return 0; } From cb9daf16db0e9935179fd995785e94c996267268 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?SZEDER=20G=C3=A1bor?= Date: Fri, 5 Jun 2020 13:00:24 +0000 Subject: [PATCH 02/18] commit-graph: fix parsing the Chunk Lookup table MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The commit-graph file format specifies that the chunks may be in any order. However, if the OID Lookup chunk happens to be the last one in the file, then any command attempting to access the commit-graph data will fail with: fatal: invalid commit position. commit-graph is likely corrupt In this case the error is wrong, the commit-graph file does conform to the specification, but the parsing of the Chunk Lookup table is a bit buggy, and leaves the field holding the number of commits in the commit-graph zero-initialized. The number of commits in the commit-graph is determined while parsing the Chunk Lookup table, by dividing the size of the OID Lookup chunk with the hash size. However, the Chunk Lookup table doesn't actually store the size of the chunks, but it stores their starting offset. Consequently, the size of a chunk can only be calculated by subtracting the starting offsets of that chunk from the offset of the subsequent chunk, or in case of the last chunk from the offset recorded in the terminating label. This is currenly implemented in a bit complicated way: as we iterate over the entries of the Chunk Lookup table, we check the ID of each chunk and store its starting offset, then we check the ID of the last seen chunk and calculate its size using its previously saved offset if necessary (at the moment it's only necessary for the OID Lookup chunk). Alas, while parsing the Chunk Lookup table we only interate through the "real" chunks, but never look at the terminating label, thus don't even check whether it's necessary to calulate the size of the last chunk. Consequently, if the OID Lookup chunk is the last one, then we don't calculate its size and turn don't run the piece of code determining the number of commits in the commit graph, leaving the field holding that number unchanged (i.e. zero-initialized), eventually triggering the sanity check in load_oid_from_graph(). Fix this by iterating through all entries in the Chunk Lookup table, including the terminating label. Note that this is the minimal fix, suitable for the maintenance track. A better fix would be to simplify how the chunk sizes are calculated, but that is a more invasive change, less suitable for 'maint', so that will be done in later patches. This additional flexibility of scanning more chunks breaks a test for "git commit-graph verify" so alter that test to mutate the commit-graph to have an even lower chunk count. Signed-off-by: SZEDER Gábor Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- commit-graph.c | 2 +- t/t5318-commit-graph.sh | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/commit-graph.c b/commit-graph.c index 196e817a84..7807d94562 100644 --- a/commit-graph.c +++ b/commit-graph.c @@ -277,7 +277,7 @@ struct commit_graph *parse_commit_graph(void *graph_map, int fd, last_chunk_id = 0; last_chunk_offset = 8; chunk_lookup = data + 8; - for (i = 0; i < graph->num_chunks; i++) { + for (i = 0; i <= graph->num_chunks; i++) { uint32_t chunk_id; uint64_t chunk_offset; int chunk_repeated = 0; diff --git a/t/t5318-commit-graph.sh b/t/t5318-commit-graph.sh index 18304a65e4..79e7fbcd40 100755 --- a/t/t5318-commit-graph.sh +++ b/t/t5318-commit-graph.sh @@ -488,7 +488,7 @@ test_expect_success 'detect bad hash version' ' ' test_expect_success 'detect low chunk count' ' - corrupt_graph_and_verify $GRAPH_BYTE_CHUNK_COUNT "\02" \ + corrupt_graph_and_verify $GRAPH_BYTE_CHUNK_COUNT "\01" \ "missing the .* chunk" ' From 6141cdfdcbe9e3edf25467582c5f32658ae79f40 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?SZEDER=20G=C3=A1bor?= Date: Fri, 5 Jun 2020 13:00:25 +0000 Subject: [PATCH 03/18] commit-graph-format.txt: all multi-byte numbers are in network byte order MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The commit-graph format specifies that "All 4-byte numbers are in network order", but the commit-graph contains 8-byte integers as well (file offsets in the Chunk Lookup table), and their byte order is unspecified. Clarify that all multi-byte integers are in network byte order. Signed-off-by: SZEDER Gábor Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- Documentation/technical/commit-graph-format.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/technical/commit-graph-format.txt b/Documentation/technical/commit-graph-format.txt index 1beef17182..440541045d 100644 --- a/Documentation/technical/commit-graph-format.txt +++ b/Documentation/technical/commit-graph-format.txt @@ -32,7 +32,7 @@ the body into "chunks" and provide a binary lookup table at the beginning of the body. The header includes certain values, such as number of chunks and hash type. -All 4-byte numbers are in network order. +All multi-byte numbers are in network byte order. HEADER: From 1df15f8dee5c637c9013d11d5b8e72e189a04f06 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?SZEDER=20G=C3=A1bor?= Date: Fri, 5 Jun 2020 13:00:26 +0000 Subject: [PATCH 04/18] commit-slab: add a function to deep free entries on the slab MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit clear_##slabname() frees only the memory allocated for a commit slab itself, but entries in the commit slab might own additional memory outside the slab that should be freed as well. We already have (at least) one such commit slab, and this patch series is about to add one more. To free all additional memory owned by entries on the commit slab the user of such a slab could iterate over all commits it knows about, peek whether there is a valid entry associated with each commit, and free the additional memory, if any. Or it could rely on intimate knowledge about the internals of the commit slab implementation, and could itself iterate directly through all entries in the slab, and free the additional memory. Or it could just leak the additional memory... Introduce deep_clear_##slabname() to allow releasing memory owned by commit slab entries by invoking the 'void free_fn(elemtype *ptr)' function specified as parameter for each entry in the slab. Use it in get_shallow_commits() in 'shallow.c' to replace an open-coded iteration over a commit slab's entries. Signed-off-by: SZEDER Gábor Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- commit-slab-decl.h | 1 + commit-slab-impl.h | 13 +++++++++++++ commit-slab.h | 10 ++++++++++ shallow.c | 14 +++++--------- 4 files changed, 29 insertions(+), 9 deletions(-) diff --git a/commit-slab-decl.h b/commit-slab-decl.h index adc7b46c83..286164b7e2 100644 --- a/commit-slab-decl.h +++ b/commit-slab-decl.h @@ -32,6 +32,7 @@ struct slabname { \ void init_ ##slabname## _with_stride(struct slabname *s, unsigned stride); \ void init_ ##slabname(struct slabname *s); \ void clear_ ##slabname(struct slabname *s); \ +void deep_clear_ ##slabname(struct slabname *s, void (*free_fn)(elemtype *ptr)); \ elemtype *slabname## _at_peek(struct slabname *s, const struct commit *c, int add_if_missing); \ elemtype *slabname## _at(struct slabname *s, const struct commit *c); \ elemtype *slabname## _peek(struct slabname *s, const struct commit *c) diff --git a/commit-slab-impl.h b/commit-slab-impl.h index 5c0eb91a5d..557738df27 100644 --- a/commit-slab-impl.h +++ b/commit-slab-impl.h @@ -38,6 +38,19 @@ scope void clear_ ##slabname(struct slabname *s) \ FREE_AND_NULL(s->slab); \ } \ \ +scope void deep_clear_ ##slabname(struct slabname *s, void (*free_fn)(elemtype *)) \ +{ \ + unsigned int i; \ + for (i = 0; i < s->slab_count; i++) { \ + unsigned int j; \ + if (!s->slab[i]) \ + continue; \ + for (j = 0; j < s->slab_size; j++) \ + free_fn(&s->slab[i][j * s->stride]); \ + } \ + clear_ ##slabname(s); \ +} \ + \ scope elemtype *slabname## _at_peek(struct slabname *s, \ const struct commit *c, \ int add_if_missing) \ diff --git a/commit-slab.h b/commit-slab.h index 05b3f2804e..8e72a30536 100644 --- a/commit-slab.h +++ b/commit-slab.h @@ -47,6 +47,16 @@ * * Call this function before the slab falls out of scope to avoid * leaking memory. + * + * - void deep_clear_indegree(struct indegree *, void (*free_fn)(int*)) + * + * Empties the slab, similar to clear_indegree(), but in addition it + * calls the given 'free_fn' for each slab entry to release any + * additional memory that might be owned by the entry (but not the + * entry itself!). + * Note that 'free_fn' might be called even for entries for which no + * indegree_at() call has been made; in this case 'free_fn' is invoked + * with a pointer to a zero-initialized location. */ #define define_commit_slab(slabname, elemtype) \ diff --git a/shallow.c b/shallow.c index 7fd04afed1..c4ac8a7327 100644 --- a/shallow.c +++ b/shallow.c @@ -84,6 +84,10 @@ int is_repository_shallow(struct repository *r) * supports a "valid" flag. */ define_commit_slab(commit_depth, int *); +static void free_depth_in_slab(int **ptr) +{ + FREE_AND_NULL(*ptr); +} struct commit_list *get_shallow_commits(struct object_array *heads, int depth, int shallow_flag, int not_shallow_flag) { @@ -150,15 +154,7 @@ struct commit_list *get_shallow_commits(struct object_array *heads, int depth, } } } - for (i = 0; i < depths.slab_count; i++) { - int j; - - if (!depths.slab[i]) - continue; - for (j = 0; j < depths.slab_size; j++) - free(depths.slab[i][j]); - } - clear_commit_depth(&depths); + deep_clear_commit_depth(&depths, free_depth_in_slab); return result; } From 0ee3cb888dc4da900b2e701ef8106205d8963ffb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?SZEDER=20G=C3=A1bor?= Date: Fri, 5 Jun 2020 13:00:27 +0000 Subject: [PATCH 05/18] diff.h: drop diff_tree_oid() & friends' return value MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ll_diff_tree_oid() has only ever returned 0 [1], so it's return value is basically useless. It's only caller diff_tree_oid() has only ever returned the return value of ll_diff_tree_oid() as-is [2], so its return value is just as useless. Most of diff_tree_oid()'s callers simply ignore its return value, except: - diff_root_tree_oid() is a thin wrapper around diff_tree_oid() and returns with its return value, but all of diff_root_tree_oid()'s callers ignore its return value. - rev_compare_tree() and rev_same_tree_as_empty() do look at the return value in a condition, but, since the return value is always 0, the former's < 0 condition is never fulfilled, while the latter's >= 0 condition is always fulfilled. So let's drop the return value of ll_diff_tree_oid(), diff_tree_oid() and diff_root_tree_oid(), and drop those conditions from rev_compare_tree() and rev_same_tree_as_empty() as well. [1] ll_diff_tree_oid() and its ancestors have been returning only 0 ever since it was introduced as diff_tree() in 9174026cfe (Add "diff-tree" program to show which files have changed between two trees., 2005-04-09). [2] diff_tree_oid() traces back to diff-tree.c:main() in 9174026cfe as well. Signed-off-by: SZEDER Gábor Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- diff.h | 10 +++++----- revision.c | 9 +++------ tree-diff.c | 30 ++++++++++++++---------------- 3 files changed, 22 insertions(+), 27 deletions(-) diff --git a/diff.h b/diff.h index 9443dc1b00..e0c0af6286 100644 --- a/diff.h +++ b/diff.h @@ -431,11 +431,11 @@ struct combine_diff_path *diff_tree_paths( struct combine_diff_path *p, const struct object_id *oid, const struct object_id **parents_oid, int nparent, struct strbuf *base, struct diff_options *opt); -int diff_tree_oid(const struct object_id *old_oid, - const struct object_id *new_oid, - const char *base, struct diff_options *opt); -int diff_root_tree_oid(const struct object_id *new_oid, const char *base, - struct diff_options *opt); +void diff_tree_oid(const struct object_id *old_oid, + const struct object_id *new_oid, + const char *base, struct diff_options *opt); +void diff_root_tree_oid(const struct object_id *new_oid, const char *base, + struct diff_options *opt); struct combine_diff_path { struct combine_diff_path *next; diff --git a/revision.c b/revision.c index cbf4b61aa6..c644c66091 100644 --- a/revision.c +++ b/revision.c @@ -791,9 +791,7 @@ static int rev_compare_tree(struct rev_info *revs, tree_difference = REV_TREE_SAME; revs->pruning.flags.has_changes = 0; - if (diff_tree_oid(&t1->object.oid, &t2->object.oid, "", - &revs->pruning) < 0) - return REV_TREE_DIFFERENT; + diff_tree_oid(&t1->object.oid, &t2->object.oid, "", &revs->pruning); if (!nth_parent) if (bloom_ret == 1 && tree_difference == REV_TREE_SAME) @@ -804,7 +802,6 @@ static int rev_compare_tree(struct rev_info *revs, static int rev_same_tree_as_empty(struct rev_info *revs, struct commit *commit) { - int retval; struct tree *t1 = get_commit_tree(commit); if (!t1) @@ -812,9 +809,9 @@ static int rev_same_tree_as_empty(struct rev_info *revs, struct commit *commit) tree_difference = REV_TREE_SAME; revs->pruning.flags.has_changes = 0; - retval = diff_tree_oid(NULL, &t1->object.oid, "", &revs->pruning); + diff_tree_oid(NULL, &t1->object.oid, "", &revs->pruning); - return retval >= 0 && (tree_difference == REV_TREE_SAME); + return tree_difference == REV_TREE_SAME; } struct treesame_state { diff --git a/tree-diff.c b/tree-diff.c index f3d303c6e5..6ebad1a46f 100644 --- a/tree-diff.c +++ b/tree-diff.c @@ -29,9 +29,9 @@ static struct combine_diff_path *ll_diff_tree_paths( struct combine_diff_path *p, const struct object_id *oid, const struct object_id **parents_oid, int nparent, struct strbuf *base, struct diff_options *opt); -static int ll_diff_tree_oid(const struct object_id *old_oid, - const struct object_id *new_oid, - struct strbuf *base, struct diff_options *opt); +static void ll_diff_tree_oid(const struct object_id *old_oid, + const struct object_id *new_oid, + struct strbuf *base, struct diff_options *opt); /* * Compare two tree entries, taking into account only path/S_ISDIR(mode), @@ -679,9 +679,9 @@ static void try_to_follow_renames(const struct object_id *old_oid, q->nr = 1; } -static int ll_diff_tree_oid(const struct object_id *old_oid, - const struct object_id *new_oid, - struct strbuf *base, struct diff_options *opt) +static void ll_diff_tree_oid(const struct object_id *old_oid, + const struct object_id *new_oid, + struct strbuf *base, struct diff_options *opt) { struct combine_diff_path phead, *p; pathchange_fn_t pathchange_old = opt->pathchange; @@ -697,29 +697,27 @@ static int ll_diff_tree_oid(const struct object_id *old_oid, } opt->pathchange = pathchange_old; - return 0; } -int diff_tree_oid(const struct object_id *old_oid, - const struct object_id *new_oid, - const char *base_str, struct diff_options *opt) +void diff_tree_oid(const struct object_id *old_oid, + const struct object_id *new_oid, + const char *base_str, struct diff_options *opt) { struct strbuf base; - int retval; strbuf_init(&base, PATH_MAX); strbuf_addstr(&base, base_str); - retval = ll_diff_tree_oid(old_oid, new_oid, &base, opt); + ll_diff_tree_oid(old_oid, new_oid, &base, opt); if (!*base_str && opt->flags.follow_renames && diff_might_be_rename()) try_to_follow_renames(old_oid, new_oid, &base, opt); strbuf_release(&base); - - return retval; } -int diff_root_tree_oid(const struct object_id *new_oid, const char *base, struct diff_options *opt) +void diff_root_tree_oid(const struct object_id *new_oid, + const char *base, + struct diff_options *opt) { - return diff_tree_oid(NULL, new_oid, base, opt); + diff_tree_oid(NULL, new_oid, base, opt); } From fa7965309e709e6dcd59c38fc6743d17d82d538b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?SZEDER=20G=C3=A1bor?= Date: Fri, 5 Jun 2020 13:00:28 +0000 Subject: [PATCH 06/18] commit-graph: clean up #includes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Our CodingGuidelines says that it's sufficient to include one of 'git-compat-util.h' and 'cache.h', but both 'commit-graph.c' and 'commit-graph.h' include both. Let's include only 'git-compat-util.h' to loose a bunch of unnecessary dependencies; but include 'hash.h', because 'commit-graph.h' does require the definition of 'struct object_id'. 'commit-graph.h' explicitly includes 'repository.h' and 'string-list.h', but only needs the declaration of a few structs from them. Drop these includes and forward-declare the necessary structs instead. 'commit-graph.c' includes 'dir.h', but doesn't actually use anything from there, so let's drop that #include as well. Signed-off-by: SZEDER Gábor Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- commit-graph.c | 4 +--- commit-graph.h | 6 +++--- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/commit-graph.c b/commit-graph.c index 7807d94562..6ed649388d 100644 --- a/commit-graph.c +++ b/commit-graph.c @@ -1,7 +1,5 @@ -#include "cache.h" -#include "config.h" -#include "dir.h" #include "git-compat-util.h" +#include "config.h" #include "lockfile.h" #include "pack.h" #include "packfile.h" diff --git a/commit-graph.h b/commit-graph.h index 39484482cc..881c9b46e5 100644 --- a/commit-graph.h +++ b/commit-graph.h @@ -2,9 +2,6 @@ #define COMMIT_GRAPH_H #include "git-compat-util.h" -#include "repository.h" -#include "string-list.h" -#include "cache.h" #include "object-store.h" #define GIT_TEST_COMMIT_GRAPH "GIT_TEST_COMMIT_GRAPH" @@ -22,6 +19,9 @@ void git_test_write_commit_graph_or_die(void); struct commit; struct bloom_filter_settings; +struct repository; +struct raw_object_store; +struct string_list; char *get_commit_graph_filename(struct object_directory *odb); int open_commit_graph(const char *graph_file, int *fd, struct stat *st); From 2ad4f1a7c43cac4ba52cff56f9090d3322c67224 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?SZEDER=20G=C3=A1bor?= Date: Fri, 5 Jun 2020 13:00:29 +0000 Subject: [PATCH 07/18] commit-graph: simplify parse_commit_graph() #1 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit While we iterate over all entries of the Chunk Lookup table we make sure that we don't attempt to read past the end of the mmap-ed commit-graph file, and check in each iteration that the chunk ID and offset we are about to read is still within the mmap-ed memory region. However, these checks in each iteration are not really necessary, because the number of chunks in the commit-graph file is already known before this loop from the just parsed commit-graph header. So let's check that the commit-graph file is large enough for all entries in the Chunk Lookup table before we start iterating over those entries, and drop those per-iteration checks. While at it, take into account the size of everything that is necessary to have a valid commit-graph file, i.e. the size of the header, the size of the mandatory OID Fanout chunk, and the size of the signature in the trailer as well. Note that this necessitates the change of the error message as well, and, consequently, have to update the 'detect incorrect chunk count' test in 't5318-commit-graph.sh' as well. Signed-off-by: SZEDER Gábor Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- commit-graph.c | 16 +++++++++------- t/t5318-commit-graph.sh | 3 ++- 2 files changed, 11 insertions(+), 8 deletions(-) diff --git a/commit-graph.c b/commit-graph.c index 6ed649388d..9927762f18 100644 --- a/commit-graph.c +++ b/commit-graph.c @@ -272,6 +272,15 @@ struct commit_graph *parse_commit_graph(void *graph_map, int fd, graph->data = graph_map; graph->data_len = graph_size; + if (graph_size < GRAPH_HEADER_SIZE + + (graph->num_chunks + 1) * GRAPH_CHUNKLOOKUP_WIDTH + + GRAPH_FANOUT_SIZE + the_hash_algo->rawsz) { + error(_("commit-graph file is too small to hold %u chunks"), + graph->num_chunks); + free(graph); + return NULL; + } + last_chunk_id = 0; last_chunk_offset = 8; chunk_lookup = data + 8; @@ -280,13 +289,6 @@ struct commit_graph *parse_commit_graph(void *graph_map, int fd, uint64_t chunk_offset; int chunk_repeated = 0; - if (data + graph_size - chunk_lookup < - GRAPH_CHUNKLOOKUP_WIDTH) { - error(_("commit-graph chunk lookup table entry missing; file may be incomplete")); - free(graph); - return NULL; - } - chunk_id = get_be32(chunk_lookup + 0); chunk_offset = get_be64(chunk_lookup + 4); diff --git a/t/t5318-commit-graph.sh b/t/t5318-commit-graph.sh index 79e7fbcd40..1073f9e3cf 100755 --- a/t/t5318-commit-graph.sh +++ b/t/t5318-commit-graph.sh @@ -574,7 +574,8 @@ test_expect_success 'detect invalid checksum hash' ' test_expect_success 'detect incorrect chunk count' ' corrupt_graph_and_verify $GRAPH_BYTE_CHUNK_COUNT "\377" \ - "chunk lookup table entry missing" $GRAPH_CHUNK_LOOKUP_OFFSET + "commit-graph file is too small to hold [0-9]* chunks" \ + $GRAPH_CHUNK_LOOKUP_OFFSET ' test_expect_success 'git fsck (checks commit-graph)' ' From 5cfa438a76f7f67f34f6f758b62943261ded1fb5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?SZEDER=20G=C3=A1bor?= Date: Fri, 5 Jun 2020 13:00:30 +0000 Subject: [PATCH 08/18] commit-graph: simplify parse_commit_graph() #2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The Chunk Lookup table stores the chunks' starting offset in the commit-graph file, not their sizes. Consequently, the size of a chunk can only be calculated by subtracting its offset from the offset of the subsequent chunk (or that of the terminating label). This is currenly implemented in a bit complicated way: as we iterate over the entries of the Chunk Lookup table, we check the id of each chunk and store its starting offset, then we check the id of the last seen chunk and calculate its size using its previously saved offset. At the moment there is only one chunk for which we calculate its size, but this patch series will add more, and the repeated chunk id checks are not that pretty. Instead let's read ahead the offset of the next chunk on each iteration, so we can calculate the size of each chunk right away, right where we store its starting offset. Signed-off-by: SZEDER Gábor Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- commit-graph.c | 26 +++++++++----------------- 1 file changed, 9 insertions(+), 17 deletions(-) diff --git a/commit-graph.c b/commit-graph.c index 9927762f18..84206f0f51 100644 --- a/commit-graph.c +++ b/commit-graph.c @@ -230,8 +230,7 @@ struct commit_graph *parse_commit_graph(void *graph_map, int fd, const unsigned char *data, *chunk_lookup; uint32_t i; struct commit_graph *graph; - uint64_t last_chunk_offset; - uint32_t last_chunk_id; + uint64_t next_chunk_offset; uint32_t graph_signature; unsigned char graph_version, hash_version; @@ -281,18 +280,17 @@ struct commit_graph *parse_commit_graph(void *graph_map, int fd, return NULL; } - last_chunk_id = 0; - last_chunk_offset = 8; chunk_lookup = data + 8; - for (i = 0; i <= graph->num_chunks; i++) { + next_chunk_offset = get_be64(chunk_lookup + 4); + for (i = 0; i < graph->num_chunks; i++) { uint32_t chunk_id; - uint64_t chunk_offset; + uint64_t chunk_offset = next_chunk_offset; int chunk_repeated = 0; chunk_id = get_be32(chunk_lookup + 0); - chunk_offset = get_be64(chunk_lookup + 4); chunk_lookup += GRAPH_CHUNKLOOKUP_WIDTH; + next_chunk_offset = get_be64(chunk_lookup + 4); if (chunk_offset > graph_size - the_hash_algo->rawsz) { error(_("commit-graph improper chunk offset %08x%08x"), (uint32_t)(chunk_offset >> 32), @@ -312,8 +310,11 @@ struct commit_graph *parse_commit_graph(void *graph_map, int fd, case GRAPH_CHUNKID_OIDLOOKUP: if (graph->chunk_oid_lookup) chunk_repeated = 1; - else + else { graph->chunk_oid_lookup = data + chunk_offset; + graph->num_commits = (next_chunk_offset - chunk_offset) + / graph->hash_len; + } break; case GRAPH_CHUNKID_DATA: @@ -368,15 +369,6 @@ struct commit_graph *parse_commit_graph(void *graph_map, int fd, free(graph); return NULL; } - - if (last_chunk_id == GRAPH_CHUNKID_OIDLOOKUP) - { - graph->num_commits = (chunk_offset - last_chunk_offset) - / graph->hash_len; - } - - last_chunk_id = chunk_id; - last_chunk_offset = chunk_offset; } if (graph->chunk_bloom_indexes && graph->chunk_bloom_data) { From bb4d60e5d536009e0c97bfa7f464416c29c04c81 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?SZEDER=20G=C3=A1bor?= Date: Fri, 5 Jun 2020 13:00:31 +0000 Subject: [PATCH 09/18] commit-graph: simplify write_commit_graph_file() #1 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In write_commit_graph_file() one block of code fills the array of chunk IDs, another block of code fills the array of chunk offsets, then the chunk IDs and offsets are written to the Chunk Lookup table, and finally a third block of code writes the actual chunks. In case of optional chunks like Extra Edge List and Base Graphs List there is also a condition checking whether that chunk is necessary/desired, and that same condition is repeated in all those three blocks of code. This patch series is about to add more optional chunks, so there would be even more repeated conditions. Those chunk offsets are relative to the beginning of the file, so they inherently depend on the size of the Chunk Lookup table, which in turn depends on the number of chunks that are to be written to the commit-graph file. IOW at the time we set the first chunk's ID we can't yet know its offset, because we don't yet know how many chunks there are. Simplify this by initially filling an array of chunk sizes, not offsets, and calculate the offsets based on the chunk sizes only later, while we are writing the Chunk Lookup table. This way we can fill the arrays of chunk IDs and sizes in one go, eliminating one set of repeated conditions. Signed-off-by: SZEDER Gábor Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- commit-graph.c | 46 +++++++++++++++++----------------------------- 1 file changed, 17 insertions(+), 29 deletions(-) diff --git a/commit-graph.c b/commit-graph.c index 84206f0f51..79cddabcd1 100644 --- a/commit-graph.c +++ b/commit-graph.c @@ -1529,10 +1529,11 @@ static int write_commit_graph_file(struct write_commit_graph_context *ctx) struct hashfile *f; struct lock_file lk = LOCK_INIT; uint32_t chunk_ids[MAX_NUM_CHUNKS + 1]; - uint64_t chunk_offsets[MAX_NUM_CHUNKS + 1]; + uint64_t chunk_sizes[MAX_NUM_CHUNKS + 1]; const unsigned hashsz = the_hash_algo->rawsz; struct strbuf progress_title = STRBUF_INIT; int num_chunks = 3; + uint64_t chunk_offset; struct object_id file_hash; const struct bloom_filter_settings bloom_settings = DEFAULT_BLOOM_FILTER_SETTINGS; @@ -1573,50 +1574,34 @@ static int write_commit_graph_file(struct write_commit_graph_context *ctx) } chunk_ids[0] = GRAPH_CHUNKID_OIDFANOUT; + chunk_sizes[0] = GRAPH_FANOUT_SIZE; chunk_ids[1] = GRAPH_CHUNKID_OIDLOOKUP; + chunk_sizes[1] = hashsz * ctx->commits.nr; chunk_ids[2] = GRAPH_CHUNKID_DATA; + chunk_sizes[2] = (hashsz + 16) * ctx->commits.nr; + if (ctx->num_extra_edges) { chunk_ids[num_chunks] = GRAPH_CHUNKID_EXTRAEDGES; + chunk_sizes[num_chunks] = 4 * ctx->num_extra_edges; num_chunks++; } if (ctx->changed_paths) { chunk_ids[num_chunks] = GRAPH_CHUNKID_BLOOMINDEXES; + chunk_sizes[num_chunks] = sizeof(uint32_t) * ctx->commits.nr; num_chunks++; chunk_ids[num_chunks] = GRAPH_CHUNKID_BLOOMDATA; + chunk_sizes[num_chunks] = sizeof(uint32_t) * 3 + + ctx->total_bloom_filter_data_size; num_chunks++; } if (ctx->num_commit_graphs_after > 1) { chunk_ids[num_chunks] = GRAPH_CHUNKID_BASE; + chunk_sizes[num_chunks] = hashsz * (ctx->num_commit_graphs_after - 1); num_chunks++; } chunk_ids[num_chunks] = 0; - - chunk_offsets[0] = 8 + (num_chunks + 1) * GRAPH_CHUNKLOOKUP_WIDTH; - chunk_offsets[1] = chunk_offsets[0] + GRAPH_FANOUT_SIZE; - chunk_offsets[2] = chunk_offsets[1] + hashsz * ctx->commits.nr; - chunk_offsets[3] = chunk_offsets[2] + (hashsz + 16) * ctx->commits.nr; - - num_chunks = 3; - if (ctx->num_extra_edges) { - chunk_offsets[num_chunks + 1] = chunk_offsets[num_chunks] + - 4 * ctx->num_extra_edges; - num_chunks++; - } - if (ctx->changed_paths) { - chunk_offsets[num_chunks + 1] = chunk_offsets[num_chunks] + - sizeof(uint32_t) * ctx->commits.nr; - num_chunks++; - - chunk_offsets[num_chunks + 1] = chunk_offsets[num_chunks] + - sizeof(uint32_t) * 3 + ctx->total_bloom_filter_data_size; - num_chunks++; - } - if (ctx->num_commit_graphs_after > 1) { - chunk_offsets[num_chunks + 1] = chunk_offsets[num_chunks] + - hashsz * (ctx->num_commit_graphs_after - 1); - num_chunks++; - } + chunk_sizes[num_chunks] = 0; hashwrite_be32(f, GRAPH_SIGNATURE); @@ -1625,13 +1610,16 @@ static int write_commit_graph_file(struct write_commit_graph_context *ctx) hashwrite_u8(f, num_chunks); hashwrite_u8(f, ctx->num_commit_graphs_after - 1); + chunk_offset = 8 + (num_chunks + 1) * GRAPH_CHUNKLOOKUP_WIDTH; for (i = 0; i <= num_chunks; i++) { uint32_t chunk_write[3]; chunk_write[0] = htonl(chunk_ids[i]); - chunk_write[1] = htonl(chunk_offsets[i] >> 32); - chunk_write[2] = htonl(chunk_offsets[i] & 0xffffffff); + chunk_write[1] = htonl(chunk_offset >> 32); + chunk_write[2] = htonl(chunk_offset & 0xffffffff); hashwrite(f, chunk_write, 12); + + chunk_offset += chunk_sizes[i]; } if (ctx->report_progress) { From 7fbfe07ab4d4e58c0971dac73001b89f180a0af3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?SZEDER=20G=C3=A1bor?= Date: Fri, 5 Jun 2020 13:00:32 +0000 Subject: [PATCH 10/18] commit-graph: simplify write_commit_graph_file() #2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Unify the 'chunk_ids' and 'chunk_sizes' arrays into an array of 'struct chunk_info'. This will allow more cleanups in the following patches. Signed-off-by: SZEDER Gábor Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- commit-graph.c | 45 ++++++++++++++++++++++++--------------------- 1 file changed, 24 insertions(+), 21 deletions(-) diff --git a/commit-graph.c b/commit-graph.c index 79cddabcd1..887837e882 100644 --- a/commit-graph.c +++ b/commit-graph.c @@ -1522,14 +1522,18 @@ static int write_graph_chunk_base(struct hashfile *f, return 0; } +struct chunk_info { + uint32_t id; + uint64_t size; +}; + static int write_commit_graph_file(struct write_commit_graph_context *ctx) { uint32_t i; int fd; struct hashfile *f; struct lock_file lk = LOCK_INIT; - uint32_t chunk_ids[MAX_NUM_CHUNKS + 1]; - uint64_t chunk_sizes[MAX_NUM_CHUNKS + 1]; + struct chunk_info chunks[MAX_NUM_CHUNKS + 1]; const unsigned hashsz = the_hash_algo->rawsz; struct strbuf progress_title = STRBUF_INIT; int num_chunks = 3; @@ -1573,35 +1577,34 @@ static int write_commit_graph_file(struct write_commit_graph_context *ctx) f = hashfd(lk.tempfile->fd, lk.tempfile->filename.buf); } - chunk_ids[0] = GRAPH_CHUNKID_OIDFANOUT; - chunk_sizes[0] = GRAPH_FANOUT_SIZE; - chunk_ids[1] = GRAPH_CHUNKID_OIDLOOKUP; - chunk_sizes[1] = hashsz * ctx->commits.nr; - chunk_ids[2] = GRAPH_CHUNKID_DATA; - chunk_sizes[2] = (hashsz + 16) * ctx->commits.nr; - + chunks[0].id = GRAPH_CHUNKID_OIDFANOUT; + chunks[0].size = GRAPH_FANOUT_SIZE; + chunks[1].id = GRAPH_CHUNKID_OIDLOOKUP; + chunks[1].size = hashsz * ctx->commits.nr; + chunks[2].id = GRAPH_CHUNKID_DATA; + chunks[2].size = (hashsz + 16) * ctx->commits.nr; if (ctx->num_extra_edges) { - chunk_ids[num_chunks] = GRAPH_CHUNKID_EXTRAEDGES; - chunk_sizes[num_chunks] = 4 * ctx->num_extra_edges; + chunks[num_chunks].id = GRAPH_CHUNKID_EXTRAEDGES; + chunks[num_chunks].size = 4 * ctx->num_extra_edges; num_chunks++; } if (ctx->changed_paths) { - chunk_ids[num_chunks] = GRAPH_CHUNKID_BLOOMINDEXES; - chunk_sizes[num_chunks] = sizeof(uint32_t) * ctx->commits.nr; + chunks[num_chunks].id = GRAPH_CHUNKID_BLOOMINDEXES; + chunks[num_chunks].size = sizeof(uint32_t) * ctx->commits.nr; num_chunks++; - chunk_ids[num_chunks] = GRAPH_CHUNKID_BLOOMDATA; - chunk_sizes[num_chunks] = sizeof(uint32_t) * 3 + chunks[num_chunks].id = GRAPH_CHUNKID_BLOOMDATA; + chunks[num_chunks].size = sizeof(uint32_t) * 3 + ctx->total_bloom_filter_data_size; num_chunks++; } if (ctx->num_commit_graphs_after > 1) { - chunk_ids[num_chunks] = GRAPH_CHUNKID_BASE; - chunk_sizes[num_chunks] = hashsz * (ctx->num_commit_graphs_after - 1); + chunks[num_chunks].id = GRAPH_CHUNKID_BASE; + chunks[num_chunks].size = hashsz * (ctx->num_commit_graphs_after - 1); num_chunks++; } - chunk_ids[num_chunks] = 0; - chunk_sizes[num_chunks] = 0; + chunks[num_chunks].id = 0; + chunks[num_chunks].size = 0; hashwrite_be32(f, GRAPH_SIGNATURE); @@ -1614,12 +1617,12 @@ static int write_commit_graph_file(struct write_commit_graph_context *ctx) for (i = 0; i <= num_chunks; i++) { uint32_t chunk_write[3]; - chunk_write[0] = htonl(chunk_ids[i]); + chunk_write[0] = htonl(chunks[i].id); chunk_write[1] = htonl(chunk_offset >> 32); chunk_write[2] = htonl(chunk_offset & 0xffffffff); hashwrite(f, chunk_write, 12); - chunk_offset += chunk_sizes[i]; + chunk_offset += chunks[i].size; } if (ctx->report_progress) { From 0f8ee206951ad25ea652ac9b00455df22e825c45 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Mon, 15 Jun 2020 20:14:46 +0000 Subject: [PATCH 11/18] commit-graph: place bloom_settings in context MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Place an instance of struct bloom_settings into the struct write_commit_graph_context. This allows simplifying the function prototype of write_graph_chunk_bloom_data(). This will allow us to combine the function prototypes and use function pointers to simplify write_commit_graph_file(). Reported-by: SZEDER Gábor Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- commit-graph.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/commit-graph.c b/commit-graph.c index 887837e882..05b7035d8d 100644 --- a/commit-graph.c +++ b/commit-graph.c @@ -882,6 +882,7 @@ struct write_commit_graph_context { const struct split_commit_graph_opts *split_opts; size_t total_bloom_filter_data_size; + struct bloom_filter_settings bloom_settings; }; static void write_graph_chunk_fanout(struct hashfile *f, @@ -1103,8 +1104,7 @@ static void write_graph_chunk_bloom_indexes(struct hashfile *f, } static void write_graph_chunk_bloom_data(struct hashfile *f, - struct write_commit_graph_context *ctx, - const struct bloom_filter_settings *settings) + struct write_commit_graph_context *ctx) { struct commit **list = ctx->commits.list; struct commit **last = ctx->commits.list + ctx->commits.nr; @@ -1116,9 +1116,9 @@ static void write_graph_chunk_bloom_data(struct hashfile *f, _("Writing changed paths Bloom filters data"), ctx->commits.nr); - hashwrite_be32(f, settings->hash_version); - hashwrite_be32(f, settings->num_hashes); - hashwrite_be32(f, settings->bits_per_entry); + hashwrite_be32(f, ctx->bloom_settings.hash_version); + hashwrite_be32(f, ctx->bloom_settings.num_hashes); + hashwrite_be32(f, ctx->bloom_settings.bits_per_entry); while (list < last) { struct bloom_filter *filter = get_bloom_filter(ctx->r, *list, 0); @@ -1541,6 +1541,8 @@ static int write_commit_graph_file(struct write_commit_graph_context *ctx) struct object_id file_hash; const struct bloom_filter_settings bloom_settings = DEFAULT_BLOOM_FILTER_SETTINGS; + ctx->bloom_settings = bloom_settings; + if (ctx->split) { struct strbuf tmp_file = STRBUF_INIT; @@ -1642,7 +1644,7 @@ static int write_commit_graph_file(struct write_commit_graph_context *ctx) write_graph_chunk_extra_edges(f, ctx); if (ctx->changed_paths) { write_graph_chunk_bloom_indexes(f, ctx); - write_graph_chunk_bloom_data(f, ctx, &bloom_settings); + write_graph_chunk_bloom_data(f, ctx); } if (ctx->num_commit_graphs_after > 1 && write_graph_chunk_base(f, ctx)) { From 7ea38d79b7987704013e55b3e9e2ff104dfb812c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?SZEDER=20G=C3=A1bor?= Date: Mon, 15 Jun 2020 20:14:47 +0000 Subject: [PATCH 12/18] commit-graph: unify the signatures of all write_graph_chunk_*() functions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Update the write_graph_chunk_*() helper functions to have the same signature: - Return an int error code from all these functions. write_graph_chunk_base() already has an int error code, now the others will have one, too, but since they don't indicate any error, they will always return 0. - Drop the hash size parameter of write_graph_chunk_oids() and write_graph_chunk_data(); its value can be read directly from 'the_hash_algo' inside these functions as well. This opens up the possibility for further cleanups and foolproofing in the following two patches. Signed-off-by: SZEDER Gábor Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- commit-graph.c | 42 ++++++++++++++++++++++++++---------------- 1 file changed, 26 insertions(+), 16 deletions(-) diff --git a/commit-graph.c b/commit-graph.c index 05b7035d8d..3bae1e52ed 100644 --- a/commit-graph.c +++ b/commit-graph.c @@ -885,8 +885,8 @@ struct write_commit_graph_context { struct bloom_filter_settings bloom_settings; }; -static void write_graph_chunk_fanout(struct hashfile *f, - struct write_commit_graph_context *ctx) +static int write_graph_chunk_fanout(struct hashfile *f, + struct write_commit_graph_context *ctx) { int i, count = 0; struct commit **list = ctx->commits.list; @@ -907,17 +907,21 @@ static void write_graph_chunk_fanout(struct hashfile *f, hashwrite_be32(f, count); } + + return 0; } -static void write_graph_chunk_oids(struct hashfile *f, int hash_len, - struct write_commit_graph_context *ctx) +static int write_graph_chunk_oids(struct hashfile *f, + struct write_commit_graph_context *ctx) { struct commit **list = ctx->commits.list; int count; for (count = 0; count < ctx->commits.nr; count++, list++) { display_progress(ctx->progress, ++ctx->progress_cnt); - hashwrite(f, (*list)->object.oid.hash, (int)hash_len); + hashwrite(f, (*list)->object.oid.hash, (int)the_hash_algo->rawsz); } + + return 0; } static const unsigned char *commit_to_sha1(size_t index, void *table) @@ -926,8 +930,8 @@ static const unsigned char *commit_to_sha1(size_t index, void *table) return commits[index]->object.oid.hash; } -static void write_graph_chunk_data(struct hashfile *f, int hash_len, - struct write_commit_graph_context *ctx) +static int write_graph_chunk_data(struct hashfile *f, + struct write_commit_graph_context *ctx) { struct commit **list = ctx->commits.list; struct commit **last = ctx->commits.list + ctx->commits.nr; @@ -944,7 +948,7 @@ static void write_graph_chunk_data(struct hashfile *f, int hash_len, die(_("unable to parse commit %s"), oid_to_hex(&(*list)->object.oid)); tree = get_commit_tree_oid(*list); - hashwrite(f, tree->hash, hash_len); + hashwrite(f, tree->hash, the_hash_algo->rawsz); parent = (*list)->parents; @@ -1024,10 +1028,12 @@ static void write_graph_chunk_data(struct hashfile *f, int hash_len, list++; } + + return 0; } -static void write_graph_chunk_extra_edges(struct hashfile *f, - struct write_commit_graph_context *ctx) +static int write_graph_chunk_extra_edges(struct hashfile *f, + struct write_commit_graph_context *ctx) { struct commit **list = ctx->commits.list; struct commit **last = ctx->commits.list + ctx->commits.nr; @@ -1076,10 +1082,12 @@ static void write_graph_chunk_extra_edges(struct hashfile *f, list++; } + + return 0; } -static void write_graph_chunk_bloom_indexes(struct hashfile *f, - struct write_commit_graph_context *ctx) +static int write_graph_chunk_bloom_indexes(struct hashfile *f, + struct write_commit_graph_context *ctx) { struct commit **list = ctx->commits.list; struct commit **last = ctx->commits.list + ctx->commits.nr; @@ -1101,10 +1109,11 @@ static void write_graph_chunk_bloom_indexes(struct hashfile *f, } stop_progress(&progress); + return 0; } -static void write_graph_chunk_bloom_data(struct hashfile *f, - struct write_commit_graph_context *ctx) +static int write_graph_chunk_bloom_data(struct hashfile *f, + struct write_commit_graph_context *ctx) { struct commit **list = ctx->commits.list; struct commit **last = ctx->commits.list + ctx->commits.nr; @@ -1128,6 +1137,7 @@ static void write_graph_chunk_bloom_data(struct hashfile *f, } stop_progress(&progress); + return 0; } static int oid_compare(const void *_a, const void *_b) @@ -1638,8 +1648,8 @@ static int write_commit_graph_file(struct write_commit_graph_context *ctx) num_chunks * ctx->commits.nr); } write_graph_chunk_fanout(f, ctx); - write_graph_chunk_oids(f, hashsz, ctx); - write_graph_chunk_data(f, hashsz, ctx); + write_graph_chunk_oids(f, ctx); + write_graph_chunk_data(f, ctx); if (ctx->num_extra_edges) write_graph_chunk_extra_edges(f, ctx); if (ctx->changed_paths) { From de726f5c785f3a7c7467bc789bb7e7caa08c55de Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?SZEDER=20G=C3=A1bor?= Date: Mon, 15 Jun 2020 20:14:48 +0000 Subject: [PATCH 13/18] commit-graph: simplify chunk writes into loop MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In write_commit_graph_file() we now have one block of code filling the array of 'struct chunk_info' with the IDs and sizes of chunks to be written, and an other block of code calling the functions responsible for writing individual chunks. In case of optional chunks like Extra Edge List an Base Graphs List there is also a condition checking whether that chunk is necessary/desired, and that same condition is repeated in both blocks of code. Other, newer chunks have similar optional conditions. Eliminate these repeated conditions by storing the function pointers responsible for writing individual chunks in the 'struct chunk_info' array as well, and calling them in a loop to write the commit-graph file. This will open up the possibility for a bit of foolproofing in the following patch. Signed-off-by: SZEDER Gábor Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- commit-graph.c | 31 +++++++++++++++++++------------ 1 file changed, 19 insertions(+), 12 deletions(-) diff --git a/commit-graph.c b/commit-graph.c index 3bae1e52ed..78e023be66 100644 --- a/commit-graph.c +++ b/commit-graph.c @@ -1532,9 +1532,13 @@ static int write_graph_chunk_base(struct hashfile *f, return 0; } +typedef int (*chunk_write_fn)(struct hashfile *f, + struct write_commit_graph_context *ctx); + struct chunk_info { uint32_t id; uint64_t size; + chunk_write_fn write_fn; }; static int write_commit_graph_file(struct write_commit_graph_context *ctx) @@ -1591,27 +1595,34 @@ static int write_commit_graph_file(struct write_commit_graph_context *ctx) chunks[0].id = GRAPH_CHUNKID_OIDFANOUT; chunks[0].size = GRAPH_FANOUT_SIZE; + chunks[0].write_fn = write_graph_chunk_fanout; chunks[1].id = GRAPH_CHUNKID_OIDLOOKUP; chunks[1].size = hashsz * ctx->commits.nr; + chunks[1].write_fn = write_graph_chunk_oids; chunks[2].id = GRAPH_CHUNKID_DATA; chunks[2].size = (hashsz + 16) * ctx->commits.nr; + chunks[2].write_fn = write_graph_chunk_data; if (ctx->num_extra_edges) { chunks[num_chunks].id = GRAPH_CHUNKID_EXTRAEDGES; chunks[num_chunks].size = 4 * ctx->num_extra_edges; + chunks[num_chunks].write_fn = write_graph_chunk_extra_edges; num_chunks++; } if (ctx->changed_paths) { chunks[num_chunks].id = GRAPH_CHUNKID_BLOOMINDEXES; chunks[num_chunks].size = sizeof(uint32_t) * ctx->commits.nr; + chunks[num_chunks].write_fn = write_graph_chunk_bloom_indexes; num_chunks++; chunks[num_chunks].id = GRAPH_CHUNKID_BLOOMDATA; chunks[num_chunks].size = sizeof(uint32_t) * 3 + ctx->total_bloom_filter_data_size; + chunks[num_chunks].write_fn = write_graph_chunk_bloom_data; num_chunks++; } if (ctx->num_commit_graphs_after > 1) { chunks[num_chunks].id = GRAPH_CHUNKID_BASE; chunks[num_chunks].size = hashsz * (ctx->num_commit_graphs_after - 1); + chunks[num_chunks].write_fn = write_graph_chunk_base; num_chunks++; } @@ -1647,19 +1658,15 @@ static int write_commit_graph_file(struct write_commit_graph_context *ctx) progress_title.buf, num_chunks * ctx->commits.nr); } - write_graph_chunk_fanout(f, ctx); - write_graph_chunk_oids(f, ctx); - write_graph_chunk_data(f, ctx); - if (ctx->num_extra_edges) - write_graph_chunk_extra_edges(f, ctx); - if (ctx->changed_paths) { - write_graph_chunk_bloom_indexes(f, ctx); - write_graph_chunk_bloom_data(f, ctx); - } - if (ctx->num_commit_graphs_after > 1 && - write_graph_chunk_base(f, ctx)) { - return -1; + + for (i = 0; i < num_chunks; i++) { + if (chunks[i].write_fn(f, ctx)) { + error(_("failed writing chunk with id %"PRIx32""), + chunks[i].id); + return -1; + } } + stop_progress(&ctx->progress); strbuf_release(&progress_title); From bd74c1d6fd4d91ecb026384b9f2c9f55c054e011 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?SZEDER=20G=C3=A1bor?= Date: Mon, 15 Jun 2020 20:14:49 +0000 Subject: [PATCH 14/18] commit-graph: check chunk sizes after writing MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In my experience while experimenting with new commit-graph chunks, early versions of the corresponding new write_commit_graph_my_chunk() functions are, sadly but not surprisingly, often buggy, and write more or less data than they are supposed to, especially if the chunk size is not directly proportional to the number of commits. This then causes all kinds of issues when reading such a bogus commit-graph file, raising the question of whether the writing or the reading part happens to be buggy this time. Let's catch such issues early, already when writing the commit-graph file, and check that each write_graph_chunk_*() function wrote the amount of data that it was expected to, and what has been encoded in the Chunk Lookup table. Now that all commit-graph chunks are written in a loop we can do this check in a single place for all chunks, and any chunks added in the future will get checked as well. Signed-off-by: SZEDER Gábor Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- commit-graph.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/commit-graph.c b/commit-graph.c index 78e023be66..5c8f210cad 100644 --- a/commit-graph.c +++ b/commit-graph.c @@ -1659,12 +1659,21 @@ static int write_commit_graph_file(struct write_commit_graph_context *ctx) num_chunks * ctx->commits.nr); } + chunk_offset = f->total + f->offset; for (i = 0; i < num_chunks; i++) { + uint64_t end_offset; + if (chunks[i].write_fn(f, ctx)) { error(_("failed writing chunk with id %"PRIx32""), chunks[i].id); return -1; } + + end_offset = f->total + f->offset; + if (end_offset - chunk_offset != chunks[i].size) + BUG("expected to write %"PRId64" bytes to chunk %"PRIx32", but wrote %"PRId64" instead", + chunks[i].size, chunks[i].id, end_offset - chunk_offset); + chunk_offset = end_offset; } stop_progress(&ctx->progress); From d83d1650bc3eeace6ed4d4e0d24d305e8fcb397b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?SZEDER=20G=C3=A1bor?= Date: Mon, 15 Jun 2020 20:14:50 +0000 Subject: [PATCH 15/18] commit-graph: check all leading directories in changed path Bloom filters MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The file 'dir/subdir/file' can only be modified if its leading directories 'dir' and 'dir/subdir' are modified as well. So when checking modified path Bloom filters looking for commits modifying a path with multiple path components, then check not only the full path in the Bloom filters, but all its leading directories as well. Take care to check these paths in "deepest first" order, because it's the full path that is least likely to be modified, and the Bloom filter queries can short circuit sooner. This can significantly reduce the average false positive rate, by about an order of magnitude or three(!), and can further speed up pathspec-limited revision walks. The table below compares the average false positive rate and runtime of git rev-list HEAD -- "$path" before and after this change for 5000+ randomly* selected paths from each repository: Average false Average Average positive rate runtime runtime before after before after difference ------------------------------------------------------------------ git 3.220% 0.7853% 0.0558s 0.0387s -30.6% linux 2.453% 0.0296% 0.1046s 0.0766s -26.8% tensorflow 2.536% 0.6977% 0.0594s 0.0420s -29.2% *Path selection was done with the following pipeline: git ls-tree -r --name-only HEAD | sort -R | head -n 5000 The improvements in runtime are much smaller than the improvements in average false positive rate, as we are clearly reaching diminishing returns here. However, all these timings depend on that accessing tree objects is reasonably fast (warm caches). If we had a partial clone and the tree objects had to be fetched from a promisor remote, e.g.: $ git clone --filter=tree:0 --bare file://.../webkit.git webkit.notrees.git $ git -C webkit.git -c core.modifiedPathBloomFilters=1 \ commit-graph write --reachable $ cp webkit.git/objects/info/commit-graph webkit.notrees.git/objects/info/ $ git -C webkit.notrees.git -c core.modifiedPathBloomFilters=1 \ rev-list HEAD -- "$path" then checking all leading path component can reduce the runtime from over an hour to a few seconds (and this is with the clone and the promisor on the same machine). This adjusts the tracing values in t4216-log-bloom.sh, which provides a concrete way to notice the improvement. Signed-off-by: SZEDER Gábor Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- revision.c | 35 ++++++++++++++++++++++++++--------- revision.h | 6 ++++-- t/t4216-log-bloom.sh | 2 +- 3 files changed, 31 insertions(+), 12 deletions(-) diff --git a/revision.c b/revision.c index c644c66091..027ae3982b 100644 --- a/revision.c +++ b/revision.c @@ -670,9 +670,10 @@ static void prepare_to_use_bloom_filter(struct rev_info *revs) { struct pathspec_item *pi; char *path_alloc = NULL; - const char *path; + const char *path, *p; int last_index; - int len; + size_t len; + int path_component_nr = 0, j; if (!revs->commits) return; @@ -705,8 +706,22 @@ static void prepare_to_use_bloom_filter(struct rev_info *revs) len = strlen(path); - revs->bloom_key = xmalloc(sizeof(struct bloom_key)); - fill_bloom_key(path, len, revs->bloom_key, revs->bloom_filter_settings); + p = path; + do { + p = strchrnul(p + 1, '/'); + path_component_nr++; + } while (p - path < len); + + revs->bloom_keys_nr = path_component_nr; + ALLOC_ARRAY(revs->bloom_keys, revs->bloom_keys_nr); + + p = path; + for (j = 0; j < revs->bloom_keys_nr; j++) { + p = strchrnul(p + 1, '/'); + + fill_bloom_key(path, p - path, &revs->bloom_keys[j], + revs->bloom_filter_settings); + } if (trace2_is_enabled() && !bloom_filter_atexit_registered) { atexit(trace2_bloom_filter_statistics_atexit); @@ -720,7 +735,7 @@ static int check_maybe_different_in_bloom_filter(struct rev_info *revs, struct commit *commit) { struct bloom_filter *filter; - int result; + int result = 1, j; if (!revs->repo->objects->commit_graph) return -1; @@ -740,9 +755,11 @@ static int check_maybe_different_in_bloom_filter(struct rev_info *revs, return -1; } - result = bloom_filter_contains(filter, - revs->bloom_key, - revs->bloom_filter_settings); + for (j = 0; result && j < revs->bloom_keys_nr; j++) { + result = bloom_filter_contains(filter, + &revs->bloom_keys[j], + revs->bloom_filter_settings); + } if (result) count_bloom_filter_maybe++; @@ -782,7 +799,7 @@ static int rev_compare_tree(struct rev_info *revs, return REV_TREE_SAME; } - if (revs->bloom_key && !nth_parent) { + if (revs->bloom_keys_nr && !nth_parent) { bloom_ret = check_maybe_different_in_bloom_filter(revs, commit); if (bloom_ret == 0) diff --git a/revision.h b/revision.h index 7c026fe41f..abbfb4ab59 100644 --- a/revision.h +++ b/revision.h @@ -295,8 +295,10 @@ struct rev_info { struct topo_walk_info *topo_walk_info; /* Commit graph bloom filter fields */ - /* The bloom filter key for the pathspec */ - struct bloom_key *bloom_key; + /* The bloom filter key(s) for the pathspec */ + struct bloom_key *bloom_keys; + int bloom_keys_nr; + /* * The bloom filter settings used to generate the key. * This is loaded from the commit-graph being used. diff --git a/t/t4216-log-bloom.sh b/t/t4216-log-bloom.sh index c7011f33e2..c13b97d3bd 100755 --- a/t/t4216-log-bloom.sh +++ b/t/t4216-log-bloom.sh @@ -142,7 +142,7 @@ test_expect_success 'setup - add commit-graph to the chain with Bloom filters' ' test_bloom_filters_used_when_some_filters_are_missing () { log_args=$1 - bloom_trace_prefix="statistics:{\"filter_not_present\":3,\"zero_length_filter\":0,\"maybe\":8,\"definitely_not\":6" + bloom_trace_prefix="statistics:{\"filter_not_present\":3,\"zero_length_filter\":0,\"maybe\":6,\"definitely_not\":8" setup "$log_args" && grep -q "$bloom_trace_prefix" "$TRASH_DIRECTORY/trace.perf" && test_cmp log_wo_bloom log_w_bloom From 488ae8cf265fe61b3158d8dcdd16bbaab7577cdc Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Mon, 15 Jun 2020 20:14:51 +0000 Subject: [PATCH 16/18] bloom: enforce a minimum size of 8 bytes The original design of changed-path Bloom filters included an 8-byte block size for filter lengths. This was changed mid-way through the submission process, and now the length stored in the commit-graph has one-byte granularity. This can cause some issues for very small filters. The analysis for false positive rates assume large filters, so rounding errors become less important at that scale. When there are only a few paths changed, a filter that has size only a few bytes could have very different behavior. In fact, this is evidenced in the Git repository due to the code organization and careful patch creation that leads to many commits with very small filters. These small filters frequently have false-positive rates in the 8-10% range or higher. The previous change improved the false-positive rate using multiple Bloom keys when the path has multiple directory components. However, that does not help at all for files at root. It is typical to have several commits that change only the README at root, and those commits would be likely to have these artificially high false-positive rates. Correct this issue by creating a minimum filters size of 8 bytes. This requires the very small commits (with fewer than six changes, including non-root directories) to have a larger filter. In principle, this violates the bits_per_entry value of struct bloom_filter_settings. However, it does not actually create a functional problem. As for compatibility, this only affects new versions writing filters for commits that do not yet have a filter. Old version will write the smaller filters and this version will persist and properly read that data. Now, the new files will be generated slightly larger. Bytes before Bytes after Difference -------------------------------------------------- git 4,021,078 4,275,311 +6.32% linux 72,212,101 73,909,286 +2.35% tensorflow 7,596,359 7,691,646 +1.25% This has a measurable improvement in the false-positive rate and the end-to-end run time for these repos. The table below compares the average false-positive rate and runtime of git rev-list HEAD -- "$path" before and after this change for 5000+ randomly* selected paths from each repository: Average false Average Average positive rate runtime runtime before after before after difference ------------------------------------------------------------------ git 0.786% 0.227% 0.0387s 0.0289s -25.5% linux 0.0296% 0.0174% 0.0766s 0.0706s -7.8% tensorflow 0.6977% 0.0268% 0.0420s 0.0384s -8.5% *Path selection was done with the following pipeline: git ls-tree -r --name-only HEAD | sort -R | head -n 5000 These relatively-small increases in file size appear to be a fair price to pay for these performance improvements. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- bloom.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/bloom.c b/bloom.c index c38d1cff0c..875e3853c2 100644 --- a/bloom.c +++ b/bloom.c @@ -258,6 +258,10 @@ struct bloom_filter *get_bloom_filter(struct repository *r, } filter->len = (hashmap_get_size(&pathmap) * settings.bits_per_entry + BITS_PER_WORD - 1) / BITS_PER_WORD; + + if (filter->len && filter->len < 8) + filter->len = 8; + filter->data = xcalloc(filter->len, sizeof(unsigned char)); hashmap_for_each_entry(&pathmap, &iter, e, entry) { From ef713d4bea7db709b08a569d394fac7c16547713 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Mon, 15 Jun 2020 20:14:52 +0000 Subject: [PATCH 17/18] commit-graph: change test to die on parse, not load 43d3561 (commit-graph write: don't die if the existing graph is corrupt, 2019-03-25) introduced the GIT_TEST_COMMIT_GRAPH_DIE_ON_LOAD environment variable. This was created to verify that commit-graph was not loaded when writing a new non-incremental commit-graph. An upcoming change wants to load a commit-graph in some valuable cases, but we want to maintain that we don't trust the commit-graph data when writing our new file. Instead of dying on load, instead die if we ever try to parse a commit from the commit-graph. This functionally verifies the same intended behavior, but allows a more advanced feature in the next change. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- commit-graph.c | 12 ++++++++---- commit-graph.h | 2 +- t/t5318-commit-graph.sh | 2 +- 3 files changed, 10 insertions(+), 6 deletions(-) diff --git a/commit-graph.c b/commit-graph.c index 5c8f210cad..3a64e3b382 100644 --- a/commit-graph.c +++ b/commit-graph.c @@ -564,10 +564,6 @@ static int prepare_commit_graph(struct repository *r) return !!r->objects->commit_graph; r->objects->commit_graph_attempted = 1; - if (git_env_bool(GIT_TEST_COMMIT_GRAPH_DIE_ON_LOAD, 0)) - die("dying as requested by the '%s' variable on commit-graph load!", - GIT_TEST_COMMIT_GRAPH_DIE_ON_LOAD); - prepare_repo_settings(r); if (!git_env_bool(GIT_TEST_COMMIT_GRAPH, 0) && @@ -790,6 +786,14 @@ static int parse_commit_in_graph_one(struct repository *r, int parse_commit_in_graph(struct repository *r, struct commit *item) { + static int checked_env = 0; + + if (!checked_env && + git_env_bool(GIT_TEST_COMMIT_GRAPH_DIE_ON_PARSE, 0)) + die("dying as requested by the '%s' variable on commit-graph parse!", + GIT_TEST_COMMIT_GRAPH_DIE_ON_PARSE); + checked_env = 1; + if (!prepare_commit_graph(r)) return 0; return parse_commit_in_graph_one(r, r->objects->commit_graph, item); diff --git a/commit-graph.h b/commit-graph.h index 881c9b46e5..f0fb13e3f2 100644 --- a/commit-graph.h +++ b/commit-graph.h @@ -5,7 +5,7 @@ #include "object-store.h" #define GIT_TEST_COMMIT_GRAPH "GIT_TEST_COMMIT_GRAPH" -#define GIT_TEST_COMMIT_GRAPH_DIE_ON_LOAD "GIT_TEST_COMMIT_GRAPH_DIE_ON_LOAD" +#define GIT_TEST_COMMIT_GRAPH_DIE_ON_PARSE "GIT_TEST_COMMIT_GRAPH_DIE_ON_PARSE" #define GIT_TEST_COMMIT_GRAPH_CHANGED_PATHS "GIT_TEST_COMMIT_GRAPH_CHANGED_PATHS" /* diff --git a/t/t5318-commit-graph.sh b/t/t5318-commit-graph.sh index 1073f9e3cf..5ec01abdaa 100755 --- a/t/t5318-commit-graph.sh +++ b/t/t5318-commit-graph.sh @@ -436,7 +436,7 @@ corrupt_graph_verify() { cp $objdir/info/commit-graph commit-graph-pre-write-test fi && git status --short && - GIT_TEST_COMMIT_GRAPH_DIE_ON_LOAD=true git commit-graph write && + GIT_TEST_COMMIT_GRAPH_DIE_ON_PARSE=true git commit-graph write && git commit-graph verify } From 744d4961e66819ff3ea8857b286d0adf3e3c5f24 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Mon, 15 Jun 2020 20:14:53 +0000 Subject: [PATCH 18/18] commit-graph: persist existence of changed-paths The changed-path Bloom filters were released in v2.27.0, but have a significant drawback. A user can opt-in to writing the changed-path filters using the "--changed-paths" option to "git commit-graph write" but the next write will drop the filters unless that option is specified. This becomes even more important when considering the interaction with gc.writeCommitGraph (on by default) or fetch.writeCommitGraph (part of features.experimental). These config options trigger commit-graph writes that the user did not signal, and hence there is no --changed-paths option available. Allow a user that opts-in to the changed-path filters to persist the property of "my commit-graph has changed-path filters" automatically. A user can drop filters using the --no-changed-paths option. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- Documentation/git-commit-graph.txt | 5 ++++- builtin/commit-graph.c | 5 ++++- commit-graph.c | 12 +++++++++++- commit-graph.h | 1 + t/t4216-log-bloom.sh | 2 +- 5 files changed, 21 insertions(+), 4 deletions(-) diff --git a/Documentation/git-commit-graph.txt b/Documentation/git-commit-graph.txt index f4b13c005b..369b222b08 100644 --- a/Documentation/git-commit-graph.txt +++ b/Documentation/git-commit-graph.txt @@ -60,7 +60,10 @@ existing commit-graph file. With the `--changed-paths` option, compute and write information about the paths changed between a commit and it's first parent. This operation can take a while on large repositories. It provides significant performance gains -for getting history of a directory or a file with `git log -- `. +for getting history of a directory or a file with `git log -- `. If +this option is given, future commit-graph writes will automatically assume +that this option was intended. Use `--no-changed-paths` to stop storing this +data. + With the `--split` option, write the commit-graph as a chain of multiple commit-graph files stored in `/info/commit-graphs`. The new commits diff --git a/builtin/commit-graph.c b/builtin/commit-graph.c index 59009837dc..ff7b177c33 100644 --- a/builtin/commit-graph.c +++ b/builtin/commit-graph.c @@ -151,6 +151,7 @@ static int graph_write(int argc, const char **argv) }; opts.progress = isatty(2); + opts.enable_changed_paths = -1; split_opts.size_multiple = 2; split_opts.max_commits = 0; split_opts.expire_time = 0; @@ -171,7 +172,9 @@ static int graph_write(int argc, const char **argv) flags |= COMMIT_GRAPH_WRITE_SPLIT; if (opts.progress) flags |= COMMIT_GRAPH_WRITE_PROGRESS; - if (opts.enable_changed_paths || + if (!opts.enable_changed_paths) + flags |= COMMIT_GRAPH_NO_WRITE_BLOOM_FILTERS; + if (opts.enable_changed_paths == 1 || git_env_bool(GIT_TEST_COMMIT_GRAPH_CHANGED_PATHS, 0)) flags |= COMMIT_GRAPH_WRITE_BLOOM_FILTERS; diff --git a/commit-graph.c b/commit-graph.c index 3a64e3b382..04eea72523 100644 --- a/commit-graph.c +++ b/commit-graph.c @@ -1996,9 +1996,19 @@ int write_commit_graph(struct object_directory *odb, ctx->split = flags & COMMIT_GRAPH_WRITE_SPLIT ? 1 : 0; ctx->check_oids = flags & COMMIT_GRAPH_WRITE_CHECK_OIDS ? 1 : 0; ctx->split_opts = split_opts; - ctx->changed_paths = flags & COMMIT_GRAPH_WRITE_BLOOM_FILTERS ? 1 : 0; ctx->total_bloom_filter_data_size = 0; + if (flags & COMMIT_GRAPH_WRITE_BLOOM_FILTERS) + ctx->changed_paths = 1; + else if (!(flags & COMMIT_GRAPH_NO_WRITE_BLOOM_FILTERS)) { + prepare_commit_graph_one(ctx->r, ctx->odb); + + /* We have changed-paths already. Keep them in the next graph */ + if (ctx->r->objects->commit_graph && + ctx->r->objects->commit_graph->chunk_bloom_data) + ctx->changed_paths = 1; + } + if (ctx->split) { struct commit_graph *g; prepare_commit_graph(ctx->r); diff --git a/commit-graph.h b/commit-graph.h index f0fb13e3f2..45b1e5bca3 100644 --- a/commit-graph.h +++ b/commit-graph.h @@ -96,6 +96,7 @@ enum commit_graph_write_flags { /* Make sure that each OID in the input is a valid commit OID. */ COMMIT_GRAPH_WRITE_CHECK_OIDS = (1 << 3), COMMIT_GRAPH_WRITE_BLOOM_FILTERS = (1 << 4), + COMMIT_GRAPH_NO_WRITE_BLOOM_FILTERS = (1 << 5), }; struct split_commit_graph_opts { diff --git a/t/t4216-log-bloom.sh b/t/t4216-log-bloom.sh index c13b97d3bd..30c8d9562e 100755 --- a/t/t4216-log-bloom.sh +++ b/t/t4216-log-bloom.sh @@ -126,7 +126,7 @@ test_expect_success 'setup - add commit-graph to the chain without Bloom filters test_commit c14 A/anotherFile2 && test_commit c15 A/B/anotherFile2 && test_commit c16 A/B/C/anotherFile2 && - GIT_TEST_COMMIT_GRAPH_CHANGED_PATHS=0 git commit-graph write --reachable --split && + git commit-graph write --reachable --split --no-changed-paths && test_line_count = 2 .git/objects/info/commit-graphs/commit-graph-chain '