From 868c991155b3392e0b1a6aab8daab89bb93400f4 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Mon, 21 Aug 2023 17:34:34 -0400 Subject: [PATCH 1/4] commit-graph: introduce `commit_graph_generation_from_graph()` In 2ee11f7261 (commit-graph: return generation from memory, 2023-03-20), the `commit_graph_generation()` function stopped returning zeros when asked to locate the generation number of a given commit. This was done at the time to prepare for a later change which set generation values in memory, meaning that we could no longer rely on `graph_pos` alone to tell us whether or not to trust the generation number returned by this function. In 2ee11f7261, it was noted that this change only impacted very old commit-graphs, which were written with all commits having generation number 0. Indeed, zero is not a valid generation number, so we should never expect to see that value outside of the aforementioned case. The test fallout in 2ee11f7261 indicated that we were no longer able to fsck a specific old case of commit-graph corruption, where we see a non-zero generation number after having seen a generation number of 0 earlier. Introduce a variant of `commit_graph_generation()` which behaves like that function did prior to 2ee11f7261, known as `commit_graph_generation_from_graph()`. Then use this function in the context of `verify_one_commit_graph()`, where we only want to trust the values from the graph. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- commit-graph.c | 14 ++++++++++++-- t/t5318-commit-graph.sh | 2 +- 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/commit-graph.c b/commit-graph.c index 0aa1640d15..c68f5c6b3a 100644 --- a/commit-graph.c +++ b/commit-graph.c @@ -128,6 +128,16 @@ timestamp_t commit_graph_generation(const struct commit *c) return GENERATION_NUMBER_INFINITY; } +static timestamp_t commit_graph_generation_from_graph(const struct commit *c) +{ + struct commit_graph_data *data = + commit_graph_data_slab_peek(&commit_graph_data_slab, c); + + if (!data || data->graph_pos == COMMIT_NOT_FROM_GRAPH) + return GENERATION_NUMBER_INFINITY; + return data->generation; +} + static struct commit_graph_data *commit_graph_data_at(const struct commit *c) { unsigned int i, nth_slab; @@ -2659,7 +2669,7 @@ static int verify_one_commit_graph(struct repository *r, oid_to_hex(&graph_parents->item->object.oid), oid_to_hex(&odb_parents->item->object.oid)); - generation = commit_graph_generation(graph_parents->item); + generation = commit_graph_generation_from_graph(graph_parents->item); if (generation > max_generation) max_generation = generation; @@ -2671,7 +2681,7 @@ static int verify_one_commit_graph(struct repository *r, graph_report(_("commit-graph parent list for commit %s terminates early"), oid_to_hex(&cur_oid)); - if (!commit_graph_generation(graph_commit)) { + if (!commit_graph_generation_from_graph(graph_commit)) { if (generation_zero == GENERATION_NUMBER_EXISTS) graph_report(_("commit-graph has generation number zero for commit %s, but non-zero elsewhere"), oid_to_hex(&cur_oid)); diff --git a/t/t5318-commit-graph.sh b/t/t5318-commit-graph.sh index 4df76173a8..4e70820c74 100755 --- a/t/t5318-commit-graph.sh +++ b/t/t5318-commit-graph.sh @@ -598,7 +598,7 @@ test_expect_success 'detect incorrect generation number' ' test_expect_success 'detect incorrect generation number' ' corrupt_graph_and_verify $GRAPH_BYTE_COMMIT_GENERATION "\01" \ - "commit-graph generation for commit" + "but zero elsewhere" ' test_expect_success 'detect incorrect commit date' ' From cc9c9a00a50dc479ef59c7d2d03d1e3fcc8752a3 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Mon, 21 Aug 2023 17:34:37 -0400 Subject: [PATCH 2/4] commit-graph: verify swapped zero/non-zero generation cases In verify_one_commit_graph(), we have code that complains when a commit is found with a generation number of zero, and then later with a non-zero number. It works like this: 1. When we see an entry with generation zero, we set the generation_zero flag to GENERATION_ZERO_EXISTS. 2. When we later see an entry with a non-zero generation, we complain if the flag is GENERATION_ZERO_EXISTS. There's a matching GENERATION_NUMBER_EXISTS value, which in theory would be used to find the case that we see the entries in the opposite order: 1. When we see an entry with a non-zero generation, we set the generation_zero flag to GENERATION_NUMBER_EXISTS. 2. When we later see an entry with a zero generation, we complain if the flag is GENERATION_NUMBER_EXISTS. But that doesn't work; step 2 is implemented, but there is no step 1. We never use NUMBER_EXISTS at all, and Coverity rightly complains that step 2 is dead code. We can fix that by implementing that step 1. Signed-off-by: Jeff King Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- commit-graph.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/commit-graph.c b/commit-graph.c index c68f5c6b3a..acca753ce8 100644 --- a/commit-graph.c +++ b/commit-graph.c @@ -2686,9 +2686,12 @@ static int verify_one_commit_graph(struct repository *r, graph_report(_("commit-graph has generation number zero for commit %s, but non-zero elsewhere"), oid_to_hex(&cur_oid)); generation_zero = GENERATION_ZERO_EXISTS; - } else if (generation_zero == GENERATION_ZERO_EXISTS) - graph_report(_("commit-graph has non-zero generation number for commit %s, but zero elsewhere"), - oid_to_hex(&cur_oid)); + } else { + if (generation_zero == GENERATION_ZERO_EXISTS) + graph_report(_("commit-graph has non-zero generation number for commit %s, but zero elsewhere"), + oid_to_hex(&cur_oid)); + generation_zero = GENERATION_NUMBER_EXISTS; + } if (generation_zero == GENERATION_ZERO_EXISTS) continue; From ce7629a315459c12451ee9071db22f8b0e26a4eb Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Mon, 21 Aug 2023 17:34:40 -0400 Subject: [PATCH 3/4] t/t5318-commit-graph.sh: test generation zero transitions during fsck The second test called "detect incorrect generation number" asserts that we correctly warn during an fsck when we see a non-zero generation number after seeing a zero beforehand. The other transition (going from non-zero to zero) was previously untested. Test both directions, and rename the existing test to make clear which direction it is exercising. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- t/t5318-commit-graph.sh | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/t/t5318-commit-graph.sh b/t/t5318-commit-graph.sh index 4e70820c74..8e96471b34 100755 --- a/t/t5318-commit-graph.sh +++ b/t/t5318-commit-graph.sh @@ -450,14 +450,15 @@ GRAPH_BYTE_FANOUT2=$(($GRAPH_FANOUT_OFFSET + 4 * 255)) GRAPH_OID_LOOKUP_OFFSET=$(($GRAPH_FANOUT_OFFSET + 4 * 256)) GRAPH_BYTE_OID_LOOKUP_ORDER=$(($GRAPH_OID_LOOKUP_OFFSET + $HASH_LEN * 8)) GRAPH_BYTE_OID_LOOKUP_MISSING=$(($GRAPH_OID_LOOKUP_OFFSET + $HASH_LEN * 4 + 10)) +GRAPH_COMMIT_DATA_WIDTH=$(($HASH_LEN + 16)) GRAPH_COMMIT_DATA_OFFSET=$(($GRAPH_OID_LOOKUP_OFFSET + $HASH_LEN * $NUM_COMMITS)) GRAPH_BYTE_COMMIT_TREE=$GRAPH_COMMIT_DATA_OFFSET GRAPH_BYTE_COMMIT_PARENT=$(($GRAPH_COMMIT_DATA_OFFSET + $HASH_LEN)) GRAPH_BYTE_COMMIT_EXTRA_PARENT=$(($GRAPH_COMMIT_DATA_OFFSET + $HASH_LEN + 4)) GRAPH_BYTE_COMMIT_WRONG_PARENT=$(($GRAPH_COMMIT_DATA_OFFSET + $HASH_LEN + 3)) GRAPH_BYTE_COMMIT_GENERATION=$(($GRAPH_COMMIT_DATA_OFFSET + $HASH_LEN + 11)) +GRAPH_BYTE_COMMIT_GENERATION_LAST=$(($GRAPH_BYTE_COMMIT_GENERATION + $(($NUM_COMMITS - 1)) * $GRAPH_COMMIT_DATA_WIDTH)) GRAPH_BYTE_COMMIT_DATE=$(($GRAPH_COMMIT_DATA_OFFSET + $HASH_LEN + 12)) -GRAPH_COMMIT_DATA_WIDTH=$(($HASH_LEN + 16)) GRAPH_OCTOPUS_DATA_OFFSET=$(($GRAPH_COMMIT_DATA_OFFSET + \ $GRAPH_COMMIT_DATA_WIDTH * $NUM_COMMITS)) GRAPH_BYTE_OCTOPUS=$(($GRAPH_OCTOPUS_DATA_OFFSET + 4)) @@ -596,11 +597,6 @@ test_expect_success 'detect incorrect generation number' ' "generation for commit" ' -test_expect_success 'detect incorrect generation number' ' - corrupt_graph_and_verify $GRAPH_BYTE_COMMIT_GENERATION "\01" \ - "but zero elsewhere" -' - test_expect_success 'detect incorrect commit date' ' corrupt_graph_and_verify $GRAPH_BYTE_COMMIT_DATE "\01" \ "commit date" @@ -622,6 +618,16 @@ test_expect_success 'detect incorrect chunk count' ' $GRAPH_CHUNK_LOOKUP_OFFSET ' +test_expect_success 'detect mixed generation numbers (non-zero to zero)' ' + corrupt_graph_and_verify $GRAPH_BYTE_COMMIT_GENERATION_LAST "\0\0\0\0" \ + "but non-zero elsewhere" +' + +test_expect_success 'detect mixed generation numbers (zero to non-zero)' ' + corrupt_graph_and_verify $GRAPH_BYTE_COMMIT_GENERATION "\0\0\0\0" \ + "but zero elsewhere" +' + test_expect_success 'git fsck (checks commit-graph when config set to true)' ' git -C full fsck && corrupt_graph_and_verify $GRAPH_BYTE_FOOTER "\00" \ From db6044d76261a996c03ce8f1e08240f326a42e15 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Mon, 21 Aug 2023 17:34:42 -0400 Subject: [PATCH 4/4] commit-graph: avoid repeated mixed generation number warnings When validating that a commit-graph has either all zero, or all non-zero generation numbers, we emit a warning on both the rising and falling edge of transitioning between the two. So if we are unfortunate enough to see a commit-graph which has a repeating sequence of zero, then non-zero generation numbers, we'll generate many warnings that contain more or less the same information. Avoid this by keeping track of a single example for a commit with zero- and non-zero generation, and emit a single warning at the end of verification if both are non-NULL. Co-authored-by: Jeff King Signed-off-by: Jeff King Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- commit-graph.c | 29 +++++++++++++---------------- t/t5318-commit-graph.sh | 4 ++-- 2 files changed, 15 insertions(+), 18 deletions(-) diff --git a/commit-graph.c b/commit-graph.c index acca753ce8..9e6eaa8a46 100644 --- a/commit-graph.c +++ b/commit-graph.c @@ -2560,9 +2560,6 @@ static void graph_report(const char *fmt, ...) va_end(ap); } -#define GENERATION_ZERO_EXISTS 1 -#define GENERATION_NUMBER_EXISTS 2 - static int commit_graph_checksum_valid(struct commit_graph *g) { return hashfile_checksum_valid(g->data, g->data_len); @@ -2575,7 +2572,8 @@ static int verify_one_commit_graph(struct repository *r, { uint32_t i, cur_fanout_pos = 0; struct object_id prev_oid, cur_oid; - int generation_zero = 0; + struct commit *seen_gen_zero = NULL; + struct commit *seen_gen_non_zero = NULL; verify_commit_graph_error = verify_commit_graph_lite(g); if (verify_commit_graph_error) @@ -2681,19 +2679,12 @@ static int verify_one_commit_graph(struct repository *r, graph_report(_("commit-graph parent list for commit %s terminates early"), oid_to_hex(&cur_oid)); - if (!commit_graph_generation_from_graph(graph_commit)) { - if (generation_zero == GENERATION_NUMBER_EXISTS) - graph_report(_("commit-graph has generation number zero for commit %s, but non-zero elsewhere"), - oid_to_hex(&cur_oid)); - generation_zero = GENERATION_ZERO_EXISTS; - } else { - if (generation_zero == GENERATION_ZERO_EXISTS) - graph_report(_("commit-graph has non-zero generation number for commit %s, but zero elsewhere"), - oid_to_hex(&cur_oid)); - generation_zero = GENERATION_NUMBER_EXISTS; - } + if (commit_graph_generation_from_graph(graph_commit)) + seen_gen_non_zero = graph_commit; + else + seen_gen_zero = graph_commit; - if (generation_zero == GENERATION_ZERO_EXISTS) + if (seen_gen_zero) continue; /* @@ -2719,6 +2710,12 @@ static int verify_one_commit_graph(struct repository *r, odb_commit->date); } + if (seen_gen_zero && seen_gen_non_zero) + graph_report(_("commit-graph has both zero and non-zero " + "generations (e.g., commits '%s' and '%s')"), + oid_to_hex(&seen_gen_zero->object.oid), + oid_to_hex(&seen_gen_non_zero->object.oid)); + return verify_commit_graph_error; } diff --git a/t/t5318-commit-graph.sh b/t/t5318-commit-graph.sh index 8e96471b34..ba65f17dd9 100755 --- a/t/t5318-commit-graph.sh +++ b/t/t5318-commit-graph.sh @@ -620,12 +620,12 @@ test_expect_success 'detect incorrect chunk count' ' test_expect_success 'detect mixed generation numbers (non-zero to zero)' ' corrupt_graph_and_verify $GRAPH_BYTE_COMMIT_GENERATION_LAST "\0\0\0\0" \ - "but non-zero elsewhere" + "both zero and non-zero generations" ' test_expect_success 'detect mixed generation numbers (zero to non-zero)' ' corrupt_graph_and_verify $GRAPH_BYTE_COMMIT_GENERATION "\0\0\0\0" \ - "but zero elsewhere" + "both zero and non-zero generations" ' test_expect_success 'git fsck (checks commit-graph when config set to true)' '