From c4cc08316944d17fc214bdad47bb4e92c31d0751 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Mon, 1 Feb 2021 17:15:03 +0000 Subject: [PATCH 1/6] commit-graph: use repo_parse_commit The write_commit_graph_context has a repository pointer, so use it. Signed-off-by: Derrick Stolee Reviewed-by: Taylor Blau Signed-off-by: Junio C Hamano --- commit-graph.c | 10 +++++----- commit.h | 5 +++-- 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/commit-graph.c b/commit-graph.c index f3bde2ad95..03e5a98796 100644 --- a/commit-graph.c +++ b/commit-graph.c @@ -1098,7 +1098,7 @@ static int write_graph_chunk_data(struct hashfile *f, uint32_t packedDate[2]; display_progress(ctx->progress, ++ctx->progress_cnt); - if (parse_commit_no_graph(*list)) + if (repo_parse_commit_no_graph(ctx->r, *list)) die(_("unable to parse commit %s"), oid_to_hex(&(*list)->object.oid)); tree = get_commit_tree_oid(*list); @@ -1411,11 +1411,11 @@ static void close_reachable(struct write_commit_graph_context *ctx) if (!commit) continue; if (ctx->split) { - if ((!parse_commit(commit) && + if ((!repo_parse_commit(ctx->r, commit) && commit_graph_position(commit) == COMMIT_NOT_FROM_GRAPH) || flags == COMMIT_GRAPH_SPLIT_REPLACE) add_missing_parents(ctx, commit); - } else if (!parse_commit_no_graph(commit)) + } else if (!repo_parse_commit_no_graph(ctx->r, commit)) add_missing_parents(ctx, commit); } stop_progress(&ctx->progress); @@ -1710,9 +1710,9 @@ static void copy_oids_to_commits(struct write_commit_graph_context *ctx) continue; if (ctx->split && flags == COMMIT_GRAPH_SPLIT_REPLACE) - parse_commit(ctx->commits.list[ctx->commits.nr]); + repo_parse_commit(ctx->r, ctx->commits.list[ctx->commits.nr]); else - parse_commit_no_graph(ctx->commits.list[ctx->commits.nr]); + repo_parse_commit_no_graph(ctx->r, ctx->commits.list[ctx->commits.nr]); num_parents = commit_list_count(ctx->commits.list[ctx->commits.nr]->parents); if (num_parents > 2) diff --git a/commit.h b/commit.h index 251d877fcf..b05ab558ce 100644 --- a/commit.h +++ b/commit.h @@ -89,9 +89,10 @@ static inline int repo_parse_commit(struct repository *r, struct commit *item) return repo_parse_commit_gently(r, item, 0); } -static inline int parse_commit_no_graph(struct commit *commit) +static inline int repo_parse_commit_no_graph(struct repository *r, + struct commit *commit) { - return repo_parse_commit_internal(the_repository, commit, 0, 0); + return repo_parse_commit_internal(r, commit, 0, 0); } #ifndef NO_THE_REPOSITORY_COMPATIBILITY_MACROS From 90cb1c47c7cbf6cdd5152c1371a2732af59911a4 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Mon, 1 Feb 2021 17:15:04 +0000 Subject: [PATCH 2/6] commit-graph: always parse before commit_graph_data_at() There is a subtle failure happening when computing corrected commit dates with --split enabled. It requires a base layer needing the generation_data_overflow chunk. Then, the next layer on top erroneously thinks it needs an overflow chunk due to a bug leading to recalculating all reachable generation numbers. The output of the failure is BUG: commit-graph.c:1912: expected to write 8 bytes to chunk 47444f56, but wrote 0 instead These "expected" 8 bytes are due to re-computing the corrected commit date for the lower layer but the new layer does not need any overflow. Add a test to t5318-commit-graph.sh that demonstrates this bug. However, it does not trigger consistently with the existing code. The generation number data is stored in a slab and accessed by commit_graph_data_at(). This data is initialized when parsing a commit, but is otherwise used assuming it has been populated. The loop in compute_generation_numbers() did not enforce that all reachable commits were parsed and had correct values. This could lead to some problems when writing a commit-graph with corrected commit dates based on a commit-graph without them. It has been difficult to identify the issue here because it was so hard to reproduce. It relies on this uninitialized data having a non-zero value, but also on specifically in a way that overwrites the existing data. This patch adds the extra parse to ensure the data is filled before we compute the generation number of a commit. This triggers the new test to fail because the generation number overflow count does not match between this computation and the write for that chunk. The actual fix will follow as the next few changes. Signed-off-by: Derrick Stolee Reviewed-by: Taylor Blau Signed-off-by: Junio C Hamano --- commit-graph.c | 16 ++++++++++++---- t/t5318-commit-graph.sh | 21 +++++++++++++++++++++ 2 files changed, 33 insertions(+), 4 deletions(-) diff --git a/commit-graph.c b/commit-graph.c index 03e5a98796..edbb3a0f2c 100644 --- a/commit-graph.c +++ b/commit-graph.c @@ -1193,7 +1193,9 @@ static int write_graph_chunk_generation_data(struct hashfile *f, for (i = 0; i < ctx->commits.nr; i++) { struct commit *c = ctx->commits.list[i]; - timestamp_t offset = commit_graph_data_at(c)->generation - c->date; + timestamp_t offset; + repo_parse_commit(ctx->r, c); + offset = commit_graph_data_at(c)->generation - c->date; display_progress(ctx->progress, ++ctx->progress_cnt); if (offset > GENERATION_NUMBER_V2_OFFSET_MAX) { @@ -1444,15 +1446,20 @@ static void compute_generation_numbers(struct write_commit_graph_context *ctx) _("Computing commit graph generation numbers"), ctx->commits.nr); for (i = 0; i < ctx->commits.nr; i++) { - uint32_t level = *topo_level_slab_at(ctx->topo_levels, ctx->commits.list[i]); - timestamp_t corrected_commit_date = commit_graph_data_at(ctx->commits.list[i])->generation; + struct commit *c = ctx->commits.list[i]; + uint32_t level; + timestamp_t corrected_commit_date; + + repo_parse_commit(ctx->r, c); + level = *topo_level_slab_at(ctx->topo_levels, c); + corrected_commit_date = commit_graph_data_at(c)->generation; display_progress(ctx->progress, i + 1); if (level != GENERATION_NUMBER_ZERO && corrected_commit_date != GENERATION_NUMBER_ZERO) continue; - commit_list_insert(ctx->commits.list[i], &list); + commit_list_insert(c, &list); while (list) { struct commit *current = list->item; struct commit_list *parent; @@ -1461,6 +1468,7 @@ static void compute_generation_numbers(struct write_commit_graph_context *ctx) timestamp_t max_corrected_commit_date = 0; for (parent = current->parents; parent; parent = parent->next) { + repo_parse_commit(ctx->r, parent->item); level = *topo_level_slab_at(ctx->topo_levels, parent->item); corrected_commit_date = commit_graph_data_at(parent->item)->generation; diff --git a/t/t5318-commit-graph.sh b/t/t5318-commit-graph.sh index fa27df579a..2cf29f425a 100755 --- a/t/t5318-commit-graph.sh +++ b/t/t5318-commit-graph.sh @@ -446,6 +446,27 @@ test_expect_success 'warn on improper hash version' ' ) ' +test_expect_failure 'lower layers have overflow chunk' ' + cd "$TRASH_DIRECTORY/full" && + UNIX_EPOCH_ZERO="@0 +0000" && + FUTURE_DATE="@2147483646 +0000" && + rm -f .git/objects/info/commit-graph && + test_commit --date "$FUTURE_DATE" future-1 && + test_commit --date "$UNIX_EPOCH_ZERO" old-1 && + git commit-graph write --reachable && + test_commit --date "$FUTURE_DATE" future-2 && + test_commit --date "$UNIX_EPOCH_ZERO" old-2 && + git commit-graph write --reachable --split=no-merge && + test_commit extra && + git commit-graph write --reachable --split=no-merge && + git commit-graph write --reachable && + graph_read_expect 16 "generation_data generation_data_overflow extra_edges" && + mv .git/objects/info/commit-graph commit-graph-upgraded && + git commit-graph write --reachable && + graph_read_expect 16 "generation_data generation_data_overflow extra_edges" && + test_cmp .git/objects/info/commit-graph commit-graph-upgraded +' + # the verify tests below expect the commit-graph to contain # exactly the commits reachable from the commits/8 branch. # If the file changes the set of commits in the list, then the From 448a39e65dedb5f9a58d40e22ec7c0cc3be54173 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Tue, 2 Feb 2021 03:01:20 +0000 Subject: [PATCH 3/6] commit-graph: validate layers for generation data We need to be extra careful that we don't use corrected commit dates from any layer of a commit-graph chain if there is a single commit-graph file that is missing the generation_data chunk. Update validate_mixed_generation_chain() to correctly update each layer to ignore the generation_data chunk in this case. It now also returns 1 if all layers have a generation_data chunk. This return value will be used in the next change. Signed-off-by: Derrick Stolee Reviewed-by: Taylor Blau Signed-off-by: Junio C Hamano --- commit-graph.c | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/commit-graph.c b/commit-graph.c index edbb3a0f2c..b3f7c3bbcb 100644 --- a/commit-graph.c +++ b/commit-graph.c @@ -614,19 +614,29 @@ static struct commit_graph *load_commit_graph_chain(struct repository *r, return graph_chain; } -static void validate_mixed_generation_chain(struct commit_graph *g) +/* + * returns 1 if and only if all graphs in the chain have + * corrected commit dates stored in the generation_data chunk. + */ +static int validate_mixed_generation_chain(struct commit_graph *g) { - int read_generation_data; + int read_generation_data = 1; + struct commit_graph *p = g; - if (!g) - return; + while (read_generation_data && p) { + read_generation_data = p->read_generation_data; + p = p->base_graph; + } - read_generation_data = !!g->chunk_generation_data; + if (read_generation_data) + return 1; while (g) { - g->read_generation_data = read_generation_data; + g->read_generation_data = 0; g = g->base_graph; } + + return 0; } struct commit_graph *read_commit_graph_one(struct repository *r, From 9c2c0a82560a49b6b491a88cbaeaaf30378ec6bb Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Tue, 2 Feb 2021 03:01:21 +0000 Subject: [PATCH 4/6] commit-graph: compute generations separately The compute_generation_numbers() method was introduced by 3258c663 (commit-graph: compute generation numbers, 2018-05-01) to compute what is now known as "topological levels". These are still stored in the commit-graph file for compatibility sake while c1a09119 (commit-graph: implement corrected commit date, 2021-01-16) updated the method to also compute the new version of generation numbers: corrected commit date. It makes sense why these are grouped. They perform very similar walks of the necessary commits and compute similar maximums over each parent. However, having these two together conflates them in subtle ways that is hard to separate. In particular, the topo_level slab is used to store the topological levels in all cases, but the commit_graph_data_at(c)->generation member stores different values depending on the state of the existing commit-graph file. * If the existing commit-graph file has a "GDAT" chunk, then these values represent corrected commit dates. * If the existing commit-graph file doesn't have a "GDAT" chunk, then these values are actually the topological levels. This issue only occurs only when upgrading an existing commit-graph file into one that has the "GDAT" chunk. The current change does not resolve this upgrade problem, but splitting the implementation into two pieces here helps with that process, which will follow in the next change. The important thing this helps with is the case where the num_generation_data_overflows was being incremented incorrectly, triggering a write of the overflow chunk. Signed-off-by: Derrick Stolee Reviewed-by: Taylor Blau Signed-off-by: Junio C Hamano --- commit-graph.c | 74 +++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 58 insertions(+), 16 deletions(-) diff --git a/commit-graph.c b/commit-graph.c index b3f7c3bbcb..2790f70d11 100644 --- a/commit-graph.c +++ b/commit-graph.c @@ -1446,6 +1446,59 @@ static void close_reachable(struct write_commit_graph_context *ctx) stop_progress(&ctx->progress); } +static void compute_topological_levels(struct write_commit_graph_context *ctx) +{ + int i; + struct commit_list *list = NULL; + + if (ctx->report_progress) + ctx->progress = start_delayed_progress( + _("Computing commit graph topological levels"), + ctx->commits.nr); + for (i = 0; i < ctx->commits.nr; i++) { + struct commit *c = ctx->commits.list[i]; + uint32_t level; + + repo_parse_commit(ctx->r, c); + level = *topo_level_slab_at(ctx->topo_levels, c); + + display_progress(ctx->progress, i + 1); + if (level != GENERATION_NUMBER_ZERO) + continue; + + commit_list_insert(c, &list); + while (list) { + struct commit *current = list->item; + struct commit_list *parent; + int all_parents_computed = 1; + uint32_t max_level = 0; + + for (parent = current->parents; parent; parent = parent->next) { + repo_parse_commit(ctx->r, parent->item); + level = *topo_level_slab_at(ctx->topo_levels, parent->item); + + if (level == GENERATION_NUMBER_ZERO) { + all_parents_computed = 0; + commit_list_insert(parent->item, &list); + break; + } + + if (level > max_level) + max_level = level; + } + + if (all_parents_computed) { + pop_commit(&list); + + if (max_level > GENERATION_NUMBER_V1_MAX - 1) + max_level = GENERATION_NUMBER_V1_MAX - 1; + *topo_level_slab_at(ctx->topo_levels, current) = max_level + 1; + } + } + } + stop_progress(&ctx->progress); +} + static void compute_generation_numbers(struct write_commit_graph_context *ctx) { int i; @@ -1457,16 +1510,13 @@ static void compute_generation_numbers(struct write_commit_graph_context *ctx) ctx->commits.nr); for (i = 0; i < ctx->commits.nr; i++) { struct commit *c = ctx->commits.list[i]; - uint32_t level; timestamp_t corrected_commit_date; repo_parse_commit(ctx->r, c); - level = *topo_level_slab_at(ctx->topo_levels, c); corrected_commit_date = commit_graph_data_at(c)->generation; display_progress(ctx->progress, i + 1); - if (level != GENERATION_NUMBER_ZERO && - corrected_commit_date != GENERATION_NUMBER_ZERO) + if (corrected_commit_date != GENERATION_NUMBER_ZERO) continue; commit_list_insert(c, &list); @@ -1474,24 +1524,18 @@ static void compute_generation_numbers(struct write_commit_graph_context *ctx) struct commit *current = list->item; struct commit_list *parent; int all_parents_computed = 1; - uint32_t max_level = 0; timestamp_t max_corrected_commit_date = 0; for (parent = current->parents; parent; parent = parent->next) { repo_parse_commit(ctx->r, parent->item); - level = *topo_level_slab_at(ctx->topo_levels, parent->item); corrected_commit_date = commit_graph_data_at(parent->item)->generation; - if (level == GENERATION_NUMBER_ZERO || - corrected_commit_date == GENERATION_NUMBER_ZERO) { + if (corrected_commit_date == GENERATION_NUMBER_ZERO) { all_parents_computed = 0; commit_list_insert(parent->item, &list); break; } - if (level > max_level) - max_level = level; - if (corrected_commit_date > max_corrected_commit_date) max_corrected_commit_date = corrected_commit_date; } @@ -1499,10 +1543,6 @@ static void compute_generation_numbers(struct write_commit_graph_context *ctx) if (all_parents_computed) { pop_commit(&list); - if (max_level > GENERATION_NUMBER_V1_MAX - 1) - max_level = GENERATION_NUMBER_V1_MAX - 1; - *topo_level_slab_at(ctx->topo_levels, current) = max_level + 1; - if (current->date && current->date > max_corrected_commit_date) max_corrected_commit_date = current->date - 1; commit_graph_data_at(current)->generation = max_corrected_commit_date + 1; @@ -2401,7 +2441,9 @@ int write_commit_graph(struct object_directory *odb, validate_mixed_generation_chain(ctx->r->objects->commit_graph); - compute_generation_numbers(ctx); + compute_topological_levels(ctx); + if (ctx->write_generation_data) + compute_generation_numbers(ctx); if (ctx->changed_paths) compute_bloom_filters(ctx); From fde55b0906552537c8cbcbf654f8e9dd64414637 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Tue, 2 Feb 2021 03:01:22 +0000 Subject: [PATCH 5/6] commit-graph: be extra careful about mixed generations When upgrading to a commit-graph with corrected commit dates from one without, there are a few things that need to be considered. When computing generation numbers for the new commit-graph file that expects to add the generation_data chunk with corrected commit dates, we need to ensure that the 'generation' member of the commit_graph_data struct is set to zero for these commits. Unfortunately, the fallback to use topological level for generation number when corrected commit dates are not available are causing us harm here: parsing commits notices that read_generation_data is false and populates 'generation' with the topological level. The solution is to iterate through the commits, parse the commits to populate initial values, then reset the generation values to zero to trigger recalculation. This loop only occurs when the existing commit-graph data has no corrected commit dates. While this improves our situation somewhat, we have not completely solved the issue for correctly computing generation numbers for mixed layers. That follows in the next change. Signed-off-by: Derrick Stolee Reviewed-by: Taylor Blau Signed-off-by: Junio C Hamano --- commit-graph.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/commit-graph.c b/commit-graph.c index 2790f70d11..ee8d5a0cdb 100644 --- a/commit-graph.c +++ b/commit-graph.c @@ -1036,7 +1036,8 @@ struct write_commit_graph_context { split:1, changed_paths:1, order_by_pack:1, - write_generation_data:1; + write_generation_data:1, + trust_generation_numbers:1; struct topo_level_slab *topo_levels; const struct commit_graph_opts *opts; @@ -1508,6 +1509,15 @@ static void compute_generation_numbers(struct write_commit_graph_context *ctx) ctx->progress = start_delayed_progress( _("Computing commit graph generation numbers"), ctx->commits.nr); + + if (!ctx->trust_generation_numbers) { + for (i = 0; i < ctx->commits.nr; i++) { + struct commit *c = ctx->commits.list[i]; + repo_parse_commit(ctx->r, c); + commit_graph_data_at(c)->generation = GENERATION_NUMBER_ZERO; + } + } + for (i = 0; i < ctx->commits.nr; i++) { struct commit *c = ctx->commits.list[i]; timestamp_t corrected_commit_date; @@ -2439,7 +2449,7 @@ int write_commit_graph(struct object_directory *odb, } else ctx->num_commit_graphs_after = 1; - validate_mixed_generation_chain(ctx->r->objects->commit_graph); + ctx->trust_generation_numbers = validate_mixed_generation_chain(ctx->r->objects->commit_graph); compute_topological_levels(ctx); if (ctx->write_generation_data) From bc50d6c91f4002b4197a9f5ea5dfdc3c9f105a1c Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Tue, 2 Feb 2021 03:01:23 +0000 Subject: [PATCH 6/6] commit-graph: prepare commit graph Before checking if the repository has a commit-graph loaded, be sure to run prepare_commit_graph(). This is necessary because otherwise the topo_levels slab is not initialized. As we compute topo_levels for the new commits, we iterate further into the lower layers since the first visit to each commit looks as though the topo_level is not populated. By properly initializing the topo_slab, we fix the previously broken case of a split commit graph where a base layer has the generation_data_overflow chunk. Signed-off-by: Derrick Stolee Reviewed-by: Taylor Blau Signed-off-by: Junio C Hamano --- commit-graph.c | 10 ++-------- t/t5318-commit-graph.sh | 2 +- 2 files changed, 3 insertions(+), 9 deletions(-) diff --git a/commit-graph.c b/commit-graph.c index ee8d5a0cdb..94d3ab4a04 100644 --- a/commit-graph.c +++ b/commit-graph.c @@ -2348,6 +2348,7 @@ int write_commit_graph(struct object_directory *odb, init_topo_level_slab(&topo_levels); ctx->topo_levels = &topo_levels; + prepare_commit_graph(ctx->r); if (ctx->r->objects->commit_graph) { struct commit_graph *g = ctx->r->objects->commit_graph; @@ -2361,7 +2362,6 @@ int write_commit_graph(struct object_directory *odb, ctx->changed_paths = 1; if (!(flags & COMMIT_GRAPH_NO_WRITE_BLOOM_FILTERS)) { struct commit_graph *g; - prepare_commit_graph_one(ctx->r, ctx->odb); g = ctx->r->objects->commit_graph; @@ -2373,10 +2373,7 @@ int write_commit_graph(struct object_directory *odb, } if (ctx->split) { - struct commit_graph *g; - prepare_commit_graph(ctx->r); - - g = ctx->r->objects->commit_graph; + struct commit_graph *g = ctx->r->objects->commit_graph; while (g) { ctx->num_commit_graphs_before++; @@ -2400,9 +2397,6 @@ int write_commit_graph(struct object_directory *odb, ctx->approx_nr_objects = approximate_object_count(); - if (ctx->append) - prepare_commit_graph_one(ctx->r, ctx->odb); - if (ctx->append && ctx->r->objects->commit_graph) { struct commit_graph *g = ctx->r->objects->commit_graph; for (i = 0; i < g->num_commits; i++) { diff --git a/t/t5318-commit-graph.sh b/t/t5318-commit-graph.sh index 2cf29f425a..5b15f1aa0f 100755 --- a/t/t5318-commit-graph.sh +++ b/t/t5318-commit-graph.sh @@ -446,7 +446,7 @@ test_expect_success 'warn on improper hash version' ' ) ' -test_expect_failure 'lower layers have overflow chunk' ' +test_expect_success 'lower layers have overflow chunk' ' cd "$TRASH_DIRECTORY/full" && UNIX_EPOCH_ZERO="@0 +0000" && FUTURE_DATE="@2147483646 +0000" &&