From 638e1702d7cf3ca000f355432b7c12bd4a27b244 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Tue, 25 Jun 2024 13:40:00 -0400 Subject: [PATCH] commit-graph: unconditionally load Bloom filters In an earlier commit, we began ignoring the Bloom data ("BDAT") chunk for commit-graphs whose Bloom filters were computed using a hash version incompatible with the value of `commitGraph.changedPathVersion`. Now that the Bloom API has been hardened to discard these incompatible filters (with the exception of low-level APIs), we can safely load these Bloom filters unconditionally. We no longer want to return early from `graph_read_bloom_data()`, and similarly do not want to set the bloom_settings' `hash_version` field as a side-effect. The latter is because we want to wait until we know which Bloom settings we're using (either the defaults, from the GIT_TEST variables, or from the previous commit-graph layer) before deciding what hash_version to use. If we detect an existing BDAT chunk, we'll infer the rest of the settings (e.g., number of hashes, bits per entry, and maximum number of changed paths) from the earlier graph layer. The hash_version will be inferred from the previous layer as well, unless one has already been specified via configuration. Once all of that is done, we normalize the value of the hash_version to either "1" or "2". Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- commit-graph.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/commit-graph.c b/commit-graph.c index 91c98ebc6c..d6fb714f32 100644 --- a/commit-graph.c +++ b/commit-graph.c @@ -358,9 +358,6 @@ static int graph_read_bloom_data(const unsigned char *chunk_start, g->chunk_bloom_data_size = chunk_size; hash_version = get_be32(chunk_start); - if (hash_version != 1) - return 0; - g->bloom_filter_settings = xmalloc(sizeof(struct bloom_filter_settings)); g->bloom_filter_settings->hash_version = hash_version; g->bloom_filter_settings->num_hashes = get_be32(chunk_start + 4); @@ -2513,6 +2510,7 @@ int write_commit_graph(struct object_directory *odb, ctx->write_generation_data = (get_configured_generation_version(r) == 2); ctx->num_generation_data_overflows = 0; + bloom_settings.hash_version = r->settings.commit_graph_changed_paths_version; bloom_settings.bits_per_entry = git_env_ulong("GIT_TEST_BLOOM_SETTINGS_BITS_PER_ENTRY", bloom_settings.bits_per_entry); bloom_settings.num_hashes = git_env_ulong("GIT_TEST_BLOOM_SETTINGS_NUM_HASHES", @@ -2544,10 +2542,18 @@ int write_commit_graph(struct object_directory *odb, /* We have changed-paths already. Keep them in the next graph */ if (g && g->chunk_bloom_data) { ctx->changed_paths = 1; - ctx->bloom_settings = g->bloom_filter_settings; + + /* don't propagate the hash_version unless unspecified */ + if (bloom_settings.hash_version == -1) + bloom_settings.hash_version = g->bloom_filter_settings->hash_version; + bloom_settings.bits_per_entry = g->bloom_filter_settings->bits_per_entry; + bloom_settings.num_hashes = g->bloom_filter_settings->num_hashes; + bloom_settings.max_changed_paths = g->bloom_filter_settings->max_changed_paths; } } + bloom_settings.hash_version = bloom_settings.hash_version == 2 ? 2 : 1; + if (ctx->split) { struct commit_graph *g = ctx->r->objects->commit_graph;