Merge branch 'tb/path-filter-fix'

The Bloom filter used for path limited history traversal was broken
on systems whose "char" is unsigned; update the implementation and
bump the format version to 2.

* tb/path-filter-fix:
  bloom: introduce `deinit_bloom_filters()`
  commit-graph: reuse existing Bloom filters where possible
  object.h: fix mis-aligned flag bits table
  commit-graph: new Bloom filter version that fixes murmur3
  commit-graph: unconditionally load Bloom filters
  bloom: prepare to discard incompatible Bloom filters
  bloom: annotate filters with hash version
  repo-settings: introduce commitgraph.changedPathsVersion
  t4216: test changed path filters with high bit paths
  t/helper/test-read-graph: implement `bloom-filters` mode
  bloom.h: make `load_bloom_filter_from_graph()` public
  t/helper/test-read-graph.c: extract `dump_graph_info()`
  gitformat-commit-graph: describe version 2 of BDAT
  commit-graph: ensure Bloom filters are read with consistent settings
  revision.c: consult Bloom filters for root commits
  t/t4216-log-bloom.sh: harden `test_bloom_filters_not_used()`
This commit is contained in:
Junio C Hamano
2024-07-08 14:53:09 -07:00
14 changed files with 736 additions and 58 deletions

View File

@ -346,7 +346,6 @@ static int graph_read_bloom_data(const unsigned char *chunk_start,
size_t chunk_size, void *data)
{
struct commit_graph *g = data;
uint32_t hash_version;
if (chunk_size < BLOOMDATA_CHUNK_HEADER_SIZE) {
warning(_("ignoring too-small changed-path chunk"
@ -358,13 +357,9 @@ static int graph_read_bloom_data(const unsigned char *chunk_start,
g->chunk_bloom_data = chunk_start;
g->chunk_bloom_data_size = chunk_size;
hash_version = get_be32(chunk_start);
if (hash_version != 1)
return 0;
g->bloom_filter_settings = xmalloc(sizeof(struct bloom_filter_settings));
g->bloom_filter_settings->hash_version = hash_version;
g->bloom_filter_settings->hash_version = get_be32(chunk_start);
g->bloom_filter_settings->num_hashes = get_be32(chunk_start + 4);
g->bloom_filter_settings->bits_per_entry = get_be32(chunk_start + 8);
g->bloom_filter_settings->max_changed_paths = DEFAULT_BLOOM_MAX_CHANGES;
@ -461,7 +456,7 @@ struct commit_graph *parse_commit_graph(struct repo_settings *s,
graph->read_generation_data = 1;
}
if (s->commit_graph_read_changed_paths) {
if (s->commit_graph_changed_paths_version) {
read_chunk(cf, GRAPH_CHUNKID_BLOOMINDEXES,
graph_read_bloom_index, graph);
read_chunk(cf, GRAPH_CHUNKID_BLOOMDATA,
@ -546,6 +541,31 @@ static int validate_mixed_generation_chain(struct commit_graph *g)
return 0;
}
static void validate_mixed_bloom_settings(struct commit_graph *g)
{
struct bloom_filter_settings *settings = NULL;
for (; g; g = g->base_graph) {
if (!g->bloom_filter_settings)
continue;
if (!settings) {
settings = g->bloom_filter_settings;
continue;
}
if (g->bloom_filter_settings->bits_per_entry != settings->bits_per_entry ||
g->bloom_filter_settings->num_hashes != settings->num_hashes ||
g->bloom_filter_settings->hash_version != settings->hash_version) {
g->chunk_bloom_indexes = NULL;
g->chunk_bloom_data = NULL;
FREE_AND_NULL(g->bloom_filter_settings);
warning(_("disabling Bloom filters for commit-graph "
"layer '%s' due to incompatible settings"),
oid_to_hex(&g->oid));
}
}
}
static int add_graph_to_chain(struct commit_graph *g,
struct commit_graph *chain,
struct object_id *oids,
@ -670,6 +690,7 @@ struct commit_graph *load_commit_graph_chain_fd_st(struct repository *r,
}
validate_mixed_generation_chain(graph_chain);
validate_mixed_bloom_settings(graph_chain);
free(oids);
fclose(fp);
@ -814,6 +835,7 @@ void close_commit_graph(struct raw_object_store *o)
return;
clear_commit_graph_data_slab(&commit_graph_data_slab);
deinit_bloom_filters();
free_commit_graph(o->commit_graph);
o->commit_graph = NULL;
}
@ -1152,6 +1174,7 @@ struct write_commit_graph_context {
int count_bloom_filter_not_computed;
int count_bloom_filter_trunc_empty;
int count_bloom_filter_trunc_large;
int count_bloom_filter_upgraded;
};
static int write_graph_chunk_fanout(struct hashfile *f,
@ -1759,6 +1782,8 @@ static void trace2_bloom_filter_write_statistics(struct write_commit_graph_conte
ctx->count_bloom_filter_trunc_empty);
trace2_data_intmax("commit-graph", ctx->r, "filter-trunc-large",
ctx->count_bloom_filter_trunc_large);
trace2_data_intmax("commit-graph", ctx->r, "filter-upgraded",
ctx->count_bloom_filter_upgraded);
}
static void compute_bloom_filters(struct write_commit_graph_context *ctx)
@ -1800,6 +1825,8 @@ static void compute_bloom_filters(struct write_commit_graph_context *ctx)
ctx->count_bloom_filter_trunc_empty++;
if (computed & BLOOM_TRUNC_LARGE)
ctx->count_bloom_filter_trunc_large++;
} else if (computed & BLOOM_UPGRADED) {
ctx->count_bloom_filter_upgraded++;
} else if (computed & BLOOM_NOT_COMPUTED)
ctx->count_bloom_filter_not_computed++;
ctx->total_bloom_filter_data_size += filter
@ -2481,6 +2508,13 @@ int write_commit_graph(struct object_directory *odb,
}
if (!commit_graph_compatible(r))
return 0;
if (r->settings.commit_graph_changed_paths_version < -1
|| r->settings.commit_graph_changed_paths_version > 2) {
warning(_("attempting to write a commit-graph, but "
"'commitGraph.changedPathsVersion' (%d) is not supported"),
r->settings.commit_graph_changed_paths_version);
return 0;
}
CALLOC_ARRAY(ctx, 1);
ctx->r = r;
@ -2493,6 +2527,7 @@ int write_commit_graph(struct object_directory *odb,
ctx->write_generation_data = (get_configured_generation_version(r) == 2);
ctx->num_generation_data_overflows = 0;
bloom_settings.hash_version = r->settings.commit_graph_changed_paths_version;
bloom_settings.bits_per_entry = git_env_ulong("GIT_TEST_BLOOM_SETTINGS_BITS_PER_ENTRY",
bloom_settings.bits_per_entry);
bloom_settings.num_hashes = git_env_ulong("GIT_TEST_BLOOM_SETTINGS_NUM_HASHES",
@ -2522,12 +2557,20 @@ int write_commit_graph(struct object_directory *odb,
g = ctx->r->objects->commit_graph;
/* We have changed-paths already. Keep them in the next graph */
if (g && g->chunk_bloom_data) {
if (g && g->bloom_filter_settings) {
ctx->changed_paths = 1;
ctx->bloom_settings = g->bloom_filter_settings;
/* don't propagate the hash_version unless unspecified */
if (bloom_settings.hash_version == -1)
bloom_settings.hash_version = g->bloom_filter_settings->hash_version;
bloom_settings.bits_per_entry = g->bloom_filter_settings->bits_per_entry;
bloom_settings.num_hashes = g->bloom_filter_settings->num_hashes;
bloom_settings.max_changed_paths = g->bloom_filter_settings->max_changed_paths;
}
}
bloom_settings.hash_version = bloom_settings.hash_version == 2 ? 2 : 1;
if (ctx->split) {
struct commit_graph *g = ctx->r->objects->commit_graph;
@ -2611,6 +2654,9 @@ int write_commit_graph(struct object_directory *odb,
res = write_commit_graph_file(ctx);
if (ctx->changed_paths)
deinit_bloom_filters();
if (ctx->split)
mark_commit_graphs(ctx);