Merge branch 'tb/path-filter-fix'
The Bloom filter used for path limited history traversal was broken on systems whose "char" is unsigned; update the implementation and bump the format version to 2. * tb/path-filter-fix: bloom: introduce `deinit_bloom_filters()` commit-graph: reuse existing Bloom filters where possible object.h: fix mis-aligned flag bits table commit-graph: new Bloom filter version that fixes murmur3 commit-graph: unconditionally load Bloom filters bloom: prepare to discard incompatible Bloom filters bloom: annotate filters with hash version repo-settings: introduce commitgraph.changedPathsVersion t4216: test changed path filters with high bit paths t/helper/test-read-graph: implement `bloom-filters` mode bloom.h: make `load_bloom_filter_from_graph()` public t/helper/test-read-graph.c: extract `dump_graph_info()` gitformat-commit-graph: describe version 2 of BDAT commit-graph: ensure Bloom filters are read with consistent settings revision.c: consult Bloom filters for root commits t/t4216-log-bloom.sh: harden `test_bloom_filters_not_used()`
This commit is contained in:
@ -346,7 +346,6 @@ static int graph_read_bloom_data(const unsigned char *chunk_start,
|
||||
size_t chunk_size, void *data)
|
||||
{
|
||||
struct commit_graph *g = data;
|
||||
uint32_t hash_version;
|
||||
|
||||
if (chunk_size < BLOOMDATA_CHUNK_HEADER_SIZE) {
|
||||
warning(_("ignoring too-small changed-path chunk"
|
||||
@ -358,13 +357,9 @@ static int graph_read_bloom_data(const unsigned char *chunk_start,
|
||||
|
||||
g->chunk_bloom_data = chunk_start;
|
||||
g->chunk_bloom_data_size = chunk_size;
|
||||
hash_version = get_be32(chunk_start);
|
||||
|
||||
if (hash_version != 1)
|
||||
return 0;
|
||||
|
||||
g->bloom_filter_settings = xmalloc(sizeof(struct bloom_filter_settings));
|
||||
g->bloom_filter_settings->hash_version = hash_version;
|
||||
g->bloom_filter_settings->hash_version = get_be32(chunk_start);
|
||||
g->bloom_filter_settings->num_hashes = get_be32(chunk_start + 4);
|
||||
g->bloom_filter_settings->bits_per_entry = get_be32(chunk_start + 8);
|
||||
g->bloom_filter_settings->max_changed_paths = DEFAULT_BLOOM_MAX_CHANGES;
|
||||
@ -461,7 +456,7 @@ struct commit_graph *parse_commit_graph(struct repo_settings *s,
|
||||
graph->read_generation_data = 1;
|
||||
}
|
||||
|
||||
if (s->commit_graph_read_changed_paths) {
|
||||
if (s->commit_graph_changed_paths_version) {
|
||||
read_chunk(cf, GRAPH_CHUNKID_BLOOMINDEXES,
|
||||
graph_read_bloom_index, graph);
|
||||
read_chunk(cf, GRAPH_CHUNKID_BLOOMDATA,
|
||||
@ -546,6 +541,31 @@ static int validate_mixed_generation_chain(struct commit_graph *g)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void validate_mixed_bloom_settings(struct commit_graph *g)
|
||||
{
|
||||
struct bloom_filter_settings *settings = NULL;
|
||||
for (; g; g = g->base_graph) {
|
||||
if (!g->bloom_filter_settings)
|
||||
continue;
|
||||
if (!settings) {
|
||||
settings = g->bloom_filter_settings;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (g->bloom_filter_settings->bits_per_entry != settings->bits_per_entry ||
|
||||
g->bloom_filter_settings->num_hashes != settings->num_hashes ||
|
||||
g->bloom_filter_settings->hash_version != settings->hash_version) {
|
||||
g->chunk_bloom_indexes = NULL;
|
||||
g->chunk_bloom_data = NULL;
|
||||
FREE_AND_NULL(g->bloom_filter_settings);
|
||||
|
||||
warning(_("disabling Bloom filters for commit-graph "
|
||||
"layer '%s' due to incompatible settings"),
|
||||
oid_to_hex(&g->oid));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static int add_graph_to_chain(struct commit_graph *g,
|
||||
struct commit_graph *chain,
|
||||
struct object_id *oids,
|
||||
@ -670,6 +690,7 @@ struct commit_graph *load_commit_graph_chain_fd_st(struct repository *r,
|
||||
}
|
||||
|
||||
validate_mixed_generation_chain(graph_chain);
|
||||
validate_mixed_bloom_settings(graph_chain);
|
||||
|
||||
free(oids);
|
||||
fclose(fp);
|
||||
@ -814,6 +835,7 @@ void close_commit_graph(struct raw_object_store *o)
|
||||
return;
|
||||
|
||||
clear_commit_graph_data_slab(&commit_graph_data_slab);
|
||||
deinit_bloom_filters();
|
||||
free_commit_graph(o->commit_graph);
|
||||
o->commit_graph = NULL;
|
||||
}
|
||||
@ -1152,6 +1174,7 @@ struct write_commit_graph_context {
|
||||
int count_bloom_filter_not_computed;
|
||||
int count_bloom_filter_trunc_empty;
|
||||
int count_bloom_filter_trunc_large;
|
||||
int count_bloom_filter_upgraded;
|
||||
};
|
||||
|
||||
static int write_graph_chunk_fanout(struct hashfile *f,
|
||||
@ -1759,6 +1782,8 @@ static void trace2_bloom_filter_write_statistics(struct write_commit_graph_conte
|
||||
ctx->count_bloom_filter_trunc_empty);
|
||||
trace2_data_intmax("commit-graph", ctx->r, "filter-trunc-large",
|
||||
ctx->count_bloom_filter_trunc_large);
|
||||
trace2_data_intmax("commit-graph", ctx->r, "filter-upgraded",
|
||||
ctx->count_bloom_filter_upgraded);
|
||||
}
|
||||
|
||||
static void compute_bloom_filters(struct write_commit_graph_context *ctx)
|
||||
@ -1800,6 +1825,8 @@ static void compute_bloom_filters(struct write_commit_graph_context *ctx)
|
||||
ctx->count_bloom_filter_trunc_empty++;
|
||||
if (computed & BLOOM_TRUNC_LARGE)
|
||||
ctx->count_bloom_filter_trunc_large++;
|
||||
} else if (computed & BLOOM_UPGRADED) {
|
||||
ctx->count_bloom_filter_upgraded++;
|
||||
} else if (computed & BLOOM_NOT_COMPUTED)
|
||||
ctx->count_bloom_filter_not_computed++;
|
||||
ctx->total_bloom_filter_data_size += filter
|
||||
@ -2481,6 +2508,13 @@ int write_commit_graph(struct object_directory *odb,
|
||||
}
|
||||
if (!commit_graph_compatible(r))
|
||||
return 0;
|
||||
if (r->settings.commit_graph_changed_paths_version < -1
|
||||
|| r->settings.commit_graph_changed_paths_version > 2) {
|
||||
warning(_("attempting to write a commit-graph, but "
|
||||
"'commitGraph.changedPathsVersion' (%d) is not supported"),
|
||||
r->settings.commit_graph_changed_paths_version);
|
||||
return 0;
|
||||
}
|
||||
|
||||
CALLOC_ARRAY(ctx, 1);
|
||||
ctx->r = r;
|
||||
@ -2493,6 +2527,7 @@ int write_commit_graph(struct object_directory *odb,
|
||||
ctx->write_generation_data = (get_configured_generation_version(r) == 2);
|
||||
ctx->num_generation_data_overflows = 0;
|
||||
|
||||
bloom_settings.hash_version = r->settings.commit_graph_changed_paths_version;
|
||||
bloom_settings.bits_per_entry = git_env_ulong("GIT_TEST_BLOOM_SETTINGS_BITS_PER_ENTRY",
|
||||
bloom_settings.bits_per_entry);
|
||||
bloom_settings.num_hashes = git_env_ulong("GIT_TEST_BLOOM_SETTINGS_NUM_HASHES",
|
||||
@ -2522,12 +2557,20 @@ int write_commit_graph(struct object_directory *odb,
|
||||
g = ctx->r->objects->commit_graph;
|
||||
|
||||
/* We have changed-paths already. Keep them in the next graph */
|
||||
if (g && g->chunk_bloom_data) {
|
||||
if (g && g->bloom_filter_settings) {
|
||||
ctx->changed_paths = 1;
|
||||
ctx->bloom_settings = g->bloom_filter_settings;
|
||||
|
||||
/* don't propagate the hash_version unless unspecified */
|
||||
if (bloom_settings.hash_version == -1)
|
||||
bloom_settings.hash_version = g->bloom_filter_settings->hash_version;
|
||||
bloom_settings.bits_per_entry = g->bloom_filter_settings->bits_per_entry;
|
||||
bloom_settings.num_hashes = g->bloom_filter_settings->num_hashes;
|
||||
bloom_settings.max_changed_paths = g->bloom_filter_settings->max_changed_paths;
|
||||
}
|
||||
}
|
||||
|
||||
bloom_settings.hash_version = bloom_settings.hash_version == 2 ? 2 : 1;
|
||||
|
||||
if (ctx->split) {
|
||||
struct commit_graph *g = ctx->r->objects->commit_graph;
|
||||
|
||||
@ -2611,6 +2654,9 @@ int write_commit_graph(struct object_directory *odb,
|
||||
|
||||
res = write_commit_graph_file(ctx);
|
||||
|
||||
if (ctx->changed_paths)
|
||||
deinit_bloom_filters();
|
||||
|
||||
if (ctx->split)
|
||||
mark_commit_graphs(ctx);
|
||||
|
||||
|
Reference in New Issue
Block a user