Merge branch 'tb/path-filter-fix'

The Bloom filter used for path limited history traversal was broken
on systems whose "char" is unsigned; update the implementation and
bump the format version to 2.

* tb/path-filter-fix:
  bloom: introduce `deinit_bloom_filters()`
  commit-graph: reuse existing Bloom filters where possible
  object.h: fix mis-aligned flag bits table
  commit-graph: new Bloom filter version that fixes murmur3
  commit-graph: unconditionally load Bloom filters
  bloom: prepare to discard incompatible Bloom filters
  bloom: annotate filters with hash version
  repo-settings: introduce commitgraph.changedPathsVersion
  t4216: test changed path filters with high bit paths
  t/helper/test-read-graph: implement `bloom-filters` mode
  bloom.h: make `load_bloom_filter_from_graph()` public
  t/helper/test-read-graph.c: extract `dump_graph_info()`
  gitformat-commit-graph: describe version 2 of BDAT
  commit-graph: ensure Bloom filters are read with consistent settings
  revision.c: consult Bloom filters for root commits
  t/t4216-log-bloom.sh: harden `test_bloom_filters_not_used()`
This commit is contained in:
Junio C Hamano
2024-07-08 14:53:09 -07:00
14 changed files with 736 additions and 58 deletions

View File

@ -51,6 +51,7 @@ static void get_bloom_filter_for_commit(const struct object_id *commit_oid)
static const char *bloom_usage = "\n"
" test-tool bloom get_murmur3 <string>\n"
" test-tool bloom get_murmur3_seven_highbit\n"
" test-tool bloom generate_filter <string> [<string>...]\n"
" test-tool bloom get_filter_for_commit <commit-hex>\n";
@ -65,7 +66,13 @@ int cmd__bloom(int argc, const char **argv)
uint32_t hashed;
if (argc < 3)
usage(bloom_usage);
hashed = murmur3_seeded(0, argv[2], strlen(argv[2]));
hashed = murmur3_seeded_v2(0, argv[2], strlen(argv[2]));
printf("Murmur3 Hash with seed=0:0x%08x\n", hashed);
}
if (!strcmp(argv[1], "get_murmur3_seven_highbit")) {
uint32_t hashed;
hashed = murmur3_seeded_v2(0, "\x99\xaa\xbb\xcc\xdd\xee\xff", 7);
printf("Murmur3 Hash with seed=0:0x%08x\n", hashed);
}

View File

@ -7,20 +7,8 @@
#include "bloom.h"
#include "setup.h"
int cmd__read_graph(int argc UNUSED, const char **argv UNUSED)
static void dump_graph_info(struct commit_graph *graph)
{
struct commit_graph *graph = NULL;
struct object_directory *odb;
setup_git_directory();
odb = the_repository->objects->odb;
prepare_repo_settings(the_repository);
graph = read_commit_graph_one(the_repository, odb);
if (!graph)
return 1;
printf("header: %08x %d %d %d %d\n",
ntohl(*(uint32_t*)graph->data),
*(unsigned char*)(graph->data + 4),
@ -59,8 +47,57 @@ int cmd__read_graph(int argc UNUSED, const char **argv UNUSED)
if (graph->topo_levels)
printf(" topo_levels");
printf("\n");
}
static void dump_graph_bloom_filters(struct commit_graph *graph)
{
uint32_t i;
for (i = 0; i < graph->num_commits + graph->num_commits_in_base; i++) {
struct bloom_filter filter = { 0 };
size_t j;
if (load_bloom_filter_from_graph(graph, &filter, i) < 0) {
fprintf(stderr, "missing Bloom filter for graph "
"position %"PRIu32"\n", i);
continue;
}
for (j = 0; j < filter.len; j++)
printf("%02x", filter.data[j]);
if (filter.len)
printf("\n");
}
}
int cmd__read_graph(int argc, const char **argv)
{
struct commit_graph *graph = NULL;
struct object_directory *odb;
int ret = 0;
setup_git_directory();
odb = the_repository->objects->odb;
prepare_repo_settings(the_repository);
graph = read_commit_graph_one(the_repository, odb);
if (!graph) {
ret = 1;
goto done;
}
if (argc <= 1)
dump_graph_info(graph);
else if (!strcmp(argv[1], "bloom-filters"))
dump_graph_bloom_filters(graph);
else {
fprintf(stderr, "unknown sub-command: '%s'\n", argv[1]);
ret = 1;
}
done:
UNLEAK(graph);
return 0;
return ret;
}