Merge branch 'ds/commit-graph-bloom-updates' into master

Updates to the changed-paths bloom filter.

* ds/commit-graph-bloom-updates:
  commit-graph: check all leading directories in changed path Bloom filters
  revision: empty pathspecs should not use Bloom filters
  revision.c: fix whitespace
  commit-graph: check chunk sizes after writing
  commit-graph: simplify chunk writes into loop
  commit-graph: unify the signatures of all write_graph_chunk_*() functions
  commit-graph: persist existence of changed-paths
  bloom: fix logic in get_bloom_filter()
  commit-graph: change test to die on parse, not load
  commit-graph: place bloom_settings in context
This commit is contained in:
Junio C Hamano
2020-07-30 13:20:31 -07:00
9 changed files with 217 additions and 74 deletions

View File

@ -633,7 +633,6 @@ static unsigned int count_bloom_filter_maybe;
static unsigned int count_bloom_filter_definitely_not;
static unsigned int count_bloom_filter_false_positive;
static unsigned int count_bloom_filter_not_present;
static unsigned int count_bloom_filter_length_zero;
static void trace2_bloom_filter_statistics_atexit(void)
{
@ -641,7 +640,6 @@ static void trace2_bloom_filter_statistics_atexit(void)
jw_object_begin(&jw, 0);
jw_object_intmax(&jw, "filter_not_present", count_bloom_filter_not_present);
jw_object_intmax(&jw, "zero_length_filter", count_bloom_filter_length_zero);
jw_object_intmax(&jw, "maybe", count_bloom_filter_maybe);
jw_object_intmax(&jw, "definitely_not", count_bloom_filter_definitely_not);
jw_object_intmax(&jw, "false_positive", count_bloom_filter_false_positive);
@ -670,9 +668,10 @@ static void prepare_to_use_bloom_filter(struct rev_info *revs)
{
struct pathspec_item *pi;
char *path_alloc = NULL;
const char *path;
const char *path, *p;
int last_index;
int len;
size_t len;
int path_component_nr = 1;
if (!revs->commits)
return;
@ -697,16 +696,45 @@ static void prepare_to_use_bloom_filter(struct rev_info *revs)
/* remove single trailing slash from path, if needed */
if (pi->match[last_index] == '/') {
path_alloc = xstrdup(pi->match);
path_alloc[last_index] = '\0';
path = path_alloc;
path_alloc = xstrdup(pi->match);
path_alloc[last_index] = '\0';
path = path_alloc;
} else
path = pi->match;
path = pi->match;
len = strlen(path);
if (!len) {
revs->bloom_filter_settings = NULL;
return;
}
revs->bloom_key = xmalloc(sizeof(struct bloom_key));
fill_bloom_key(path, len, revs->bloom_key, revs->bloom_filter_settings);
p = path;
while (*p) {
/*
* At this point, the path is normalized to use Unix-style
* path separators. This is required due to how the
* changed-path Bloom filters store the paths.
*/
if (*p == '/')
path_component_nr++;
p++;
}
revs->bloom_keys_nr = path_component_nr;
ALLOC_ARRAY(revs->bloom_keys, revs->bloom_keys_nr);
fill_bloom_key(path, len, &revs->bloom_keys[0],
revs->bloom_filter_settings);
path_component_nr = 1;
p = path + len - 1;
while (p > path) {
if (*p == '/')
fill_bloom_key(path, p - path,
&revs->bloom_keys[path_component_nr++],
revs->bloom_filter_settings);
p--;
}
if (trace2_is_enabled() && !bloom_filter_atexit_registered) {
atexit(trace2_bloom_filter_statistics_atexit);
@ -720,7 +748,7 @@ static int check_maybe_different_in_bloom_filter(struct rev_info *revs,
struct commit *commit)
{
struct bloom_filter *filter;
int result;
int result = 1, j;
if (!revs->repo->objects->commit_graph)
return -1;
@ -735,15 +763,12 @@ static int check_maybe_different_in_bloom_filter(struct rev_info *revs,
return -1;
}
if (!filter->len) {
count_bloom_filter_length_zero++;
return -1;
for (j = 0; result && j < revs->bloom_keys_nr; j++) {
result = bloom_filter_contains(filter,
&revs->bloom_keys[j],
revs->bloom_filter_settings);
}
result = bloom_filter_contains(filter,
revs->bloom_key,
revs->bloom_filter_settings);
if (result)
count_bloom_filter_maybe++;
else
@ -782,7 +807,7 @@ static int rev_compare_tree(struct rev_info *revs,
return REV_TREE_SAME;
}
if (revs->bloom_key && !nth_parent) {
if (revs->bloom_keys_nr && !nth_parent) {
bloom_ret = check_maybe_different_in_bloom_filter(revs, commit);
if (bloom_ret == 0)