Merge branch 'tb/bloom-improvements'

"git commit-graph write" learned to limit the number of bloom
filters that are computed from scratch with the --max-new-filters
option.

* tb/bloom-improvements:
  commit-graph: introduce 'commitGraph.maxNewFilters'
  builtin/commit-graph.c: introduce '--max-new-filters=<n>'
  commit-graph: rename 'split_commit_graph_opts'
  bloom: encode out-of-bounds filters as non-empty
  bloom/diff: properly short-circuit on max_changes
  bloom: use provided 'struct bloom_filter_settings'
  bloom: split 'get_bloom_filter()' in two
  commit-graph.c: store maximum changed paths
  commit-graph: respect 'commitGraph.readChangedPaths'
  t/helper/test-read-graph.c: prepare repo settings
  commit-graph: pass a 'struct repository *' in more places
  t4216: use an '&&'-chain
  commit-graph: introduce 'get_bloom_filter_settings()'
This commit is contained in:
Junio C Hamano
2020-09-29 14:01:20 -07:00
22 changed files with 508 additions and 123 deletions

View File

@ -340,6 +340,8 @@ include::config/column.txt[]
include::config/commit.txt[] include::config/commit.txt[]
include::config/commitgraph.txt[]
include::config/credential.txt[] include::config/credential.txt[]
include::config/completion.txt[] include::config/completion.txt[]

View File

@ -0,0 +1,8 @@
commitGraph.maxNewFilters::
Specifies the default value for the `--max-new-filters` option of `git
commit-graph write` (c.f., linkgit:git-commit-graph[1]).
commitGraph.readChangedPaths::
If true, then git will use the changed-path Bloom filters in the
commit-graph file (if it exists, and they are present). Defaults to
true. See linkgit:git-commit-graph[1] for more information.

View File

@ -67,6 +67,13 @@ this option is given, future commit-graph writes will automatically assume
that this option was intended. Use `--no-changed-paths` to stop storing this that this option was intended. Use `--no-changed-paths` to stop storing this
data. data.
+ +
With the `--max-new-filters=<n>` option, generate at most `n` new Bloom
filters (if `--changed-paths` is specified). If `n` is `-1`, no limit is
enforced. Only commits present in the new layer count against this
limit. To retroactively compute Bloom filters over earlier layers, it is
advised to use `--split=replace`. Overrides the `commitGraph.maxNewFilters`
configuration.
+
With the `--split[=<strategy>]` option, write the commit-graph as a With the `--split[=<strategy>]` option, write the commit-graph as a
chain of multiple commit-graph files stored in chain of multiple commit-graph files stored in
`<dir>/info/commit-graphs`. Commit-graph layers are merged based on the `<dir>/info/commit-graphs`. Commit-graph layers are merged based on the

View File

@ -125,7 +125,7 @@ CHUNK DATA:
* The rest of the chunk is the concatenation of all the computed Bloom * The rest of the chunk is the concatenation of all the computed Bloom
filters for the commits in lexicographic order. filters for the commits in lexicographic order.
* Note: Commits with no changes or more than 512 changes have Bloom filters * Note: Commits with no changes or more than 512 changes have Bloom filters
of length zero. of length one, with either all bits set to zero or one respectively.
* The BDAT chunk is present if and only if BIDX is present. * The BDAT chunk is present if and only if BIDX is present.
Base Graphs List (ID: {'B', 'A', 'S', 'E'}) [Optional] Base Graphs List (ID: {'B', 'A', 'S', 'E'}) [Optional]

View File

@ -1276,7 +1276,7 @@ static int maybe_changed_path(struct repository *r,
if (commit_graph_generation(origin->commit) == GENERATION_NUMBER_INFINITY) if (commit_graph_generation(origin->commit) == GENERATION_NUMBER_INFINITY)
return 1; return 1;
filter = get_bloom_filter(r, origin->commit, 0); filter = get_bloom_filter(r, origin->commit);
if (!filter) if (!filter)
return 1; return 1;
@ -2892,16 +2892,18 @@ void setup_blame_bloom_data(struct blame_scoreboard *sb,
const char *path) const char *path)
{ {
struct blame_bloom_data *bd; struct blame_bloom_data *bd;
struct bloom_filter_settings *bs;
if (!sb->repo->objects->commit_graph) if (!sb->repo->objects->commit_graph)
return; return;
if (!sb->repo->objects->commit_graph->bloom_filter_settings) bs = get_bloom_filter_settings(sb->repo);
if (!bs)
return; return;
bd = xmalloc(sizeof(struct blame_bloom_data)); bd = xmalloc(sizeof(struct blame_bloom_data));
bd->settings = sb->repo->objects->commit_graph->bloom_filter_settings; bd->settings = bs;
bd->alloc = 4; bd->alloc = 4;
bd->nr = 0; bd->nr = 0;

57
bloom.c
View File

@ -38,7 +38,7 @@ static int load_bloom_filter_from_graph(struct commit_graph *g,
while (graph_pos < g->num_commits_in_base) while (graph_pos < g->num_commits_in_base)
g = g->base_graph; g = g->base_graph;
/* The commit graph commit 'c' lives in doesn't carry bloom filters. */ /* The commit graph commit 'c' lives in doesn't carry Bloom filters. */
if (!g->chunk_bloom_indexes) if (!g->chunk_bloom_indexes)
return 0; return 0;
@ -177,15 +177,25 @@ static int pathmap_cmp(const void *hashmap_cmp_fn_data,
return strcmp(e1->path, e2->path); return strcmp(e1->path, e2->path);
} }
struct bloom_filter *get_bloom_filter(struct repository *r, static void init_truncated_large_filter(struct bloom_filter *filter)
{
filter->data = xmalloc(1);
filter->data[0] = 0xFF;
filter->len = 1;
}
struct bloom_filter *get_or_compute_bloom_filter(struct repository *r,
struct commit *c, struct commit *c,
int compute_if_not_present) int compute_if_not_present,
const struct bloom_filter_settings *settings,
enum bloom_filter_computed *computed)
{ {
struct bloom_filter *filter; struct bloom_filter *filter;
struct bloom_filter_settings settings = DEFAULT_BLOOM_FILTER_SETTINGS;
int i; int i;
struct diff_options diffopt; struct diff_options diffopt;
int max_changes = 512;
if (computed)
*computed = BLOOM_NOT_COMPUTED;
if (!bloom_filters.slab_size) if (!bloom_filters.slab_size)
return NULL; return NULL;
@ -194,12 +204,11 @@ struct bloom_filter *get_bloom_filter(struct repository *r,
if (!filter->data) { if (!filter->data) {
load_commit_graph_info(r, c); load_commit_graph_info(r, c);
if (commit_graph_position(c) != COMMIT_NOT_FROM_GRAPH && if (commit_graph_position(c) != COMMIT_NOT_FROM_GRAPH)
r->objects->commit_graph->chunk_bloom_indexes)
load_bloom_filter_from_graph(r->objects->commit_graph, filter, c); load_bloom_filter_from_graph(r->objects->commit_graph, filter, c);
} }
if (filter->data) if (filter->data && filter->len)
return filter; return filter;
if (!compute_if_not_present) if (!compute_if_not_present)
return NULL; return NULL;
@ -207,7 +216,7 @@ struct bloom_filter *get_bloom_filter(struct repository *r,
repo_diff_setup(r, &diffopt); repo_diff_setup(r, &diffopt);
diffopt.flags.recursive = 1; diffopt.flags.recursive = 1;
diffopt.detect_rename = 0; diffopt.detect_rename = 0;
diffopt.max_changes = max_changes; diffopt.max_changes = settings->max_changed_paths;
diff_setup_done(&diffopt); diff_setup_done(&diffopt);
/* ensure commit is parsed so we have parent information */ /* ensure commit is parsed so we have parent information */
@ -219,7 +228,7 @@ struct bloom_filter *get_bloom_filter(struct repository *r,
diff_tree_oid(NULL, &c->object.oid, "", &diffopt); diff_tree_oid(NULL, &c->object.oid, "", &diffopt);
diffcore_std(&diffopt); diffcore_std(&diffopt);
if (diffopt.num_changes <= max_changes) { if (diff_queued_diff.nr <= settings->max_changed_paths) {
struct hashmap pathmap; struct hashmap pathmap;
struct pathmap_hash_entry *e; struct pathmap_hash_entry *e;
struct hashmap_iter iter; struct hashmap_iter iter;
@ -256,23 +265,41 @@ struct bloom_filter *get_bloom_filter(struct repository *r,
diff_free_filepair(diff_queued_diff.queue[i]); diff_free_filepair(diff_queued_diff.queue[i]);
} }
filter->len = (hashmap_get_size(&pathmap) * settings.bits_per_entry + BITS_PER_WORD - 1) / BITS_PER_WORD; if (hashmap_get_size(&pathmap) > settings->max_changed_paths) {
init_truncated_large_filter(filter);
if (computed)
*computed |= BLOOM_TRUNC_LARGE;
goto cleanup;
}
filter->len = (hashmap_get_size(&pathmap) * settings->bits_per_entry + BITS_PER_WORD - 1) / BITS_PER_WORD;
if (!filter->len) {
if (computed)
*computed |= BLOOM_TRUNC_EMPTY;
filter->len = 1;
}
filter->data = xcalloc(filter->len, sizeof(unsigned char)); filter->data = xcalloc(filter->len, sizeof(unsigned char));
hashmap_for_each_entry(&pathmap, &iter, e, entry) { hashmap_for_each_entry(&pathmap, &iter, e, entry) {
struct bloom_key key; struct bloom_key key;
fill_bloom_key(e->path, strlen(e->path), &key, &settings); fill_bloom_key(e->path, strlen(e->path), &key, settings);
add_key_to_filter(&key, filter, &settings); add_key_to_filter(&key, filter, settings);
} }
cleanup:
hashmap_free_entries(&pathmap, struct pathmap_hash_entry, entry); hashmap_free_entries(&pathmap, struct pathmap_hash_entry, entry);
} else { } else {
for (i = 0; i < diff_queued_diff.nr; i++) for (i = 0; i < diff_queued_diff.nr; i++)
diff_free_filepair(diff_queued_diff.queue[i]); diff_free_filepair(diff_queued_diff.queue[i]);
filter->data = NULL; init_truncated_large_filter(filter);
filter->len = 0;
if (computed)
*computed |= BLOOM_TRUNC_LARGE;
} }
if (computed)
*computed |= BLOOM_COMPUTED;
free(diff_queued_diff.queue); free(diff_queued_diff.queue);
DIFF_QUEUE_CLEAR(&diff_queued_diff); DIFF_QUEUE_CLEAR(&diff_queued_diff);

27
bloom.h
View File

@ -28,9 +28,18 @@ struct bloom_filter_settings {
* that contain n*b bits. * that contain n*b bits.
*/ */
uint32_t bits_per_entry; uint32_t bits_per_entry;
/*
* The maximum number of changed paths per commit
* before declaring a Bloom filter to be too-large.
*
* Not written to the commit-graph file.
*/
uint32_t max_changed_paths;
}; };
#define DEFAULT_BLOOM_FILTER_SETTINGS { 1, 7, 10 } #define DEFAULT_BLOOM_MAX_CHANGES 512
#define DEFAULT_BLOOM_FILTER_SETTINGS { 1, 7, 10, DEFAULT_BLOOM_MAX_CHANGES }
#define BITS_PER_WORD 8 #define BITS_PER_WORD 8
#define BLOOMDATA_CHUNK_HEADER_SIZE 3 * sizeof(uint32_t) #define BLOOMDATA_CHUNK_HEADER_SIZE 3 * sizeof(uint32_t)
@ -80,9 +89,21 @@ void add_key_to_filter(const struct bloom_key *key,
void init_bloom_filters(void); void init_bloom_filters(void);
struct bloom_filter *get_bloom_filter(struct repository *r, enum bloom_filter_computed {
BLOOM_NOT_COMPUTED = (1 << 0),
BLOOM_COMPUTED = (1 << 1),
BLOOM_TRUNC_LARGE = (1 << 2),
BLOOM_TRUNC_EMPTY = (1 << 3),
};
struct bloom_filter *get_or_compute_bloom_filter(struct repository *r,
struct commit *c, struct commit *c,
int compute_if_not_present); int compute_if_not_present,
const struct bloom_filter_settings *settings,
enum bloom_filter_computed *computed);
#define get_bloom_filter(r, c) get_or_compute_bloom_filter( \
(r), (c), 0, NULL, NULL)
int bloom_filter_contains(const struct bloom_filter *filter, int bloom_filter_contains(const struct bloom_filter *filter,
const struct bloom_key *key, const struct bloom_key *key,

View File

@ -13,7 +13,8 @@ static char const * const builtin_commit_graph_usage[] = {
N_("git commit-graph verify [--object-dir <objdir>] [--shallow] [--[no-]progress]"), N_("git commit-graph verify [--object-dir <objdir>] [--shallow] [--[no-]progress]"),
N_("git commit-graph write [--object-dir <objdir>] [--append] " N_("git commit-graph write [--object-dir <objdir>] [--append] "
"[--split[=<strategy>]] [--reachable|--stdin-packs|--stdin-commits] " "[--split[=<strategy>]] [--reachable|--stdin-packs|--stdin-commits] "
"[--changed-paths] [--[no-]progress] <split options>"), "[--changed-paths] [--[no-]max-new-filters <n>] [--[no-]progress] "
"<split options>"),
NULL NULL
}; };
@ -25,7 +26,8 @@ static const char * const builtin_commit_graph_verify_usage[] = {
static const char * const builtin_commit_graph_write_usage[] = { static const char * const builtin_commit_graph_write_usage[] = {
N_("git commit-graph write [--object-dir <objdir>] [--append] " N_("git commit-graph write [--object-dir <objdir>] [--append] "
"[--split[=<strategy>]] [--reachable|--stdin-packs|--stdin-commits] " "[--split[=<strategy>]] [--reachable|--stdin-packs|--stdin-commits] "
"[--changed-paths] [--[no-]progress] <split options>"), "[--changed-paths] [--[no-]max-new-filters <n>] [--[no-]progress] "
"<split options>"),
NULL NULL
}; };
@ -106,7 +108,7 @@ static int graph_verify(int argc, const char **argv)
FREE_AND_NULL(graph_name); FREE_AND_NULL(graph_name);
if (open_ok) if (open_ok)
graph = load_commit_graph_one_fd_st(fd, &st, odb); graph = load_commit_graph_one_fd_st(the_repository, fd, &st, odb);
else else
graph = read_commit_graph_one(the_repository, odb); graph = read_commit_graph_one(the_repository, odb);
@ -119,7 +121,7 @@ static int graph_verify(int argc, const char **argv)
} }
extern int read_replace_refs; extern int read_replace_refs;
static struct split_commit_graph_opts split_opts; static struct commit_graph_opts write_opts;
static int write_option_parse_split(const struct option *opt, const char *arg, static int write_option_parse_split(const struct option *opt, const char *arg,
int unset) int unset)
@ -162,6 +164,35 @@ static int read_one_commit(struct oidset *commits, struct progress *progress,
return 0; return 0;
} }
static int write_option_max_new_filters(const struct option *opt,
const char *arg,
int unset)
{
int *to = opt->value;
if (unset)
*to = -1;
else {
const char *s;
*to = strtol(arg, (char **)&s, 10);
if (*s)
return error(_("%s expects a numerical value"),
optname(opt, opt->flags));
}
return 0;
}
static int git_commit_graph_write_config(const char *var, const char *value,
void *cb)
{
if (!strcmp(var, "commitgraph.maxnewfilters"))
write_opts.max_new_filters = git_config_int(var, value);
/*
* No need to fall-back to 'git_default_config', since this was already
* called in 'cmd_commit_graph()'.
*/
return 0;
}
static int graph_write(int argc, const char **argv) static int graph_write(int argc, const char **argv)
{ {
struct string_list pack_indexes = STRING_LIST_INIT_NODUP; struct string_list pack_indexes = STRING_LIST_INIT_NODUP;
@ -187,27 +218,33 @@ static int graph_write(int argc, const char **argv)
OPT_BOOL(0, "changed-paths", &opts.enable_changed_paths, OPT_BOOL(0, "changed-paths", &opts.enable_changed_paths,
N_("enable computation for changed paths")), N_("enable computation for changed paths")),
OPT_BOOL(0, "progress", &opts.progress, N_("force progress reporting")), OPT_BOOL(0, "progress", &opts.progress, N_("force progress reporting")),
OPT_CALLBACK_F(0, "split", &split_opts.flags, NULL, OPT_CALLBACK_F(0, "split", &write_opts.split_flags, NULL,
N_("allow writing an incremental commit-graph file"), N_("allow writing an incremental commit-graph file"),
PARSE_OPT_OPTARG | PARSE_OPT_NONEG, PARSE_OPT_OPTARG | PARSE_OPT_NONEG,
write_option_parse_split), write_option_parse_split),
OPT_INTEGER(0, "max-commits", &split_opts.max_commits, OPT_INTEGER(0, "max-commits", &write_opts.max_commits,
N_("maximum number of commits in a non-base split commit-graph")), N_("maximum number of commits in a non-base split commit-graph")),
OPT_INTEGER(0, "size-multiple", &split_opts.size_multiple, OPT_INTEGER(0, "size-multiple", &write_opts.size_multiple,
N_("maximum ratio between two levels of a split commit-graph")), N_("maximum ratio between two levels of a split commit-graph")),
OPT_EXPIRY_DATE(0, "expire-time", &split_opts.expire_time, OPT_EXPIRY_DATE(0, "expire-time", &write_opts.expire_time,
N_("only expire files older than a given date-time")), N_("only expire files older than a given date-time")),
OPT_CALLBACK_F(0, "max-new-filters", &write_opts.max_new_filters,
NULL, N_("maximum number of changed-path Bloom filters to compute"),
0, write_option_max_new_filters),
OPT_END(), OPT_END(),
}; };
opts.progress = isatty(2); opts.progress = isatty(2);
opts.enable_changed_paths = -1; opts.enable_changed_paths = -1;
split_opts.size_multiple = 2; write_opts.size_multiple = 2;
split_opts.max_commits = 0; write_opts.max_commits = 0;
split_opts.expire_time = 0; write_opts.expire_time = 0;
write_opts.max_new_filters = -1;
trace2_cmd_mode("write"); trace2_cmd_mode("write");
git_config(git_commit_graph_write_config, &opts);
argc = parse_options(argc, argv, NULL, argc = parse_options(argc, argv, NULL,
builtin_commit_graph_write_options, builtin_commit_graph_write_options,
builtin_commit_graph_write_usage, 0); builtin_commit_graph_write_usage, 0);
@ -232,7 +269,7 @@ static int graph_write(int argc, const char **argv)
odb = find_odb(the_repository, opts.obj_dir); odb = find_odb(the_repository, opts.obj_dir);
if (opts.reachable) { if (opts.reachable) {
if (write_commit_graph_reachable(odb, flags, &split_opts)) if (write_commit_graph_reachable(odb, flags, &write_opts))
return 1; return 1;
return 0; return 0;
} }
@ -261,7 +298,7 @@ static int graph_write(int argc, const char **argv)
opts.stdin_packs ? &pack_indexes : NULL, opts.stdin_packs ? &pack_indexes : NULL,
opts.stdin_commits ? &commits : NULL, opts.stdin_commits ? &commits : NULL,
flags, flags,
&split_opts)) &write_opts))
result = 1; result = 1;
cleanup: cleanup:

View File

@ -231,7 +231,8 @@ int open_commit_graph(const char *graph_file, int *fd, struct stat *st)
return 1; return 1;
} }
struct commit_graph *load_commit_graph_one_fd_st(int fd, struct stat *st, struct commit_graph *load_commit_graph_one_fd_st(struct repository *r,
int fd, struct stat *st,
struct object_directory *odb) struct object_directory *odb)
{ {
void *graph_map; void *graph_map;
@ -247,7 +248,7 @@ struct commit_graph *load_commit_graph_one_fd_st(int fd, struct stat *st,
} }
graph_map = xmmap(NULL, graph_size, PROT_READ, MAP_PRIVATE, fd, 0); graph_map = xmmap(NULL, graph_size, PROT_READ, MAP_PRIVATE, fd, 0);
close(fd); close(fd);
ret = parse_commit_graph(graph_map, graph_size); ret = parse_commit_graph(r, graph_map, graph_size);
if (ret) if (ret)
ret->odb = odb; ret->odb = odb;
@ -287,7 +288,8 @@ static int verify_commit_graph_lite(struct commit_graph *g)
return 0; return 0;
} }
struct commit_graph *parse_commit_graph(void *graph_map, size_t graph_size) struct commit_graph *parse_commit_graph(struct repository *r,
void *graph_map, size_t graph_size)
{ {
const unsigned char *data, *chunk_lookup; const unsigned char *data, *chunk_lookup;
uint32_t i; uint32_t i;
@ -325,6 +327,8 @@ struct commit_graph *parse_commit_graph(void *graph_map, size_t graph_size)
return NULL; return NULL;
} }
prepare_repo_settings(r);
graph = alloc_commit_graph(); graph = alloc_commit_graph();
graph->hash_len = the_hash_algo->rawsz; graph->hash_len = the_hash_algo->rawsz;
@ -401,14 +405,14 @@ struct commit_graph *parse_commit_graph(void *graph_map, size_t graph_size)
case GRAPH_CHUNKID_BLOOMINDEXES: case GRAPH_CHUNKID_BLOOMINDEXES:
if (graph->chunk_bloom_indexes) if (graph->chunk_bloom_indexes)
chunk_repeated = 1; chunk_repeated = 1;
else else if (r->settings.commit_graph_read_changed_paths)
graph->chunk_bloom_indexes = data + chunk_offset; graph->chunk_bloom_indexes = data + chunk_offset;
break; break;
case GRAPH_CHUNKID_BLOOMDATA: case GRAPH_CHUNKID_BLOOMDATA:
if (graph->chunk_bloom_data) if (graph->chunk_bloom_data)
chunk_repeated = 1; chunk_repeated = 1;
else { else if (r->settings.commit_graph_read_changed_paths) {
uint32_t hash_version; uint32_t hash_version;
graph->chunk_bloom_data = data + chunk_offset; graph->chunk_bloom_data = data + chunk_offset;
hash_version = get_be32(data + chunk_offset); hash_version = get_be32(data + chunk_offset);
@ -420,6 +424,7 @@ struct commit_graph *parse_commit_graph(void *graph_map, size_t graph_size)
graph->bloom_filter_settings->hash_version = hash_version; graph->bloom_filter_settings->hash_version = hash_version;
graph->bloom_filter_settings->num_hashes = get_be32(data + chunk_offset + 4); graph->bloom_filter_settings->num_hashes = get_be32(data + chunk_offset + 4);
graph->bloom_filter_settings->bits_per_entry = get_be32(data + chunk_offset + 8); graph->bloom_filter_settings->bits_per_entry = get_be32(data + chunk_offset + 8);
graph->bloom_filter_settings->max_changed_paths = DEFAULT_BLOOM_MAX_CHANGES;
} }
break; break;
} }
@ -452,7 +457,8 @@ free_and_return:
return NULL; return NULL;
} }
static struct commit_graph *load_commit_graph_one(const char *graph_file, static struct commit_graph *load_commit_graph_one(struct repository *r,
const char *graph_file,
struct object_directory *odb) struct object_directory *odb)
{ {
@ -464,7 +470,7 @@ static struct commit_graph *load_commit_graph_one(const char *graph_file,
if (!open_ok) if (!open_ok)
return NULL; return NULL;
g = load_commit_graph_one_fd_st(fd, &st, odb); g = load_commit_graph_one_fd_st(r, fd, &st, odb);
if (g) if (g)
g->filename = xstrdup(graph_file); g->filename = xstrdup(graph_file);
@ -476,7 +482,7 @@ static struct commit_graph *load_commit_graph_v1(struct repository *r,
struct object_directory *odb) struct object_directory *odb)
{ {
char *graph_name = get_commit_graph_filename(odb); char *graph_name = get_commit_graph_filename(odb);
struct commit_graph *g = load_commit_graph_one(graph_name, odb); struct commit_graph *g = load_commit_graph_one(r, graph_name, odb);
free(graph_name); free(graph_name);
return g; return g;
@ -557,7 +563,7 @@ static struct commit_graph *load_commit_graph_chain(struct repository *r,
valid = 0; valid = 0;
for (odb = r->objects->odb; odb; odb = odb->next) { for (odb = r->objects->odb; odb; odb = odb->next) {
char *graph_name = get_split_graph_filename(odb, line.buf); char *graph_name = get_split_graph_filename(odb, line.buf);
struct commit_graph *g = load_commit_graph_one(graph_name, odb); struct commit_graph *g = load_commit_graph_one(r, graph_name, odb);
free(graph_name); free(graph_name);
@ -667,6 +673,17 @@ int generation_numbers_enabled(struct repository *r)
return !!first_generation; return !!first_generation;
} }
struct bloom_filter_settings *get_bloom_filter_settings(struct repository *r)
{
struct commit_graph *g = r->objects->commit_graph;
while (g) {
if (g->bloom_filter_settings)
return g->bloom_filter_settings;
g = g->base_graph;
}
return NULL;
}
static void close_commit_graph_one(struct commit_graph *g) static void close_commit_graph_one(struct commit_graph *g)
{ {
if (!g) if (!g)
@ -952,9 +969,14 @@ struct write_commit_graph_context {
changed_paths:1, changed_paths:1,
order_by_pack:1; order_by_pack:1;
const struct split_commit_graph_opts *split_opts; const struct commit_graph_opts *opts;
size_t total_bloom_filter_data_size; size_t total_bloom_filter_data_size;
const struct bloom_filter_settings *bloom_settings; const struct bloom_filter_settings *bloom_settings;
int count_bloom_filter_computed;
int count_bloom_filter_not_computed;
int count_bloom_filter_trunc_empty;
int count_bloom_filter_trunc_large;
}; };
static int write_graph_chunk_fanout(struct hashfile *f, static int write_graph_chunk_fanout(struct hashfile *f,
@ -1166,7 +1188,7 @@ static int write_graph_chunk_bloom_indexes(struct hashfile *f,
uint32_t cur_pos = 0; uint32_t cur_pos = 0;
while (list < last) { while (list < last) {
struct bloom_filter *filter = get_bloom_filter(ctx->r, *list, 0); struct bloom_filter *filter = get_bloom_filter(ctx->r, *list);
size_t len = filter ? filter->len : 0; size_t len = filter ? filter->len : 0;
cur_pos += len; cur_pos += len;
display_progress(ctx->progress, ++ctx->progress_cnt); display_progress(ctx->progress, ++ctx->progress_cnt);
@ -1185,6 +1207,7 @@ static void trace2_bloom_filter_settings(struct write_commit_graph_context *ctx)
jw_object_intmax(&jw, "hash_version", ctx->bloom_settings->hash_version); jw_object_intmax(&jw, "hash_version", ctx->bloom_settings->hash_version);
jw_object_intmax(&jw, "num_hashes", ctx->bloom_settings->num_hashes); jw_object_intmax(&jw, "num_hashes", ctx->bloom_settings->num_hashes);
jw_object_intmax(&jw, "bits_per_entry", ctx->bloom_settings->bits_per_entry); jw_object_intmax(&jw, "bits_per_entry", ctx->bloom_settings->bits_per_entry);
jw_object_intmax(&jw, "max_changed_paths", ctx->bloom_settings->max_changed_paths);
jw_end(&jw); jw_end(&jw);
trace2_data_json("bloom", ctx->r, "settings", &jw); trace2_data_json("bloom", ctx->r, "settings", &jw);
@ -1205,7 +1228,7 @@ static int write_graph_chunk_bloom_data(struct hashfile *f,
hashwrite_be32(f, ctx->bloom_settings->bits_per_entry); hashwrite_be32(f, ctx->bloom_settings->bits_per_entry);
while (list < last) { while (list < last) {
struct bloom_filter *filter = get_bloom_filter(ctx->r, *list, 0); struct bloom_filter *filter = get_bloom_filter(ctx->r, *list);
size_t len = filter ? filter->len : 0; size_t len = filter ? filter->len : 0;
display_progress(ctx->progress, ++ctx->progress_cnt); display_progress(ctx->progress, ++ctx->progress_cnt);
@ -1270,8 +1293,8 @@ static void close_reachable(struct write_commit_graph_context *ctx)
{ {
int i; int i;
struct commit *commit; struct commit *commit;
enum commit_graph_split_flags flags = ctx->split_opts ? enum commit_graph_split_flags flags = ctx->opts ?
ctx->split_opts->flags : COMMIT_GRAPH_SPLIT_UNSPECIFIED; ctx->opts->split_flags : COMMIT_GRAPH_SPLIT_UNSPECIFIED;
if (ctx->report_progress) if (ctx->report_progress)
ctx->progress = start_delayed_progress( ctx->progress = start_delayed_progress(
@ -1375,11 +1398,24 @@ static void compute_generation_numbers(struct write_commit_graph_context *ctx)
stop_progress(&ctx->progress); stop_progress(&ctx->progress);
} }
static void trace2_bloom_filter_write_statistics(struct write_commit_graph_context *ctx)
{
trace2_data_intmax("commit-graph", ctx->r, "filter-computed",
ctx->count_bloom_filter_computed);
trace2_data_intmax("commit-graph", ctx->r, "filter-not-computed",
ctx->count_bloom_filter_not_computed);
trace2_data_intmax("commit-graph", ctx->r, "filter-trunc-empty",
ctx->count_bloom_filter_trunc_empty);
trace2_data_intmax("commit-graph", ctx->r, "filter-trunc-large",
ctx->count_bloom_filter_trunc_large);
}
static void compute_bloom_filters(struct write_commit_graph_context *ctx) static void compute_bloom_filters(struct write_commit_graph_context *ctx)
{ {
int i; int i;
struct progress *progress = NULL; struct progress *progress = NULL;
struct commit **sorted_commits; struct commit **sorted_commits;
int max_new_filters;
init_bloom_filters(); init_bloom_filters();
@ -1396,13 +1432,34 @@ static void compute_bloom_filters(struct write_commit_graph_context *ctx)
else else
QSORT(sorted_commits, ctx->commits.nr, commit_gen_cmp); QSORT(sorted_commits, ctx->commits.nr, commit_gen_cmp);
max_new_filters = ctx->opts && ctx->opts->max_new_filters >= 0 ?
ctx->opts->max_new_filters : ctx->commits.nr;
for (i = 0; i < ctx->commits.nr; i++) { for (i = 0; i < ctx->commits.nr; i++) {
enum bloom_filter_computed computed = 0;
struct commit *c = sorted_commits[i]; struct commit *c = sorted_commits[i];
struct bloom_filter *filter = get_bloom_filter(ctx->r, c, 1); struct bloom_filter *filter = get_or_compute_bloom_filter(
ctx->total_bloom_filter_data_size += sizeof(unsigned char) * filter->len; ctx->r,
c,
ctx->count_bloom_filter_computed < max_new_filters,
ctx->bloom_settings,
&computed);
if (computed & BLOOM_COMPUTED) {
ctx->count_bloom_filter_computed++;
if (computed & BLOOM_TRUNC_EMPTY)
ctx->count_bloom_filter_trunc_empty++;
if (computed & BLOOM_TRUNC_LARGE)
ctx->count_bloom_filter_trunc_large++;
} else if (computed & BLOOM_NOT_COMPUTED)
ctx->count_bloom_filter_not_computed++;
ctx->total_bloom_filter_data_size += filter
? sizeof(unsigned char) * filter->len : 0;
display_progress(progress, i + 1); display_progress(progress, i + 1);
} }
if (trace2_is_enabled())
trace2_bloom_filter_write_statistics(ctx);
free(sorted_commits); free(sorted_commits);
stop_progress(&progress); stop_progress(&progress);
} }
@ -1431,7 +1488,7 @@ static int add_ref_to_set(const char *refname,
int write_commit_graph_reachable(struct object_directory *odb, int write_commit_graph_reachable(struct object_directory *odb,
enum commit_graph_write_flags flags, enum commit_graph_write_flags flags,
const struct split_commit_graph_opts *split_opts) const struct commit_graph_opts *opts)
{ {
struct oidset commits = OIDSET_INIT; struct oidset commits = OIDSET_INIT;
struct refs_cb_data data; struct refs_cb_data data;
@ -1448,7 +1505,7 @@ int write_commit_graph_reachable(struct object_directory *odb,
stop_progress(&data.progress); stop_progress(&data.progress);
result = write_commit_graph(odb, NULL, &commits, result = write_commit_graph(odb, NULL, &commits,
flags, split_opts); flags, opts);
oidset_clear(&commits); oidset_clear(&commits);
return result; return result;
@ -1563,8 +1620,8 @@ static uint32_t count_distinct_commits(struct write_commit_graph_context *ctx)
static void copy_oids_to_commits(struct write_commit_graph_context *ctx) static void copy_oids_to_commits(struct write_commit_graph_context *ctx)
{ {
uint32_t i; uint32_t i;
enum commit_graph_split_flags flags = ctx->split_opts ? enum commit_graph_split_flags flags = ctx->opts ?
ctx->split_opts->flags : COMMIT_GRAPH_SPLIT_UNSPECIFIED; ctx->opts->split_flags : COMMIT_GRAPH_SPLIT_UNSPECIFIED;
ctx->num_extra_edges = 0; ctx->num_extra_edges = 0;
if (ctx->report_progress) if (ctx->report_progress)
@ -1646,15 +1703,6 @@ static int write_commit_graph_file(struct write_commit_graph_context *ctx)
int num_chunks = 3; int num_chunks = 3;
uint64_t chunk_offset; uint64_t chunk_offset;
struct object_id file_hash; struct object_id file_hash;
struct bloom_filter_settings bloom_settings = DEFAULT_BLOOM_FILTER_SETTINGS;
if (!ctx->bloom_settings) {
bloom_settings.bits_per_entry = git_env_ulong("GIT_TEST_BLOOM_SETTINGS_BITS_PER_ENTRY",
bloom_settings.bits_per_entry);
bloom_settings.num_hashes = git_env_ulong("GIT_TEST_BLOOM_SETTINGS_NUM_HASHES",
bloom_settings.num_hashes);
ctx->bloom_settings = &bloom_settings;
}
if (ctx->split) { if (ctx->split) {
struct strbuf tmp_file = STRBUF_INIT; struct strbuf tmp_file = STRBUF_INIT;
@ -1858,13 +1906,13 @@ static void split_graph_merge_strategy(struct write_commit_graph_context *ctx)
int max_commits = 0; int max_commits = 0;
int size_mult = 2; int size_mult = 2;
if (ctx->split_opts) { if (ctx->opts) {
max_commits = ctx->split_opts->max_commits; max_commits = ctx->opts->max_commits;
if (ctx->split_opts->size_multiple) if (ctx->opts->size_multiple)
size_mult = ctx->split_opts->size_multiple; size_mult = ctx->opts->size_multiple;
flags = ctx->split_opts->flags; flags = ctx->opts->split_flags;
} }
g = ctx->r->objects->commit_graph; g = ctx->r->objects->commit_graph;
@ -2042,8 +2090,8 @@ static void expire_commit_graphs(struct write_commit_graph_context *ctx)
size_t dirnamelen; size_t dirnamelen;
timestamp_t expire_time = time(NULL); timestamp_t expire_time = time(NULL);
if (ctx->split_opts && ctx->split_opts->expire_time) if (ctx->opts && ctx->opts->expire_time)
expire_time = ctx->split_opts->expire_time; expire_time = ctx->opts->expire_time;
if (!ctx->split) { if (!ctx->split) {
char *chain_file_name = get_commit_graph_chain_filename(ctx->odb); char *chain_file_name = get_commit_graph_chain_filename(ctx->odb);
unlink(chain_file_name); unlink(chain_file_name);
@ -2094,12 +2142,13 @@ int write_commit_graph(struct object_directory *odb,
struct string_list *pack_indexes, struct string_list *pack_indexes,
struct oidset *commits, struct oidset *commits,
enum commit_graph_write_flags flags, enum commit_graph_write_flags flags,
const struct split_commit_graph_opts *split_opts) const struct commit_graph_opts *opts)
{ {
struct write_commit_graph_context *ctx; struct write_commit_graph_context *ctx;
uint32_t i, count_distinct = 0; uint32_t i, count_distinct = 0;
int res = 0; int res = 0;
int replace = 0; int replace = 0;
struct bloom_filter_settings bloom_settings = DEFAULT_BLOOM_FILTER_SETTINGS;
if (!commit_graph_compatible(the_repository)) if (!commit_graph_compatible(the_repository))
return 0; return 0;
@ -2110,9 +2159,17 @@ int write_commit_graph(struct object_directory *odb,
ctx->append = flags & COMMIT_GRAPH_WRITE_APPEND ? 1 : 0; ctx->append = flags & COMMIT_GRAPH_WRITE_APPEND ? 1 : 0;
ctx->report_progress = flags & COMMIT_GRAPH_WRITE_PROGRESS ? 1 : 0; ctx->report_progress = flags & COMMIT_GRAPH_WRITE_PROGRESS ? 1 : 0;
ctx->split = flags & COMMIT_GRAPH_WRITE_SPLIT ? 1 : 0; ctx->split = flags & COMMIT_GRAPH_WRITE_SPLIT ? 1 : 0;
ctx->split_opts = split_opts; ctx->opts = opts;
ctx->total_bloom_filter_data_size = 0; ctx->total_bloom_filter_data_size = 0;
bloom_settings.bits_per_entry = git_env_ulong("GIT_TEST_BLOOM_SETTINGS_BITS_PER_ENTRY",
bloom_settings.bits_per_entry);
bloom_settings.num_hashes = git_env_ulong("GIT_TEST_BLOOM_SETTINGS_NUM_HASHES",
bloom_settings.num_hashes);
bloom_settings.max_changed_paths = git_env_ulong("GIT_TEST_BLOOM_SETTINGS_MAX_CHANGED_PATHS",
bloom_settings.max_changed_paths);
ctx->bloom_settings = &bloom_settings;
if (flags & COMMIT_GRAPH_WRITE_BLOOM_FILTERS) if (flags & COMMIT_GRAPH_WRITE_BLOOM_FILTERS)
ctx->changed_paths = 1; ctx->changed_paths = 1;
if (!(flags & COMMIT_GRAPH_NO_WRITE_BLOOM_FILTERS)) { if (!(flags & COMMIT_GRAPH_NO_WRITE_BLOOM_FILTERS)) {
@ -2150,15 +2207,15 @@ int write_commit_graph(struct object_directory *odb,
} }
} }
if (ctx->split_opts) if (ctx->opts)
replace = ctx->split_opts->flags & COMMIT_GRAPH_SPLIT_REPLACE; replace = ctx->opts->split_flags & COMMIT_GRAPH_SPLIT_REPLACE;
} }
ctx->approx_nr_objects = approximate_object_count(); ctx->approx_nr_objects = approximate_object_count();
ctx->oids.alloc = ctx->approx_nr_objects / 32; ctx->oids.alloc = ctx->approx_nr_objects / 32;
if (ctx->split && split_opts && ctx->oids.alloc > split_opts->max_commits) if (ctx->split && opts && ctx->oids.alloc > opts->max_commits)
ctx->oids.alloc = split_opts->max_commits; ctx->oids.alloc = opts->max_commits;
if (ctx->append) { if (ctx->append) {
prepare_commit_graph_one(ctx->r, ctx->odb); prepare_commit_graph_one(ctx->r, ctx->odb);

View File

@ -76,11 +76,13 @@ struct commit_graph {
struct bloom_filter_settings *bloom_filter_settings; struct bloom_filter_settings *bloom_filter_settings;
}; };
struct commit_graph *load_commit_graph_one_fd_st(int fd, struct stat *st, struct commit_graph *load_commit_graph_one_fd_st(struct repository *r,
int fd, struct stat *st,
struct object_directory *odb); struct object_directory *odb);
struct commit_graph *read_commit_graph_one(struct repository *r, struct commit_graph *read_commit_graph_one(struct repository *r,
struct object_directory *odb); struct object_directory *odb);
struct commit_graph *parse_commit_graph(void *graph_map, size_t graph_size); struct commit_graph *parse_commit_graph(struct repository *r,
void *graph_map, size_t graph_size);
/* /*
* Return 1 if and only if the repository has a commit-graph * Return 1 if and only if the repository has a commit-graph
@ -88,6 +90,8 @@ struct commit_graph *parse_commit_graph(void *graph_map, size_t graph_size);
*/ */
int generation_numbers_enabled(struct repository *r); int generation_numbers_enabled(struct repository *r);
struct bloom_filter_settings *get_bloom_filter_settings(struct repository *r);
enum commit_graph_write_flags { enum commit_graph_write_flags {
COMMIT_GRAPH_WRITE_APPEND = (1 << 0), COMMIT_GRAPH_WRITE_APPEND = (1 << 0),
COMMIT_GRAPH_WRITE_PROGRESS = (1 << 1), COMMIT_GRAPH_WRITE_PROGRESS = (1 << 1),
@ -102,11 +106,12 @@ enum commit_graph_split_flags {
COMMIT_GRAPH_SPLIT_REPLACE = 2 COMMIT_GRAPH_SPLIT_REPLACE = 2
}; };
struct split_commit_graph_opts { struct commit_graph_opts {
int size_multiple; int size_multiple;
int max_commits; int max_commits;
timestamp_t expire_time; timestamp_t expire_time;
enum commit_graph_split_flags flags; enum commit_graph_split_flags split_flags;
int max_new_filters;
}; };
/* /*
@ -117,12 +122,12 @@ struct split_commit_graph_opts {
*/ */
int write_commit_graph_reachable(struct object_directory *odb, int write_commit_graph_reachable(struct object_directory *odb,
enum commit_graph_write_flags flags, enum commit_graph_write_flags flags,
const struct split_commit_graph_opts *split_opts); const struct commit_graph_opts *opts);
int write_commit_graph(struct object_directory *odb, int write_commit_graph(struct object_directory *odb,
struct string_list *pack_indexes, struct string_list *pack_indexes,
struct oidset *commits, struct oidset *commits,
enum commit_graph_write_flags flags, enum commit_graph_write_flags flags,
const struct split_commit_graph_opts *split_opts); const struct commit_graph_opts *opts);
#define COMMIT_GRAPH_VERIFY_SHALLOW (1 << 0) #define COMMIT_GRAPH_VERIFY_SHALLOW (1 << 0)

2
diff.h
View File

@ -287,8 +287,6 @@ struct diff_options {
/* If non-zero, then stop computing after this many changes. */ /* If non-zero, then stop computing after this many changes. */
int max_changes; int max_changes;
/* For internal use only. */
int num_changes;
int ita_invisible_in_index; int ita_invisible_in_index;
/* white-space error highlighting */ /* white-space error highlighting */

View File

@ -1,7 +1,8 @@
#include "commit-graph.h" #include "commit-graph.h"
#include "repository.h" #include "repository.h"
struct commit_graph *parse_commit_graph(void *graph_map, size_t graph_size); struct commit_graph *parse_commit_graph(struct repository *r,
void *graph_map, size_t graph_size);
int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size); int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size);
@ -10,7 +11,7 @@ int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size)
struct commit_graph *g; struct commit_graph *g;
initialize_the_repository(); initialize_the_repository();
g = parse_commit_graph((void *)data, size); g = parse_commit_graph(the_repository, (void *)data, size);
repo_clear(the_repository); repo_clear(the_repository);
free_commit_graph(g); free_commit_graph(g);

View File

@ -1159,7 +1159,7 @@ static int bloom_filter_check(struct rev_info *rev,
return 1; return 1;
if (!rev->bloom_filter_settings || if (!rev->bloom_filter_settings ||
!(filter = get_bloom_filter(rev->repo, commit, 0))) !(filter = get_bloom_filter(rev->repo, commit)))
return 1; return 1;
if (!range) if (!range)

View File

@ -17,9 +17,12 @@ void prepare_repo_settings(struct repository *r)
if (!repo_config_get_bool(r, "core.commitgraph", &value)) if (!repo_config_get_bool(r, "core.commitgraph", &value))
r->settings.core_commit_graph = value; r->settings.core_commit_graph = value;
if (!repo_config_get_bool(r, "commitgraph.readchangedpaths", &value))
r->settings.commit_graph_read_changed_paths = value;
if (!repo_config_get_bool(r, "gc.writecommitgraph", &value)) if (!repo_config_get_bool(r, "gc.writecommitgraph", &value))
r->settings.gc_write_commit_graph = value; r->settings.gc_write_commit_graph = value;
UPDATE_DEFAULT_BOOL(r->settings.core_commit_graph, 1); UPDATE_DEFAULT_BOOL(r->settings.core_commit_graph, 1);
UPDATE_DEFAULT_BOOL(r->settings.commit_graph_read_changed_paths, 1);
UPDATE_DEFAULT_BOOL(r->settings.gc_write_commit_graph, 1); UPDATE_DEFAULT_BOOL(r->settings.gc_write_commit_graph, 1);
if (!repo_config_get_int(r, "index.version", &value)) if (!repo_config_get_int(r, "index.version", &value))

View File

@ -30,6 +30,7 @@ struct repo_settings {
int initialized; int initialized;
int core_commit_graph; int core_commit_graph;
int commit_graph_read_changed_paths;
int gc_write_commit_graph; int gc_write_commit_graph;
int fetch_write_commit_graph; int fetch_write_commit_graph;

View File

@ -681,10 +681,7 @@ static void prepare_to_use_bloom_filter(struct rev_info *revs)
repo_parse_commit(revs->repo, revs->commits->item); repo_parse_commit(revs->repo, revs->commits->item);
if (!revs->repo->objects->commit_graph) revs->bloom_filter_settings = get_bloom_filter_settings(revs->repo);
return;
revs->bloom_filter_settings = revs->repo->objects->commit_graph->bloom_filter_settings;
if (!revs->bloom_filter_settings) if (!revs->bloom_filter_settings)
return; return;
@ -755,7 +752,7 @@ static int check_maybe_different_in_bloom_filter(struct rev_info *revs,
if (commit_graph_generation(commit) == GENERATION_NUMBER_INFINITY) if (commit_graph_generation(commit) == GENERATION_NUMBER_INFINITY)
return -1; return -1;
filter = get_bloom_filter(revs->repo, commit, 0); filter = get_bloom_filter(revs->repo, commit);
if (!filter) { if (!filter) {
count_bloom_filter_not_present++; count_bloom_filter_not_present++;

View File

@ -39,7 +39,9 @@ static void get_bloom_filter_for_commit(const struct object_id *commit_oid)
struct bloom_filter *filter; struct bloom_filter *filter;
setup_git_directory(); setup_git_directory();
c = lookup_commit(the_repository, commit_oid); c = lookup_commit(the_repository, commit_oid);
filter = get_bloom_filter(the_repository, c, 1); filter = get_or_compute_bloom_filter(the_repository, c, 1,
&settings,
NULL);
print_bloom_filter(filter); print_bloom_filter(filter);
} }

View File

@ -12,11 +12,12 @@ int cmd__read_graph(int argc, const char **argv)
setup_git_directory(); setup_git_directory();
odb = the_repository->objects->odb; odb = the_repository->objects->odb;
prepare_repo_settings(the_repository);
graph = read_commit_graph_one(the_repository, odb); graph = read_commit_graph_one(the_repository, odb);
if (!graph) if (!graph)
return 1; return 1;
printf("header: %08x %d %d %d %d\n", printf("header: %08x %d %d %d %d\n",
ntohl(*(uint32_t*)graph->data), ntohl(*(uint32_t*)graph->data),
*(unsigned char*)(graph->data + 4), *(unsigned char*)(graph->data + 4),

View File

@ -71,8 +71,8 @@ test_expect_success 'get bloom filters for commit with no changes' '
git init && git init &&
git commit --allow-empty -m "c0" && git commit --allow-empty -m "c0" &&
cat >expect <<-\EOF && cat >expect <<-\EOF &&
Filter_Length:0 Filter_Length:1
Filter_Data: Filter_Data:00|
EOF EOF
test-tool bloom get_filter_for_commit "$(git rev-parse HEAD)" >actual && test-tool bloom get_filter_for_commit "$(git rev-parse HEAD)" >actual &&
test_cmp expect actual test_cmp expect actual
@ -107,8 +107,8 @@ test_expect_success EXPENSIVE 'get bloom filter for commit with 513 changes' '
git add bigDir && git add bigDir &&
git commit -m "commit with 513 changes" && git commit -m "commit with 513 changes" &&
cat >expect <<-\EOF && cat >expect <<-\EOF &&
Filter_Length:0 Filter_Length:1
Filter_Data: Filter_Data:ff|
EOF EOF
test-tool bloom get_filter_for_commit "$(git rev-parse HEAD)" >actual && test-tool bloom get_filter_for_commit "$(git rev-parse HEAD)" >actual &&
test_cmp expect actual test_cmp expect actual

View File

@ -30,6 +30,7 @@ test_expect_success 'setup test - repo, commits, commit graph, log outputs' '
rm file_to_be_deleted && rm file_to_be_deleted &&
git add . && git add . &&
git commit -m "file removed" && git commit -m "file removed" &&
git commit --allow-empty -m "empty" &&
git commit-graph write --reachable --changed-paths && git commit-graph write --reachable --changed-paths &&
test_oid_cache <<-EOF test_oid_cache <<-EOF
@ -37,6 +38,7 @@ test_expect_success 'setup test - repo, commits, commit graph, log outputs' '
oid_version sha256:2 oid_version sha256:2
EOF EOF
' '
graph_read_expect () { graph_read_expect () {
NUM_CHUNKS=5 NUM_CHUNKS=5
cat >expect <<- EOF cat >expect <<- EOF
@ -49,7 +51,7 @@ graph_read_expect () {
} }
test_expect_success 'commit-graph write wrote out the bloom chunks' ' test_expect_success 'commit-graph write wrote out the bloom chunks' '
graph_read_expect 15 graph_read_expect 16
' '
# Turn off any inherited trace2 settings for this test. # Turn off any inherited trace2 settings for this test.
@ -58,14 +60,14 @@ sane_unset GIT_TRACE2_PERF_BRIEF
sane_unset GIT_TRACE2_CONFIG_PARAMS sane_unset GIT_TRACE2_CONFIG_PARAMS
setup () { setup () {
rm "$TRASH_DIRECTORY/trace.perf" rm -f "$TRASH_DIRECTORY/trace.perf" &&
git -c core.commitGraph=false log --pretty="format:%s" $1 >log_wo_bloom && git -c core.commitGraph=false log --pretty="format:%s" $1 >log_wo_bloom &&
GIT_TRACE2_PERF="$TRASH_DIRECTORY/trace.perf" git -c core.commitGraph=true log --pretty="format:%s" $1 >log_w_bloom GIT_TRACE2_PERF="$TRASH_DIRECTORY/trace.perf" git -c core.commitGraph=true log --pretty="format:%s" $1 >log_w_bloom
} }
test_bloom_filters_used () { test_bloom_filters_used () {
log_args=$1 log_args=$1
bloom_trace_prefix="statistics:{\"filter_not_present\":0,\"maybe\"" bloom_trace_prefix="statistics:{\"filter_not_present\":${2:-0},\"maybe\""
setup "$log_args" && setup "$log_args" &&
grep -q "$bloom_trace_prefix" "$TRASH_DIRECTORY/trace.perf" && grep -q "$bloom_trace_prefix" "$TRASH_DIRECTORY/trace.perf" &&
test_cmp log_wo_bloom log_w_bloom && test_cmp log_wo_bloom log_w_bloom &&
@ -95,7 +97,9 @@ do
"--ancestry-path side..master" "--ancestry-path side..master"
do do
test_expect_success "git log option: $option for path: $path" ' test_expect_success "git log option: $option for path: $path" '
test_bloom_filters_used "$option -- $path" test_bloom_filters_used "$option -- $path" &&
test_config commitgraph.readChangedPaths false &&
test_bloom_filters_not_used "$option -- $path"
' '
done done
done done
@ -139,8 +143,11 @@ test_expect_success 'setup - add commit-graph to the chain without Bloom filters
test_line_count = 2 .git/objects/info/commit-graphs/commit-graph-chain test_line_count = 2 .git/objects/info/commit-graphs/commit-graph-chain
' '
test_expect_success 'Do not use Bloom filters if the latest graph does not have Bloom filters.' ' test_expect_success 'use Bloom filters even if the latest graph does not have Bloom filters' '
test_bloom_filters_not_used "-- A/B" # Ensure that the number of empty filters is equal to the number of
# filters in the latest graph layer to prove that they are loaded (and
# ignored).
test_bloom_filters_used "-- A/B" 3
' '
test_expect_success 'setup - add commit-graph to the chain with Bloom filters' ' test_expect_success 'setup - add commit-graph to the chain with Bloom filters' '
@ -151,7 +158,7 @@ test_expect_success 'setup - add commit-graph to the chain with Bloom filters' '
test_bloom_filters_used_when_some_filters_are_missing () { test_bloom_filters_used_when_some_filters_are_missing () {
log_args=$1 log_args=$1
bloom_trace_prefix="statistics:{\"filter_not_present\":3,\"maybe\":6,\"definitely_not\":8" bloom_trace_prefix="statistics:{\"filter_not_present\":3,\"maybe\":6,\"definitely_not\":9"
setup "$log_args" && setup "$log_args" &&
grep -q "$bloom_trace_prefix" "$TRASH_DIRECTORY/trace.perf" && grep -q "$bloom_trace_prefix" "$TRASH_DIRECTORY/trace.perf" &&
test_cmp log_wo_bloom log_w_bloom test_cmp log_wo_bloom log_w_bloom
@ -169,31 +176,230 @@ test_expect_success 'persist filter settings' '
GIT_TEST_BLOOM_SETTINGS_NUM_HASHES=9 \ GIT_TEST_BLOOM_SETTINGS_NUM_HASHES=9 \
GIT_TEST_BLOOM_SETTINGS_BITS_PER_ENTRY=15 \ GIT_TEST_BLOOM_SETTINGS_BITS_PER_ENTRY=15 \
git commit-graph write --reachable --changed-paths && git commit-graph write --reachable --changed-paths &&
grep "{\"hash_version\":1,\"num_hashes\":9,\"bits_per_entry\":15}" trace2.txt && grep "{\"hash_version\":1,\"num_hashes\":9,\"bits_per_entry\":15,\"max_changed_paths\":512" trace2.txt &&
GIT_TRACE2_EVENT="$(pwd)/trace2-auto.txt" \ GIT_TRACE2_EVENT="$(pwd)/trace2-auto.txt" \
GIT_TRACE2_EVENT_NESTING=5 \ GIT_TRACE2_EVENT_NESTING=5 \
git commit-graph write --reachable --changed-paths && git commit-graph write --reachable --changed-paths &&
grep "{\"hash_version\":1,\"num_hashes\":9,\"bits_per_entry\":15}" trace2-auto.txt grep "{\"hash_version\":1,\"num_hashes\":9,\"bits_per_entry\":15,\"max_changed_paths\":512" trace2-auto.txt
' '
test_max_changed_paths () {
grep "\"max_changed_paths\":$1" $2
}
test_filter_not_computed () {
grep "\"key\":\"filter-not-computed\",\"value\":\"$1\"" $2
}
test_filter_computed () {
grep "\"key\":\"filter-computed\",\"value\":\"$1\"" $2
}
test_filter_trunc_empty () {
grep "\"key\":\"filter-trunc-empty\",\"value\":\"$1\"" $2
}
test_filter_trunc_large () {
grep "\"key\":\"filter-trunc-large\",\"value\":\"$1\"" $2
}
test_expect_success 'correctly report changes over limit' ' test_expect_success 'correctly report changes over limit' '
git init 513changes && git init limits &&
( (
cd 513changes && cd limits &&
for i in $(test_seq 1 513) mkdir d &&
mkdir d/e &&
for i in $(test_seq 1 2)
do do
echo $i >file$i.txt || return 1 printf $i >d/file$i.txt &&
printf $i >d/e/file$i.txt || return 1
done && done &&
git add . &&
mkdir mode &&
printf bash >mode/script.sh &&
mkdir foo &&
touch foo/bar &&
touch foo.txt &&
git add d foo foo.txt mode &&
git commit -m "files" && git commit -m "files" &&
# Commit has 7 file and 4 directory adds
GIT_TEST_BLOOM_SETTINGS_MAX_CHANGED_PATHS=10 \
GIT_TRACE2_EVENT="$(pwd)/trace" \
git commit-graph write --reachable --changed-paths && git commit-graph write --reachable --changed-paths &&
for i in $(test_seq 1 513) test_max_changed_paths 10 trace &&
test_filter_computed 1 trace &&
test_filter_trunc_large 1 trace &&
for path in $(git ls-tree -r --name-only HEAD)
do do
git -c core.commitGraph=false log -- file$i.txt >expect && git -c commitGraph.readChangedPaths=false log \
git log -- file$i.txt >actual && -- $path >expect &&
git log -- $path >actual &&
test_cmp expect actual || return 1
done &&
# Make a variety of path changes
printf new1 >d/e/file1.txt &&
printf new2 >d/file2.txt &&
rm d/e/file2.txt &&
rm -r foo &&
printf text >foo &&
mkdir f &&
printf new1 >f/file1.txt &&
# including a mode-only change (counts as modified)
git update-index --chmod=+x mode/script.sh &&
git add foo d f &&
git commit -m "complicated" &&
# start from scratch and rebuild
rm -f .git/objects/info/commit-graph &&
GIT_TEST_BLOOM_SETTINGS_MAX_CHANGED_PATHS=10 \
GIT_TRACE2_EVENT="$(pwd)/trace-edit" \
git commit-graph write --reachable --changed-paths &&
test_max_changed_paths 10 trace-edit &&
test_filter_computed 2 trace-edit &&
test_filter_trunc_large 2 trace-edit &&
for path in $(git ls-tree -r --name-only HEAD)
do
git -c commitGraph.readChangedPaths=false log \
-- $path >expect &&
git log -- $path >actual &&
test_cmp expect actual || return 1
done &&
# start from scratch and rebuild
rm -f .git/objects/info/commit-graph &&
GIT_TEST_BLOOM_SETTINGS_MAX_CHANGED_PATHS=11 \
GIT_TRACE2_EVENT="$(pwd)/trace-update" \
git commit-graph write --reachable --changed-paths &&
test_max_changed_paths 11 trace-update &&
test_filter_computed 2 trace-update &&
test_filter_trunc_large 0 trace-update &&
for path in $(git ls-tree -r --name-only HEAD)
do
git -c commitGraph.readChangedPaths=false log \
-- $path >expect &&
git log -- $path >actual &&
test_cmp expect actual || return 1 test_cmp expect actual || return 1
done done
) )
' '
test_expect_success 'correctly report commits with no changed paths' '
git init empty &&
test_when_finished "rm -fr empty" &&
(
cd empty &&
git commit --allow-empty -m "initial commit" &&
GIT_TRACE2_EVENT="$(pwd)/trace.event" \
git commit-graph write --reachable --changed-paths &&
test_filter_computed 1 trace.event &&
test_filter_not_computed 0 trace.event &&
test_filter_trunc_empty 1 trace.event &&
test_filter_trunc_large 0 trace.event
)
'
test_expect_success 'Bloom generation is limited by --max-new-filters' '
(
cd limits &&
test_commit c2 filter &&
test_commit c3 filter &&
test_commit c4 no-filter &&
rm -f trace.event &&
GIT_TRACE2_EVENT="$(pwd)/trace.event" \
git commit-graph write --reachable --split=replace \
--changed-paths --max-new-filters=2 &&
test_filter_computed 2 trace.event &&
test_filter_not_computed 3 trace.event &&
test_filter_trunc_empty 0 trace.event &&
test_filter_trunc_large 0 trace.event
)
'
test_expect_success 'Bloom generation backfills previously-skipped filters' '
# Check specifying commitGraph.maxNewFilters over "git config" works.
test_config -C limits commitGraph.maxNewFilters 1 &&
(
cd limits &&
rm -f trace.event &&
GIT_TRACE2_EVENT="$(pwd)/trace.event" \
git commit-graph write --reachable --changed-paths \
--split=replace &&
test_filter_computed 1 trace.event &&
test_filter_not_computed 4 trace.event &&
test_filter_trunc_empty 0 trace.event &&
test_filter_trunc_large 0 trace.event
)
'
test_expect_success '--max-new-filters overrides configuration' '
git init override &&
test_when_finished "rm -fr override" &&
test_config -C override commitGraph.maxNewFilters 2 &&
(
cd override &&
test_commit one &&
test_commit two &&
rm -f trace.event &&
GIT_TRACE2_EVENT="$(pwd)/trace.event" \
git commit-graph write --reachable --changed-paths \
--max-new-filters=1 &&
test_filter_computed 1 trace.event &&
test_filter_not_computed 1 trace.event &&
test_filter_trunc_empty 0 trace.event &&
test_filter_trunc_large 0 trace.event
)
'
test_expect_success 'Bloom generation backfills empty commits' '
git init empty &&
test_when_finished "rm -fr empty" &&
(
cd empty &&
for i in $(test_seq 1 6)
do
git commit --allow-empty -m "$i"
done &&
# Generate Bloom filters for empty commits 1-6, two at a time.
for i in $(test_seq 1 3)
do
rm -f trace.event &&
GIT_TRACE2_EVENT="$(pwd)/trace.event" \
git commit-graph write --reachable \
--changed-paths --max-new-filters=2 &&
test_filter_computed 2 trace.event &&
test_filter_not_computed 4 trace.event &&
test_filter_trunc_empty 2 trace.event &&
test_filter_trunc_large 0 trace.event
done &&
# Finally, make sure that once all commits have filters, that
# none are subsequently recomputed.
rm -f trace.event &&
GIT_TRACE2_EVENT="$(pwd)/trace.event" \
git commit-graph write --reachable \
--changed-paths --max-new-filters=2 &&
test_filter_computed 0 trace.event &&
test_filter_not_computed 6 trace.event &&
test_filter_trunc_empty 0 trace.event &&
test_filter_trunc_large 0 trace.event
)
'
test_done test_done

View File

@ -427,4 +427,17 @@ done <<\EOF
0600 -r-------- 0600 -r--------
EOF EOF
test_expect_success '--split=replace with partial Bloom data' '
rm -rf $graphdir $infodir/commit-graph &&
git reset --hard commits/3 &&
git rev-list -1 HEAD~2 >a &&
git rev-list -1 HEAD~1 >b &&
git commit-graph write --split=no-merge --stdin-commits --changed-paths <a &&
git commit-graph write --split=no-merge --stdin-commits <b &&
git commit-graph write --split=replace --stdin-commits --changed-paths <c &&
ls $graphdir/graph-*.graph >graph-files &&
test_line_count = 1 graph-files &&
verify_chain_files_exist $graphdir
'
test_done test_done

View File

@ -434,7 +434,7 @@ static struct combine_diff_path *ll_diff_tree_paths(
if (diff_can_quit_early(opt)) if (diff_can_quit_early(opt))
break; break;
if (opt->max_changes && opt->num_changes > opt->max_changes) if (opt->max_changes && diff_queued_diff.nr > opt->max_changes)
break; break;
if (opt->pathspec.nr) { if (opt->pathspec.nr) {
@ -521,7 +521,6 @@ static struct combine_diff_path *ll_diff_tree_paths(
/* t↓ */ /* t↓ */
update_tree_entry(&t); update_tree_entry(&t);
opt->num_changes++;
} }
/* t > p[imin] */ /* t > p[imin] */
@ -539,7 +538,6 @@ static struct combine_diff_path *ll_diff_tree_paths(
skip_emit_tp: skip_emit_tp:
/* ∀ pi=p[imin] pi↓ */ /* ∀ pi=p[imin] pi↓ */
update_tp_entries(tp, nparent); update_tp_entries(tp, nparent);
opt->num_changes++;
} }
} }
@ -557,7 +555,6 @@ struct combine_diff_path *diff_tree_paths(
const struct object_id **parents_oid, int nparent, const struct object_id **parents_oid, int nparent,
struct strbuf *base, struct diff_options *opt) struct strbuf *base, struct diff_options *opt)
{ {
opt->num_changes = 0;
p = ll_diff_tree_paths(p, oid, parents_oid, nparent, base, opt); p = ll_diff_tree_paths(p, oid, parents_oid, nparent, base, opt);
/* /*