Merge branch 'ds/commit-graph-bloom-updates' into master
Updates to the changed-paths bloom filter. * ds/commit-graph-bloom-updates: commit-graph: check all leading directories in changed path Bloom filters revision: empty pathspecs should not use Bloom filters revision.c: fix whitespace commit-graph: check chunk sizes after writing commit-graph: simplify chunk writes into loop commit-graph: unify the signatures of all write_graph_chunk_*() functions commit-graph: persist existence of changed-paths bloom: fix logic in get_bloom_filter() commit-graph: change test to die on parse, not load commit-graph: place bloom_settings in context
This commit is contained in:
148
commit-graph.c
148
commit-graph.c
@ -17,6 +17,8 @@
|
||||
#include "bloom.h"
|
||||
#include "commit-slab.h"
|
||||
#include "shallow.h"
|
||||
#include "json-writer.h"
|
||||
#include "trace2.h"
|
||||
|
||||
void git_test_write_commit_graph_or_die(void)
|
||||
{
|
||||
@ -617,10 +619,6 @@ static int prepare_commit_graph(struct repository *r)
|
||||
return !!r->objects->commit_graph;
|
||||
r->objects->commit_graph_attempted = 1;
|
||||
|
||||
if (git_env_bool(GIT_TEST_COMMIT_GRAPH_DIE_ON_LOAD, 0))
|
||||
die("dying as requested by the '%s' variable on commit-graph load!",
|
||||
GIT_TEST_COMMIT_GRAPH_DIE_ON_LOAD);
|
||||
|
||||
prepare_repo_settings(r);
|
||||
|
||||
if (!git_env_bool(GIT_TEST_COMMIT_GRAPH, 0) &&
|
||||
@ -849,6 +847,14 @@ static int parse_commit_in_graph_one(struct repository *r,
|
||||
|
||||
int parse_commit_in_graph(struct repository *r, struct commit *item)
|
||||
{
|
||||
static int checked_env = 0;
|
||||
|
||||
if (!checked_env &&
|
||||
git_env_bool(GIT_TEST_COMMIT_GRAPH_DIE_ON_PARSE, 0))
|
||||
die("dying as requested by the '%s' variable on commit-graph parse!",
|
||||
GIT_TEST_COMMIT_GRAPH_DIE_ON_PARSE);
|
||||
checked_env = 1;
|
||||
|
||||
if (!prepare_commit_graph(r))
|
||||
return 0;
|
||||
return parse_commit_in_graph_one(r, r->objects->commit_graph, item);
|
||||
@ -941,10 +947,11 @@ struct write_commit_graph_context {
|
||||
|
||||
const struct split_commit_graph_opts *split_opts;
|
||||
size_t total_bloom_filter_data_size;
|
||||
const struct bloom_filter_settings *bloom_settings;
|
||||
};
|
||||
|
||||
static void write_graph_chunk_fanout(struct hashfile *f,
|
||||
struct write_commit_graph_context *ctx)
|
||||
static int write_graph_chunk_fanout(struct hashfile *f,
|
||||
struct write_commit_graph_context *ctx)
|
||||
{
|
||||
int i, count = 0;
|
||||
struct commit **list = ctx->commits.list;
|
||||
@ -965,17 +972,21 @@ static void write_graph_chunk_fanout(struct hashfile *f,
|
||||
|
||||
hashwrite_be32(f, count);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void write_graph_chunk_oids(struct hashfile *f, int hash_len,
|
||||
struct write_commit_graph_context *ctx)
|
||||
static int write_graph_chunk_oids(struct hashfile *f,
|
||||
struct write_commit_graph_context *ctx)
|
||||
{
|
||||
struct commit **list = ctx->commits.list;
|
||||
int count;
|
||||
for (count = 0; count < ctx->commits.nr; count++, list++) {
|
||||
display_progress(ctx->progress, ++ctx->progress_cnt);
|
||||
hashwrite(f, (*list)->object.oid.hash, (int)hash_len);
|
||||
hashwrite(f, (*list)->object.oid.hash, the_hash_algo->rawsz);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static const unsigned char *commit_to_sha1(size_t index, void *table)
|
||||
@ -984,8 +995,8 @@ static const unsigned char *commit_to_sha1(size_t index, void *table)
|
||||
return commits[index]->object.oid.hash;
|
||||
}
|
||||
|
||||
static void write_graph_chunk_data(struct hashfile *f, int hash_len,
|
||||
struct write_commit_graph_context *ctx)
|
||||
static int write_graph_chunk_data(struct hashfile *f,
|
||||
struct write_commit_graph_context *ctx)
|
||||
{
|
||||
struct commit **list = ctx->commits.list;
|
||||
struct commit **last = ctx->commits.list + ctx->commits.nr;
|
||||
@ -1002,7 +1013,7 @@ static void write_graph_chunk_data(struct hashfile *f, int hash_len,
|
||||
die(_("unable to parse commit %s"),
|
||||
oid_to_hex(&(*list)->object.oid));
|
||||
tree = get_commit_tree_oid(*list);
|
||||
hashwrite(f, tree->hash, hash_len);
|
||||
hashwrite(f, tree->hash, the_hash_algo->rawsz);
|
||||
|
||||
parent = (*list)->parents;
|
||||
|
||||
@ -1082,10 +1093,12 @@ static void write_graph_chunk_data(struct hashfile *f, int hash_len,
|
||||
|
||||
list++;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void write_graph_chunk_extra_edges(struct hashfile *f,
|
||||
struct write_commit_graph_context *ctx)
|
||||
static int write_graph_chunk_extra_edges(struct hashfile *f,
|
||||
struct write_commit_graph_context *ctx)
|
||||
{
|
||||
struct commit **list = ctx->commits.list;
|
||||
struct commit **last = ctx->commits.list + ctx->commits.nr;
|
||||
@ -1134,10 +1147,12 @@ static void write_graph_chunk_extra_edges(struct hashfile *f,
|
||||
|
||||
list++;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void write_graph_chunk_bloom_indexes(struct hashfile *f,
|
||||
struct write_commit_graph_context *ctx)
|
||||
static int write_graph_chunk_bloom_indexes(struct hashfile *f,
|
||||
struct write_commit_graph_context *ctx)
|
||||
{
|
||||
struct commit **list = ctx->commits.list;
|
||||
struct commit **last = ctx->commits.list + ctx->commits.nr;
|
||||
@ -1145,30 +1160,54 @@ static void write_graph_chunk_bloom_indexes(struct hashfile *f,
|
||||
|
||||
while (list < last) {
|
||||
struct bloom_filter *filter = get_bloom_filter(ctx->r, *list, 0);
|
||||
cur_pos += filter->len;
|
||||
size_t len = filter ? filter->len : 0;
|
||||
cur_pos += len;
|
||||
display_progress(ctx->progress, ++ctx->progress_cnt);
|
||||
hashwrite_be32(f, cur_pos);
|
||||
list++;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void write_graph_chunk_bloom_data(struct hashfile *f,
|
||||
struct write_commit_graph_context *ctx,
|
||||
const struct bloom_filter_settings *settings)
|
||||
static void trace2_bloom_filter_settings(struct write_commit_graph_context *ctx)
|
||||
{
|
||||
struct json_writer jw = JSON_WRITER_INIT;
|
||||
|
||||
jw_object_begin(&jw, 0);
|
||||
jw_object_intmax(&jw, "hash_version", ctx->bloom_settings->hash_version);
|
||||
jw_object_intmax(&jw, "num_hashes", ctx->bloom_settings->num_hashes);
|
||||
jw_object_intmax(&jw, "bits_per_entry", ctx->bloom_settings->bits_per_entry);
|
||||
jw_end(&jw);
|
||||
|
||||
trace2_data_json("bloom", ctx->r, "settings", &jw);
|
||||
|
||||
jw_release(&jw);
|
||||
}
|
||||
|
||||
static int write_graph_chunk_bloom_data(struct hashfile *f,
|
||||
struct write_commit_graph_context *ctx)
|
||||
{
|
||||
struct commit **list = ctx->commits.list;
|
||||
struct commit **last = ctx->commits.list + ctx->commits.nr;
|
||||
|
||||
hashwrite_be32(f, settings->hash_version);
|
||||
hashwrite_be32(f, settings->num_hashes);
|
||||
hashwrite_be32(f, settings->bits_per_entry);
|
||||
trace2_bloom_filter_settings(ctx);
|
||||
|
||||
hashwrite_be32(f, ctx->bloom_settings->hash_version);
|
||||
hashwrite_be32(f, ctx->bloom_settings->num_hashes);
|
||||
hashwrite_be32(f, ctx->bloom_settings->bits_per_entry);
|
||||
|
||||
while (list < last) {
|
||||
struct bloom_filter *filter = get_bloom_filter(ctx->r, *list, 0);
|
||||
size_t len = filter ? filter->len : 0;
|
||||
|
||||
display_progress(ctx->progress, ++ctx->progress_cnt);
|
||||
hashwrite(f, filter->data, filter->len * sizeof(unsigned char));
|
||||
if (len)
|
||||
hashwrite(f, filter->data, len * sizeof(unsigned char));
|
||||
list++;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int oid_compare(const void *_a, const void *_b)
|
||||
@ -1579,9 +1618,13 @@ static int write_graph_chunk_base(struct hashfile *f,
|
||||
return 0;
|
||||
}
|
||||
|
||||
typedef int (*chunk_write_fn)(struct hashfile *f,
|
||||
struct write_commit_graph_context *ctx);
|
||||
|
||||
struct chunk_info {
|
||||
uint32_t id;
|
||||
uint64_t size;
|
||||
chunk_write_fn write_fn;
|
||||
};
|
||||
|
||||
static int write_commit_graph_file(struct write_commit_graph_context *ctx)
|
||||
@ -1596,7 +1639,15 @@ static int write_commit_graph_file(struct write_commit_graph_context *ctx)
|
||||
int num_chunks = 3;
|
||||
uint64_t chunk_offset;
|
||||
struct object_id file_hash;
|
||||
const struct bloom_filter_settings bloom_settings = DEFAULT_BLOOM_FILTER_SETTINGS;
|
||||
struct bloom_filter_settings bloom_settings = DEFAULT_BLOOM_FILTER_SETTINGS;
|
||||
|
||||
if (!ctx->bloom_settings) {
|
||||
bloom_settings.bits_per_entry = git_env_ulong("GIT_TEST_BLOOM_SETTINGS_BITS_PER_ENTRY",
|
||||
bloom_settings.bits_per_entry);
|
||||
bloom_settings.num_hashes = git_env_ulong("GIT_TEST_BLOOM_SETTINGS_NUM_HASHES",
|
||||
bloom_settings.num_hashes);
|
||||
ctx->bloom_settings = &bloom_settings;
|
||||
}
|
||||
|
||||
if (ctx->split) {
|
||||
struct strbuf tmp_file = STRBUF_INIT;
|
||||
@ -1644,27 +1695,34 @@ static int write_commit_graph_file(struct write_commit_graph_context *ctx)
|
||||
|
||||
chunks[0].id = GRAPH_CHUNKID_OIDFANOUT;
|
||||
chunks[0].size = GRAPH_FANOUT_SIZE;
|
||||
chunks[0].write_fn = write_graph_chunk_fanout;
|
||||
chunks[1].id = GRAPH_CHUNKID_OIDLOOKUP;
|
||||
chunks[1].size = hashsz * ctx->commits.nr;
|
||||
chunks[1].write_fn = write_graph_chunk_oids;
|
||||
chunks[2].id = GRAPH_CHUNKID_DATA;
|
||||
chunks[2].size = (hashsz + 16) * ctx->commits.nr;
|
||||
chunks[2].write_fn = write_graph_chunk_data;
|
||||
if (ctx->num_extra_edges) {
|
||||
chunks[num_chunks].id = GRAPH_CHUNKID_EXTRAEDGES;
|
||||
chunks[num_chunks].size = 4 * ctx->num_extra_edges;
|
||||
chunks[num_chunks].write_fn = write_graph_chunk_extra_edges;
|
||||
num_chunks++;
|
||||
}
|
||||
if (ctx->changed_paths) {
|
||||
chunks[num_chunks].id = GRAPH_CHUNKID_BLOOMINDEXES;
|
||||
chunks[num_chunks].size = sizeof(uint32_t) * ctx->commits.nr;
|
||||
chunks[num_chunks].write_fn = write_graph_chunk_bloom_indexes;
|
||||
num_chunks++;
|
||||
chunks[num_chunks].id = GRAPH_CHUNKID_BLOOMDATA;
|
||||
chunks[num_chunks].size = sizeof(uint32_t) * 3
|
||||
+ ctx->total_bloom_filter_data_size;
|
||||
chunks[num_chunks].write_fn = write_graph_chunk_bloom_data;
|
||||
num_chunks++;
|
||||
}
|
||||
if (ctx->num_commit_graphs_after > 1) {
|
||||
chunks[num_chunks].id = GRAPH_CHUNKID_BASE;
|
||||
chunks[num_chunks].size = hashsz * (ctx->num_commit_graphs_after - 1);
|
||||
chunks[num_chunks].write_fn = write_graph_chunk_base;
|
||||
num_chunks++;
|
||||
}
|
||||
|
||||
@ -1700,19 +1758,19 @@ static int write_commit_graph_file(struct write_commit_graph_context *ctx)
|
||||
progress_title.buf,
|
||||
num_chunks * ctx->commits.nr);
|
||||
}
|
||||
write_graph_chunk_fanout(f, ctx);
|
||||
write_graph_chunk_oids(f, hashsz, ctx);
|
||||
write_graph_chunk_data(f, hashsz, ctx);
|
||||
if (ctx->num_extra_edges)
|
||||
write_graph_chunk_extra_edges(f, ctx);
|
||||
if (ctx->changed_paths) {
|
||||
write_graph_chunk_bloom_indexes(f, ctx);
|
||||
write_graph_chunk_bloom_data(f, ctx, &bloom_settings);
|
||||
}
|
||||
if (ctx->num_commit_graphs_after > 1 &&
|
||||
write_graph_chunk_base(f, ctx)) {
|
||||
return -1;
|
||||
|
||||
for (i = 0; i < num_chunks; i++) {
|
||||
uint64_t start_offset = f->total + f->offset;
|
||||
|
||||
if (chunks[i].write_fn(f, ctx))
|
||||
return -1;
|
||||
|
||||
if (f->total + f->offset != start_offset + chunks[i].size)
|
||||
BUG("expected to write %"PRId64" bytes to chunk %"PRIx32", but wrote %"PRId64" instead",
|
||||
chunks[i].size, chunks[i].id,
|
||||
f->total + f->offset - start_offset);
|
||||
}
|
||||
|
||||
stop_progress(&ctx->progress);
|
||||
strbuf_release(&progress_title);
|
||||
|
||||
@ -2046,9 +2104,23 @@ int write_commit_graph(struct object_directory *odb,
|
||||
ctx->report_progress = flags & COMMIT_GRAPH_WRITE_PROGRESS ? 1 : 0;
|
||||
ctx->split = flags & COMMIT_GRAPH_WRITE_SPLIT ? 1 : 0;
|
||||
ctx->split_opts = split_opts;
|
||||
ctx->changed_paths = flags & COMMIT_GRAPH_WRITE_BLOOM_FILTERS ? 1 : 0;
|
||||
ctx->total_bloom_filter_data_size = 0;
|
||||
|
||||
if (flags & COMMIT_GRAPH_WRITE_BLOOM_FILTERS)
|
||||
ctx->changed_paths = 1;
|
||||
if (!(flags & COMMIT_GRAPH_NO_WRITE_BLOOM_FILTERS)) {
|
||||
struct commit_graph *g;
|
||||
prepare_commit_graph_one(ctx->r, ctx->odb);
|
||||
|
||||
g = ctx->r->objects->commit_graph;
|
||||
|
||||
/* We have changed-paths already. Keep them in the next graph */
|
||||
if (g && g->chunk_bloom_data) {
|
||||
ctx->changed_paths = 1;
|
||||
ctx->bloom_settings = g->bloom_filter_settings;
|
||||
}
|
||||
}
|
||||
|
||||
if (ctx->split) {
|
||||
struct commit_graph *g;
|
||||
prepare_commit_graph(ctx->r);
|
||||
|
Reference in New Issue
Block a user