Merge branch 'sg/commit-graph-cleanups' into pu

The changed-path Bloom filter is improved using ideas from an
independent implementation.

* sg/commit-graph-cleanups:
  commit-graph: persist existence of changed-paths
  commit-graph: change test to die on parse, not load
  bloom: enforce a minimum size of 8 bytes
  commit-graph: check all leading directories in changed path Bloom filters
  commit-graph: check chunk sizes after writing
  commit-graph: simplify chunk writes into loop
  commit-graph: unify the signatures of all write_graph_chunk_*() functions
  commit-graph: place bloom_settings in context
  commit-graph: simplify write_commit_graph_file() #2
  commit-graph: simplify write_commit_graph_file() #1
  commit-graph: simplify parse_commit_graph() #2
  commit-graph: simplify parse_commit_graph() #1
  commit-graph: clean up #includes
  diff.h: drop diff_tree_oid() & friends' return value
  commit-slab: add a function to deep free entries on the slab
  commit-graph-format.txt: all multi-byte numbers are in network byte order
  commit-graph: fix parsing the Chunk Lookup table
  tree-walk.c: don't match submodule entries for 'submod/anything'
This commit is contained in:
Junio C Hamano
2020-06-19 14:52:42 -07:00
18 changed files with 241 additions and 160 deletions

View File

@ -62,7 +62,10 @@ existing commit-graph file.
With the `--changed-paths` option, compute and write information about the With the `--changed-paths` option, compute and write information about the
paths changed between a commit and its first parent. This operation can paths changed between a commit and its first parent. This operation can
take a while on large repositories. It provides significant performance gains take a while on large repositories. It provides significant performance gains
for getting history of a directory or a file with `git log -- <path>`. for getting history of a directory or a file with `git log -- <path>`. If
this option is given, future commit-graph writes will automatically assume
that this option was intended. Use `--no-changed-paths` to stop storing this
data.
+ +
With the `--split[=<strategy>]` option, write the commit-graph as a With the `--split[=<strategy>]` option, write the commit-graph as a
chain of multiple commit-graph files stored in chain of multiple commit-graph files stored in

View File

@ -32,7 +32,7 @@ the body into "chunks" and provide a binary lookup table at the beginning
of the body. The header includes certain values, such as number of chunks of the body. The header includes certain values, such as number of chunks
and hash type. and hash type.
All 4-byte numbers are in network order. All multi-byte numbers are in network byte order.
HEADER: HEADER:

View File

@ -259,6 +259,10 @@ struct bloom_filter *get_bloom_filter(struct repository *r,
} }
filter->len = (hashmap_get_size(&pathmap) * settings.bits_per_entry + BITS_PER_WORD - 1) / BITS_PER_WORD; filter->len = (hashmap_get_size(&pathmap) * settings.bits_per_entry + BITS_PER_WORD - 1) / BITS_PER_WORD;
if (filter->len && filter->len < 8)
filter->len = 8;
filter->data = xcalloc(filter->len, sizeof(unsigned char)); filter->data = xcalloc(filter->len, sizeof(unsigned char));
hashmap_for_each_entry(&pathmap, &iter, e, entry) { hashmap_for_each_entry(&pathmap, &iter, e, entry) {

View File

@ -201,6 +201,7 @@ static int graph_write(int argc, const char **argv)
}; };
opts.progress = isatty(2); opts.progress = isatty(2);
opts.enable_changed_paths = -1;
split_opts.size_multiple = 2; split_opts.size_multiple = 2;
split_opts.max_commits = 0; split_opts.max_commits = 0;
split_opts.expire_time = 0; split_opts.expire_time = 0;
@ -221,7 +222,9 @@ static int graph_write(int argc, const char **argv)
flags |= COMMIT_GRAPH_WRITE_SPLIT; flags |= COMMIT_GRAPH_WRITE_SPLIT;
if (opts.progress) if (opts.progress)
flags |= COMMIT_GRAPH_WRITE_PROGRESS; flags |= COMMIT_GRAPH_WRITE_PROGRESS;
if (opts.enable_changed_paths || if (!opts.enable_changed_paths)
flags |= COMMIT_GRAPH_NO_WRITE_BLOOM_FILTERS;
if (opts.enable_changed_paths == 1 ||
git_env_bool(GIT_TEST_COMMIT_GRAPH_CHANGED_PATHS, 0)) git_env_bool(GIT_TEST_COMMIT_GRAPH_CHANGED_PATHS, 0))
flags |= COMMIT_GRAPH_WRITE_BLOOM_FILTERS; flags |= COMMIT_GRAPH_WRITE_BLOOM_FILTERS;

View File

@ -1,7 +1,5 @@
#include "cache.h"
#include "config.h"
#include "dir.h"
#include "git-compat-util.h" #include "git-compat-util.h"
#include "config.h"
#include "lockfile.h" #include "lockfile.h"
#include "pack.h" #include "pack.h"
#include "packfile.h" #include "packfile.h"
@ -284,8 +282,7 @@ struct commit_graph *parse_commit_graph(void *graph_map, size_t graph_size)
const unsigned char *data, *chunk_lookup; const unsigned char *data, *chunk_lookup;
uint32_t i; uint32_t i;
struct commit_graph *graph; struct commit_graph *graph;
uint64_t last_chunk_offset; uint64_t next_chunk_offset;
uint32_t last_chunk_id;
uint32_t graph_signature; uint32_t graph_signature;
unsigned char graph_version, hash_version; unsigned char graph_version, hash_version;
@ -325,24 +322,26 @@ struct commit_graph *parse_commit_graph(void *graph_map, size_t graph_size)
graph->data = graph_map; graph->data = graph_map;
graph->data_len = graph_size; graph->data_len = graph_size;
last_chunk_id = 0; if (graph_size < GRAPH_HEADER_SIZE +
last_chunk_offset = 8; (graph->num_chunks + 1) * GRAPH_CHUNKLOOKUP_WIDTH +
chunk_lookup = data + 8; GRAPH_FANOUT_SIZE + the_hash_algo->rawsz) {
for (i = 0; i < graph->num_chunks; i++) { error(_("commit-graph file is too small to hold %u chunks"),
uint32_t chunk_id; graph->num_chunks);
uint64_t chunk_offset; free(graph);
int chunk_repeated = 0; return NULL;
if (data + graph_size - chunk_lookup <
GRAPH_CHUNKLOOKUP_WIDTH) {
error(_("commit-graph chunk lookup table entry missing; file may be incomplete"));
goto free_and_return;
} }
chunk_lookup = data + 8;
next_chunk_offset = get_be64(chunk_lookup + 4);
for (i = 0; i < graph->num_chunks; i++) {
uint32_t chunk_id;
uint64_t chunk_offset = next_chunk_offset;
int chunk_repeated = 0;
chunk_id = get_be32(chunk_lookup + 0); chunk_id = get_be32(chunk_lookup + 0);
chunk_offset = get_be64(chunk_lookup + 4);
chunk_lookup += GRAPH_CHUNKLOOKUP_WIDTH; chunk_lookup += GRAPH_CHUNKLOOKUP_WIDTH;
next_chunk_offset = get_be64(chunk_lookup + 4);
if (chunk_offset > graph_size - the_hash_algo->rawsz) { if (chunk_offset > graph_size - the_hash_algo->rawsz) {
error(_("commit-graph improper chunk offset %08x%08x"), (uint32_t)(chunk_offset >> 32), error(_("commit-graph improper chunk offset %08x%08x"), (uint32_t)(chunk_offset >> 32),
@ -361,8 +360,11 @@ struct commit_graph *parse_commit_graph(void *graph_map, size_t graph_size)
case GRAPH_CHUNKID_OIDLOOKUP: case GRAPH_CHUNKID_OIDLOOKUP:
if (graph->chunk_oid_lookup) if (graph->chunk_oid_lookup)
chunk_repeated = 1; chunk_repeated = 1;
else else {
graph->chunk_oid_lookup = data + chunk_offset; graph->chunk_oid_lookup = data + chunk_offset;
graph->num_commits = (next_chunk_offset - chunk_offset)
/ graph->hash_len;
}
break; break;
case GRAPH_CHUNKID_DATA: case GRAPH_CHUNKID_DATA:
@ -416,15 +418,6 @@ struct commit_graph *parse_commit_graph(void *graph_map, size_t graph_size)
error(_("commit-graph chunk id %08x appears multiple times"), chunk_id); error(_("commit-graph chunk id %08x appears multiple times"), chunk_id);
goto free_and_return; goto free_and_return;
} }
if (last_chunk_id == GRAPH_CHUNKID_OIDLOOKUP)
{
graph->num_commits = (chunk_offset - last_chunk_offset)
/ graph->hash_len;
}
last_chunk_id = chunk_id;
last_chunk_offset = chunk_offset;
} }
if (graph->chunk_bloom_indexes && graph->chunk_bloom_data) { if (graph->chunk_bloom_indexes && graph->chunk_bloom_data) {
@ -623,10 +616,6 @@ static int prepare_commit_graph(struct repository *r)
return !!r->objects->commit_graph; return !!r->objects->commit_graph;
r->objects->commit_graph_attempted = 1; r->objects->commit_graph_attempted = 1;
if (git_env_bool(GIT_TEST_COMMIT_GRAPH_DIE_ON_LOAD, 0))
die("dying as requested by the '%s' variable on commit-graph load!",
GIT_TEST_COMMIT_GRAPH_DIE_ON_LOAD);
prepare_repo_settings(r); prepare_repo_settings(r);
if (!git_env_bool(GIT_TEST_COMMIT_GRAPH, 0) && if (!git_env_bool(GIT_TEST_COMMIT_GRAPH, 0) &&
@ -855,6 +844,14 @@ static int parse_commit_in_graph_one(struct repository *r,
int parse_commit_in_graph(struct repository *r, struct commit *item) int parse_commit_in_graph(struct repository *r, struct commit *item)
{ {
static int checked_env = 0;
if (!checked_env &&
git_env_bool(GIT_TEST_COMMIT_GRAPH_DIE_ON_PARSE, 0))
die("dying as requested by the '%s' variable on commit-graph parse!",
GIT_TEST_COMMIT_GRAPH_DIE_ON_PARSE);
checked_env = 1;
if (!prepare_commit_graph(r)) if (!prepare_commit_graph(r))
return 0; return 0;
return parse_commit_in_graph_one(r, r->objects->commit_graph, item); return parse_commit_in_graph_one(r, r->objects->commit_graph, item);
@ -947,9 +944,10 @@ struct write_commit_graph_context {
const struct split_commit_graph_opts *split_opts; const struct split_commit_graph_opts *split_opts;
size_t total_bloom_filter_data_size; size_t total_bloom_filter_data_size;
struct bloom_filter_settings bloom_settings;
}; };
static void write_graph_chunk_fanout(struct hashfile *f, static int write_graph_chunk_fanout(struct hashfile *f,
struct write_commit_graph_context *ctx) struct write_commit_graph_context *ctx)
{ {
int i, count = 0; int i, count = 0;
@ -971,17 +969,21 @@ static void write_graph_chunk_fanout(struct hashfile *f,
hashwrite_be32(f, count); hashwrite_be32(f, count);
} }
return 0;
} }
static void write_graph_chunk_oids(struct hashfile *f, int hash_len, static int write_graph_chunk_oids(struct hashfile *f,
struct write_commit_graph_context *ctx) struct write_commit_graph_context *ctx)
{ {
struct commit **list = ctx->commits.list; struct commit **list = ctx->commits.list;
int count; int count;
for (count = 0; count < ctx->commits.nr; count++, list++) { for (count = 0; count < ctx->commits.nr; count++, list++) {
display_progress(ctx->progress, ++ctx->progress_cnt); display_progress(ctx->progress, ++ctx->progress_cnt);
hashwrite(f, (*list)->object.oid.hash, (int)hash_len); hashwrite(f, (*list)->object.oid.hash, (int)the_hash_algo->rawsz);
} }
return 0;
} }
static const unsigned char *commit_to_sha1(size_t index, void *table) static const unsigned char *commit_to_sha1(size_t index, void *table)
@ -990,7 +992,7 @@ static const unsigned char *commit_to_sha1(size_t index, void *table)
return commits[index]->object.oid.hash; return commits[index]->object.oid.hash;
} }
static void write_graph_chunk_data(struct hashfile *f, int hash_len, static int write_graph_chunk_data(struct hashfile *f,
struct write_commit_graph_context *ctx) struct write_commit_graph_context *ctx)
{ {
struct commit **list = ctx->commits.list; struct commit **list = ctx->commits.list;
@ -1008,7 +1010,7 @@ static void write_graph_chunk_data(struct hashfile *f, int hash_len,
die(_("unable to parse commit %s"), die(_("unable to parse commit %s"),
oid_to_hex(&(*list)->object.oid)); oid_to_hex(&(*list)->object.oid));
tree = get_commit_tree_oid(*list); tree = get_commit_tree_oid(*list);
hashwrite(f, tree->hash, hash_len); hashwrite(f, tree->hash, the_hash_algo->rawsz);
parent = (*list)->parents; parent = (*list)->parents;
@ -1088,9 +1090,11 @@ static void write_graph_chunk_data(struct hashfile *f, int hash_len,
list++; list++;
} }
return 0;
} }
static void write_graph_chunk_extra_edges(struct hashfile *f, static int write_graph_chunk_extra_edges(struct hashfile *f,
struct write_commit_graph_context *ctx) struct write_commit_graph_context *ctx)
{ {
struct commit **list = ctx->commits.list; struct commit **list = ctx->commits.list;
@ -1140,9 +1144,11 @@ static void write_graph_chunk_extra_edges(struct hashfile *f,
list++; list++;
} }
return 0;
} }
static void write_graph_chunk_bloom_indexes(struct hashfile *f, static int write_graph_chunk_bloom_indexes(struct hashfile *f,
struct write_commit_graph_context *ctx) struct write_commit_graph_context *ctx)
{ {
struct commit **list = ctx->commits.list; struct commit **list = ctx->commits.list;
@ -1165,11 +1171,11 @@ static void write_graph_chunk_bloom_indexes(struct hashfile *f,
} }
stop_progress(&progress); stop_progress(&progress);
return 0;
} }
static void write_graph_chunk_bloom_data(struct hashfile *f, static int write_graph_chunk_bloom_data(struct hashfile *f,
struct write_commit_graph_context *ctx, struct write_commit_graph_context *ctx)
const struct bloom_filter_settings *settings)
{ {
struct commit **list = ctx->commits.list; struct commit **list = ctx->commits.list;
struct commit **last = ctx->commits.list + ctx->commits.nr; struct commit **last = ctx->commits.list + ctx->commits.nr;
@ -1181,9 +1187,9 @@ static void write_graph_chunk_bloom_data(struct hashfile *f,
_("Writing changed paths Bloom filters data"), _("Writing changed paths Bloom filters data"),
ctx->commits.nr); ctx->commits.nr);
hashwrite_be32(f, settings->hash_version); hashwrite_be32(f, ctx->bloom_settings.hash_version);
hashwrite_be32(f, settings->num_hashes); hashwrite_be32(f, ctx->bloom_settings.num_hashes);
hashwrite_be32(f, settings->bits_per_entry); hashwrite_be32(f, ctx->bloom_settings.bits_per_entry);
while (list < last) { while (list < last) {
struct bloom_filter *filter = get_bloom_filter(ctx->r, *list, 0); struct bloom_filter *filter = get_bloom_filter(ctx->r, *list, 0);
@ -1193,6 +1199,7 @@ static void write_graph_chunk_bloom_data(struct hashfile *f,
} }
stop_progress(&progress); stop_progress(&progress);
return 0;
} }
static int oid_compare(const void *_a, const void *_b) static int oid_compare(const void *_a, const void *_b)
@ -1602,20 +1609,31 @@ static int write_graph_chunk_base(struct hashfile *f,
return 0; return 0;
} }
typedef int (*chunk_write_fn)(struct hashfile *f,
struct write_commit_graph_context *ctx);
struct chunk_info {
uint32_t id;
uint64_t size;
chunk_write_fn write_fn;
};
static int write_commit_graph_file(struct write_commit_graph_context *ctx) static int write_commit_graph_file(struct write_commit_graph_context *ctx)
{ {
uint32_t i; uint32_t i;
int fd; int fd;
struct hashfile *f; struct hashfile *f;
struct lock_file lk = LOCK_INIT; struct lock_file lk = LOCK_INIT;
uint32_t chunk_ids[MAX_NUM_CHUNKS + 1]; struct chunk_info chunks[MAX_NUM_CHUNKS + 1];
uint64_t chunk_offsets[MAX_NUM_CHUNKS + 1];
const unsigned hashsz = the_hash_algo->rawsz; const unsigned hashsz = the_hash_algo->rawsz;
struct strbuf progress_title = STRBUF_INIT; struct strbuf progress_title = STRBUF_INIT;
int num_chunks = 3; int num_chunks = 3;
uint64_t chunk_offset;
struct object_id file_hash; struct object_id file_hash;
const struct bloom_filter_settings bloom_settings = DEFAULT_BLOOM_FILTER_SETTINGS; const struct bloom_filter_settings bloom_settings = DEFAULT_BLOOM_FILTER_SETTINGS;
ctx->bloom_settings = bloom_settings;
if (ctx->split) { if (ctx->split) {
struct strbuf tmp_file = STRBUF_INIT; struct strbuf tmp_file = STRBUF_INIT;
@ -1660,51 +1678,41 @@ static int write_commit_graph_file(struct write_commit_graph_context *ctx)
f = hashfd(lk.tempfile->fd, lk.tempfile->filename.buf); f = hashfd(lk.tempfile->fd, lk.tempfile->filename.buf);
} }
chunk_ids[0] = GRAPH_CHUNKID_OIDFANOUT; chunks[0].id = GRAPH_CHUNKID_OIDFANOUT;
chunk_ids[1] = GRAPH_CHUNKID_OIDLOOKUP; chunks[0].size = GRAPH_FANOUT_SIZE;
chunk_ids[2] = GRAPH_CHUNKID_DATA; chunks[0].write_fn = write_graph_chunk_fanout;
chunks[1].id = GRAPH_CHUNKID_OIDLOOKUP;
chunks[1].size = hashsz * ctx->commits.nr;
chunks[1].write_fn = write_graph_chunk_oids;
chunks[2].id = GRAPH_CHUNKID_DATA;
chunks[2].size = (hashsz + 16) * ctx->commits.nr;
chunks[2].write_fn = write_graph_chunk_data;
if (ctx->num_extra_edges) { if (ctx->num_extra_edges) {
chunk_ids[num_chunks] = GRAPH_CHUNKID_EXTRAEDGES; chunks[num_chunks].id = GRAPH_CHUNKID_EXTRAEDGES;
chunks[num_chunks].size = 4 * ctx->num_extra_edges;
chunks[num_chunks].write_fn = write_graph_chunk_extra_edges;
num_chunks++; num_chunks++;
} }
if (ctx->changed_paths) { if (ctx->changed_paths) {
chunk_ids[num_chunks] = GRAPH_CHUNKID_BLOOMINDEXES; chunks[num_chunks].id = GRAPH_CHUNKID_BLOOMINDEXES;
chunks[num_chunks].size = sizeof(uint32_t) * ctx->commits.nr;
chunks[num_chunks].write_fn = write_graph_chunk_bloom_indexes;
num_chunks++; num_chunks++;
chunk_ids[num_chunks] = GRAPH_CHUNKID_BLOOMDATA; chunks[num_chunks].id = GRAPH_CHUNKID_BLOOMDATA;
chunks[num_chunks].size = sizeof(uint32_t) * 3
+ ctx->total_bloom_filter_data_size;
chunks[num_chunks].write_fn = write_graph_chunk_bloom_data;
num_chunks++; num_chunks++;
} }
if (ctx->num_commit_graphs_after > 1) { if (ctx->num_commit_graphs_after > 1) {
chunk_ids[num_chunks] = GRAPH_CHUNKID_BASE; chunks[num_chunks].id = GRAPH_CHUNKID_BASE;
chunks[num_chunks].size = hashsz * (ctx->num_commit_graphs_after - 1);
chunks[num_chunks].write_fn = write_graph_chunk_base;
num_chunks++; num_chunks++;
} }
chunk_ids[num_chunks] = 0; chunks[num_chunks].id = 0;
chunks[num_chunks].size = 0;
chunk_offsets[0] = 8 + (num_chunks + 1) * GRAPH_CHUNKLOOKUP_WIDTH;
chunk_offsets[1] = chunk_offsets[0] + GRAPH_FANOUT_SIZE;
chunk_offsets[2] = chunk_offsets[1] + hashsz * ctx->commits.nr;
chunk_offsets[3] = chunk_offsets[2] + (hashsz + 16) * ctx->commits.nr;
num_chunks = 3;
if (ctx->num_extra_edges) {
chunk_offsets[num_chunks + 1] = chunk_offsets[num_chunks] +
4 * ctx->num_extra_edges;
num_chunks++;
}
if (ctx->changed_paths) {
chunk_offsets[num_chunks + 1] = chunk_offsets[num_chunks] +
sizeof(uint32_t) * ctx->commits.nr;
num_chunks++;
chunk_offsets[num_chunks + 1] = chunk_offsets[num_chunks] +
sizeof(uint32_t) * 3 + ctx->total_bloom_filter_data_size;
num_chunks++;
}
if (ctx->num_commit_graphs_after > 1) {
chunk_offsets[num_chunks + 1] = chunk_offsets[num_chunks] +
hashsz * (ctx->num_commit_graphs_after - 1);
num_chunks++;
}
hashwrite_be32(f, GRAPH_SIGNATURE); hashwrite_be32(f, GRAPH_SIGNATURE);
@ -1713,13 +1721,16 @@ static int write_commit_graph_file(struct write_commit_graph_context *ctx)
hashwrite_u8(f, num_chunks); hashwrite_u8(f, num_chunks);
hashwrite_u8(f, ctx->num_commit_graphs_after - 1); hashwrite_u8(f, ctx->num_commit_graphs_after - 1);
chunk_offset = 8 + (num_chunks + 1) * GRAPH_CHUNKLOOKUP_WIDTH;
for (i = 0; i <= num_chunks; i++) { for (i = 0; i <= num_chunks; i++) {
uint32_t chunk_write[3]; uint32_t chunk_write[3];
chunk_write[0] = htonl(chunk_ids[i]); chunk_write[0] = htonl(chunks[i].id);
chunk_write[1] = htonl(chunk_offsets[i] >> 32); chunk_write[1] = htonl(chunk_offset >> 32);
chunk_write[2] = htonl(chunk_offsets[i] & 0xffffffff); chunk_write[2] = htonl(chunk_offset & 0xffffffff);
hashwrite(f, chunk_write, 12); hashwrite(f, chunk_write, 12);
chunk_offset += chunks[i].size;
} }
if (ctx->report_progress) { if (ctx->report_progress) {
@ -1732,19 +1743,24 @@ static int write_commit_graph_file(struct write_commit_graph_context *ctx)
progress_title.buf, progress_title.buf,
num_chunks * ctx->commits.nr); num_chunks * ctx->commits.nr);
} }
write_graph_chunk_fanout(f, ctx);
write_graph_chunk_oids(f, hashsz, ctx); chunk_offset = f->total + f->offset;
write_graph_chunk_data(f, hashsz, ctx); for (i = 0; i < num_chunks; i++) {
if (ctx->num_extra_edges) uint64_t end_offset;
write_graph_chunk_extra_edges(f, ctx);
if (ctx->changed_paths) { if (chunks[i].write_fn(f, ctx)) {
write_graph_chunk_bloom_indexes(f, ctx); error(_("failed writing chunk with id %"PRIx32""),
write_graph_chunk_bloom_data(f, ctx, &bloom_settings); chunks[i].id);
}
if (ctx->num_commit_graphs_after > 1 &&
write_graph_chunk_base(f, ctx)) {
return -1; return -1;
} }
end_offset = f->total + f->offset;
if (end_offset - chunk_offset != chunks[i].size)
BUG("expected to write %"PRId64" bytes to chunk %"PRIx32", but wrote %"PRId64" instead",
chunks[i].size, chunks[i].id, end_offset - chunk_offset);
chunk_offset = end_offset;
}
stop_progress(&ctx->progress); stop_progress(&ctx->progress);
strbuf_release(&progress_title); strbuf_release(&progress_title);
@ -2078,9 +2094,19 @@ int write_commit_graph(struct object_directory *odb,
ctx->report_progress = flags & COMMIT_GRAPH_WRITE_PROGRESS ? 1 : 0; ctx->report_progress = flags & COMMIT_GRAPH_WRITE_PROGRESS ? 1 : 0;
ctx->split = flags & COMMIT_GRAPH_WRITE_SPLIT ? 1 : 0; ctx->split = flags & COMMIT_GRAPH_WRITE_SPLIT ? 1 : 0;
ctx->split_opts = split_opts; ctx->split_opts = split_opts;
ctx->changed_paths = flags & COMMIT_GRAPH_WRITE_BLOOM_FILTERS ? 1 : 0;
ctx->total_bloom_filter_data_size = 0; ctx->total_bloom_filter_data_size = 0;
if (flags & COMMIT_GRAPH_WRITE_BLOOM_FILTERS)
ctx->changed_paths = 1;
else if (!(flags & COMMIT_GRAPH_NO_WRITE_BLOOM_FILTERS)) {
prepare_commit_graph_one(ctx->r, ctx->odb);
/* We have changed-paths already. Keep them in the next graph */
if (ctx->r->objects->commit_graph &&
ctx->r->objects->commit_graph->chunk_bloom_data)
ctx->changed_paths = 1;
}
if (ctx->split) { if (ctx->split) {
struct commit_graph *g; struct commit_graph *g;
prepare_commit_graph(ctx->r); prepare_commit_graph(ctx->r);

View File

@ -2,14 +2,11 @@
#define COMMIT_GRAPH_H #define COMMIT_GRAPH_H
#include "git-compat-util.h" #include "git-compat-util.h"
#include "repository.h"
#include "string-list.h"
#include "cache.h"
#include "object-store.h" #include "object-store.h"
#include "oidset.h" #include "oidset.h"
#define GIT_TEST_COMMIT_GRAPH "GIT_TEST_COMMIT_GRAPH" #define GIT_TEST_COMMIT_GRAPH "GIT_TEST_COMMIT_GRAPH"
#define GIT_TEST_COMMIT_GRAPH_DIE_ON_LOAD "GIT_TEST_COMMIT_GRAPH_DIE_ON_LOAD" #define GIT_TEST_COMMIT_GRAPH_DIE_ON_PARSE "GIT_TEST_COMMIT_GRAPH_DIE_ON_PARSE"
#define GIT_TEST_COMMIT_GRAPH_CHANGED_PATHS "GIT_TEST_COMMIT_GRAPH_CHANGED_PATHS" #define GIT_TEST_COMMIT_GRAPH_CHANGED_PATHS "GIT_TEST_COMMIT_GRAPH_CHANGED_PATHS"
/* /*
@ -23,6 +20,9 @@ void git_test_write_commit_graph_or_die(void);
struct commit; struct commit;
struct bloom_filter_settings; struct bloom_filter_settings;
struct repository;
struct raw_object_store;
struct string_list;
char *get_commit_graph_filename(struct object_directory *odb); char *get_commit_graph_filename(struct object_directory *odb);
int open_commit_graph(const char *graph_file, int *fd, struct stat *st); int open_commit_graph(const char *graph_file, int *fd, struct stat *st);
@ -92,6 +92,7 @@ enum commit_graph_write_flags {
COMMIT_GRAPH_WRITE_PROGRESS = (1 << 1), COMMIT_GRAPH_WRITE_PROGRESS = (1 << 1),
COMMIT_GRAPH_WRITE_SPLIT = (1 << 2), COMMIT_GRAPH_WRITE_SPLIT = (1 << 2),
COMMIT_GRAPH_WRITE_BLOOM_FILTERS = (1 << 3), COMMIT_GRAPH_WRITE_BLOOM_FILTERS = (1 << 3),
COMMIT_GRAPH_NO_WRITE_BLOOM_FILTERS = (1 << 4),
}; };
enum commit_graph_split_flags { enum commit_graph_split_flags {

View File

@ -32,6 +32,7 @@ struct slabname { \
void init_ ##slabname## _with_stride(struct slabname *s, unsigned stride); \ void init_ ##slabname## _with_stride(struct slabname *s, unsigned stride); \
void init_ ##slabname(struct slabname *s); \ void init_ ##slabname(struct slabname *s); \
void clear_ ##slabname(struct slabname *s); \ void clear_ ##slabname(struct slabname *s); \
void deep_clear_ ##slabname(struct slabname *s, void (*free_fn)(elemtype *ptr)); \
elemtype *slabname## _at_peek(struct slabname *s, const struct commit *c, int add_if_missing); \ elemtype *slabname## _at_peek(struct slabname *s, const struct commit *c, int add_if_missing); \
elemtype *slabname## _at(struct slabname *s, const struct commit *c); \ elemtype *slabname## _at(struct slabname *s, const struct commit *c); \
elemtype *slabname## _peek(struct slabname *s, const struct commit *c) elemtype *slabname## _peek(struct slabname *s, const struct commit *c)

View File

@ -38,6 +38,19 @@ scope void clear_ ##slabname(struct slabname *s) \
FREE_AND_NULL(s->slab); \ FREE_AND_NULL(s->slab); \
} \ } \
\ \
scope void deep_clear_ ##slabname(struct slabname *s, void (*free_fn)(elemtype *)) \
{ \
unsigned int i; \
for (i = 0; i < s->slab_count; i++) { \
unsigned int j; \
if (!s->slab[i]) \
continue; \
for (j = 0; j < s->slab_size; j++) \
free_fn(&s->slab[i][j * s->stride]); \
} \
clear_ ##slabname(s); \
} \
\
scope elemtype *slabname## _at_peek(struct slabname *s, \ scope elemtype *slabname## _at_peek(struct slabname *s, \
const struct commit *c, \ const struct commit *c, \
int add_if_missing) \ int add_if_missing) \

View File

@ -47,6 +47,16 @@
* *
* Call this function before the slab falls out of scope to avoid * Call this function before the slab falls out of scope to avoid
* leaking memory. * leaking memory.
*
* - void deep_clear_indegree(struct indegree *, void (*free_fn)(int*))
*
* Empties the slab, similar to clear_indegree(), but in addition it
* calls the given 'free_fn' for each slab entry to release any
* additional memory that might be owned by the entry (but not the
* entry itself!).
* Note that 'free_fn' might be called even for entries for which no
* indegree_at() call has been made; in this case 'free_fn' is invoked
* with a pointer to a zero-initialized location.
*/ */
#define define_commit_slab(slabname, elemtype) \ #define define_commit_slab(slabname, elemtype) \

4
diff.h
View File

@ -431,10 +431,10 @@ struct combine_diff_path *diff_tree_paths(
struct combine_diff_path *p, const struct object_id *oid, struct combine_diff_path *p, const struct object_id *oid,
const struct object_id **parents_oid, int nparent, const struct object_id **parents_oid, int nparent,
struct strbuf *base, struct diff_options *opt); struct strbuf *base, struct diff_options *opt);
int diff_tree_oid(const struct object_id *old_oid, void diff_tree_oid(const struct object_id *old_oid,
const struct object_id *new_oid, const struct object_id *new_oid,
const char *base, struct diff_options *opt); const char *base, struct diff_options *opt);
int diff_root_tree_oid(const struct object_id *new_oid, const char *base, void diff_root_tree_oid(const struct object_id *new_oid, const char *base,
struct diff_options *opt); struct diff_options *opt);
struct combine_diff_path { struct combine_diff_path {

View File

@ -670,9 +670,10 @@ static void prepare_to_use_bloom_filter(struct rev_info *revs)
{ {
struct pathspec_item *pi; struct pathspec_item *pi;
char *path_alloc = NULL; char *path_alloc = NULL;
const char *path; const char *path, *p;
int last_index; int last_index;
int len; size_t len;
int path_component_nr = 0, j;
if (!revs->commits) if (!revs->commits)
return; return;
@ -705,8 +706,22 @@ static void prepare_to_use_bloom_filter(struct rev_info *revs)
len = strlen(path); len = strlen(path);
revs->bloom_key = xmalloc(sizeof(struct bloom_key)); p = path;
fill_bloom_key(path, len, revs->bloom_key, revs->bloom_filter_settings); do {
p = strchrnul(p + 1, '/');
path_component_nr++;
} while (p - path < len);
revs->bloom_keys_nr = path_component_nr;
ALLOC_ARRAY(revs->bloom_keys, revs->bloom_keys_nr);
p = path;
for (j = 0; j < revs->bloom_keys_nr; j++) {
p = strchrnul(p + 1, '/');
fill_bloom_key(path, p - path, &revs->bloom_keys[j],
revs->bloom_filter_settings);
}
if (trace2_is_enabled() && !bloom_filter_atexit_registered) { if (trace2_is_enabled() && !bloom_filter_atexit_registered) {
atexit(trace2_bloom_filter_statistics_atexit); atexit(trace2_bloom_filter_statistics_atexit);
@ -720,7 +735,7 @@ static int check_maybe_different_in_bloom_filter(struct rev_info *revs,
struct commit *commit) struct commit *commit)
{ {
struct bloom_filter *filter; struct bloom_filter *filter;
int result; int result = 1, j;
if (!revs->repo->objects->commit_graph) if (!revs->repo->objects->commit_graph)
return -1; return -1;
@ -740,9 +755,11 @@ static int check_maybe_different_in_bloom_filter(struct rev_info *revs,
return -1; return -1;
} }
for (j = 0; result && j < revs->bloom_keys_nr; j++) {
result = bloom_filter_contains(filter, result = bloom_filter_contains(filter,
revs->bloom_key, &revs->bloom_keys[j],
revs->bloom_filter_settings); revs->bloom_filter_settings);
}
if (result) if (result)
count_bloom_filter_maybe++; count_bloom_filter_maybe++;
@ -782,7 +799,7 @@ static int rev_compare_tree(struct rev_info *revs,
return REV_TREE_SAME; return REV_TREE_SAME;
} }
if (revs->bloom_key && !nth_parent) { if (revs->bloom_keys_nr && !nth_parent) {
bloom_ret = check_maybe_different_in_bloom_filter(revs, commit); bloom_ret = check_maybe_different_in_bloom_filter(revs, commit);
if (bloom_ret == 0) if (bloom_ret == 0)
@ -791,9 +808,7 @@ static int rev_compare_tree(struct rev_info *revs,
tree_difference = REV_TREE_SAME; tree_difference = REV_TREE_SAME;
revs->pruning.flags.has_changes = 0; revs->pruning.flags.has_changes = 0;
if (diff_tree_oid(&t1->object.oid, &t2->object.oid, "", diff_tree_oid(&t1->object.oid, &t2->object.oid, "", &revs->pruning);
&revs->pruning) < 0)
return REV_TREE_DIFFERENT;
if (!nth_parent) if (!nth_parent)
if (bloom_ret == 1 && tree_difference == REV_TREE_SAME) if (bloom_ret == 1 && tree_difference == REV_TREE_SAME)
@ -804,7 +819,6 @@ static int rev_compare_tree(struct rev_info *revs,
static int rev_same_tree_as_empty(struct rev_info *revs, struct commit *commit) static int rev_same_tree_as_empty(struct rev_info *revs, struct commit *commit)
{ {
int retval;
struct tree *t1 = get_commit_tree(commit); struct tree *t1 = get_commit_tree(commit);
if (!t1) if (!t1)
@ -812,9 +826,9 @@ static int rev_same_tree_as_empty(struct rev_info *revs, struct commit *commit)
tree_difference = REV_TREE_SAME; tree_difference = REV_TREE_SAME;
revs->pruning.flags.has_changes = 0; revs->pruning.flags.has_changes = 0;
retval = diff_tree_oid(NULL, &t1->object.oid, "", &revs->pruning); diff_tree_oid(NULL, &t1->object.oid, "", &revs->pruning);
return retval >= 0 && (tree_difference == REV_TREE_SAME); return tree_difference == REV_TREE_SAME;
} }
struct treesame_state { struct treesame_state {

View File

@ -300,8 +300,10 @@ struct rev_info {
struct topo_walk_info *topo_walk_info; struct topo_walk_info *topo_walk_info;
/* Commit graph bloom filter fields */ /* Commit graph bloom filter fields */
/* The bloom filter key for the pathspec */ /* The bloom filter key(s) for the pathspec */
struct bloom_key *bloom_key; struct bloom_key *bloom_keys;
int bloom_keys_nr;
/* /*
* The bloom filter settings used to generate the key. * The bloom filter settings used to generate the key.
* This is loaded from the commit-graph being used. * This is loaded from the commit-graph being used.

View File

@ -110,6 +110,10 @@ void rollback_shallow_file(struct repository *r, struct shallow_lock *lk)
* supports a "valid" flag. * supports a "valid" flag.
*/ */
define_commit_slab(commit_depth, int *); define_commit_slab(commit_depth, int *);
static void free_depth_in_slab(int **ptr)
{
FREE_AND_NULL(*ptr);
}
struct commit_list *get_shallow_commits(struct object_array *heads, int depth, struct commit_list *get_shallow_commits(struct object_array *heads, int depth,
int shallow_flag, int not_shallow_flag) int shallow_flag, int not_shallow_flag)
{ {
@ -176,15 +180,7 @@ struct commit_list *get_shallow_commits(struct object_array *heads, int depth,
} }
} }
} }
for (i = 0; i < depths.slab_count; i++) { deep_clear_commit_depth(&depths, free_depth_in_slab);
int j;
if (!depths.slab[i])
continue;
for (j = 0; j < depths.slab_size; j++)
free(depths.slab[i][j]);
}
clear_commit_depth(&depths);
return result; return result;
} }

View File

@ -125,7 +125,9 @@ test_expect_success 'setup submodules' '
test_expect_success 'diff-tree ignores trailing slash on submodule path' ' test_expect_success 'diff-tree ignores trailing slash on submodule path' '
git diff --name-only HEAD^ HEAD submod >expect && git diff --name-only HEAD^ HEAD submod >expect &&
git diff --name-only HEAD^ HEAD submod/ >actual && git diff --name-only HEAD^ HEAD submod/ >actual &&
test_cmp expect actual test_cmp expect actual &&
git diff --name-only HEAD^ HEAD -- submod/whatever >actual &&
test_must_be_empty actual
' '
test_expect_success 'diff multiple wildcard pathspecs' ' test_expect_success 'diff multiple wildcard pathspecs' '

View File

@ -126,7 +126,7 @@ test_expect_success 'setup - add commit-graph to the chain without Bloom filters
test_commit c14 A/anotherFile2 && test_commit c14 A/anotherFile2 &&
test_commit c15 A/B/anotherFile2 && test_commit c15 A/B/anotherFile2 &&
test_commit c16 A/B/C/anotherFile2 && test_commit c16 A/B/C/anotherFile2 &&
GIT_TEST_COMMIT_GRAPH_CHANGED_PATHS=0 git commit-graph write --reachable --split && git commit-graph write --reachable --split --no-changed-paths &&
test_line_count = 2 .git/objects/info/commit-graphs/commit-graph-chain test_line_count = 2 .git/objects/info/commit-graphs/commit-graph-chain
' '
@ -142,7 +142,7 @@ test_expect_success 'setup - add commit-graph to the chain with Bloom filters' '
test_bloom_filters_used_when_some_filters_are_missing () { test_bloom_filters_used_when_some_filters_are_missing () {
log_args=$1 log_args=$1
bloom_trace_prefix="statistics:{\"filter_not_present\":3,\"zero_length_filter\":0,\"maybe\":8,\"definitely_not\":6" bloom_trace_prefix="statistics:{\"filter_not_present\":3,\"zero_length_filter\":0,\"maybe\":6,\"definitely_not\":8"
setup "$log_args" && setup "$log_args" &&
grep -q "$bloom_trace_prefix" "$TRASH_DIRECTORY/trace.perf" && grep -q "$bloom_trace_prefix" "$TRASH_DIRECTORY/trace.perf" &&
test_cmp log_wo_bloom log_w_bloom test_cmp log_wo_bloom log_w_bloom

View File

@ -476,7 +476,7 @@ corrupt_graph_verify() {
cp $objdir/info/commit-graph commit-graph-pre-write-test cp $objdir/info/commit-graph commit-graph-pre-write-test
fi && fi &&
git status --short && git status --short &&
GIT_TEST_COMMIT_GRAPH_DIE_ON_LOAD=true git commit-graph write && GIT_TEST_COMMIT_GRAPH_DIE_ON_PARSE=true git commit-graph write &&
chmod u+w $objdir/info/commit-graph && chmod u+w $objdir/info/commit-graph &&
git commit-graph verify git commit-graph verify
} }
@ -529,7 +529,7 @@ test_expect_success 'detect bad hash version' '
' '
test_expect_success 'detect low chunk count' ' test_expect_success 'detect low chunk count' '
corrupt_graph_and_verify $GRAPH_BYTE_CHUNK_COUNT "\02" \ corrupt_graph_and_verify $GRAPH_BYTE_CHUNK_COUNT "\01" \
"missing the .* chunk" "missing the .* chunk"
' '
@ -615,7 +615,8 @@ test_expect_success 'detect invalid checksum hash' '
test_expect_success 'detect incorrect chunk count' ' test_expect_success 'detect incorrect chunk count' '
corrupt_graph_and_verify $GRAPH_BYTE_CHUNK_COUNT "\377" \ corrupt_graph_and_verify $GRAPH_BYTE_CHUNK_COUNT "\377" \
"chunk lookup table entry missing" $GRAPH_CHUNK_LOOKUP_OFFSET "commit-graph file is too small to hold [0-9]* chunks" \
$GRAPH_CHUNK_LOOKUP_OFFSET
' '
test_expect_success 'git fsck (checks commit-graph)' ' test_expect_success 'git fsck (checks commit-graph)' '

View File

@ -29,7 +29,7 @@ static struct combine_diff_path *ll_diff_tree_paths(
struct combine_diff_path *p, const struct object_id *oid, struct combine_diff_path *p, const struct object_id *oid,
const struct object_id **parents_oid, int nparent, const struct object_id **parents_oid, int nparent,
struct strbuf *base, struct diff_options *opt); struct strbuf *base, struct diff_options *opt);
static int ll_diff_tree_oid(const struct object_id *old_oid, static void ll_diff_tree_oid(const struct object_id *old_oid,
const struct object_id *new_oid, const struct object_id *new_oid,
struct strbuf *base, struct diff_options *opt); struct strbuf *base, struct diff_options *opt);
@ -679,7 +679,7 @@ static void try_to_follow_renames(const struct object_id *old_oid,
q->nr = 1; q->nr = 1;
} }
static int ll_diff_tree_oid(const struct object_id *old_oid, static void ll_diff_tree_oid(const struct object_id *old_oid,
const struct object_id *new_oid, const struct object_id *new_oid,
struct strbuf *base, struct diff_options *opt) struct strbuf *base, struct diff_options *opt)
{ {
@ -697,29 +697,27 @@ static int ll_diff_tree_oid(const struct object_id *old_oid,
} }
opt->pathchange = pathchange_old; opt->pathchange = pathchange_old;
return 0;
} }
int diff_tree_oid(const struct object_id *old_oid, void diff_tree_oid(const struct object_id *old_oid,
const struct object_id *new_oid, const struct object_id *new_oid,
const char *base_str, struct diff_options *opt) const char *base_str, struct diff_options *opt)
{ {
struct strbuf base; struct strbuf base;
int retval;
strbuf_init(&base, PATH_MAX); strbuf_init(&base, PATH_MAX);
strbuf_addstr(&base, base_str); strbuf_addstr(&base, base_str);
retval = ll_diff_tree_oid(old_oid, new_oid, &base, opt); ll_diff_tree_oid(old_oid, new_oid, &base, opt);
if (!*base_str && opt->flags.follow_renames && diff_might_be_rename()) if (!*base_str && opt->flags.follow_renames && diff_might_be_rename())
try_to_follow_renames(old_oid, new_oid, &base, opt); try_to_follow_renames(old_oid, new_oid, &base, opt);
strbuf_release(&base); strbuf_release(&base);
return retval;
} }
int diff_root_tree_oid(const struct object_id *new_oid, const char *base, struct diff_options *opt) void diff_root_tree_oid(const struct object_id *new_oid,
const char *base,
struct diff_options *opt)
{ {
return diff_tree_oid(NULL, new_oid, base, opt); diff_tree_oid(NULL, new_oid, base, opt);
} }

View File

@ -851,7 +851,14 @@ static int match_entry(const struct pathspec_item *item,
if (matchlen > pathlen) { if (matchlen > pathlen) {
if (match[pathlen] != '/') if (match[pathlen] != '/')
return 0; return 0;
if (!S_ISDIR(entry->mode) && !S_ISGITLINK(entry->mode)) /*
* Reject non-directories as partial pathnames, except
* when match is a submodule with a trailing slash and
* nothing else (to handle 'submod/' and 'submod'
* uniformly).
*/
if (!S_ISDIR(entry->mode) &&
(!S_ISGITLINK(entry->mode) || matchlen > pathlen + 1))
return 0; return 0;
} }