diff --git a/Documentation/git-commit-graph.txt b/Documentation/git-commit-graph.txt index 8ca1764d3d..17405c73a9 100644 --- a/Documentation/git-commit-graph.txt +++ b/Documentation/git-commit-graph.txt @@ -62,7 +62,10 @@ existing commit-graph file. With the `--changed-paths` option, compute and write information about the paths changed between a commit and its first parent. This operation can take a while on large repositories. It provides significant performance gains -for getting history of a directory or a file with `git log -- `. +for getting history of a directory or a file with `git log -- `. If +this option is given, future commit-graph writes will automatically assume +that this option was intended. Use `--no-changed-paths` to stop storing this +data. + With the `--split[=]` option, write the commit-graph as a chain of multiple commit-graph files stored in diff --git a/Documentation/technical/commit-graph-format.txt b/Documentation/technical/commit-graph-format.txt index 1beef17182..440541045d 100644 --- a/Documentation/technical/commit-graph-format.txt +++ b/Documentation/technical/commit-graph-format.txt @@ -32,7 +32,7 @@ the body into "chunks" and provide a binary lookup table at the beginning of the body. The header includes certain values, such as number of chunks and hash type. -All 4-byte numbers are in network order. +All multi-byte numbers are in network byte order. HEADER: diff --git a/bloom.c b/bloom.c index 6a7f2f2bdc..d43f676e09 100644 --- a/bloom.c +++ b/bloom.c @@ -259,6 +259,10 @@ struct bloom_filter *get_bloom_filter(struct repository *r, } filter->len = (hashmap_get_size(&pathmap) * settings.bits_per_entry + BITS_PER_WORD - 1) / BITS_PER_WORD; + + if (filter->len && filter->len < 8) + filter->len = 8; + filter->data = xcalloc(filter->len, sizeof(unsigned char)); hashmap_for_each_entry(&pathmap, &iter, e, entry) { diff --git a/builtin/commit-graph.c b/builtin/commit-graph.c index f6797e2a9f..ca5e7b4c67 100644 --- a/builtin/commit-graph.c +++ b/builtin/commit-graph.c @@ -201,6 +201,7 @@ static int graph_write(int argc, const char **argv) }; opts.progress = isatty(2); + opts.enable_changed_paths = -1; split_opts.size_multiple = 2; split_opts.max_commits = 0; split_opts.expire_time = 0; @@ -221,7 +222,9 @@ static int graph_write(int argc, const char **argv) flags |= COMMIT_GRAPH_WRITE_SPLIT; if (opts.progress) flags |= COMMIT_GRAPH_WRITE_PROGRESS; - if (opts.enable_changed_paths || + if (!opts.enable_changed_paths) + flags |= COMMIT_GRAPH_NO_WRITE_BLOOM_FILTERS; + if (opts.enable_changed_paths == 1 || git_env_bool(GIT_TEST_COMMIT_GRAPH_CHANGED_PATHS, 0)) flags |= COMMIT_GRAPH_WRITE_BLOOM_FILTERS; diff --git a/commit-graph.c b/commit-graph.c index fdd1c4fa7c..bc73b3c918 100644 --- a/commit-graph.c +++ b/commit-graph.c @@ -1,7 +1,5 @@ -#include "cache.h" -#include "config.h" -#include "dir.h" #include "git-compat-util.h" +#include "config.h" #include "lockfile.h" #include "pack.h" #include "packfile.h" @@ -284,8 +282,7 @@ struct commit_graph *parse_commit_graph(void *graph_map, size_t graph_size) const unsigned char *data, *chunk_lookup; uint32_t i; struct commit_graph *graph; - uint64_t last_chunk_offset; - uint32_t last_chunk_id; + uint64_t next_chunk_offset; uint32_t graph_signature; unsigned char graph_version, hash_version; @@ -325,24 +322,26 @@ struct commit_graph *parse_commit_graph(void *graph_map, size_t graph_size) graph->data = graph_map; graph->data_len = graph_size; - last_chunk_id = 0; - last_chunk_offset = 8; + if (graph_size < GRAPH_HEADER_SIZE + + (graph->num_chunks + 1) * GRAPH_CHUNKLOOKUP_WIDTH + + GRAPH_FANOUT_SIZE + the_hash_algo->rawsz) { + error(_("commit-graph file is too small to hold %u chunks"), + graph->num_chunks); + free(graph); + return NULL; + } + chunk_lookup = data + 8; + next_chunk_offset = get_be64(chunk_lookup + 4); for (i = 0; i < graph->num_chunks; i++) { uint32_t chunk_id; - uint64_t chunk_offset; + uint64_t chunk_offset = next_chunk_offset; int chunk_repeated = 0; - if (data + graph_size - chunk_lookup < - GRAPH_CHUNKLOOKUP_WIDTH) { - error(_("commit-graph chunk lookup table entry missing; file may be incomplete")); - goto free_and_return; - } - chunk_id = get_be32(chunk_lookup + 0); - chunk_offset = get_be64(chunk_lookup + 4); chunk_lookup += GRAPH_CHUNKLOOKUP_WIDTH; + next_chunk_offset = get_be64(chunk_lookup + 4); if (chunk_offset > graph_size - the_hash_algo->rawsz) { error(_("commit-graph improper chunk offset %08x%08x"), (uint32_t)(chunk_offset >> 32), @@ -361,8 +360,11 @@ struct commit_graph *parse_commit_graph(void *graph_map, size_t graph_size) case GRAPH_CHUNKID_OIDLOOKUP: if (graph->chunk_oid_lookup) chunk_repeated = 1; - else + else { graph->chunk_oid_lookup = data + chunk_offset; + graph->num_commits = (next_chunk_offset - chunk_offset) + / graph->hash_len; + } break; case GRAPH_CHUNKID_DATA: @@ -416,15 +418,6 @@ struct commit_graph *parse_commit_graph(void *graph_map, size_t graph_size) error(_("commit-graph chunk id %08x appears multiple times"), chunk_id); goto free_and_return; } - - if (last_chunk_id == GRAPH_CHUNKID_OIDLOOKUP) - { - graph->num_commits = (chunk_offset - last_chunk_offset) - / graph->hash_len; - } - - last_chunk_id = chunk_id; - last_chunk_offset = chunk_offset; } if (graph->chunk_bloom_indexes && graph->chunk_bloom_data) { @@ -623,10 +616,6 @@ static int prepare_commit_graph(struct repository *r) return !!r->objects->commit_graph; r->objects->commit_graph_attempted = 1; - if (git_env_bool(GIT_TEST_COMMIT_GRAPH_DIE_ON_LOAD, 0)) - die("dying as requested by the '%s' variable on commit-graph load!", - GIT_TEST_COMMIT_GRAPH_DIE_ON_LOAD); - prepare_repo_settings(r); if (!git_env_bool(GIT_TEST_COMMIT_GRAPH, 0) && @@ -855,6 +844,14 @@ static int parse_commit_in_graph_one(struct repository *r, int parse_commit_in_graph(struct repository *r, struct commit *item) { + static int checked_env = 0; + + if (!checked_env && + git_env_bool(GIT_TEST_COMMIT_GRAPH_DIE_ON_PARSE, 0)) + die("dying as requested by the '%s' variable on commit-graph parse!", + GIT_TEST_COMMIT_GRAPH_DIE_ON_PARSE); + checked_env = 1; + if (!prepare_commit_graph(r)) return 0; return parse_commit_in_graph_one(r, r->objects->commit_graph, item); @@ -947,10 +944,11 @@ struct write_commit_graph_context { const struct split_commit_graph_opts *split_opts; size_t total_bloom_filter_data_size; + struct bloom_filter_settings bloom_settings; }; -static void write_graph_chunk_fanout(struct hashfile *f, - struct write_commit_graph_context *ctx) +static int write_graph_chunk_fanout(struct hashfile *f, + struct write_commit_graph_context *ctx) { int i, count = 0; struct commit **list = ctx->commits.list; @@ -971,17 +969,21 @@ static void write_graph_chunk_fanout(struct hashfile *f, hashwrite_be32(f, count); } + + return 0; } -static void write_graph_chunk_oids(struct hashfile *f, int hash_len, - struct write_commit_graph_context *ctx) +static int write_graph_chunk_oids(struct hashfile *f, + struct write_commit_graph_context *ctx) { struct commit **list = ctx->commits.list; int count; for (count = 0; count < ctx->commits.nr; count++, list++) { display_progress(ctx->progress, ++ctx->progress_cnt); - hashwrite(f, (*list)->object.oid.hash, (int)hash_len); + hashwrite(f, (*list)->object.oid.hash, (int)the_hash_algo->rawsz); } + + return 0; } static const unsigned char *commit_to_sha1(size_t index, void *table) @@ -990,8 +992,8 @@ static const unsigned char *commit_to_sha1(size_t index, void *table) return commits[index]->object.oid.hash; } -static void write_graph_chunk_data(struct hashfile *f, int hash_len, - struct write_commit_graph_context *ctx) +static int write_graph_chunk_data(struct hashfile *f, + struct write_commit_graph_context *ctx) { struct commit **list = ctx->commits.list; struct commit **last = ctx->commits.list + ctx->commits.nr; @@ -1008,7 +1010,7 @@ static void write_graph_chunk_data(struct hashfile *f, int hash_len, die(_("unable to parse commit %s"), oid_to_hex(&(*list)->object.oid)); tree = get_commit_tree_oid(*list); - hashwrite(f, tree->hash, hash_len); + hashwrite(f, tree->hash, the_hash_algo->rawsz); parent = (*list)->parents; @@ -1088,10 +1090,12 @@ static void write_graph_chunk_data(struct hashfile *f, int hash_len, list++; } + + return 0; } -static void write_graph_chunk_extra_edges(struct hashfile *f, - struct write_commit_graph_context *ctx) +static int write_graph_chunk_extra_edges(struct hashfile *f, + struct write_commit_graph_context *ctx) { struct commit **list = ctx->commits.list; struct commit **last = ctx->commits.list + ctx->commits.nr; @@ -1140,10 +1144,12 @@ static void write_graph_chunk_extra_edges(struct hashfile *f, list++; } + + return 0; } -static void write_graph_chunk_bloom_indexes(struct hashfile *f, - struct write_commit_graph_context *ctx) +static int write_graph_chunk_bloom_indexes(struct hashfile *f, + struct write_commit_graph_context *ctx) { struct commit **list = ctx->commits.list; struct commit **last = ctx->commits.list + ctx->commits.nr; @@ -1165,11 +1171,11 @@ static void write_graph_chunk_bloom_indexes(struct hashfile *f, } stop_progress(&progress); + return 0; } -static void write_graph_chunk_bloom_data(struct hashfile *f, - struct write_commit_graph_context *ctx, - const struct bloom_filter_settings *settings) +static int write_graph_chunk_bloom_data(struct hashfile *f, + struct write_commit_graph_context *ctx) { struct commit **list = ctx->commits.list; struct commit **last = ctx->commits.list + ctx->commits.nr; @@ -1181,9 +1187,9 @@ static void write_graph_chunk_bloom_data(struct hashfile *f, _("Writing changed paths Bloom filters data"), ctx->commits.nr); - hashwrite_be32(f, settings->hash_version); - hashwrite_be32(f, settings->num_hashes); - hashwrite_be32(f, settings->bits_per_entry); + hashwrite_be32(f, ctx->bloom_settings.hash_version); + hashwrite_be32(f, ctx->bloom_settings.num_hashes); + hashwrite_be32(f, ctx->bloom_settings.bits_per_entry); while (list < last) { struct bloom_filter *filter = get_bloom_filter(ctx->r, *list, 0); @@ -1193,6 +1199,7 @@ static void write_graph_chunk_bloom_data(struct hashfile *f, } stop_progress(&progress); + return 0; } static int oid_compare(const void *_a, const void *_b) @@ -1602,20 +1609,31 @@ static int write_graph_chunk_base(struct hashfile *f, return 0; } +typedef int (*chunk_write_fn)(struct hashfile *f, + struct write_commit_graph_context *ctx); + +struct chunk_info { + uint32_t id; + uint64_t size; + chunk_write_fn write_fn; +}; + static int write_commit_graph_file(struct write_commit_graph_context *ctx) { uint32_t i; int fd; struct hashfile *f; struct lock_file lk = LOCK_INIT; - uint32_t chunk_ids[MAX_NUM_CHUNKS + 1]; - uint64_t chunk_offsets[MAX_NUM_CHUNKS + 1]; + struct chunk_info chunks[MAX_NUM_CHUNKS + 1]; const unsigned hashsz = the_hash_algo->rawsz; struct strbuf progress_title = STRBUF_INIT; int num_chunks = 3; + uint64_t chunk_offset; struct object_id file_hash; const struct bloom_filter_settings bloom_settings = DEFAULT_BLOOM_FILTER_SETTINGS; + ctx->bloom_settings = bloom_settings; + if (ctx->split) { struct strbuf tmp_file = STRBUF_INIT; @@ -1660,51 +1678,41 @@ static int write_commit_graph_file(struct write_commit_graph_context *ctx) f = hashfd(lk.tempfile->fd, lk.tempfile->filename.buf); } - chunk_ids[0] = GRAPH_CHUNKID_OIDFANOUT; - chunk_ids[1] = GRAPH_CHUNKID_OIDLOOKUP; - chunk_ids[2] = GRAPH_CHUNKID_DATA; + chunks[0].id = GRAPH_CHUNKID_OIDFANOUT; + chunks[0].size = GRAPH_FANOUT_SIZE; + chunks[0].write_fn = write_graph_chunk_fanout; + chunks[1].id = GRAPH_CHUNKID_OIDLOOKUP; + chunks[1].size = hashsz * ctx->commits.nr; + chunks[1].write_fn = write_graph_chunk_oids; + chunks[2].id = GRAPH_CHUNKID_DATA; + chunks[2].size = (hashsz + 16) * ctx->commits.nr; + chunks[2].write_fn = write_graph_chunk_data; if (ctx->num_extra_edges) { - chunk_ids[num_chunks] = GRAPH_CHUNKID_EXTRAEDGES; + chunks[num_chunks].id = GRAPH_CHUNKID_EXTRAEDGES; + chunks[num_chunks].size = 4 * ctx->num_extra_edges; + chunks[num_chunks].write_fn = write_graph_chunk_extra_edges; num_chunks++; } if (ctx->changed_paths) { - chunk_ids[num_chunks] = GRAPH_CHUNKID_BLOOMINDEXES; + chunks[num_chunks].id = GRAPH_CHUNKID_BLOOMINDEXES; + chunks[num_chunks].size = sizeof(uint32_t) * ctx->commits.nr; + chunks[num_chunks].write_fn = write_graph_chunk_bloom_indexes; num_chunks++; - chunk_ids[num_chunks] = GRAPH_CHUNKID_BLOOMDATA; + chunks[num_chunks].id = GRAPH_CHUNKID_BLOOMDATA; + chunks[num_chunks].size = sizeof(uint32_t) * 3 + + ctx->total_bloom_filter_data_size; + chunks[num_chunks].write_fn = write_graph_chunk_bloom_data; num_chunks++; } if (ctx->num_commit_graphs_after > 1) { - chunk_ids[num_chunks] = GRAPH_CHUNKID_BASE; + chunks[num_chunks].id = GRAPH_CHUNKID_BASE; + chunks[num_chunks].size = hashsz * (ctx->num_commit_graphs_after - 1); + chunks[num_chunks].write_fn = write_graph_chunk_base; num_chunks++; } - chunk_ids[num_chunks] = 0; - - chunk_offsets[0] = 8 + (num_chunks + 1) * GRAPH_CHUNKLOOKUP_WIDTH; - chunk_offsets[1] = chunk_offsets[0] + GRAPH_FANOUT_SIZE; - chunk_offsets[2] = chunk_offsets[1] + hashsz * ctx->commits.nr; - chunk_offsets[3] = chunk_offsets[2] + (hashsz + 16) * ctx->commits.nr; - - num_chunks = 3; - if (ctx->num_extra_edges) { - chunk_offsets[num_chunks + 1] = chunk_offsets[num_chunks] + - 4 * ctx->num_extra_edges; - num_chunks++; - } - if (ctx->changed_paths) { - chunk_offsets[num_chunks + 1] = chunk_offsets[num_chunks] + - sizeof(uint32_t) * ctx->commits.nr; - num_chunks++; - - chunk_offsets[num_chunks + 1] = chunk_offsets[num_chunks] + - sizeof(uint32_t) * 3 + ctx->total_bloom_filter_data_size; - num_chunks++; - } - if (ctx->num_commit_graphs_after > 1) { - chunk_offsets[num_chunks + 1] = chunk_offsets[num_chunks] + - hashsz * (ctx->num_commit_graphs_after - 1); - num_chunks++; - } + chunks[num_chunks].id = 0; + chunks[num_chunks].size = 0; hashwrite_be32(f, GRAPH_SIGNATURE); @@ -1713,13 +1721,16 @@ static int write_commit_graph_file(struct write_commit_graph_context *ctx) hashwrite_u8(f, num_chunks); hashwrite_u8(f, ctx->num_commit_graphs_after - 1); + chunk_offset = 8 + (num_chunks + 1) * GRAPH_CHUNKLOOKUP_WIDTH; for (i = 0; i <= num_chunks; i++) { uint32_t chunk_write[3]; - chunk_write[0] = htonl(chunk_ids[i]); - chunk_write[1] = htonl(chunk_offsets[i] >> 32); - chunk_write[2] = htonl(chunk_offsets[i] & 0xffffffff); + chunk_write[0] = htonl(chunks[i].id); + chunk_write[1] = htonl(chunk_offset >> 32); + chunk_write[2] = htonl(chunk_offset & 0xffffffff); hashwrite(f, chunk_write, 12); + + chunk_offset += chunks[i].size; } if (ctx->report_progress) { @@ -1732,19 +1743,24 @@ static int write_commit_graph_file(struct write_commit_graph_context *ctx) progress_title.buf, num_chunks * ctx->commits.nr); } - write_graph_chunk_fanout(f, ctx); - write_graph_chunk_oids(f, hashsz, ctx); - write_graph_chunk_data(f, hashsz, ctx); - if (ctx->num_extra_edges) - write_graph_chunk_extra_edges(f, ctx); - if (ctx->changed_paths) { - write_graph_chunk_bloom_indexes(f, ctx); - write_graph_chunk_bloom_data(f, ctx, &bloom_settings); - } - if (ctx->num_commit_graphs_after > 1 && - write_graph_chunk_base(f, ctx)) { - return -1; + + chunk_offset = f->total + f->offset; + for (i = 0; i < num_chunks; i++) { + uint64_t end_offset; + + if (chunks[i].write_fn(f, ctx)) { + error(_("failed writing chunk with id %"PRIx32""), + chunks[i].id); + return -1; + } + + end_offset = f->total + f->offset; + if (end_offset - chunk_offset != chunks[i].size) + BUG("expected to write %"PRId64" bytes to chunk %"PRIx32", but wrote %"PRId64" instead", + chunks[i].size, chunks[i].id, end_offset - chunk_offset); + chunk_offset = end_offset; } + stop_progress(&ctx->progress); strbuf_release(&progress_title); @@ -2078,9 +2094,19 @@ int write_commit_graph(struct object_directory *odb, ctx->report_progress = flags & COMMIT_GRAPH_WRITE_PROGRESS ? 1 : 0; ctx->split = flags & COMMIT_GRAPH_WRITE_SPLIT ? 1 : 0; ctx->split_opts = split_opts; - ctx->changed_paths = flags & COMMIT_GRAPH_WRITE_BLOOM_FILTERS ? 1 : 0; ctx->total_bloom_filter_data_size = 0; + if (flags & COMMIT_GRAPH_WRITE_BLOOM_FILTERS) + ctx->changed_paths = 1; + else if (!(flags & COMMIT_GRAPH_NO_WRITE_BLOOM_FILTERS)) { + prepare_commit_graph_one(ctx->r, ctx->odb); + + /* We have changed-paths already. Keep them in the next graph */ + if (ctx->r->objects->commit_graph && + ctx->r->objects->commit_graph->chunk_bloom_data) + ctx->changed_paths = 1; + } + if (ctx->split) { struct commit_graph *g; prepare_commit_graph(ctx->r); diff --git a/commit-graph.h b/commit-graph.h index 28f89cdf3e..09a97030dc 100644 --- a/commit-graph.h +++ b/commit-graph.h @@ -2,14 +2,11 @@ #define COMMIT_GRAPH_H #include "git-compat-util.h" -#include "repository.h" -#include "string-list.h" -#include "cache.h" #include "object-store.h" #include "oidset.h" #define GIT_TEST_COMMIT_GRAPH "GIT_TEST_COMMIT_GRAPH" -#define GIT_TEST_COMMIT_GRAPH_DIE_ON_LOAD "GIT_TEST_COMMIT_GRAPH_DIE_ON_LOAD" +#define GIT_TEST_COMMIT_GRAPH_DIE_ON_PARSE "GIT_TEST_COMMIT_GRAPH_DIE_ON_PARSE" #define GIT_TEST_COMMIT_GRAPH_CHANGED_PATHS "GIT_TEST_COMMIT_GRAPH_CHANGED_PATHS" /* @@ -23,6 +20,9 @@ void git_test_write_commit_graph_or_die(void); struct commit; struct bloom_filter_settings; +struct repository; +struct raw_object_store; +struct string_list; char *get_commit_graph_filename(struct object_directory *odb); int open_commit_graph(const char *graph_file, int *fd, struct stat *st); @@ -92,6 +92,7 @@ enum commit_graph_write_flags { COMMIT_GRAPH_WRITE_PROGRESS = (1 << 1), COMMIT_GRAPH_WRITE_SPLIT = (1 << 2), COMMIT_GRAPH_WRITE_BLOOM_FILTERS = (1 << 3), + COMMIT_GRAPH_NO_WRITE_BLOOM_FILTERS = (1 << 4), }; enum commit_graph_split_flags { diff --git a/commit-slab-decl.h b/commit-slab-decl.h index bfbed1516a..98de2c970c 100644 --- a/commit-slab-decl.h +++ b/commit-slab-decl.h @@ -32,6 +32,7 @@ struct slabname { \ void init_ ##slabname## _with_stride(struct slabname *s, unsigned stride); \ void init_ ##slabname(struct slabname *s); \ void clear_ ##slabname(struct slabname *s); \ +void deep_clear_ ##slabname(struct slabname *s, void (*free_fn)(elemtype *ptr)); \ elemtype *slabname## _at_peek(struct slabname *s, const struct commit *c, int add_if_missing); \ elemtype *slabname## _at(struct slabname *s, const struct commit *c); \ elemtype *slabname## _peek(struct slabname *s, const struct commit *c) diff --git a/commit-slab-impl.h b/commit-slab-impl.h index 5c0eb91a5d..557738df27 100644 --- a/commit-slab-impl.h +++ b/commit-slab-impl.h @@ -38,6 +38,19 @@ scope void clear_ ##slabname(struct slabname *s) \ FREE_AND_NULL(s->slab); \ } \ \ +scope void deep_clear_ ##slabname(struct slabname *s, void (*free_fn)(elemtype *)) \ +{ \ + unsigned int i; \ + for (i = 0; i < s->slab_count; i++) { \ + unsigned int j; \ + if (!s->slab[i]) \ + continue; \ + for (j = 0; j < s->slab_size; j++) \ + free_fn(&s->slab[i][j * s->stride]); \ + } \ + clear_ ##slabname(s); \ +} \ + \ scope elemtype *slabname## _at_peek(struct slabname *s, \ const struct commit *c, \ int add_if_missing) \ diff --git a/commit-slab.h b/commit-slab.h index 05b3f2804e..8e72a30536 100644 --- a/commit-slab.h +++ b/commit-slab.h @@ -47,6 +47,16 @@ * * Call this function before the slab falls out of scope to avoid * leaking memory. + * + * - void deep_clear_indegree(struct indegree *, void (*free_fn)(int*)) + * + * Empties the slab, similar to clear_indegree(), but in addition it + * calls the given 'free_fn' for each slab entry to release any + * additional memory that might be owned by the entry (but not the + * entry itself!). + * Note that 'free_fn' might be called even for entries for which no + * indegree_at() call has been made; in this case 'free_fn' is invoked + * with a pointer to a zero-initialized location. */ #define define_commit_slab(slabname, elemtype) \ diff --git a/diff.h b/diff.h index 9443dc1b00..e0c0af6286 100644 --- a/diff.h +++ b/diff.h @@ -431,11 +431,11 @@ struct combine_diff_path *diff_tree_paths( struct combine_diff_path *p, const struct object_id *oid, const struct object_id **parents_oid, int nparent, struct strbuf *base, struct diff_options *opt); -int diff_tree_oid(const struct object_id *old_oid, - const struct object_id *new_oid, - const char *base, struct diff_options *opt); -int diff_root_tree_oid(const struct object_id *new_oid, const char *base, - struct diff_options *opt); +void diff_tree_oid(const struct object_id *old_oid, + const struct object_id *new_oid, + const char *base, struct diff_options *opt); +void diff_root_tree_oid(const struct object_id *new_oid, const char *base, + struct diff_options *opt); struct combine_diff_path { struct combine_diff_path *next; diff --git a/revision.c b/revision.c index 32be93f404..8bd383b1dd 100644 --- a/revision.c +++ b/revision.c @@ -670,9 +670,10 @@ static void prepare_to_use_bloom_filter(struct rev_info *revs) { struct pathspec_item *pi; char *path_alloc = NULL; - const char *path; + const char *path, *p; int last_index; - int len; + size_t len; + int path_component_nr = 0, j; if (!revs->commits) return; @@ -705,8 +706,22 @@ static void prepare_to_use_bloom_filter(struct rev_info *revs) len = strlen(path); - revs->bloom_key = xmalloc(sizeof(struct bloom_key)); - fill_bloom_key(path, len, revs->bloom_key, revs->bloom_filter_settings); + p = path; + do { + p = strchrnul(p + 1, '/'); + path_component_nr++; + } while (p - path < len); + + revs->bloom_keys_nr = path_component_nr; + ALLOC_ARRAY(revs->bloom_keys, revs->bloom_keys_nr); + + p = path; + for (j = 0; j < revs->bloom_keys_nr; j++) { + p = strchrnul(p + 1, '/'); + + fill_bloom_key(path, p - path, &revs->bloom_keys[j], + revs->bloom_filter_settings); + } if (trace2_is_enabled() && !bloom_filter_atexit_registered) { atexit(trace2_bloom_filter_statistics_atexit); @@ -720,7 +735,7 @@ static int check_maybe_different_in_bloom_filter(struct rev_info *revs, struct commit *commit) { struct bloom_filter *filter; - int result; + int result = 1, j; if (!revs->repo->objects->commit_graph) return -1; @@ -740,9 +755,11 @@ static int check_maybe_different_in_bloom_filter(struct rev_info *revs, return -1; } - result = bloom_filter_contains(filter, - revs->bloom_key, - revs->bloom_filter_settings); + for (j = 0; result && j < revs->bloom_keys_nr; j++) { + result = bloom_filter_contains(filter, + &revs->bloom_keys[j], + revs->bloom_filter_settings); + } if (result) count_bloom_filter_maybe++; @@ -782,7 +799,7 @@ static int rev_compare_tree(struct rev_info *revs, return REV_TREE_SAME; } - if (revs->bloom_key && !nth_parent) { + if (revs->bloom_keys_nr && !nth_parent) { bloom_ret = check_maybe_different_in_bloom_filter(revs, commit); if (bloom_ret == 0) @@ -791,9 +808,7 @@ static int rev_compare_tree(struct rev_info *revs, tree_difference = REV_TREE_SAME; revs->pruning.flags.has_changes = 0; - if (diff_tree_oid(&t1->object.oid, &t2->object.oid, "", - &revs->pruning) < 0) - return REV_TREE_DIFFERENT; + diff_tree_oid(&t1->object.oid, &t2->object.oid, "", &revs->pruning); if (!nth_parent) if (bloom_ret == 1 && tree_difference == REV_TREE_SAME) @@ -804,7 +819,6 @@ static int rev_compare_tree(struct rev_info *revs, static int rev_same_tree_as_empty(struct rev_info *revs, struct commit *commit) { - int retval; struct tree *t1 = get_commit_tree(commit); if (!t1) @@ -812,9 +826,9 @@ static int rev_same_tree_as_empty(struct rev_info *revs, struct commit *commit) tree_difference = REV_TREE_SAME; revs->pruning.flags.has_changes = 0; - retval = diff_tree_oid(NULL, &t1->object.oid, "", &revs->pruning); + diff_tree_oid(NULL, &t1->object.oid, "", &revs->pruning); - return retval >= 0 && (tree_difference == REV_TREE_SAME); + return tree_difference == REV_TREE_SAME; } struct treesame_state { diff --git a/revision.h b/revision.h index 93491b79d4..d8ac585123 100644 --- a/revision.h +++ b/revision.h @@ -300,8 +300,10 @@ struct rev_info { struct topo_walk_info *topo_walk_info; /* Commit graph bloom filter fields */ - /* The bloom filter key for the pathspec */ - struct bloom_key *bloom_key; + /* The bloom filter key(s) for the pathspec */ + struct bloom_key *bloom_keys; + int bloom_keys_nr; + /* * The bloom filter settings used to generate the key. * This is loaded from the commit-graph being used. diff --git a/shallow.c b/shallow.c index b826de9b67..91b9e1073c 100644 --- a/shallow.c +++ b/shallow.c @@ -110,6 +110,10 @@ void rollback_shallow_file(struct repository *r, struct shallow_lock *lk) * supports a "valid" flag. */ define_commit_slab(commit_depth, int *); +static void free_depth_in_slab(int **ptr) +{ + FREE_AND_NULL(*ptr); +} struct commit_list *get_shallow_commits(struct object_array *heads, int depth, int shallow_flag, int not_shallow_flag) { @@ -176,15 +180,7 @@ struct commit_list *get_shallow_commits(struct object_array *heads, int depth, } } } - for (i = 0; i < depths.slab_count; i++) { - int j; - - if (!depths.slab[i]) - continue; - for (j = 0; j < depths.slab_size; j++) - free(depths.slab[i][j]); - } - clear_commit_depth(&depths); + deep_clear_commit_depth(&depths, free_depth_in_slab); return result; } diff --git a/t/t4010-diff-pathspec.sh b/t/t4010-diff-pathspec.sh index e5ca359edf..65cc703c65 100755 --- a/t/t4010-diff-pathspec.sh +++ b/t/t4010-diff-pathspec.sh @@ -125,7 +125,9 @@ test_expect_success 'setup submodules' ' test_expect_success 'diff-tree ignores trailing slash on submodule path' ' git diff --name-only HEAD^ HEAD submod >expect && git diff --name-only HEAD^ HEAD submod/ >actual && - test_cmp expect actual + test_cmp expect actual && + git diff --name-only HEAD^ HEAD -- submod/whatever >actual && + test_must_be_empty actual ' test_expect_success 'diff multiple wildcard pathspecs' ' diff --git a/t/t4216-log-bloom.sh b/t/t4216-log-bloom.sh index c855bcd3e7..89839e5ee1 100755 --- a/t/t4216-log-bloom.sh +++ b/t/t4216-log-bloom.sh @@ -126,7 +126,7 @@ test_expect_success 'setup - add commit-graph to the chain without Bloom filters test_commit c14 A/anotherFile2 && test_commit c15 A/B/anotherFile2 && test_commit c16 A/B/C/anotherFile2 && - GIT_TEST_COMMIT_GRAPH_CHANGED_PATHS=0 git commit-graph write --reachable --split && + git commit-graph write --reachable --split --no-changed-paths && test_line_count = 2 .git/objects/info/commit-graphs/commit-graph-chain ' @@ -142,7 +142,7 @@ test_expect_success 'setup - add commit-graph to the chain with Bloom filters' ' test_bloom_filters_used_when_some_filters_are_missing () { log_args=$1 - bloom_trace_prefix="statistics:{\"filter_not_present\":3,\"zero_length_filter\":0,\"maybe\":8,\"definitely_not\":6" + bloom_trace_prefix="statistics:{\"filter_not_present\":3,\"zero_length_filter\":0,\"maybe\":6,\"definitely_not\":8" setup "$log_args" && grep -q "$bloom_trace_prefix" "$TRASH_DIRECTORY/trace.perf" && test_cmp log_wo_bloom log_w_bloom diff --git a/t/t5318-commit-graph.sh b/t/t5318-commit-graph.sh index 26f332d6a3..2804b0dd45 100755 --- a/t/t5318-commit-graph.sh +++ b/t/t5318-commit-graph.sh @@ -476,7 +476,7 @@ corrupt_graph_verify() { cp $objdir/info/commit-graph commit-graph-pre-write-test fi && git status --short && - GIT_TEST_COMMIT_GRAPH_DIE_ON_LOAD=true git commit-graph write && + GIT_TEST_COMMIT_GRAPH_DIE_ON_PARSE=true git commit-graph write && chmod u+w $objdir/info/commit-graph && git commit-graph verify } @@ -529,7 +529,7 @@ test_expect_success 'detect bad hash version' ' ' test_expect_success 'detect low chunk count' ' - corrupt_graph_and_verify $GRAPH_BYTE_CHUNK_COUNT "\02" \ + corrupt_graph_and_verify $GRAPH_BYTE_CHUNK_COUNT "\01" \ "missing the .* chunk" ' @@ -615,7 +615,8 @@ test_expect_success 'detect invalid checksum hash' ' test_expect_success 'detect incorrect chunk count' ' corrupt_graph_and_verify $GRAPH_BYTE_CHUNK_COUNT "\377" \ - "chunk lookup table entry missing" $GRAPH_CHUNK_LOOKUP_OFFSET + "commit-graph file is too small to hold [0-9]* chunks" \ + $GRAPH_CHUNK_LOOKUP_OFFSET ' test_expect_success 'git fsck (checks commit-graph)' ' diff --git a/tree-diff.c b/tree-diff.c index f3d303c6e5..6ebad1a46f 100644 --- a/tree-diff.c +++ b/tree-diff.c @@ -29,9 +29,9 @@ static struct combine_diff_path *ll_diff_tree_paths( struct combine_diff_path *p, const struct object_id *oid, const struct object_id **parents_oid, int nparent, struct strbuf *base, struct diff_options *opt); -static int ll_diff_tree_oid(const struct object_id *old_oid, - const struct object_id *new_oid, - struct strbuf *base, struct diff_options *opt); +static void ll_diff_tree_oid(const struct object_id *old_oid, + const struct object_id *new_oid, + struct strbuf *base, struct diff_options *opt); /* * Compare two tree entries, taking into account only path/S_ISDIR(mode), @@ -679,9 +679,9 @@ static void try_to_follow_renames(const struct object_id *old_oid, q->nr = 1; } -static int ll_diff_tree_oid(const struct object_id *old_oid, - const struct object_id *new_oid, - struct strbuf *base, struct diff_options *opt) +static void ll_diff_tree_oid(const struct object_id *old_oid, + const struct object_id *new_oid, + struct strbuf *base, struct diff_options *opt) { struct combine_diff_path phead, *p; pathchange_fn_t pathchange_old = opt->pathchange; @@ -697,29 +697,27 @@ static int ll_diff_tree_oid(const struct object_id *old_oid, } opt->pathchange = pathchange_old; - return 0; } -int diff_tree_oid(const struct object_id *old_oid, - const struct object_id *new_oid, - const char *base_str, struct diff_options *opt) +void diff_tree_oid(const struct object_id *old_oid, + const struct object_id *new_oid, + const char *base_str, struct diff_options *opt) { struct strbuf base; - int retval; strbuf_init(&base, PATH_MAX); strbuf_addstr(&base, base_str); - retval = ll_diff_tree_oid(old_oid, new_oid, &base, opt); + ll_diff_tree_oid(old_oid, new_oid, &base, opt); if (!*base_str && opt->flags.follow_renames && diff_might_be_rename()) try_to_follow_renames(old_oid, new_oid, &base, opt); strbuf_release(&base); - - return retval; } -int diff_root_tree_oid(const struct object_id *new_oid, const char *base, struct diff_options *opt) +void diff_root_tree_oid(const struct object_id *new_oid, + const char *base, + struct diff_options *opt) { - return diff_tree_oid(NULL, new_oid, base, opt); + diff_tree_oid(NULL, new_oid, base, opt); } diff --git a/tree-walk.c b/tree-walk.c index bb0ad34c54..0160294712 100644 --- a/tree-walk.c +++ b/tree-walk.c @@ -851,7 +851,14 @@ static int match_entry(const struct pathspec_item *item, if (matchlen > pathlen) { if (match[pathlen] != '/') return 0; - if (!S_ISDIR(entry->mode) && !S_ISGITLINK(entry->mode)) + /* + * Reject non-directories as partial pathnames, except + * when match is a submodule with a trailing slash and + * nothing else (to handle 'submod/' and 'submod' + * uniformly). + */ + if (!S_ISDIR(entry->mode) && + (!S_ISGITLINK(entry->mode) || matchlen > pathlen + 1)) return 0; }