Merge branch 'sg/commit-graph-cleanups' into pu

The changed-path Bloom filter is improved using ideas from an
independent implementation.

* sg/commit-graph-cleanups:
  commit-graph: persist existence of changed-paths
  commit-graph: change test to die on parse, not load
  bloom: enforce a minimum size of 8 bytes
  commit-graph: check all leading directories in changed path Bloom filters
  commit-graph: check chunk sizes after writing
  commit-graph: simplify chunk writes into loop
  commit-graph: unify the signatures of all write_graph_chunk_*() functions
  commit-graph: place bloom_settings in context
  commit-graph: simplify write_commit_graph_file() #2
  commit-graph: simplify write_commit_graph_file() #1
  commit-graph: simplify parse_commit_graph() #2
  commit-graph: simplify parse_commit_graph() #1
  commit-graph: clean up #includes
  diff.h: drop diff_tree_oid() & friends' return value
  commit-slab: add a function to deep free entries on the slab
  commit-graph-format.txt: all multi-byte numbers are in network byte order
  commit-graph: fix parsing the Chunk Lookup table
  tree-walk.c: don't match submodule entries for 'submod/anything'
This commit is contained in:
Junio C Hamano
2020-06-19 14:52:42 -07:00
18 changed files with 241 additions and 160 deletions

View File

@ -62,7 +62,10 @@ existing commit-graph file.
With the `--changed-paths` option, compute and write information about the
paths changed between a commit and its first parent. This operation can
take a while on large repositories. It provides significant performance gains
for getting history of a directory or a file with `git log -- <path>`.
for getting history of a directory or a file with `git log -- <path>`. If
this option is given, future commit-graph writes will automatically assume
that this option was intended. Use `--no-changed-paths` to stop storing this
data.
+
With the `--split[=<strategy>]` option, write the commit-graph as a
chain of multiple commit-graph files stored in

View File

@ -32,7 +32,7 @@ the body into "chunks" and provide a binary lookup table at the beginning
of the body. The header includes certain values, such as number of chunks
and hash type.
All 4-byte numbers are in network order.
All multi-byte numbers are in network byte order.
HEADER:

View File

@ -259,6 +259,10 @@ struct bloom_filter *get_bloom_filter(struct repository *r,
}
filter->len = (hashmap_get_size(&pathmap) * settings.bits_per_entry + BITS_PER_WORD - 1) / BITS_PER_WORD;
if (filter->len && filter->len < 8)
filter->len = 8;
filter->data = xcalloc(filter->len, sizeof(unsigned char));
hashmap_for_each_entry(&pathmap, &iter, e, entry) {

View File

@ -201,6 +201,7 @@ static int graph_write(int argc, const char **argv)
};
opts.progress = isatty(2);
opts.enable_changed_paths = -1;
split_opts.size_multiple = 2;
split_opts.max_commits = 0;
split_opts.expire_time = 0;
@ -221,7 +222,9 @@ static int graph_write(int argc, const char **argv)
flags |= COMMIT_GRAPH_WRITE_SPLIT;
if (opts.progress)
flags |= COMMIT_GRAPH_WRITE_PROGRESS;
if (opts.enable_changed_paths ||
if (!opts.enable_changed_paths)
flags |= COMMIT_GRAPH_NO_WRITE_BLOOM_FILTERS;
if (opts.enable_changed_paths == 1 ||
git_env_bool(GIT_TEST_COMMIT_GRAPH_CHANGED_PATHS, 0))
flags |= COMMIT_GRAPH_WRITE_BLOOM_FILTERS;

View File

@ -1,7 +1,5 @@
#include "cache.h"
#include "config.h"
#include "dir.h"
#include "git-compat-util.h"
#include "config.h"
#include "lockfile.h"
#include "pack.h"
#include "packfile.h"
@ -284,8 +282,7 @@ struct commit_graph *parse_commit_graph(void *graph_map, size_t graph_size)
const unsigned char *data, *chunk_lookup;
uint32_t i;
struct commit_graph *graph;
uint64_t last_chunk_offset;
uint32_t last_chunk_id;
uint64_t next_chunk_offset;
uint32_t graph_signature;
unsigned char graph_version, hash_version;
@ -325,24 +322,26 @@ struct commit_graph *parse_commit_graph(void *graph_map, size_t graph_size)
graph->data = graph_map;
graph->data_len = graph_size;
last_chunk_id = 0;
last_chunk_offset = 8;
if (graph_size < GRAPH_HEADER_SIZE +
(graph->num_chunks + 1) * GRAPH_CHUNKLOOKUP_WIDTH +
GRAPH_FANOUT_SIZE + the_hash_algo->rawsz) {
error(_("commit-graph file is too small to hold %u chunks"),
graph->num_chunks);
free(graph);
return NULL;
}
chunk_lookup = data + 8;
next_chunk_offset = get_be64(chunk_lookup + 4);
for (i = 0; i < graph->num_chunks; i++) {
uint32_t chunk_id;
uint64_t chunk_offset;
uint64_t chunk_offset = next_chunk_offset;
int chunk_repeated = 0;
if (data + graph_size - chunk_lookup <
GRAPH_CHUNKLOOKUP_WIDTH) {
error(_("commit-graph chunk lookup table entry missing; file may be incomplete"));
goto free_and_return;
}
chunk_id = get_be32(chunk_lookup + 0);
chunk_offset = get_be64(chunk_lookup + 4);
chunk_lookup += GRAPH_CHUNKLOOKUP_WIDTH;
next_chunk_offset = get_be64(chunk_lookup + 4);
if (chunk_offset > graph_size - the_hash_algo->rawsz) {
error(_("commit-graph improper chunk offset %08x%08x"), (uint32_t)(chunk_offset >> 32),
@ -361,8 +360,11 @@ struct commit_graph *parse_commit_graph(void *graph_map, size_t graph_size)
case GRAPH_CHUNKID_OIDLOOKUP:
if (graph->chunk_oid_lookup)
chunk_repeated = 1;
else
else {
graph->chunk_oid_lookup = data + chunk_offset;
graph->num_commits = (next_chunk_offset - chunk_offset)
/ graph->hash_len;
}
break;
case GRAPH_CHUNKID_DATA:
@ -416,15 +418,6 @@ struct commit_graph *parse_commit_graph(void *graph_map, size_t graph_size)
error(_("commit-graph chunk id %08x appears multiple times"), chunk_id);
goto free_and_return;
}
if (last_chunk_id == GRAPH_CHUNKID_OIDLOOKUP)
{
graph->num_commits = (chunk_offset - last_chunk_offset)
/ graph->hash_len;
}
last_chunk_id = chunk_id;
last_chunk_offset = chunk_offset;
}
if (graph->chunk_bloom_indexes && graph->chunk_bloom_data) {
@ -623,10 +616,6 @@ static int prepare_commit_graph(struct repository *r)
return !!r->objects->commit_graph;
r->objects->commit_graph_attempted = 1;
if (git_env_bool(GIT_TEST_COMMIT_GRAPH_DIE_ON_LOAD, 0))
die("dying as requested by the '%s' variable on commit-graph load!",
GIT_TEST_COMMIT_GRAPH_DIE_ON_LOAD);
prepare_repo_settings(r);
if (!git_env_bool(GIT_TEST_COMMIT_GRAPH, 0) &&
@ -855,6 +844,14 @@ static int parse_commit_in_graph_one(struct repository *r,
int parse_commit_in_graph(struct repository *r, struct commit *item)
{
static int checked_env = 0;
if (!checked_env &&
git_env_bool(GIT_TEST_COMMIT_GRAPH_DIE_ON_PARSE, 0))
die("dying as requested by the '%s' variable on commit-graph parse!",
GIT_TEST_COMMIT_GRAPH_DIE_ON_PARSE);
checked_env = 1;
if (!prepare_commit_graph(r))
return 0;
return parse_commit_in_graph_one(r, r->objects->commit_graph, item);
@ -947,10 +944,11 @@ struct write_commit_graph_context {
const struct split_commit_graph_opts *split_opts;
size_t total_bloom_filter_data_size;
struct bloom_filter_settings bloom_settings;
};
static void write_graph_chunk_fanout(struct hashfile *f,
struct write_commit_graph_context *ctx)
static int write_graph_chunk_fanout(struct hashfile *f,
struct write_commit_graph_context *ctx)
{
int i, count = 0;
struct commit **list = ctx->commits.list;
@ -971,17 +969,21 @@ static void write_graph_chunk_fanout(struct hashfile *f,
hashwrite_be32(f, count);
}
return 0;
}
static void write_graph_chunk_oids(struct hashfile *f, int hash_len,
struct write_commit_graph_context *ctx)
static int write_graph_chunk_oids(struct hashfile *f,
struct write_commit_graph_context *ctx)
{
struct commit **list = ctx->commits.list;
int count;
for (count = 0; count < ctx->commits.nr; count++, list++) {
display_progress(ctx->progress, ++ctx->progress_cnt);
hashwrite(f, (*list)->object.oid.hash, (int)hash_len);
hashwrite(f, (*list)->object.oid.hash, (int)the_hash_algo->rawsz);
}
return 0;
}
static const unsigned char *commit_to_sha1(size_t index, void *table)
@ -990,8 +992,8 @@ static const unsigned char *commit_to_sha1(size_t index, void *table)
return commits[index]->object.oid.hash;
}
static void write_graph_chunk_data(struct hashfile *f, int hash_len,
struct write_commit_graph_context *ctx)
static int write_graph_chunk_data(struct hashfile *f,
struct write_commit_graph_context *ctx)
{
struct commit **list = ctx->commits.list;
struct commit **last = ctx->commits.list + ctx->commits.nr;
@ -1008,7 +1010,7 @@ static void write_graph_chunk_data(struct hashfile *f, int hash_len,
die(_("unable to parse commit %s"),
oid_to_hex(&(*list)->object.oid));
tree = get_commit_tree_oid(*list);
hashwrite(f, tree->hash, hash_len);
hashwrite(f, tree->hash, the_hash_algo->rawsz);
parent = (*list)->parents;
@ -1088,10 +1090,12 @@ static void write_graph_chunk_data(struct hashfile *f, int hash_len,
list++;
}
return 0;
}
static void write_graph_chunk_extra_edges(struct hashfile *f,
struct write_commit_graph_context *ctx)
static int write_graph_chunk_extra_edges(struct hashfile *f,
struct write_commit_graph_context *ctx)
{
struct commit **list = ctx->commits.list;
struct commit **last = ctx->commits.list + ctx->commits.nr;
@ -1140,10 +1144,12 @@ static void write_graph_chunk_extra_edges(struct hashfile *f,
list++;
}
return 0;
}
static void write_graph_chunk_bloom_indexes(struct hashfile *f,
struct write_commit_graph_context *ctx)
static int write_graph_chunk_bloom_indexes(struct hashfile *f,
struct write_commit_graph_context *ctx)
{
struct commit **list = ctx->commits.list;
struct commit **last = ctx->commits.list + ctx->commits.nr;
@ -1165,11 +1171,11 @@ static void write_graph_chunk_bloom_indexes(struct hashfile *f,
}
stop_progress(&progress);
return 0;
}
static void write_graph_chunk_bloom_data(struct hashfile *f,
struct write_commit_graph_context *ctx,
const struct bloom_filter_settings *settings)
static int write_graph_chunk_bloom_data(struct hashfile *f,
struct write_commit_graph_context *ctx)
{
struct commit **list = ctx->commits.list;
struct commit **last = ctx->commits.list + ctx->commits.nr;
@ -1181,9 +1187,9 @@ static void write_graph_chunk_bloom_data(struct hashfile *f,
_("Writing changed paths Bloom filters data"),
ctx->commits.nr);
hashwrite_be32(f, settings->hash_version);
hashwrite_be32(f, settings->num_hashes);
hashwrite_be32(f, settings->bits_per_entry);
hashwrite_be32(f, ctx->bloom_settings.hash_version);
hashwrite_be32(f, ctx->bloom_settings.num_hashes);
hashwrite_be32(f, ctx->bloom_settings.bits_per_entry);
while (list < last) {
struct bloom_filter *filter = get_bloom_filter(ctx->r, *list, 0);
@ -1193,6 +1199,7 @@ static void write_graph_chunk_bloom_data(struct hashfile *f,
}
stop_progress(&progress);
return 0;
}
static int oid_compare(const void *_a, const void *_b)
@ -1602,20 +1609,31 @@ static int write_graph_chunk_base(struct hashfile *f,
return 0;
}
typedef int (*chunk_write_fn)(struct hashfile *f,
struct write_commit_graph_context *ctx);
struct chunk_info {
uint32_t id;
uint64_t size;
chunk_write_fn write_fn;
};
static int write_commit_graph_file(struct write_commit_graph_context *ctx)
{
uint32_t i;
int fd;
struct hashfile *f;
struct lock_file lk = LOCK_INIT;
uint32_t chunk_ids[MAX_NUM_CHUNKS + 1];
uint64_t chunk_offsets[MAX_NUM_CHUNKS + 1];
struct chunk_info chunks[MAX_NUM_CHUNKS + 1];
const unsigned hashsz = the_hash_algo->rawsz;
struct strbuf progress_title = STRBUF_INIT;
int num_chunks = 3;
uint64_t chunk_offset;
struct object_id file_hash;
const struct bloom_filter_settings bloom_settings = DEFAULT_BLOOM_FILTER_SETTINGS;
ctx->bloom_settings = bloom_settings;
if (ctx->split) {
struct strbuf tmp_file = STRBUF_INIT;
@ -1660,51 +1678,41 @@ static int write_commit_graph_file(struct write_commit_graph_context *ctx)
f = hashfd(lk.tempfile->fd, lk.tempfile->filename.buf);
}
chunk_ids[0] = GRAPH_CHUNKID_OIDFANOUT;
chunk_ids[1] = GRAPH_CHUNKID_OIDLOOKUP;
chunk_ids[2] = GRAPH_CHUNKID_DATA;
chunks[0].id = GRAPH_CHUNKID_OIDFANOUT;
chunks[0].size = GRAPH_FANOUT_SIZE;
chunks[0].write_fn = write_graph_chunk_fanout;
chunks[1].id = GRAPH_CHUNKID_OIDLOOKUP;
chunks[1].size = hashsz * ctx->commits.nr;
chunks[1].write_fn = write_graph_chunk_oids;
chunks[2].id = GRAPH_CHUNKID_DATA;
chunks[2].size = (hashsz + 16) * ctx->commits.nr;
chunks[2].write_fn = write_graph_chunk_data;
if (ctx->num_extra_edges) {
chunk_ids[num_chunks] = GRAPH_CHUNKID_EXTRAEDGES;
chunks[num_chunks].id = GRAPH_CHUNKID_EXTRAEDGES;
chunks[num_chunks].size = 4 * ctx->num_extra_edges;
chunks[num_chunks].write_fn = write_graph_chunk_extra_edges;
num_chunks++;
}
if (ctx->changed_paths) {
chunk_ids[num_chunks] = GRAPH_CHUNKID_BLOOMINDEXES;
chunks[num_chunks].id = GRAPH_CHUNKID_BLOOMINDEXES;
chunks[num_chunks].size = sizeof(uint32_t) * ctx->commits.nr;
chunks[num_chunks].write_fn = write_graph_chunk_bloom_indexes;
num_chunks++;
chunk_ids[num_chunks] = GRAPH_CHUNKID_BLOOMDATA;
chunks[num_chunks].id = GRAPH_CHUNKID_BLOOMDATA;
chunks[num_chunks].size = sizeof(uint32_t) * 3
+ ctx->total_bloom_filter_data_size;
chunks[num_chunks].write_fn = write_graph_chunk_bloom_data;
num_chunks++;
}
if (ctx->num_commit_graphs_after > 1) {
chunk_ids[num_chunks] = GRAPH_CHUNKID_BASE;
chunks[num_chunks].id = GRAPH_CHUNKID_BASE;
chunks[num_chunks].size = hashsz * (ctx->num_commit_graphs_after - 1);
chunks[num_chunks].write_fn = write_graph_chunk_base;
num_chunks++;
}
chunk_ids[num_chunks] = 0;
chunk_offsets[0] = 8 + (num_chunks + 1) * GRAPH_CHUNKLOOKUP_WIDTH;
chunk_offsets[1] = chunk_offsets[0] + GRAPH_FANOUT_SIZE;
chunk_offsets[2] = chunk_offsets[1] + hashsz * ctx->commits.nr;
chunk_offsets[3] = chunk_offsets[2] + (hashsz + 16) * ctx->commits.nr;
num_chunks = 3;
if (ctx->num_extra_edges) {
chunk_offsets[num_chunks + 1] = chunk_offsets[num_chunks] +
4 * ctx->num_extra_edges;
num_chunks++;
}
if (ctx->changed_paths) {
chunk_offsets[num_chunks + 1] = chunk_offsets[num_chunks] +
sizeof(uint32_t) * ctx->commits.nr;
num_chunks++;
chunk_offsets[num_chunks + 1] = chunk_offsets[num_chunks] +
sizeof(uint32_t) * 3 + ctx->total_bloom_filter_data_size;
num_chunks++;
}
if (ctx->num_commit_graphs_after > 1) {
chunk_offsets[num_chunks + 1] = chunk_offsets[num_chunks] +
hashsz * (ctx->num_commit_graphs_after - 1);
num_chunks++;
}
chunks[num_chunks].id = 0;
chunks[num_chunks].size = 0;
hashwrite_be32(f, GRAPH_SIGNATURE);
@ -1713,13 +1721,16 @@ static int write_commit_graph_file(struct write_commit_graph_context *ctx)
hashwrite_u8(f, num_chunks);
hashwrite_u8(f, ctx->num_commit_graphs_after - 1);
chunk_offset = 8 + (num_chunks + 1) * GRAPH_CHUNKLOOKUP_WIDTH;
for (i = 0; i <= num_chunks; i++) {
uint32_t chunk_write[3];
chunk_write[0] = htonl(chunk_ids[i]);
chunk_write[1] = htonl(chunk_offsets[i] >> 32);
chunk_write[2] = htonl(chunk_offsets[i] & 0xffffffff);
chunk_write[0] = htonl(chunks[i].id);
chunk_write[1] = htonl(chunk_offset >> 32);
chunk_write[2] = htonl(chunk_offset & 0xffffffff);
hashwrite(f, chunk_write, 12);
chunk_offset += chunks[i].size;
}
if (ctx->report_progress) {
@ -1732,19 +1743,24 @@ static int write_commit_graph_file(struct write_commit_graph_context *ctx)
progress_title.buf,
num_chunks * ctx->commits.nr);
}
write_graph_chunk_fanout(f, ctx);
write_graph_chunk_oids(f, hashsz, ctx);
write_graph_chunk_data(f, hashsz, ctx);
if (ctx->num_extra_edges)
write_graph_chunk_extra_edges(f, ctx);
if (ctx->changed_paths) {
write_graph_chunk_bloom_indexes(f, ctx);
write_graph_chunk_bloom_data(f, ctx, &bloom_settings);
}
if (ctx->num_commit_graphs_after > 1 &&
write_graph_chunk_base(f, ctx)) {
return -1;
chunk_offset = f->total + f->offset;
for (i = 0; i < num_chunks; i++) {
uint64_t end_offset;
if (chunks[i].write_fn(f, ctx)) {
error(_("failed writing chunk with id %"PRIx32""),
chunks[i].id);
return -1;
}
end_offset = f->total + f->offset;
if (end_offset - chunk_offset != chunks[i].size)
BUG("expected to write %"PRId64" bytes to chunk %"PRIx32", but wrote %"PRId64" instead",
chunks[i].size, chunks[i].id, end_offset - chunk_offset);
chunk_offset = end_offset;
}
stop_progress(&ctx->progress);
strbuf_release(&progress_title);
@ -2078,9 +2094,19 @@ int write_commit_graph(struct object_directory *odb,
ctx->report_progress = flags & COMMIT_GRAPH_WRITE_PROGRESS ? 1 : 0;
ctx->split = flags & COMMIT_GRAPH_WRITE_SPLIT ? 1 : 0;
ctx->split_opts = split_opts;
ctx->changed_paths = flags & COMMIT_GRAPH_WRITE_BLOOM_FILTERS ? 1 : 0;
ctx->total_bloom_filter_data_size = 0;
if (flags & COMMIT_GRAPH_WRITE_BLOOM_FILTERS)
ctx->changed_paths = 1;
else if (!(flags & COMMIT_GRAPH_NO_WRITE_BLOOM_FILTERS)) {
prepare_commit_graph_one(ctx->r, ctx->odb);
/* We have changed-paths already. Keep them in the next graph */
if (ctx->r->objects->commit_graph &&
ctx->r->objects->commit_graph->chunk_bloom_data)
ctx->changed_paths = 1;
}
if (ctx->split) {
struct commit_graph *g;
prepare_commit_graph(ctx->r);

View File

@ -2,14 +2,11 @@
#define COMMIT_GRAPH_H
#include "git-compat-util.h"
#include "repository.h"
#include "string-list.h"
#include "cache.h"
#include "object-store.h"
#include "oidset.h"
#define GIT_TEST_COMMIT_GRAPH "GIT_TEST_COMMIT_GRAPH"
#define GIT_TEST_COMMIT_GRAPH_DIE_ON_LOAD "GIT_TEST_COMMIT_GRAPH_DIE_ON_LOAD"
#define GIT_TEST_COMMIT_GRAPH_DIE_ON_PARSE "GIT_TEST_COMMIT_GRAPH_DIE_ON_PARSE"
#define GIT_TEST_COMMIT_GRAPH_CHANGED_PATHS "GIT_TEST_COMMIT_GRAPH_CHANGED_PATHS"
/*
@ -23,6 +20,9 @@ void git_test_write_commit_graph_or_die(void);
struct commit;
struct bloom_filter_settings;
struct repository;
struct raw_object_store;
struct string_list;
char *get_commit_graph_filename(struct object_directory *odb);
int open_commit_graph(const char *graph_file, int *fd, struct stat *st);
@ -92,6 +92,7 @@ enum commit_graph_write_flags {
COMMIT_GRAPH_WRITE_PROGRESS = (1 << 1),
COMMIT_GRAPH_WRITE_SPLIT = (1 << 2),
COMMIT_GRAPH_WRITE_BLOOM_FILTERS = (1 << 3),
COMMIT_GRAPH_NO_WRITE_BLOOM_FILTERS = (1 << 4),
};
enum commit_graph_split_flags {

View File

@ -32,6 +32,7 @@ struct slabname { \
void init_ ##slabname## _with_stride(struct slabname *s, unsigned stride); \
void init_ ##slabname(struct slabname *s); \
void clear_ ##slabname(struct slabname *s); \
void deep_clear_ ##slabname(struct slabname *s, void (*free_fn)(elemtype *ptr)); \
elemtype *slabname## _at_peek(struct slabname *s, const struct commit *c, int add_if_missing); \
elemtype *slabname## _at(struct slabname *s, const struct commit *c); \
elemtype *slabname## _peek(struct slabname *s, const struct commit *c)

View File

@ -38,6 +38,19 @@ scope void clear_ ##slabname(struct slabname *s) \
FREE_AND_NULL(s->slab); \
} \
\
scope void deep_clear_ ##slabname(struct slabname *s, void (*free_fn)(elemtype *)) \
{ \
unsigned int i; \
for (i = 0; i < s->slab_count; i++) { \
unsigned int j; \
if (!s->slab[i]) \
continue; \
for (j = 0; j < s->slab_size; j++) \
free_fn(&s->slab[i][j * s->stride]); \
} \
clear_ ##slabname(s); \
} \
\
scope elemtype *slabname## _at_peek(struct slabname *s, \
const struct commit *c, \
int add_if_missing) \

View File

@ -47,6 +47,16 @@
*
* Call this function before the slab falls out of scope to avoid
* leaking memory.
*
* - void deep_clear_indegree(struct indegree *, void (*free_fn)(int*))
*
* Empties the slab, similar to clear_indegree(), but in addition it
* calls the given 'free_fn' for each slab entry to release any
* additional memory that might be owned by the entry (but not the
* entry itself!).
* Note that 'free_fn' might be called even for entries for which no
* indegree_at() call has been made; in this case 'free_fn' is invoked
* with a pointer to a zero-initialized location.
*/
#define define_commit_slab(slabname, elemtype) \

10
diff.h
View File

@ -431,11 +431,11 @@ struct combine_diff_path *diff_tree_paths(
struct combine_diff_path *p, const struct object_id *oid,
const struct object_id **parents_oid, int nparent,
struct strbuf *base, struct diff_options *opt);
int diff_tree_oid(const struct object_id *old_oid,
const struct object_id *new_oid,
const char *base, struct diff_options *opt);
int diff_root_tree_oid(const struct object_id *new_oid, const char *base,
struct diff_options *opt);
void diff_tree_oid(const struct object_id *old_oid,
const struct object_id *new_oid,
const char *base, struct diff_options *opt);
void diff_root_tree_oid(const struct object_id *new_oid, const char *base,
struct diff_options *opt);
struct combine_diff_path {
struct combine_diff_path *next;

View File

@ -670,9 +670,10 @@ static void prepare_to_use_bloom_filter(struct rev_info *revs)
{
struct pathspec_item *pi;
char *path_alloc = NULL;
const char *path;
const char *path, *p;
int last_index;
int len;
size_t len;
int path_component_nr = 0, j;
if (!revs->commits)
return;
@ -705,8 +706,22 @@ static void prepare_to_use_bloom_filter(struct rev_info *revs)
len = strlen(path);
revs->bloom_key = xmalloc(sizeof(struct bloom_key));
fill_bloom_key(path, len, revs->bloom_key, revs->bloom_filter_settings);
p = path;
do {
p = strchrnul(p + 1, '/');
path_component_nr++;
} while (p - path < len);
revs->bloom_keys_nr = path_component_nr;
ALLOC_ARRAY(revs->bloom_keys, revs->bloom_keys_nr);
p = path;
for (j = 0; j < revs->bloom_keys_nr; j++) {
p = strchrnul(p + 1, '/');
fill_bloom_key(path, p - path, &revs->bloom_keys[j],
revs->bloom_filter_settings);
}
if (trace2_is_enabled() && !bloom_filter_atexit_registered) {
atexit(trace2_bloom_filter_statistics_atexit);
@ -720,7 +735,7 @@ static int check_maybe_different_in_bloom_filter(struct rev_info *revs,
struct commit *commit)
{
struct bloom_filter *filter;
int result;
int result = 1, j;
if (!revs->repo->objects->commit_graph)
return -1;
@ -740,9 +755,11 @@ static int check_maybe_different_in_bloom_filter(struct rev_info *revs,
return -1;
}
result = bloom_filter_contains(filter,
revs->bloom_key,
revs->bloom_filter_settings);
for (j = 0; result && j < revs->bloom_keys_nr; j++) {
result = bloom_filter_contains(filter,
&revs->bloom_keys[j],
revs->bloom_filter_settings);
}
if (result)
count_bloom_filter_maybe++;
@ -782,7 +799,7 @@ static int rev_compare_tree(struct rev_info *revs,
return REV_TREE_SAME;
}
if (revs->bloom_key && !nth_parent) {
if (revs->bloom_keys_nr && !nth_parent) {
bloom_ret = check_maybe_different_in_bloom_filter(revs, commit);
if (bloom_ret == 0)
@ -791,9 +808,7 @@ static int rev_compare_tree(struct rev_info *revs,
tree_difference = REV_TREE_SAME;
revs->pruning.flags.has_changes = 0;
if (diff_tree_oid(&t1->object.oid, &t2->object.oid, "",
&revs->pruning) < 0)
return REV_TREE_DIFFERENT;
diff_tree_oid(&t1->object.oid, &t2->object.oid, "", &revs->pruning);
if (!nth_parent)
if (bloom_ret == 1 && tree_difference == REV_TREE_SAME)
@ -804,7 +819,6 @@ static int rev_compare_tree(struct rev_info *revs,
static int rev_same_tree_as_empty(struct rev_info *revs, struct commit *commit)
{
int retval;
struct tree *t1 = get_commit_tree(commit);
if (!t1)
@ -812,9 +826,9 @@ static int rev_same_tree_as_empty(struct rev_info *revs, struct commit *commit)
tree_difference = REV_TREE_SAME;
revs->pruning.flags.has_changes = 0;
retval = diff_tree_oid(NULL, &t1->object.oid, "", &revs->pruning);
diff_tree_oid(NULL, &t1->object.oid, "", &revs->pruning);
return retval >= 0 && (tree_difference == REV_TREE_SAME);
return tree_difference == REV_TREE_SAME;
}
struct treesame_state {

View File

@ -300,8 +300,10 @@ struct rev_info {
struct topo_walk_info *topo_walk_info;
/* Commit graph bloom filter fields */
/* The bloom filter key for the pathspec */
struct bloom_key *bloom_key;
/* The bloom filter key(s) for the pathspec */
struct bloom_key *bloom_keys;
int bloom_keys_nr;
/*
* The bloom filter settings used to generate the key.
* This is loaded from the commit-graph being used.

View File

@ -110,6 +110,10 @@ void rollback_shallow_file(struct repository *r, struct shallow_lock *lk)
* supports a "valid" flag.
*/
define_commit_slab(commit_depth, int *);
static void free_depth_in_slab(int **ptr)
{
FREE_AND_NULL(*ptr);
}
struct commit_list *get_shallow_commits(struct object_array *heads, int depth,
int shallow_flag, int not_shallow_flag)
{
@ -176,15 +180,7 @@ struct commit_list *get_shallow_commits(struct object_array *heads, int depth,
}
}
}
for (i = 0; i < depths.slab_count; i++) {
int j;
if (!depths.slab[i])
continue;
for (j = 0; j < depths.slab_size; j++)
free(depths.slab[i][j]);
}
clear_commit_depth(&depths);
deep_clear_commit_depth(&depths, free_depth_in_slab);
return result;
}

View File

@ -125,7 +125,9 @@ test_expect_success 'setup submodules' '
test_expect_success 'diff-tree ignores trailing slash on submodule path' '
git diff --name-only HEAD^ HEAD submod >expect &&
git diff --name-only HEAD^ HEAD submod/ >actual &&
test_cmp expect actual
test_cmp expect actual &&
git diff --name-only HEAD^ HEAD -- submod/whatever >actual &&
test_must_be_empty actual
'
test_expect_success 'diff multiple wildcard pathspecs' '

View File

@ -126,7 +126,7 @@ test_expect_success 'setup - add commit-graph to the chain without Bloom filters
test_commit c14 A/anotherFile2 &&
test_commit c15 A/B/anotherFile2 &&
test_commit c16 A/B/C/anotherFile2 &&
GIT_TEST_COMMIT_GRAPH_CHANGED_PATHS=0 git commit-graph write --reachable --split &&
git commit-graph write --reachable --split --no-changed-paths &&
test_line_count = 2 .git/objects/info/commit-graphs/commit-graph-chain
'
@ -142,7 +142,7 @@ test_expect_success 'setup - add commit-graph to the chain with Bloom filters' '
test_bloom_filters_used_when_some_filters_are_missing () {
log_args=$1
bloom_trace_prefix="statistics:{\"filter_not_present\":3,\"zero_length_filter\":0,\"maybe\":8,\"definitely_not\":6"
bloom_trace_prefix="statistics:{\"filter_not_present\":3,\"zero_length_filter\":0,\"maybe\":6,\"definitely_not\":8"
setup "$log_args" &&
grep -q "$bloom_trace_prefix" "$TRASH_DIRECTORY/trace.perf" &&
test_cmp log_wo_bloom log_w_bloom

View File

@ -476,7 +476,7 @@ corrupt_graph_verify() {
cp $objdir/info/commit-graph commit-graph-pre-write-test
fi &&
git status --short &&
GIT_TEST_COMMIT_GRAPH_DIE_ON_LOAD=true git commit-graph write &&
GIT_TEST_COMMIT_GRAPH_DIE_ON_PARSE=true git commit-graph write &&
chmod u+w $objdir/info/commit-graph &&
git commit-graph verify
}
@ -529,7 +529,7 @@ test_expect_success 'detect bad hash version' '
'
test_expect_success 'detect low chunk count' '
corrupt_graph_and_verify $GRAPH_BYTE_CHUNK_COUNT "\02" \
corrupt_graph_and_verify $GRAPH_BYTE_CHUNK_COUNT "\01" \
"missing the .* chunk"
'
@ -615,7 +615,8 @@ test_expect_success 'detect invalid checksum hash' '
test_expect_success 'detect incorrect chunk count' '
corrupt_graph_and_verify $GRAPH_BYTE_CHUNK_COUNT "\377" \
"chunk lookup table entry missing" $GRAPH_CHUNK_LOOKUP_OFFSET
"commit-graph file is too small to hold [0-9]* chunks" \
$GRAPH_CHUNK_LOOKUP_OFFSET
'
test_expect_success 'git fsck (checks commit-graph)' '

View File

@ -29,9 +29,9 @@ static struct combine_diff_path *ll_diff_tree_paths(
struct combine_diff_path *p, const struct object_id *oid,
const struct object_id **parents_oid, int nparent,
struct strbuf *base, struct diff_options *opt);
static int ll_diff_tree_oid(const struct object_id *old_oid,
const struct object_id *new_oid,
struct strbuf *base, struct diff_options *opt);
static void ll_diff_tree_oid(const struct object_id *old_oid,
const struct object_id *new_oid,
struct strbuf *base, struct diff_options *opt);
/*
* Compare two tree entries, taking into account only path/S_ISDIR(mode),
@ -679,9 +679,9 @@ static void try_to_follow_renames(const struct object_id *old_oid,
q->nr = 1;
}
static int ll_diff_tree_oid(const struct object_id *old_oid,
const struct object_id *new_oid,
struct strbuf *base, struct diff_options *opt)
static void ll_diff_tree_oid(const struct object_id *old_oid,
const struct object_id *new_oid,
struct strbuf *base, struct diff_options *opt)
{
struct combine_diff_path phead, *p;
pathchange_fn_t pathchange_old = opt->pathchange;
@ -697,29 +697,27 @@ static int ll_diff_tree_oid(const struct object_id *old_oid,
}
opt->pathchange = pathchange_old;
return 0;
}
int diff_tree_oid(const struct object_id *old_oid,
const struct object_id *new_oid,
const char *base_str, struct diff_options *opt)
void diff_tree_oid(const struct object_id *old_oid,
const struct object_id *new_oid,
const char *base_str, struct diff_options *opt)
{
struct strbuf base;
int retval;
strbuf_init(&base, PATH_MAX);
strbuf_addstr(&base, base_str);
retval = ll_diff_tree_oid(old_oid, new_oid, &base, opt);
ll_diff_tree_oid(old_oid, new_oid, &base, opt);
if (!*base_str && opt->flags.follow_renames && diff_might_be_rename())
try_to_follow_renames(old_oid, new_oid, &base, opt);
strbuf_release(&base);
return retval;
}
int diff_root_tree_oid(const struct object_id *new_oid, const char *base, struct diff_options *opt)
void diff_root_tree_oid(const struct object_id *new_oid,
const char *base,
struct diff_options *opt)
{
return diff_tree_oid(NULL, new_oid, base, opt);
diff_tree_oid(NULL, new_oid, base, opt);
}

View File

@ -851,7 +851,14 @@ static int match_entry(const struct pathspec_item *item,
if (matchlen > pathlen) {
if (match[pathlen] != '/')
return 0;
if (!S_ISDIR(entry->mode) && !S_ISGITLINK(entry->mode))
/*
* Reject non-directories as partial pathnames, except
* when match is a submodule with a trailing slash and
* nothing else (to handle 'submod/' and 'submod'
* uniformly).
*/
if (!S_ISDIR(entry->mode) &&
(!S_ISGITLINK(entry->mode) || matchlen > pathlen + 1))
return 0;
}