Merge branch 'ak/corrected-commit-date'

The commit-graph learned to use corrected commit dates instead of
the generation number to help topological revision traversal.

* ak/corrected-commit-date:
  doc: add corrected commit date info
  commit-reach: use corrected commit dates in paint_down_to_common()
  commit-graph: use generation v2 only if entire chain does
  commit-graph: implement generation data chunk
  commit-graph: implement corrected commit date
  commit-graph: return 64-bit generation number
  commit-graph: add a slab to store topological levels
  t6600-test-reach: generalize *_three_modes
  commit-graph: consolidate fill_commit_graph_info
  revision: parse parent in indegree_walk_step()
  commit-graph: fix regression when computing Bloom filters
This commit is contained in:
Junio C Hamano
2021-02-17 17:21:40 -08:00
19 changed files with 668 additions and 155 deletions

View File

@ -38,11 +38,13 @@ void git_test_write_commit_graph_or_die(void)
#define GRAPH_CHUNKID_OIDFANOUT 0x4f494446 /* "OIDF" */
#define GRAPH_CHUNKID_OIDLOOKUP 0x4f49444c /* "OIDL" */
#define GRAPH_CHUNKID_DATA 0x43444154 /* "CDAT" */
#define GRAPH_CHUNKID_GENERATION_DATA 0x47444154 /* "GDAT" */
#define GRAPH_CHUNKID_GENERATION_DATA_OVERFLOW 0x47444f56 /* "GDOV" */
#define GRAPH_CHUNKID_EXTRAEDGES 0x45444745 /* "EDGE" */
#define GRAPH_CHUNKID_BLOOMINDEXES 0x42494458 /* "BIDX" */
#define GRAPH_CHUNKID_BLOOMDATA 0x42444154 /* "BDAT" */
#define GRAPH_CHUNKID_BASE 0x42415345 /* "BASE" */
#define MAX_NUM_CHUNKS 7
#define MAX_NUM_CHUNKS 9
#define GRAPH_DATA_WIDTH (the_hash_algo->rawsz + 16)
@ -61,9 +63,13 @@ void git_test_write_commit_graph_or_die(void)
#define GRAPH_MIN_SIZE (GRAPH_HEADER_SIZE + 4 * GRAPH_CHUNKLOOKUP_WIDTH \
+ GRAPH_FANOUT_SIZE + the_hash_algo->rawsz)
#define CORRECTED_COMMIT_DATE_OFFSET_OVERFLOW (1ULL << 31)
/* Remember to update object flag allocation in object.h */
#define REACHABLE (1u<<15)
define_commit_slab(topo_level_slab, uint32_t);
/* Keep track of the order in which commits are added to our list. */
define_commit_slab(commit_pos, int);
static struct commit_pos commit_pos = COMMIT_SLAB_INIT(1, commit_pos);
@ -99,7 +105,7 @@ uint32_t commit_graph_position(const struct commit *c)
return data ? data->graph_pos : COMMIT_NOT_FROM_GRAPH;
}
uint32_t commit_graph_generation(const struct commit *c)
timestamp_t commit_graph_generation(const struct commit *c)
{
struct commit_graph_data *data =
commit_graph_data_slab_peek(&commit_graph_data_slab, c);
@ -139,13 +145,17 @@ static struct commit_graph_data *commit_graph_data_at(const struct commit *c)
return data;
}
/*
* Should be used only while writing commit-graph as it compares
* generation value of commits by directly accessing commit-slab.
*/
static int commit_gen_cmp(const void *va, const void *vb)
{
const struct commit *a = *(const struct commit **)va;
const struct commit *b = *(const struct commit **)vb;
uint32_t generation_a = commit_graph_generation(a);
uint32_t generation_b = commit_graph_generation(b);
const timestamp_t generation_a = commit_graph_data_at(a)->generation;
const timestamp_t generation_b = commit_graph_data_at(b)->generation;
/* lower generation commits first */
if (generation_a < generation_b)
return -1;
@ -388,6 +398,20 @@ struct commit_graph *parse_commit_graph(struct repository *r,
graph->chunk_commit_data = data + chunk_offset;
break;
case GRAPH_CHUNKID_GENERATION_DATA:
if (graph->chunk_generation_data)
chunk_repeated = 1;
else
graph->chunk_generation_data = data + chunk_offset;
break;
case GRAPH_CHUNKID_GENERATION_DATA_OVERFLOW:
if (graph->chunk_generation_data_overflow)
chunk_repeated = 1;
else
graph->chunk_generation_data_overflow = data + chunk_offset;
break;
case GRAPH_CHUNKID_EXTRAEDGES:
if (graph->chunk_extra_edges)
chunk_repeated = 1;
@ -590,6 +614,21 @@ static struct commit_graph *load_commit_graph_chain(struct repository *r,
return graph_chain;
}
static void validate_mixed_generation_chain(struct commit_graph *g)
{
int read_generation_data;
if (!g)
return;
read_generation_data = !!g->chunk_generation_data;
while (g) {
g->read_generation_data = read_generation_data;
g = g->base_graph;
}
}
struct commit_graph *read_commit_graph_one(struct repository *r,
struct object_directory *odb)
{
@ -598,6 +637,8 @@ struct commit_graph *read_commit_graph_one(struct repository *r,
if (!g)
g = load_commit_graph_chain(r, odb);
validate_mixed_generation_chain(g);
return g;
}
@ -673,6 +714,20 @@ int generation_numbers_enabled(struct repository *r)
return !!first_generation;
}
int corrected_commit_dates_enabled(struct repository *r)
{
struct commit_graph *g;
if (!prepare_commit_graph(r))
return 0;
g = r->objects->commit_graph;
if (!g->num_commits)
return 0;
return g->read_generation_data;
}
struct bloom_filter_settings *get_bloom_filter_settings(struct repository *r)
{
struct commit_graph *g = r->objects->commit_graph;
@ -748,17 +803,41 @@ static void fill_commit_graph_info(struct commit *item, struct commit_graph *g,
{
const unsigned char *commit_data;
struct commit_graph_data *graph_data;
uint32_t lex_index;
uint32_t lex_index, offset_pos;
uint64_t date_high, date_low, offset;
while (pos < g->num_commits_in_base)
g = g->base_graph;
if (pos >= g->num_commits + g->num_commits_in_base)
die(_("invalid commit position. commit-graph is likely corrupt"));
lex_index = pos - g->num_commits_in_base;
commit_data = g->chunk_commit_data + GRAPH_DATA_WIDTH * lex_index;
graph_data = commit_graph_data_at(item);
graph_data->graph_pos = pos;
graph_data->generation = get_be32(commit_data + g->hash_len + 8) >> 2;
date_high = get_be32(commit_data + g->hash_len + 8) & 0x3;
date_low = get_be32(commit_data + g->hash_len + 12);
item->date = (timestamp_t)((date_high << 32) | date_low);
if (g->read_generation_data) {
offset = (timestamp_t)get_be32(g->chunk_generation_data + sizeof(uint32_t) * lex_index);
if (offset & CORRECTED_COMMIT_DATE_OFFSET_OVERFLOW) {
if (!g->chunk_generation_data_overflow)
die(_("commit-graph requires overflow generation data but has none"));
offset_pos = offset ^ CORRECTED_COMMIT_DATE_OFFSET_OVERFLOW;
graph_data->generation = get_be64(g->chunk_generation_data_overflow + 8 * offset_pos);
} else
graph_data->generation = item->date + offset;
} else
graph_data->generation = get_be32(commit_data + g->hash_len + 8) >> 2;
if (g->topo_levels)
*topo_level_slab_at(g->topo_levels, item) = get_be32(commit_data + g->hash_len + 8) >> 2;
}
static inline void set_commit_tree(struct commit *c, struct tree *t)
@ -772,38 +851,22 @@ static int fill_commit_in_graph(struct repository *r,
{
uint32_t edge_value;
uint32_t *parent_data_ptr;
uint64_t date_low, date_high;
struct commit_list **pptr;
struct commit_graph_data *graph_data;
const unsigned char *commit_data;
uint32_t lex_index;
while (pos < g->num_commits_in_base)
g = g->base_graph;
if (pos >= g->num_commits + g->num_commits_in_base)
die(_("invalid commit position. commit-graph is likely corrupt"));
fill_commit_graph_info(item, g, pos);
/*
* Store the "full" position, but then use the
* "local" position for the rest of the calculation.
*/
graph_data = commit_graph_data_at(item);
graph_data->graph_pos = pos;
lex_index = pos - g->num_commits_in_base;
commit_data = g->chunk_commit_data + (g->hash_len + 16) * lex_index;
item->object.parsed = 1;
set_commit_tree(item, NULL);
date_high = get_be32(commit_data + g->hash_len + 8) & 0x3;
date_low = get_be32(commit_data + g->hash_len + 12);
item->date = (timestamp_t)((date_high << 32) | date_low);
graph_data->generation = get_be32(commit_data + g->hash_len + 8) >> 2;
pptr = &item->parents;
edge_value = get_be32(commit_data + g->hash_len);
@ -943,6 +1006,7 @@ struct write_commit_graph_context {
struct oid_array oids;
struct packed_commit_list commits;
int num_extra_edges;
int num_generation_data_overflows;
unsigned long approx_nr_objects;
struct progress *progress;
int progress_done;
@ -961,8 +1025,10 @@ struct write_commit_graph_context {
report_progress:1,
split:1,
changed_paths:1,
order_by_pack:1;
order_by_pack:1,
write_generation_data:1;
struct topo_level_slab *topo_levels;
const struct commit_graph_opts *opts;
size_t total_bloom_filter_data_size;
const struct bloom_filter_settings *bloom_settings;
@ -1109,7 +1175,7 @@ static int write_graph_chunk_data(struct hashfile *f,
else
packedDate[0] = 0;
packedDate[0] |= htonl(commit_graph_data_at(*list)->generation << 2);
packedDate[0] |= htonl(*topo_level_slab_at(ctx->topo_levels, *list) << 2);
packedDate[1] = htonl((*list)->date);
hashwrite(f, packedDate, 8);
@ -1120,6 +1186,45 @@ static int write_graph_chunk_data(struct hashfile *f,
return 0;
}
static int write_graph_chunk_generation_data(struct hashfile *f,
struct write_commit_graph_context *ctx)
{
int i, num_generation_data_overflows = 0;
for (i = 0; i < ctx->commits.nr; i++) {
struct commit *c = ctx->commits.list[i];
timestamp_t offset = commit_graph_data_at(c)->generation - c->date;
display_progress(ctx->progress, ++ctx->progress_cnt);
if (offset > GENERATION_NUMBER_V2_OFFSET_MAX) {
offset = CORRECTED_COMMIT_DATE_OFFSET_OVERFLOW | num_generation_data_overflows;
num_generation_data_overflows++;
}
hashwrite_be32(f, offset);
}
return 0;
}
static int write_graph_chunk_generation_data_overflow(struct hashfile *f,
struct write_commit_graph_context *ctx)
{
int i;
for (i = 0; i < ctx->commits.nr; i++) {
struct commit *c = ctx->commits.list[i];
timestamp_t offset = commit_graph_data_at(c)->generation - c->date;
display_progress(ctx->progress, ++ctx->progress_cnt);
if (offset > GENERATION_NUMBER_V2_OFFSET_MAX) {
hashwrite_be32(f, offset >> 32);
hashwrite_be32(f, (uint32_t) offset);
}
}
return 0;
}
static int write_graph_chunk_extra_edges(struct hashfile *f,
struct write_commit_graph_context *ctx)
{
@ -1339,11 +1444,12 @@ static void compute_generation_numbers(struct write_commit_graph_context *ctx)
_("Computing commit graph generation numbers"),
ctx->commits.nr);
for (i = 0; i < ctx->commits.nr; i++) {
uint32_t generation = commit_graph_data_at(ctx->commits.list[i])->generation;
uint32_t level = *topo_level_slab_at(ctx->topo_levels, ctx->commits.list[i]);
timestamp_t corrected_commit_date = commit_graph_data_at(ctx->commits.list[i])->generation;
display_progress(ctx->progress, i + 1);
if (generation != GENERATION_NUMBER_INFINITY &&
generation != GENERATION_NUMBER_ZERO)
if (level != GENERATION_NUMBER_ZERO &&
corrected_commit_date != GENERATION_NUMBER_ZERO)
continue;
commit_list_insert(ctx->commits.list[i], &list);
@ -1351,29 +1457,40 @@ static void compute_generation_numbers(struct write_commit_graph_context *ctx)
struct commit *current = list->item;
struct commit_list *parent;
int all_parents_computed = 1;
uint32_t max_generation = 0;
uint32_t max_level = 0;
timestamp_t max_corrected_commit_date = 0;
for (parent = current->parents; parent; parent = parent->next) {
generation = commit_graph_data_at(parent->item)->generation;
level = *topo_level_slab_at(ctx->topo_levels, parent->item);
corrected_commit_date = commit_graph_data_at(parent->item)->generation;
if (generation == GENERATION_NUMBER_INFINITY ||
generation == GENERATION_NUMBER_ZERO) {
if (level == GENERATION_NUMBER_ZERO ||
corrected_commit_date == GENERATION_NUMBER_ZERO) {
all_parents_computed = 0;
commit_list_insert(parent->item, &list);
break;
} else if (generation > max_generation) {
max_generation = generation;
}
if (level > max_level)
max_level = level;
if (corrected_commit_date > max_corrected_commit_date)
max_corrected_commit_date = corrected_commit_date;
}
if (all_parents_computed) {
struct commit_graph_data *data = commit_graph_data_at(current);
data->generation = max_generation + 1;
pop_commit(&list);
if (data->generation > GENERATION_NUMBER_MAX)
data->generation = GENERATION_NUMBER_MAX;
if (max_level > GENERATION_NUMBER_V1_MAX - 1)
max_level = GENERATION_NUMBER_V1_MAX - 1;
*topo_level_slab_at(ctx->topo_levels, current) = max_level + 1;
if (current->date && current->date > max_corrected_commit_date)
max_corrected_commit_date = current->date - 1;
commit_graph_data_at(current)->generation = max_corrected_commit_date + 1;
if (commit_graph_data_at(current)->generation - current->date > GENERATION_NUMBER_V2_OFFSET_MAX)
ctx->num_generation_data_overflows++;
}
}
}
@ -1707,6 +1824,21 @@ static int write_commit_graph_file(struct write_commit_graph_context *ctx)
chunks[2].id = GRAPH_CHUNKID_DATA;
chunks[2].size = (hashsz + 16) * ctx->commits.nr;
chunks[2].write_fn = write_graph_chunk_data;
if (git_env_bool(GIT_TEST_COMMIT_GRAPH_NO_GDAT, 0))
ctx->write_generation_data = 0;
if (ctx->write_generation_data) {
chunks[num_chunks].id = GRAPH_CHUNKID_GENERATION_DATA;
chunks[num_chunks].size = sizeof(uint32_t) * ctx->commits.nr;
chunks[num_chunks].write_fn = write_graph_chunk_generation_data;
num_chunks++;
}
if (ctx->num_generation_data_overflows) {
chunks[num_chunks].id = GRAPH_CHUNKID_GENERATION_DATA_OVERFLOW;
chunks[num_chunks].size = sizeof(timestamp_t) * ctx->num_generation_data_overflows;
chunks[num_chunks].write_fn = write_graph_chunk_generation_data_overflow;
num_chunks++;
}
if (ctx->num_extra_edges) {
chunks[num_chunks].id = GRAPH_CHUNKID_EXTRAEDGES;
chunks[num_chunks].size = 4 * ctx->num_extra_edges;
@ -1918,6 +2050,13 @@ static void split_graph_merge_strategy(struct write_commit_graph_context *ctx)
if (i < ctx->num_commit_graphs_after)
ctx->commit_graph_hash_after[i] = xstrdup(oid_to_hex(&g->oid));
/*
* If the topmost remaining layer has generation data chunk, the
* resultant layer also has generation data chunk.
*/
if (i == ctx->num_commit_graphs_after - 2)
ctx->write_generation_data = !!g->chunk_generation_data;
i--;
g = g->base_graph;
}
@ -2109,6 +2248,7 @@ int write_commit_graph(struct object_directory *odb,
int res = 0;
int replace = 0;
struct bloom_filter_settings bloom_settings = DEFAULT_BLOOM_FILTER_SETTINGS;
struct topo_level_slab topo_levels;
prepare_repo_settings(the_repository);
if (!the_repository->settings.core_commit_graph) {
@ -2126,6 +2266,8 @@ int write_commit_graph(struct object_directory *odb,
ctx->split = flags & COMMIT_GRAPH_WRITE_SPLIT ? 1 : 0;
ctx->opts = opts;
ctx->total_bloom_filter_data_size = 0;
ctx->write_generation_data = 1;
ctx->num_generation_data_overflows = 0;
bloom_settings.bits_per_entry = git_env_ulong("GIT_TEST_BLOOM_SETTINGS_BITS_PER_ENTRY",
bloom_settings.bits_per_entry);
@ -2135,6 +2277,18 @@ int write_commit_graph(struct object_directory *odb,
bloom_settings.max_changed_paths);
ctx->bloom_settings = &bloom_settings;
init_topo_level_slab(&topo_levels);
ctx->topo_levels = &topo_levels;
if (ctx->r->objects->commit_graph) {
struct commit_graph *g = ctx->r->objects->commit_graph;
while (g) {
g->topo_levels = &topo_levels;
g = g->base_graph;
}
}
if (flags & COMMIT_GRAPH_WRITE_BLOOM_FILTERS)
ctx->changed_paths = 1;
if (!(flags & COMMIT_GRAPH_NO_WRITE_BLOOM_FILTERS)) {
@ -2227,6 +2381,8 @@ int write_commit_graph(struct object_directory *odb,
} else
ctx->num_commit_graphs_after = 1;
validate_mixed_generation_chain(ctx->r->objects->commit_graph);
compute_generation_numbers(ctx);
if (ctx->changed_paths)
@ -2355,8 +2511,8 @@ int verify_commit_graph(struct repository *r, struct commit_graph *g, int flags)
for (i = 0; i < g->num_commits; i++) {
struct commit *graph_commit, *odb_commit;
struct commit_list *graph_parents, *odb_parents;
uint32_t max_generation = 0;
uint32_t generation;
timestamp_t max_generation = 0;
timestamp_t generation;
display_progress(progress, i + 1);
hashcpy(cur_oid.hash, g->chunk_oid_lookup + g->hash_len * i);
@ -2420,16 +2576,17 @@ int verify_commit_graph(struct repository *r, struct commit_graph *g, int flags)
continue;
/*
* If one of our parents has generation GENERATION_NUMBER_MAX, then
* our generation is also GENERATION_NUMBER_MAX. Decrement to avoid
* extra logic in the following condition.
* If we are using topological level and one of our parents has
* generation GENERATION_NUMBER_V1_MAX, then our generation is
* also GENERATION_NUMBER_V1_MAX. Decrement to avoid extra logic
* in the following condition.
*/
if (max_generation == GENERATION_NUMBER_MAX)
if (!g->read_generation_data && max_generation == GENERATION_NUMBER_V1_MAX)
max_generation--;
generation = commit_graph_generation(graph_commit);
if (generation != max_generation + 1)
graph_report(_("commit-graph generation for commit %s is %u != %u"),
if (generation < max_generation + 1)
graph_report(_("commit-graph generation for commit %s is %"PRItime" < %"PRItime),
oid_to_hex(&cur_oid),
generation,
max_generation + 1);