commit-graph: use chunk-format read API

Instead of parsing the table of contents directly, use the chunk-format
API methods read_table_of_contents() and pair_chunk(). While the current
implementation loses the duplicate-chunk detection, that will be added
in a future change.

Signed-off-by: Derrick Stolee <dstolee@microsoft.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
This commit is contained in:
Derrick Stolee
2021-02-18 14:07:35 +00:00
committed by Junio C Hamano
parent 5f0879f54b
commit 2692c2f6fd
2 changed files with 55 additions and 106 deletions

View File

@ -59,8 +59,7 @@ void git_test_write_commit_graph_or_die(void)
#define GRAPH_HEADER_SIZE 8 #define GRAPH_HEADER_SIZE 8
#define GRAPH_FANOUT_SIZE (4 * 256) #define GRAPH_FANOUT_SIZE (4 * 256)
#define GRAPH_CHUNKLOOKUP_WIDTH 12 #define GRAPH_MIN_SIZE (GRAPH_HEADER_SIZE + 4 * CHUNK_TOC_ENTRY_SIZE \
#define GRAPH_MIN_SIZE (GRAPH_HEADER_SIZE + 4 * GRAPH_CHUNKLOOKUP_WIDTH \
+ GRAPH_FANOUT_SIZE + the_hash_algo->rawsz) + GRAPH_FANOUT_SIZE + the_hash_algo->rawsz)
#define CORRECTED_COMMIT_DATE_OFFSET_OVERFLOW (1ULL << 31) #define CORRECTED_COMMIT_DATE_OFFSET_OVERFLOW (1ULL << 31)
@ -298,15 +297,43 @@ static int verify_commit_graph_lite(struct commit_graph *g)
return 0; return 0;
} }
static int graph_read_oid_lookup(const unsigned char *chunk_start,
size_t chunk_size, void *data)
{
struct commit_graph *g = data;
g->chunk_oid_lookup = chunk_start;
g->num_commits = chunk_size / g->hash_len;
return 0;
}
static int graph_read_bloom_data(const unsigned char *chunk_start,
size_t chunk_size, void *data)
{
struct commit_graph *g = data;
uint32_t hash_version;
g->chunk_bloom_data = chunk_start;
hash_version = get_be32(chunk_start);
if (hash_version != 1)
return 0;
g->bloom_filter_settings = xmalloc(sizeof(struct bloom_filter_settings));
g->bloom_filter_settings->hash_version = hash_version;
g->bloom_filter_settings->num_hashes = get_be32(chunk_start + 4);
g->bloom_filter_settings->bits_per_entry = get_be32(chunk_start + 8);
g->bloom_filter_settings->max_changed_paths = DEFAULT_BLOOM_MAX_CHANGES;
return 0;
}
struct commit_graph *parse_commit_graph(struct repository *r, struct commit_graph *parse_commit_graph(struct repository *r,
void *graph_map, size_t graph_size) void *graph_map, size_t graph_size)
{ {
const unsigned char *data, *chunk_lookup; const unsigned char *data;
uint32_t i;
struct commit_graph *graph; struct commit_graph *graph;
uint64_t next_chunk_offset;
uint32_t graph_signature; uint32_t graph_signature;
unsigned char graph_version, hash_version; unsigned char graph_version, hash_version;
struct chunkfile *cf = NULL;
if (!graph_map) if (!graph_map)
return NULL; return NULL;
@ -347,7 +374,7 @@ struct commit_graph *parse_commit_graph(struct repository *r,
graph->data_len = graph_size; graph->data_len = graph_size;
if (graph_size < GRAPH_HEADER_SIZE + if (graph_size < GRAPH_HEADER_SIZE +
(graph->num_chunks + 1) * GRAPH_CHUNKLOOKUP_WIDTH + (graph->num_chunks + 1) * CHUNK_TOC_ENTRY_SIZE +
GRAPH_FANOUT_SIZE + the_hash_algo->rawsz) { GRAPH_FANOUT_SIZE + the_hash_algo->rawsz) {
error(_("commit-graph file is too small to hold %u chunks"), error(_("commit-graph file is too small to hold %u chunks"),
graph->num_chunks); graph->num_chunks);
@ -355,108 +382,28 @@ struct commit_graph *parse_commit_graph(struct repository *r,
return NULL; return NULL;
} }
chunk_lookup = data + 8; cf = init_chunkfile(NULL);
next_chunk_offset = get_be64(chunk_lookup + 4);
for (i = 0; i < graph->num_chunks; i++) {
uint32_t chunk_id;
uint64_t chunk_offset = next_chunk_offset;
int chunk_repeated = 0;
chunk_id = get_be32(chunk_lookup + 0); if (read_table_of_contents(cf, graph->data, graph_size,
GRAPH_HEADER_SIZE, graph->num_chunks))
chunk_lookup += GRAPH_CHUNKLOOKUP_WIDTH;
next_chunk_offset = get_be64(chunk_lookup + 4);
if (chunk_offset > graph_size - the_hash_algo->rawsz) {
error(_("commit-graph improper chunk offset %08x%08x"), (uint32_t)(chunk_offset >> 32),
(uint32_t)chunk_offset);
goto free_and_return; goto free_and_return;
}
switch (chunk_id) { pair_chunk(cf, GRAPH_CHUNKID_OIDFANOUT,
case GRAPH_CHUNKID_OIDFANOUT: (const unsigned char **)&graph->chunk_oid_fanout);
if (graph->chunk_oid_fanout) read_chunk(cf, GRAPH_CHUNKID_OIDLOOKUP, graph_read_oid_lookup, graph);
chunk_repeated = 1; pair_chunk(cf, GRAPH_CHUNKID_DATA, &graph->chunk_commit_data);
else pair_chunk(cf, GRAPH_CHUNKID_EXTRAEDGES, &graph->chunk_extra_edges);
graph->chunk_oid_fanout = (uint32_t*)(data + chunk_offset); pair_chunk(cf, GRAPH_CHUNKID_BASE, &graph->chunk_base_graphs);
break; pair_chunk(cf, GRAPH_CHUNKID_GENERATION_DATA,
&graph->chunk_generation_data);
pair_chunk(cf, GRAPH_CHUNKID_GENERATION_DATA_OVERFLOW,
&graph->chunk_generation_data_overflow);
case GRAPH_CHUNKID_OIDLOOKUP: if (r->settings.commit_graph_read_changed_paths) {
if (graph->chunk_oid_lookup) pair_chunk(cf, GRAPH_CHUNKID_BLOOMINDEXES,
chunk_repeated = 1; &graph->chunk_bloom_indexes);
else { read_chunk(cf, GRAPH_CHUNKID_BLOOMDATA,
graph->chunk_oid_lookup = data + chunk_offset; graph_read_bloom_data, graph);
graph->num_commits = (next_chunk_offset - chunk_offset)
/ graph->hash_len;
}
break;
case GRAPH_CHUNKID_DATA:
if (graph->chunk_commit_data)
chunk_repeated = 1;
else
graph->chunk_commit_data = data + chunk_offset;
break;
case GRAPH_CHUNKID_GENERATION_DATA:
if (graph->chunk_generation_data)
chunk_repeated = 1;
else
graph->chunk_generation_data = data + chunk_offset;
break;
case GRAPH_CHUNKID_GENERATION_DATA_OVERFLOW:
if (graph->chunk_generation_data_overflow)
chunk_repeated = 1;
else
graph->chunk_generation_data_overflow = data + chunk_offset;
break;
case GRAPH_CHUNKID_EXTRAEDGES:
if (graph->chunk_extra_edges)
chunk_repeated = 1;
else
graph->chunk_extra_edges = data + chunk_offset;
break;
case GRAPH_CHUNKID_BASE:
if (graph->chunk_base_graphs)
chunk_repeated = 1;
else
graph->chunk_base_graphs = data + chunk_offset;
break;
case GRAPH_CHUNKID_BLOOMINDEXES:
if (graph->chunk_bloom_indexes)
chunk_repeated = 1;
else if (r->settings.commit_graph_read_changed_paths)
graph->chunk_bloom_indexes = data + chunk_offset;
break;
case GRAPH_CHUNKID_BLOOMDATA:
if (graph->chunk_bloom_data)
chunk_repeated = 1;
else if (r->settings.commit_graph_read_changed_paths) {
uint32_t hash_version;
graph->chunk_bloom_data = data + chunk_offset;
hash_version = get_be32(data + chunk_offset);
if (hash_version != 1)
break;
graph->bloom_filter_settings = xmalloc(sizeof(struct bloom_filter_settings));
graph->bloom_filter_settings->hash_version = hash_version;
graph->bloom_filter_settings->num_hashes = get_be32(data + chunk_offset + 4);
graph->bloom_filter_settings->bits_per_entry = get_be32(data + chunk_offset + 8);
graph->bloom_filter_settings->max_changed_paths = DEFAULT_BLOOM_MAX_CHANGES;
}
break;
}
if (chunk_repeated) {
error(_("commit-graph chunk id %08x appears multiple times"), chunk_id);
goto free_and_return;
}
} }
if (graph->chunk_bloom_indexes && graph->chunk_bloom_data) { if (graph->chunk_bloom_indexes && graph->chunk_bloom_data) {
@ -473,9 +420,11 @@ struct commit_graph *parse_commit_graph(struct repository *r,
if (verify_commit_graph_lite(graph)) if (verify_commit_graph_lite(graph))
goto free_and_return; goto free_and_return;
free_chunkfile(cf);
return graph; return graph;
free_and_return: free_and_return:
free_chunkfile(cf);
free(graph->bloom_filter_settings); free(graph->bloom_filter_settings);
free(graph); free(graph);
return NULL; return NULL;

View File

@ -564,7 +564,7 @@ test_expect_success 'detect bad hash version' '
test_expect_success 'detect low chunk count' ' test_expect_success 'detect low chunk count' '
corrupt_graph_and_verify $GRAPH_BYTE_CHUNK_COUNT "\01" \ corrupt_graph_and_verify $GRAPH_BYTE_CHUNK_COUNT "\01" \
"missing the .* chunk" "final chunk has non-zero id"
' '
test_expect_success 'detect missing OID fanout chunk' ' test_expect_success 'detect missing OID fanout chunk' '