midx: support reading incremental MIDX chains

Now that the MIDX machinery's internals have been taught to understand
incremental MIDXs over the previous handful of commits, the MIDX
machinery itself can begin reading incremental MIDXs.

(Note that while the on-disk format for incremental MIDXs has been
defined, the writing end has not been implemented. This will take place
in the commit after next.)

The core of this change involves following the order specified in the
MIDX chain in reverse and opening up MIDXs in the chain one-by-one,
adding them to the previous layer's `->base_midx` pointer at each step.

In order to implement this, the `load_multi_pack_index()` function is
taught to call a new `load_multi_pack_index_chain()` function if loading
a non-incremental MIDX failed via `load_multi_pack_index_one()`.

When loading a MIDX chain, `load_midx_chain_fd_st()` reads each line in
the file one-by-one and dispatches calls to
`load_multi_pack_index_one()` to read each layer of the MIDX chain. When
a layer was successfully read, it is added to the MIDX chain by calling
`add_midx_to_chain()` which validates the contents of the `BASE` chunk,
performs some bounds checks on the number of combined packs and objects,
and attaches the new MIDX by assigning its `base_midx` pointer to the
existing part of the chain.

As a supplement to this, introduce a new mode in the test-read-midx
test-tool which allows us to read the information for a specific MIDX in
the chain by specifying its trailing checksum via the command-line
arguments like so:

    $ test-tool read-midx .git/objects [checksum]

Signed-off-by: Taylor Blau <me@ttaylorr.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
This commit is contained in:
Taylor Blau
2024-08-06 11:37:55 -04:00
committed by Junio C Hamano
parent 97fd770ea1
commit b80236d0e3
4 changed files with 201 additions and 19 deletions

184
midx.c
View File

@ -91,7 +91,9 @@ static int midx_read_object_offsets(const unsigned char *chunk_start,
#define MIDX_MIN_SIZE (MIDX_HEADER_SIZE + the_hash_algo->rawsz)
struct multi_pack_index *load_multi_pack_index(const char *object_dir, int local)
static struct multi_pack_index *load_multi_pack_index_one(const char *object_dir,
const char *midx_name,
int local)
{
struct multi_pack_index *m = NULL;
int fd;
@ -99,31 +101,26 @@ struct multi_pack_index *load_multi_pack_index(const char *object_dir, int local
size_t midx_size;
void *midx_map = NULL;
uint32_t hash_version;
struct strbuf midx_name = STRBUF_INIT;
uint32_t i;
const char *cur_pack_name;
struct chunkfile *cf = NULL;
get_midx_filename(&midx_name, object_dir);
fd = git_open(midx_name.buf);
fd = git_open(midx_name);
if (fd < 0)
goto cleanup_fail;
if (fstat(fd, &st)) {
error_errno(_("failed to read %s"), midx_name.buf);
error_errno(_("failed to read %s"), midx_name);
goto cleanup_fail;
}
midx_size = xsize_t(st.st_size);
if (midx_size < MIDX_MIN_SIZE) {
error(_("multi-pack-index file %s is too small"), midx_name.buf);
error(_("multi-pack-index file %s is too small"), midx_name);
goto cleanup_fail;
}
strbuf_release(&midx_name);
midx_map = xmmap(NULL, midx_size, PROT_READ, MAP_PRIVATE, fd, 0);
close(fd);
@ -213,7 +210,6 @@ struct multi_pack_index *load_multi_pack_index(const char *object_dir, int local
cleanup_fail:
free(m);
strbuf_release(&midx_name);
free_chunkfile(cf);
if (midx_map)
munmap(midx_map, midx_size);
@ -222,6 +218,173 @@ cleanup_fail:
return NULL;
}
void get_midx_chain_dirname(struct strbuf *buf, const char *object_dir)
{
strbuf_addf(buf, "%s/pack/multi-pack-index.d", object_dir);
}
void get_midx_chain_filename(struct strbuf *buf, const char *object_dir)
{
get_midx_chain_dirname(buf, object_dir);
strbuf_addstr(buf, "/multi-pack-index-chain");
}
void get_split_midx_filename_ext(struct strbuf *buf, const char *object_dir,
const unsigned char *hash, const char *ext)
{
get_midx_chain_dirname(buf, object_dir);
strbuf_addf(buf, "/multi-pack-index-%s.%s", hash_to_hex(hash), ext);
}
static int open_multi_pack_index_chain(const char *chain_file,
int *fd, struct stat *st)
{
*fd = git_open(chain_file);
if (*fd < 0)
return 0;
if (fstat(*fd, st)) {
close(*fd);
return 0;
}
if (st->st_size < the_hash_algo->hexsz) {
close(*fd);
if (!st->st_size) {
/* treat empty files the same as missing */
errno = ENOENT;
} else {
warning(_("multi-pack-index chain file too small"));
errno = EINVAL;
}
return 0;
}
return 1;
}
static int add_midx_to_chain(struct multi_pack_index *midx,
struct multi_pack_index *midx_chain,
struct object_id *oids,
int n)
{
if (midx_chain) {
if (unsigned_add_overflows(midx_chain->num_packs,
midx_chain->num_packs_in_base)) {
warning(_("pack count in base MIDX too high: %"PRIuMAX),
(uintmax_t)midx_chain->num_packs_in_base);
return 0;
}
if (unsigned_add_overflows(midx_chain->num_objects,
midx_chain->num_objects_in_base)) {
warning(_("object count in base MIDX too high: %"PRIuMAX),
(uintmax_t)midx_chain->num_objects_in_base);
return 0;
}
midx->num_packs_in_base = midx_chain->num_packs +
midx_chain->num_packs_in_base;
midx->num_objects_in_base = midx_chain->num_objects +
midx_chain->num_objects_in_base;
}
midx->base_midx = midx_chain;
midx->has_chain = 1;
return 1;
}
static struct multi_pack_index *load_midx_chain_fd_st(const char *object_dir,
int local,
int fd, struct stat *st,
int *incomplete_chain)
{
struct multi_pack_index *midx_chain = NULL;
struct strbuf buf = STRBUF_INIT;
struct object_id *layers = NULL;
int valid = 1;
uint32_t i, count;
FILE *fp = xfdopen(fd, "r");
count = st->st_size / (the_hash_algo->hexsz + 1);
CALLOC_ARRAY(layers, count);
for (i = 0; i < count; i++) {
struct multi_pack_index *m;
if (strbuf_getline_lf(&buf, fp) == EOF)
break;
if (get_oid_hex(buf.buf, &layers[i])) {
warning(_("invalid multi-pack-index chain: line '%s' "
"not a hash"),
buf.buf);
valid = 0;
break;
}
valid = 0;
strbuf_reset(&buf);
get_split_midx_filename_ext(&buf, object_dir, layers[i].hash,
MIDX_EXT_MIDX);
m = load_multi_pack_index_one(object_dir, buf.buf, local);
if (m) {
if (add_midx_to_chain(m, midx_chain, layers, i)) {
midx_chain = m;
valid = 1;
} else {
close_midx(m);
}
}
if (!valid) {
warning(_("unable to find all multi-pack index files"));
break;
}
}
free(layers);
fclose(fp);
strbuf_release(&buf);
*incomplete_chain = !valid;
return midx_chain;
}
static struct multi_pack_index *load_multi_pack_index_chain(const char *object_dir,
int local)
{
struct strbuf chain_file = STRBUF_INIT;
struct stat st;
int fd;
struct multi_pack_index *m = NULL;
get_midx_chain_filename(&chain_file, object_dir);
if (open_multi_pack_index_chain(chain_file.buf, &fd, &st)) {
int incomplete;
/* ownership of fd is taken over by load function */
m = load_midx_chain_fd_st(object_dir, local, fd, &st,
&incomplete);
}
strbuf_release(&chain_file);
return m;
}
struct multi_pack_index *load_multi_pack_index(const char *object_dir,
int local)
{
struct strbuf midx_name = STRBUF_INIT;
struct multi_pack_index *m;
get_midx_filename(&midx_name, object_dir);
m = load_multi_pack_index_one(object_dir, midx_name.buf, local);
if (!m)
m = load_multi_pack_index_chain(object_dir, local);
strbuf_release(&midx_name);
return m;
}
void close_midx(struct multi_pack_index *m)
{
uint32_t i;
@ -230,6 +393,7 @@ void close_midx(struct multi_pack_index *m)
return;
close_midx(m->next);
close_midx(m->base_midx);
munmap((unsigned char *)m->data, m->data_len);