Merge branch 'tb/midx-write-cleanup'
Code clean-up around writing the .midx files. * tb/midx-write-cleanup: pack-bitmap.c: reimplement `midx_bitmap_filename()` with helper midx: replace `get_midx_rev_filename()` with a generic helper midx-write.c: support reading an existing MIDX with `packs_to_include` midx-write.c: extract `fill_packs_from_midx()` midx-write.c: extract `should_include_pack()` midx-write.c: pass `start_pack` to `compute_sorted_entries()` midx-write.c: reduce argument count for `get_sorted_entries()` midx-write.c: tolerate `--preferred-pack` without bitmaps
This commit is contained in:
161
midx-write.c
161
midx-write.c
@ -100,6 +100,18 @@ struct write_midx_context {
|
||||
struct string_list *to_include;
|
||||
};
|
||||
|
||||
static int should_include_pack(const struct write_midx_context *ctx,
|
||||
const char *file_name,
|
||||
int exclude_from_midx)
|
||||
{
|
||||
if (exclude_from_midx && ctx->m && midx_contains_pack(ctx->m, file_name))
|
||||
return 0;
|
||||
if (ctx->to_include && !string_list_has_string(ctx->to_include,
|
||||
file_name))
|
||||
return 0;
|
||||
return 1;
|
||||
}
|
||||
|
||||
static void add_pack_to_midx(const char *full_path, size_t full_path_len,
|
||||
const char *file_name, void *data)
|
||||
{
|
||||
@ -108,29 +120,11 @@ static void add_pack_to_midx(const char *full_path, size_t full_path_len,
|
||||
|
||||
if (ends_with(file_name, ".idx")) {
|
||||
display_progress(ctx->progress, ++ctx->pack_paths_checked);
|
||||
/*
|
||||
* Note that at most one of ctx->m and ctx->to_include are set,
|
||||
* so we are testing midx_contains_pack() and
|
||||
* string_list_has_string() independently (guarded by the
|
||||
* appropriate NULL checks).
|
||||
*
|
||||
* We could support passing to_include while reusing an existing
|
||||
* MIDX, but don't currently since the reuse process drags
|
||||
* forward all packs from an existing MIDX (without checking
|
||||
* whether or not they appear in the to_include list).
|
||||
*
|
||||
* If we added support for that, these next two conditional
|
||||
* should be performed independently (likely checking
|
||||
* to_include before the existing MIDX).
|
||||
*/
|
||||
if (ctx->m && midx_contains_pack(ctx->m, file_name))
|
||||
return;
|
||||
else if (ctx->to_include &&
|
||||
!string_list_has_string(ctx->to_include, file_name))
|
||||
|
||||
if (!should_include_pack(ctx, file_name, 1))
|
||||
return;
|
||||
|
||||
ALLOC_GROW(ctx->info, ctx->nr + 1, ctx->alloc);
|
||||
|
||||
p = add_packed_git(full_path, full_path_len, 0);
|
||||
if (!p) {
|
||||
warning(_("failed to add packfile '%s'"),
|
||||
@ -299,21 +293,16 @@ static void midx_fanout_add_pack_fanout(struct midx_fanout *fanout,
|
||||
* Copy only the de-duplicated entries (selected by most-recent modified time
|
||||
* of a packfile containing the object).
|
||||
*/
|
||||
static struct pack_midx_entry *get_sorted_entries(struct multi_pack_index *m,
|
||||
struct pack_info *info,
|
||||
uint32_t nr_packs,
|
||||
size_t *nr_objects,
|
||||
int preferred_pack)
|
||||
static void compute_sorted_entries(struct write_midx_context *ctx,
|
||||
uint32_t start_pack)
|
||||
{
|
||||
uint32_t cur_fanout, cur_pack, cur_object;
|
||||
size_t alloc_objects, total_objects = 0;
|
||||
struct midx_fanout fanout = { 0 };
|
||||
struct pack_midx_entry *deduplicated_entries = NULL;
|
||||
uint32_t start_pack = m ? m->num_packs : 0;
|
||||
|
||||
for (cur_pack = start_pack; cur_pack < nr_packs; cur_pack++)
|
||||
for (cur_pack = start_pack; cur_pack < ctx->nr; cur_pack++)
|
||||
total_objects = st_add(total_objects,
|
||||
info[cur_pack].p->num_objects);
|
||||
ctx->info[cur_pack].p->num_objects);
|
||||
|
||||
/*
|
||||
* As we de-duplicate by fanout value, we expect the fanout
|
||||
@ -323,26 +312,26 @@ static struct pack_midx_entry *get_sorted_entries(struct multi_pack_index *m,
|
||||
alloc_objects = fanout.alloc = total_objects > 3200 ? total_objects / 200 : 16;
|
||||
|
||||
ALLOC_ARRAY(fanout.entries, fanout.alloc);
|
||||
ALLOC_ARRAY(deduplicated_entries, alloc_objects);
|
||||
*nr_objects = 0;
|
||||
ALLOC_ARRAY(ctx->entries, alloc_objects);
|
||||
ctx->entries_nr = 0;
|
||||
|
||||
for (cur_fanout = 0; cur_fanout < 256; cur_fanout++) {
|
||||
fanout.nr = 0;
|
||||
|
||||
if (m)
|
||||
midx_fanout_add_midx_fanout(&fanout, m, cur_fanout,
|
||||
preferred_pack);
|
||||
if (ctx->m)
|
||||
midx_fanout_add_midx_fanout(&fanout, ctx->m, cur_fanout,
|
||||
ctx->preferred_pack_idx);
|
||||
|
||||
for (cur_pack = start_pack; cur_pack < nr_packs; cur_pack++) {
|
||||
int preferred = cur_pack == preferred_pack;
|
||||
for (cur_pack = start_pack; cur_pack < ctx->nr; cur_pack++) {
|
||||
int preferred = cur_pack == ctx->preferred_pack_idx;
|
||||
midx_fanout_add_pack_fanout(&fanout,
|
||||
info, cur_pack,
|
||||
ctx->info, cur_pack,
|
||||
preferred, cur_fanout);
|
||||
}
|
||||
|
||||
if (-1 < preferred_pack && preferred_pack < start_pack)
|
||||
midx_fanout_add_pack_fanout(&fanout, info,
|
||||
preferred_pack, 1,
|
||||
if (-1 < ctx->preferred_pack_idx && ctx->preferred_pack_idx < start_pack)
|
||||
midx_fanout_add_pack_fanout(&fanout, ctx->info,
|
||||
ctx->preferred_pack_idx, 1,
|
||||
cur_fanout);
|
||||
|
||||
midx_fanout_sort(&fanout);
|
||||
@ -356,17 +345,16 @@ static struct pack_midx_entry *get_sorted_entries(struct multi_pack_index *m,
|
||||
&fanout.entries[cur_object].oid))
|
||||
continue;
|
||||
|
||||
ALLOC_GROW(deduplicated_entries, st_add(*nr_objects, 1),
|
||||
ALLOC_GROW(ctx->entries, st_add(ctx->entries_nr, 1),
|
||||
alloc_objects);
|
||||
memcpy(&deduplicated_entries[*nr_objects],
|
||||
memcpy(&ctx->entries[ctx->entries_nr],
|
||||
&fanout.entries[cur_object],
|
||||
sizeof(struct pack_midx_entry));
|
||||
(*nr_objects)++;
|
||||
ctx->entries_nr++;
|
||||
}
|
||||
}
|
||||
|
||||
free(fanout.entries);
|
||||
return deduplicated_entries;
|
||||
}
|
||||
|
||||
static int write_midx_pack_names(struct hashfile *f, void *data)
|
||||
@ -886,6 +874,43 @@ cleanup:
|
||||
return result;
|
||||
}
|
||||
|
||||
static int fill_packs_from_midx(struct write_midx_context *ctx,
|
||||
const char *preferred_pack_name, uint32_t flags)
|
||||
{
|
||||
uint32_t i;
|
||||
|
||||
for (i = 0; i < ctx->m->num_packs; i++) {
|
||||
if (!should_include_pack(ctx, ctx->m->pack_names[i], 0))
|
||||
continue;
|
||||
|
||||
ALLOC_GROW(ctx->info, ctx->nr + 1, ctx->alloc);
|
||||
|
||||
if (flags & MIDX_WRITE_REV_INDEX || preferred_pack_name) {
|
||||
/*
|
||||
* If generating a reverse index, need to have
|
||||
* packed_git's loaded to compare their
|
||||
* mtimes and object count.
|
||||
*
|
||||
*
|
||||
* If a preferred pack is specified, need to
|
||||
* have packed_git's loaded to ensure the chosen
|
||||
* preferred pack has a non-zero object count.
|
||||
*/
|
||||
if (prepare_midx_pack(the_repository, ctx->m, i))
|
||||
return error(_("could not load pack"));
|
||||
|
||||
if (open_pack_index(ctx->m->packs[i]))
|
||||
die(_("could not open index for %s"),
|
||||
ctx->m->packs[i]->pack_name);
|
||||
}
|
||||
|
||||
fill_pack_info(&ctx->info[ctx->nr++], ctx->m->packs[i],
|
||||
ctx->m->pack_names[i], i);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int write_midx_internal(const char *object_dir,
|
||||
struct string_list *packs_to_include,
|
||||
struct string_list *packs_to_drop,
|
||||
@ -895,7 +920,7 @@ static int write_midx_internal(const char *object_dir,
|
||||
{
|
||||
struct strbuf midx_name = STRBUF_INIT;
|
||||
unsigned char midx_hash[GIT_MAX_RAWSZ];
|
||||
uint32_t i;
|
||||
uint32_t i, start_pack;
|
||||
struct hashfile *f = NULL;
|
||||
struct lock_file lk;
|
||||
struct write_midx_context ctx = { 0 };
|
||||
@ -912,15 +937,7 @@ static int write_midx_internal(const char *object_dir,
|
||||
die_errno(_("unable to create leading directories of %s"),
|
||||
midx_name.buf);
|
||||
|
||||
if (!packs_to_include) {
|
||||
/*
|
||||
* Only reference an existing MIDX when not filtering which
|
||||
* packs to include, since all packs and objects are copied
|
||||
* blindly from an existing MIDX if one is present.
|
||||
*/
|
||||
ctx.m = lookup_multi_pack_index(the_repository, object_dir);
|
||||
}
|
||||
|
||||
ctx.m = lookup_multi_pack_index(the_repository, object_dir);
|
||||
if (ctx.m && !midx_checksum_valid(ctx.m)) {
|
||||
warning(_("ignoring existing multi-pack-index; checksum mismatch"));
|
||||
ctx.m = NULL;
|
||||
@ -929,42 +946,23 @@ static int write_midx_internal(const char *object_dir,
|
||||
ctx.nr = 0;
|
||||
ctx.alloc = ctx.m ? ctx.m->num_packs : 16;
|
||||
ctx.info = NULL;
|
||||
ctx.to_include = packs_to_include;
|
||||
ALLOC_ARRAY(ctx.info, ctx.alloc);
|
||||
|
||||
if (ctx.m) {
|
||||
for (i = 0; i < ctx.m->num_packs; i++) {
|
||||
ALLOC_GROW(ctx.info, ctx.nr + 1, ctx.alloc);
|
||||
|
||||
if (flags & MIDX_WRITE_REV_INDEX) {
|
||||
/*
|
||||
* If generating a reverse index, need to have
|
||||
* packed_git's loaded to compare their
|
||||
* mtimes and object count.
|
||||
*/
|
||||
if (prepare_midx_pack(the_repository, ctx.m, i)) {
|
||||
error(_("could not load pack"));
|
||||
result = 1;
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
if (open_pack_index(ctx.m->packs[i]))
|
||||
die(_("could not open index for %s"),
|
||||
ctx.m->packs[i]->pack_name);
|
||||
}
|
||||
|
||||
fill_pack_info(&ctx.info[ctx.nr++], ctx.m->packs[i],
|
||||
ctx.m->pack_names[i], i);
|
||||
}
|
||||
if (ctx.m && fill_packs_from_midx(&ctx, preferred_pack_name,
|
||||
flags) < 0) {
|
||||
result = 1;
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
start_pack = ctx.nr;
|
||||
|
||||
ctx.pack_paths_checked = 0;
|
||||
if (flags & MIDX_PROGRESS)
|
||||
ctx.progress = start_delayed_progress(_("Adding packfiles to multi-pack-index"), 0);
|
||||
else
|
||||
ctx.progress = NULL;
|
||||
|
||||
ctx.to_include = packs_to_include;
|
||||
|
||||
for_each_file_in_pack_dir(object_dir, add_pack_to_midx, &ctx);
|
||||
stop_progress(&ctx.progress);
|
||||
|
||||
@ -1054,8 +1052,7 @@ static int write_midx_internal(const char *object_dir,
|
||||
}
|
||||
}
|
||||
|
||||
ctx.entries = get_sorted_entries(ctx.m, ctx.info, ctx.nr, &ctx.entries_nr,
|
||||
ctx.preferred_pack_idx);
|
||||
compute_sorted_entries(&ctx, start_pack);
|
||||
|
||||
ctx.large_offsets_needed = 0;
|
||||
for (i = 0; i < ctx.entries_nr; i++) {
|
||||
|
10
midx.c
10
midx.c
@ -25,13 +25,15 @@ const unsigned char *get_midx_checksum(struct multi_pack_index *m)
|
||||
|
||||
void get_midx_filename(struct strbuf *out, const char *object_dir)
|
||||
{
|
||||
strbuf_addf(out, "%s/pack/multi-pack-index", object_dir);
|
||||
get_midx_filename_ext(out, object_dir, NULL, NULL);
|
||||
}
|
||||
|
||||
void get_midx_rev_filename(struct strbuf *out, struct multi_pack_index *m)
|
||||
void get_midx_filename_ext(struct strbuf *out, const char *object_dir,
|
||||
const unsigned char *hash, const char *ext)
|
||||
{
|
||||
get_midx_filename(out, m->object_dir);
|
||||
strbuf_addf(out, "-%s.rev", hash_to_hex(get_midx_checksum(m)));
|
||||
strbuf_addf(out, "%s/pack/multi-pack-index", object_dir);
|
||||
if (ext)
|
||||
strbuf_addf(out, "-%s.%s", hash_to_hex(hash), ext);
|
||||
}
|
||||
|
||||
static int midx_read_oid_fanout(const unsigned char *chunk_start,
|
||||
|
6
midx.h
6
midx.h
@ -74,9 +74,13 @@ struct multi_pack_index {
|
||||
#define MIDX_WRITE_BITMAP_HASH_CACHE (1 << 3)
|
||||
#define MIDX_WRITE_BITMAP_LOOKUP_TABLE (1 << 4)
|
||||
|
||||
#define MIDX_EXT_REV "rev"
|
||||
#define MIDX_EXT_BITMAP "bitmap"
|
||||
|
||||
const unsigned char *get_midx_checksum(struct multi_pack_index *m);
|
||||
void get_midx_filename(struct strbuf *out, const char *object_dir);
|
||||
void get_midx_rev_filename(struct strbuf *out, struct multi_pack_index *m);
|
||||
void get_midx_filename_ext(struct strbuf *out, const char *object_dir,
|
||||
const unsigned char *hash, const char *ext);
|
||||
|
||||
struct multi_pack_index *load_multi_pack_index(const char *object_dir, int local);
|
||||
int prepare_midx_pack(struct repository *r, struct multi_pack_index *m, uint32_t pack_int_id);
|
||||
|
@ -309,9 +309,8 @@ static int load_bitmap_entries_v1(struct bitmap_index *index)
|
||||
char *midx_bitmap_filename(struct multi_pack_index *midx)
|
||||
{
|
||||
struct strbuf buf = STRBUF_INIT;
|
||||
|
||||
get_midx_filename(&buf, midx->object_dir);
|
||||
strbuf_addf(&buf, "-%s.bitmap", hash_to_hex(get_midx_checksum(midx)));
|
||||
get_midx_filename_ext(&buf, midx->object_dir, get_midx_checksum(midx),
|
||||
MIDX_EXT_BITMAP);
|
||||
|
||||
return strbuf_detach(&buf, NULL);
|
||||
}
|
||||
|
@ -381,7 +381,8 @@ int load_midx_revindex(struct multi_pack_index *m)
|
||||
trace2_data_string("load_midx_revindex", the_repository,
|
||||
"source", "rev");
|
||||
|
||||
get_midx_rev_filename(&revindex_name, m);
|
||||
get_midx_filename_ext(&revindex_name, m->object_dir,
|
||||
get_midx_checksum(m), MIDX_EXT_REV);
|
||||
|
||||
ret = load_revindex_from_disk(revindex_name.buf,
|
||||
m->num_objects,
|
||||
|
@ -350,6 +350,29 @@ test_expect_success 'preferred packs must be non-empty' '
|
||||
)
|
||||
'
|
||||
|
||||
test_expect_success 'preferred pack from existing MIDX without bitmaps' '
|
||||
git init preferred-without-bitmaps &&
|
||||
(
|
||||
cd preferred-without-bitmaps &&
|
||||
|
||||
test_commit one &&
|
||||
pack="$(git pack-objects --all $objdir/pack/pack </dev/null)" &&
|
||||
|
||||
git multi-pack-index write &&
|
||||
|
||||
# make another pack so that the subsequent MIDX write
|
||||
# has something to do
|
||||
test_commit two &&
|
||||
git repack -d &&
|
||||
|
||||
# write a new MIDX without bitmaps reusing the singular
|
||||
# pack from the existing MIDX as the preferred pack in
|
||||
# the new MIDX
|
||||
git multi-pack-index write --preferred-pack=pack-$pack.pack
|
||||
)
|
||||
|
||||
'
|
||||
|
||||
test_expect_success 'verify multi-pack-index success' '
|
||||
git multi-pack-index verify --object-dir=$objdir
|
||||
'
|
||||
|
Reference in New Issue
Block a user