Merge branch 'tb/midx-write-cleanup'

Code clean-up around writing the .midx files.

* tb/midx-write-cleanup:
  pack-bitmap.c: reimplement `midx_bitmap_filename()` with helper
  midx: replace `get_midx_rev_filename()` with a generic helper
  midx-write.c: support reading an existing MIDX with `packs_to_include`
  midx-write.c: extract `fill_packs_from_midx()`
  midx-write.c: extract `should_include_pack()`
  midx-write.c: pass `start_pack` to `compute_sorted_entries()`
  midx-write.c: reduce argument count for `get_sorted_entries()`
  midx-write.c: tolerate `--preferred-pack` without bitmaps
This commit is contained in:
Junio C Hamano
2024-06-07 10:57:23 -07:00
6 changed files with 117 additions and 91 deletions

View File

@ -100,6 +100,18 @@ struct write_midx_context {
struct string_list *to_include; struct string_list *to_include;
}; };
static int should_include_pack(const struct write_midx_context *ctx,
const char *file_name,
int exclude_from_midx)
{
if (exclude_from_midx && ctx->m && midx_contains_pack(ctx->m, file_name))
return 0;
if (ctx->to_include && !string_list_has_string(ctx->to_include,
file_name))
return 0;
return 1;
}
static void add_pack_to_midx(const char *full_path, size_t full_path_len, static void add_pack_to_midx(const char *full_path, size_t full_path_len,
const char *file_name, void *data) const char *file_name, void *data)
{ {
@ -108,29 +120,11 @@ static void add_pack_to_midx(const char *full_path, size_t full_path_len,
if (ends_with(file_name, ".idx")) { if (ends_with(file_name, ".idx")) {
display_progress(ctx->progress, ++ctx->pack_paths_checked); display_progress(ctx->progress, ++ctx->pack_paths_checked);
/*
* Note that at most one of ctx->m and ctx->to_include are set, if (!should_include_pack(ctx, file_name, 1))
* so we are testing midx_contains_pack() and
* string_list_has_string() independently (guarded by the
* appropriate NULL checks).
*
* We could support passing to_include while reusing an existing
* MIDX, but don't currently since the reuse process drags
* forward all packs from an existing MIDX (without checking
* whether or not they appear in the to_include list).
*
* If we added support for that, these next two conditional
* should be performed independently (likely checking
* to_include before the existing MIDX).
*/
if (ctx->m && midx_contains_pack(ctx->m, file_name))
return;
else if (ctx->to_include &&
!string_list_has_string(ctx->to_include, file_name))
return; return;
ALLOC_GROW(ctx->info, ctx->nr + 1, ctx->alloc); ALLOC_GROW(ctx->info, ctx->nr + 1, ctx->alloc);
p = add_packed_git(full_path, full_path_len, 0); p = add_packed_git(full_path, full_path_len, 0);
if (!p) { if (!p) {
warning(_("failed to add packfile '%s'"), warning(_("failed to add packfile '%s'"),
@ -299,21 +293,16 @@ static void midx_fanout_add_pack_fanout(struct midx_fanout *fanout,
* Copy only the de-duplicated entries (selected by most-recent modified time * Copy only the de-duplicated entries (selected by most-recent modified time
* of a packfile containing the object). * of a packfile containing the object).
*/ */
static struct pack_midx_entry *get_sorted_entries(struct multi_pack_index *m, static void compute_sorted_entries(struct write_midx_context *ctx,
struct pack_info *info, uint32_t start_pack)
uint32_t nr_packs,
size_t *nr_objects,
int preferred_pack)
{ {
uint32_t cur_fanout, cur_pack, cur_object; uint32_t cur_fanout, cur_pack, cur_object;
size_t alloc_objects, total_objects = 0; size_t alloc_objects, total_objects = 0;
struct midx_fanout fanout = { 0 }; struct midx_fanout fanout = { 0 };
struct pack_midx_entry *deduplicated_entries = NULL;
uint32_t start_pack = m ? m->num_packs : 0;
for (cur_pack = start_pack; cur_pack < nr_packs; cur_pack++) for (cur_pack = start_pack; cur_pack < ctx->nr; cur_pack++)
total_objects = st_add(total_objects, total_objects = st_add(total_objects,
info[cur_pack].p->num_objects); ctx->info[cur_pack].p->num_objects);
/* /*
* As we de-duplicate by fanout value, we expect the fanout * As we de-duplicate by fanout value, we expect the fanout
@ -323,26 +312,26 @@ static struct pack_midx_entry *get_sorted_entries(struct multi_pack_index *m,
alloc_objects = fanout.alloc = total_objects > 3200 ? total_objects / 200 : 16; alloc_objects = fanout.alloc = total_objects > 3200 ? total_objects / 200 : 16;
ALLOC_ARRAY(fanout.entries, fanout.alloc); ALLOC_ARRAY(fanout.entries, fanout.alloc);
ALLOC_ARRAY(deduplicated_entries, alloc_objects); ALLOC_ARRAY(ctx->entries, alloc_objects);
*nr_objects = 0; ctx->entries_nr = 0;
for (cur_fanout = 0; cur_fanout < 256; cur_fanout++) { for (cur_fanout = 0; cur_fanout < 256; cur_fanout++) {
fanout.nr = 0; fanout.nr = 0;
if (m) if (ctx->m)
midx_fanout_add_midx_fanout(&fanout, m, cur_fanout, midx_fanout_add_midx_fanout(&fanout, ctx->m, cur_fanout,
preferred_pack); ctx->preferred_pack_idx);
for (cur_pack = start_pack; cur_pack < nr_packs; cur_pack++) { for (cur_pack = start_pack; cur_pack < ctx->nr; cur_pack++) {
int preferred = cur_pack == preferred_pack; int preferred = cur_pack == ctx->preferred_pack_idx;
midx_fanout_add_pack_fanout(&fanout, midx_fanout_add_pack_fanout(&fanout,
info, cur_pack, ctx->info, cur_pack,
preferred, cur_fanout); preferred, cur_fanout);
} }
if (-1 < preferred_pack && preferred_pack < start_pack) if (-1 < ctx->preferred_pack_idx && ctx->preferred_pack_idx < start_pack)
midx_fanout_add_pack_fanout(&fanout, info, midx_fanout_add_pack_fanout(&fanout, ctx->info,
preferred_pack, 1, ctx->preferred_pack_idx, 1,
cur_fanout); cur_fanout);
midx_fanout_sort(&fanout); midx_fanout_sort(&fanout);
@ -356,17 +345,16 @@ static struct pack_midx_entry *get_sorted_entries(struct multi_pack_index *m,
&fanout.entries[cur_object].oid)) &fanout.entries[cur_object].oid))
continue; continue;
ALLOC_GROW(deduplicated_entries, st_add(*nr_objects, 1), ALLOC_GROW(ctx->entries, st_add(ctx->entries_nr, 1),
alloc_objects); alloc_objects);
memcpy(&deduplicated_entries[*nr_objects], memcpy(&ctx->entries[ctx->entries_nr],
&fanout.entries[cur_object], &fanout.entries[cur_object],
sizeof(struct pack_midx_entry)); sizeof(struct pack_midx_entry));
(*nr_objects)++; ctx->entries_nr++;
} }
} }
free(fanout.entries); free(fanout.entries);
return deduplicated_entries;
} }
static int write_midx_pack_names(struct hashfile *f, void *data) static int write_midx_pack_names(struct hashfile *f, void *data)
@ -886,6 +874,43 @@ cleanup:
return result; return result;
} }
static int fill_packs_from_midx(struct write_midx_context *ctx,
const char *preferred_pack_name, uint32_t flags)
{
uint32_t i;
for (i = 0; i < ctx->m->num_packs; i++) {
if (!should_include_pack(ctx, ctx->m->pack_names[i], 0))
continue;
ALLOC_GROW(ctx->info, ctx->nr + 1, ctx->alloc);
if (flags & MIDX_WRITE_REV_INDEX || preferred_pack_name) {
/*
* If generating a reverse index, need to have
* packed_git's loaded to compare their
* mtimes and object count.
*
*
* If a preferred pack is specified, need to
* have packed_git's loaded to ensure the chosen
* preferred pack has a non-zero object count.
*/
if (prepare_midx_pack(the_repository, ctx->m, i))
return error(_("could not load pack"));
if (open_pack_index(ctx->m->packs[i]))
die(_("could not open index for %s"),
ctx->m->packs[i]->pack_name);
}
fill_pack_info(&ctx->info[ctx->nr++], ctx->m->packs[i],
ctx->m->pack_names[i], i);
}
return 0;
}
static int write_midx_internal(const char *object_dir, static int write_midx_internal(const char *object_dir,
struct string_list *packs_to_include, struct string_list *packs_to_include,
struct string_list *packs_to_drop, struct string_list *packs_to_drop,
@ -895,7 +920,7 @@ static int write_midx_internal(const char *object_dir,
{ {
struct strbuf midx_name = STRBUF_INIT; struct strbuf midx_name = STRBUF_INIT;
unsigned char midx_hash[GIT_MAX_RAWSZ]; unsigned char midx_hash[GIT_MAX_RAWSZ];
uint32_t i; uint32_t i, start_pack;
struct hashfile *f = NULL; struct hashfile *f = NULL;
struct lock_file lk; struct lock_file lk;
struct write_midx_context ctx = { 0 }; struct write_midx_context ctx = { 0 };
@ -912,15 +937,7 @@ static int write_midx_internal(const char *object_dir,
die_errno(_("unable to create leading directories of %s"), die_errno(_("unable to create leading directories of %s"),
midx_name.buf); midx_name.buf);
if (!packs_to_include) { ctx.m = lookup_multi_pack_index(the_repository, object_dir);
/*
* Only reference an existing MIDX when not filtering which
* packs to include, since all packs and objects are copied
* blindly from an existing MIDX if one is present.
*/
ctx.m = lookup_multi_pack_index(the_repository, object_dir);
}
if (ctx.m && !midx_checksum_valid(ctx.m)) { if (ctx.m && !midx_checksum_valid(ctx.m)) {
warning(_("ignoring existing multi-pack-index; checksum mismatch")); warning(_("ignoring existing multi-pack-index; checksum mismatch"));
ctx.m = NULL; ctx.m = NULL;
@ -929,42 +946,23 @@ static int write_midx_internal(const char *object_dir,
ctx.nr = 0; ctx.nr = 0;
ctx.alloc = ctx.m ? ctx.m->num_packs : 16; ctx.alloc = ctx.m ? ctx.m->num_packs : 16;
ctx.info = NULL; ctx.info = NULL;
ctx.to_include = packs_to_include;
ALLOC_ARRAY(ctx.info, ctx.alloc); ALLOC_ARRAY(ctx.info, ctx.alloc);
if (ctx.m) { if (ctx.m && fill_packs_from_midx(&ctx, preferred_pack_name,
for (i = 0; i < ctx.m->num_packs; i++) { flags) < 0) {
ALLOC_GROW(ctx.info, ctx.nr + 1, ctx.alloc); result = 1;
goto cleanup;
if (flags & MIDX_WRITE_REV_INDEX) {
/*
* If generating a reverse index, need to have
* packed_git's loaded to compare their
* mtimes and object count.
*/
if (prepare_midx_pack(the_repository, ctx.m, i)) {
error(_("could not load pack"));
result = 1;
goto cleanup;
}
if (open_pack_index(ctx.m->packs[i]))
die(_("could not open index for %s"),
ctx.m->packs[i]->pack_name);
}
fill_pack_info(&ctx.info[ctx.nr++], ctx.m->packs[i],
ctx.m->pack_names[i], i);
}
} }
start_pack = ctx.nr;
ctx.pack_paths_checked = 0; ctx.pack_paths_checked = 0;
if (flags & MIDX_PROGRESS) if (flags & MIDX_PROGRESS)
ctx.progress = start_delayed_progress(_("Adding packfiles to multi-pack-index"), 0); ctx.progress = start_delayed_progress(_("Adding packfiles to multi-pack-index"), 0);
else else
ctx.progress = NULL; ctx.progress = NULL;
ctx.to_include = packs_to_include;
for_each_file_in_pack_dir(object_dir, add_pack_to_midx, &ctx); for_each_file_in_pack_dir(object_dir, add_pack_to_midx, &ctx);
stop_progress(&ctx.progress); stop_progress(&ctx.progress);
@ -1054,8 +1052,7 @@ static int write_midx_internal(const char *object_dir,
} }
} }
ctx.entries = get_sorted_entries(ctx.m, ctx.info, ctx.nr, &ctx.entries_nr, compute_sorted_entries(&ctx, start_pack);
ctx.preferred_pack_idx);
ctx.large_offsets_needed = 0; ctx.large_offsets_needed = 0;
for (i = 0; i < ctx.entries_nr; i++) { for (i = 0; i < ctx.entries_nr; i++) {

10
midx.c
View File

@ -25,13 +25,15 @@ const unsigned char *get_midx_checksum(struct multi_pack_index *m)
void get_midx_filename(struct strbuf *out, const char *object_dir) void get_midx_filename(struct strbuf *out, const char *object_dir)
{ {
strbuf_addf(out, "%s/pack/multi-pack-index", object_dir); get_midx_filename_ext(out, object_dir, NULL, NULL);
} }
void get_midx_rev_filename(struct strbuf *out, struct multi_pack_index *m) void get_midx_filename_ext(struct strbuf *out, const char *object_dir,
const unsigned char *hash, const char *ext)
{ {
get_midx_filename(out, m->object_dir); strbuf_addf(out, "%s/pack/multi-pack-index", object_dir);
strbuf_addf(out, "-%s.rev", hash_to_hex(get_midx_checksum(m))); if (ext)
strbuf_addf(out, "-%s.%s", hash_to_hex(hash), ext);
} }
static int midx_read_oid_fanout(const unsigned char *chunk_start, static int midx_read_oid_fanout(const unsigned char *chunk_start,

6
midx.h
View File

@ -74,9 +74,13 @@ struct multi_pack_index {
#define MIDX_WRITE_BITMAP_HASH_CACHE (1 << 3) #define MIDX_WRITE_BITMAP_HASH_CACHE (1 << 3)
#define MIDX_WRITE_BITMAP_LOOKUP_TABLE (1 << 4) #define MIDX_WRITE_BITMAP_LOOKUP_TABLE (1 << 4)
#define MIDX_EXT_REV "rev"
#define MIDX_EXT_BITMAP "bitmap"
const unsigned char *get_midx_checksum(struct multi_pack_index *m); const unsigned char *get_midx_checksum(struct multi_pack_index *m);
void get_midx_filename(struct strbuf *out, const char *object_dir); void get_midx_filename(struct strbuf *out, const char *object_dir);
void get_midx_rev_filename(struct strbuf *out, struct multi_pack_index *m); void get_midx_filename_ext(struct strbuf *out, const char *object_dir,
const unsigned char *hash, const char *ext);
struct multi_pack_index *load_multi_pack_index(const char *object_dir, int local); struct multi_pack_index *load_multi_pack_index(const char *object_dir, int local);
int prepare_midx_pack(struct repository *r, struct multi_pack_index *m, uint32_t pack_int_id); int prepare_midx_pack(struct repository *r, struct multi_pack_index *m, uint32_t pack_int_id);

View File

@ -309,9 +309,8 @@ static int load_bitmap_entries_v1(struct bitmap_index *index)
char *midx_bitmap_filename(struct multi_pack_index *midx) char *midx_bitmap_filename(struct multi_pack_index *midx)
{ {
struct strbuf buf = STRBUF_INIT; struct strbuf buf = STRBUF_INIT;
get_midx_filename_ext(&buf, midx->object_dir, get_midx_checksum(midx),
get_midx_filename(&buf, midx->object_dir); MIDX_EXT_BITMAP);
strbuf_addf(&buf, "-%s.bitmap", hash_to_hex(get_midx_checksum(midx)));
return strbuf_detach(&buf, NULL); return strbuf_detach(&buf, NULL);
} }

View File

@ -381,7 +381,8 @@ int load_midx_revindex(struct multi_pack_index *m)
trace2_data_string("load_midx_revindex", the_repository, trace2_data_string("load_midx_revindex", the_repository,
"source", "rev"); "source", "rev");
get_midx_rev_filename(&revindex_name, m); get_midx_filename_ext(&revindex_name, m->object_dir,
get_midx_checksum(m), MIDX_EXT_REV);
ret = load_revindex_from_disk(revindex_name.buf, ret = load_revindex_from_disk(revindex_name.buf,
m->num_objects, m->num_objects,

View File

@ -350,6 +350,29 @@ test_expect_success 'preferred packs must be non-empty' '
) )
' '
test_expect_success 'preferred pack from existing MIDX without bitmaps' '
git init preferred-without-bitmaps &&
(
cd preferred-without-bitmaps &&
test_commit one &&
pack="$(git pack-objects --all $objdir/pack/pack </dev/null)" &&
git multi-pack-index write &&
# make another pack so that the subsequent MIDX write
# has something to do
test_commit two &&
git repack -d &&
# write a new MIDX without bitmaps reusing the singular
# pack from the existing MIDX as the preferred pack in
# the new MIDX
git multi-pack-index write --preferred-pack=pack-$pack.pack
)
'
test_expect_success 'verify multi-pack-index success' ' test_expect_success 'verify multi-pack-index success' '
git multi-pack-index verify --object-dir=$objdir git multi-pack-index verify --object-dir=$objdir
' '