midx: avoid duplicate packed_git entries
When we scan a pack directory to load the idx entries we find into the
packed_git list, we skip any of them that are contained in a midx. We
then load them later lazily if we actually need to access the
corresponding pack, referencing them both from the midx struct and the
packed_git list.
The lazy-load in the midx code checks to see if the midx already
mentions the pack, but doesn't otherwise check the packed_git list. This
makes sense, since we should have added any pack to both lists.
But there's a loophole! If we call close_object_store(), that frees the
midx entirely, but _not_ the packed_git structs, which we must keep
around for Reasons[1]. If we then try to look up more objects, we'll
auto-load the midx again, which won't realize that we've already loaded
those packs, and will create duplicate entries in the packed_git list.
This is possibly inefficient, because it means we may open and map the
pack redundantly. But it can also lead to weird user-visible behavior.
The case I found is in "git repack", which closes and reopens the midx
after repacking and then calls update_server_info(). We end up writing
the duplicate entries into objects/info/packs.
We could obviously de-dup them while writing that file, but it seems
like a violation of more core assumptions that we end up with these
duplicate structs at all.
We can avoid the duplicates reasonably efficiently by checking their
names in the pack_map hash. This annoyingly does require a little more
than a straight hash lookup due to the naming conventions, but it should
only happen when we are about to actually open a pack. I don't think one
extra malloc will be noticeable there.
[1] I'm not entirely sure of all the details, except that we generally
assume the packed_git structs never go away. We noted this
restriction in the comment added by 6f1e9394e2
(object: fix leaking
packfiles when closing object store, 2024-08-08), but it's somewhat
vague. At any rate, if you try freeing the structs in
close_object_store(), you can observe segfaults all over the test
suite. So it might be fixable, but it's not trivial.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Taylor Blau <me@ttaylorr.com>
This commit is contained in:
18
midx.c
18
midx.c
@ -445,6 +445,7 @@ int prepare_midx_pack(struct repository *r, struct multi_pack_index *m,
|
|||||||
uint32_t pack_int_id)
|
uint32_t pack_int_id)
|
||||||
{
|
{
|
||||||
struct strbuf pack_name = STRBUF_INIT;
|
struct strbuf pack_name = STRBUF_INIT;
|
||||||
|
struct strbuf key = STRBUF_INIT;
|
||||||
struct packed_git *p;
|
struct packed_git *p;
|
||||||
|
|
||||||
pack_int_id = midx_for_pack(&m, pack_int_id);
|
pack_int_id = midx_for_pack(&m, pack_int_id);
|
||||||
@ -455,16 +456,29 @@ int prepare_midx_pack(struct repository *r, struct multi_pack_index *m,
|
|||||||
strbuf_addf(&pack_name, "%s/pack/%s", m->object_dir,
|
strbuf_addf(&pack_name, "%s/pack/%s", m->object_dir,
|
||||||
m->pack_names[pack_int_id]);
|
m->pack_names[pack_int_id]);
|
||||||
|
|
||||||
|
/* pack_map holds the ".pack" name, but we have the .idx */
|
||||||
|
strbuf_addbuf(&key, &pack_name);
|
||||||
|
strbuf_strip_suffix(&key, ".idx");
|
||||||
|
strbuf_addstr(&key, ".pack");
|
||||||
|
p = hashmap_get_entry_from_hash(&r->objects->pack_map,
|
||||||
|
strhash(key.buf), key.buf,
|
||||||
|
struct packed_git, packmap_ent);
|
||||||
|
if (!p) {
|
||||||
p = add_packed_git(pack_name.buf, pack_name.len, m->local);
|
p = add_packed_git(pack_name.buf, pack_name.len, m->local);
|
||||||
|
if (p) {
|
||||||
|
install_packed_git(r, p);
|
||||||
|
list_add_tail(&p->mru, &r->objects->packed_git_mru);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
strbuf_release(&pack_name);
|
strbuf_release(&pack_name);
|
||||||
|
strbuf_release(&key);
|
||||||
|
|
||||||
if (!p)
|
if (!p)
|
||||||
return 1;
|
return 1;
|
||||||
|
|
||||||
p->multi_pack_index = 1;
|
p->multi_pack_index = 1;
|
||||||
m->packs[pack_int_id] = p;
|
m->packs[pack_int_id] = p;
|
||||||
install_packed_git(r, p);
|
|
||||||
list_add_tail(&p->mru, &r->objects->packed_git_mru);
|
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
@ -39,4 +39,12 @@ test_expect_success 'info/refs updates when changes are made' '
|
|||||||
! test_cmp a b
|
! test_cmp a b
|
||||||
'
|
'
|
||||||
|
|
||||||
|
test_expect_success 'midx does not create duplicate pack entries' '
|
||||||
|
git repack -d --write-midx &&
|
||||||
|
git repack -d &&
|
||||||
|
grep ^P .git/objects/info/packs >packs &&
|
||||||
|
uniq -d <packs >dups &&
|
||||||
|
test_must_be_empty dups
|
||||||
|
'
|
||||||
|
|
||||||
test_done
|
test_done
|
||||||
|
Reference in New Issue
Block a user