builtin/repack.c: avoid making cruft packs preferred
When doing a `--geometric` repack, we make sure that the preferred pack (if writing a MIDX) is the largest pack that we *didn't* repack. That has the effect of keeping the preferred pack in sync with the pack containing a majority of the repository's reachable objects. But if the repository happens to double in size, we'll repack everything. Here we don't specify any `--preferred-pack`, and instead let the MIDX code choose. In the past, that worked fine, since there would only be one pack to choose from: the one we just wrote. But it's no longer necessarily the case that there is one pack to choose from. It's possible that the repository also has a cruft pack, too. If the cruft pack happens to come earlier in lexical order (and has an earlier mtime than any non-cruft pack), we'll pick that pack as preferred. This makes it impossible to reuse chunks of the reachable pack verbatim from pack-objects, so is sub-optimal. Luckily, this is a somewhat rare circumstance to be in, since we would have to repack the entire repository during a `--geometric` repack, and the cruft pack would have to sort ahead of the pack we just created. Signed-off-by: Taylor Blau <me@ttaylorr.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
This commit is contained in:

committed by
Junio C Hamano

parent
37dc6d8104
commit
3c1e2c2113
@ -355,6 +355,18 @@ static struct generated_pack_data *populate_pack_exts(const char *name)
|
|||||||
return data;
|
return data;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int has_pack_ext(const struct generated_pack_data *data,
|
||||||
|
const char *ext)
|
||||||
|
{
|
||||||
|
int i;
|
||||||
|
for (i = 0; i < ARRAY_SIZE(exts); i++) {
|
||||||
|
if (strcmp(exts[i].name, ext))
|
||||||
|
continue;
|
||||||
|
return !!data->tempfiles[i];
|
||||||
|
}
|
||||||
|
BUG("unknown pack extension: '%s'", ext);
|
||||||
|
}
|
||||||
|
|
||||||
static void repack_promisor_objects(const struct pack_objects_args *args,
|
static void repack_promisor_objects(const struct pack_objects_args *args,
|
||||||
struct string_list *names)
|
struct string_list *names)
|
||||||
{
|
{
|
||||||
@ -772,6 +784,7 @@ static void midx_included_packs(struct string_list *include,
|
|||||||
|
|
||||||
static int write_midx_included_packs(struct string_list *include,
|
static int write_midx_included_packs(struct string_list *include,
|
||||||
struct pack_geometry *geometry,
|
struct pack_geometry *geometry,
|
||||||
|
struct string_list *names,
|
||||||
const char *refs_snapshot,
|
const char *refs_snapshot,
|
||||||
int show_progress, int write_bitmaps)
|
int show_progress, int write_bitmaps)
|
||||||
{
|
{
|
||||||
@ -801,6 +814,38 @@ static int write_midx_included_packs(struct string_list *include,
|
|||||||
if (preferred)
|
if (preferred)
|
||||||
strvec_pushf(&cmd.args, "--preferred-pack=%s",
|
strvec_pushf(&cmd.args, "--preferred-pack=%s",
|
||||||
pack_basename(preferred));
|
pack_basename(preferred));
|
||||||
|
else if (names->nr) {
|
||||||
|
/* The largest pack was repacked, meaning that either
|
||||||
|
* one or two packs exist depending on whether the
|
||||||
|
* repository has a cruft pack or not.
|
||||||
|
*
|
||||||
|
* Select the non-cruft one as preferred to encourage
|
||||||
|
* pack-reuse among packs containing reachable objects
|
||||||
|
* over unreachable ones.
|
||||||
|
*
|
||||||
|
* (Note we could write multiple packs here if
|
||||||
|
* `--max-pack-size` was given, but any one of them
|
||||||
|
* will suffice, so pick the first one.)
|
||||||
|
*/
|
||||||
|
for_each_string_list_item(item, names) {
|
||||||
|
struct generated_pack_data *data = item->util;
|
||||||
|
if (has_pack_ext(data, ".mtimes"))
|
||||||
|
continue;
|
||||||
|
|
||||||
|
strvec_pushf(&cmd.args, "--preferred-pack=pack-%s.pack",
|
||||||
|
item->string);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
/*
|
||||||
|
* No packs were kept, and no packs were written. The
|
||||||
|
* only thing remaining are .keep packs (unless
|
||||||
|
* --pack-kept-objects was given).
|
||||||
|
*
|
||||||
|
* Set the `--preferred-pack` arbitrarily here.
|
||||||
|
*/
|
||||||
|
;
|
||||||
|
}
|
||||||
|
|
||||||
if (refs_snapshot)
|
if (refs_snapshot)
|
||||||
strvec_pushf(&cmd.args, "--refs-snapshot=%s", refs_snapshot);
|
strvec_pushf(&cmd.args, "--refs-snapshot=%s", refs_snapshot);
|
||||||
@ -1360,7 +1405,7 @@ int cmd_repack(int argc, const char **argv, const char *prefix)
|
|||||||
struct string_list include = STRING_LIST_INIT_NODUP;
|
struct string_list include = STRING_LIST_INIT_NODUP;
|
||||||
midx_included_packs(&include, &existing, &names, &geometry);
|
midx_included_packs(&include, &existing, &names, &geometry);
|
||||||
|
|
||||||
ret = write_midx_included_packs(&include, &geometry,
|
ret = write_midx_included_packs(&include, &geometry, &names,
|
||||||
refs_snapshot ? get_tempfile_path(refs_snapshot) : NULL,
|
refs_snapshot ? get_tempfile_path(refs_snapshot) : NULL,
|
||||||
show_progress, write_bitmaps > 0);
|
show_progress, write_bitmaps > 0);
|
||||||
|
|
||||||
|
@ -372,4 +372,43 @@ test_expect_success '--max-cruft-size ignores non-local packs' '
|
|||||||
)
|
)
|
||||||
'
|
'
|
||||||
|
|
||||||
|
test_expect_success 'reachable packs are preferred over cruft ones' '
|
||||||
|
repo="cruft-preferred-packs" &&
|
||||||
|
git init "$repo" &&
|
||||||
|
(
|
||||||
|
cd "$repo" &&
|
||||||
|
|
||||||
|
# This test needs to exercise careful control over when a MIDX
|
||||||
|
# is and is not written. Unset the corresponding TEST variable
|
||||||
|
# accordingly.
|
||||||
|
sane_unset GIT_TEST_MULTI_PACK_INDEX &&
|
||||||
|
|
||||||
|
test_commit base &&
|
||||||
|
test_commit --no-tag cruft &&
|
||||||
|
|
||||||
|
non_cruft="$(echo base | git pack-objects --revs $packdir/pack)" &&
|
||||||
|
# Write a cruft pack which both (a) sorts ahead of the non-cruft
|
||||||
|
# pack in lexical order, and (b) has an older mtime to appease
|
||||||
|
# the MIDX preferred pack selection routine.
|
||||||
|
cruft="$(echo pack-$non_cruft.pack | git pack-objects --cruft $packdir/pack-A)" &&
|
||||||
|
test-tool chmtime -1000 $packdir/pack-A-$cruft.pack &&
|
||||||
|
|
||||||
|
test_commit other &&
|
||||||
|
git repack -d &&
|
||||||
|
|
||||||
|
git repack --geometric 2 -d --write-midx --write-bitmap-index &&
|
||||||
|
|
||||||
|
# After repacking, there are two packs left: one reachable one
|
||||||
|
# (which is the result of combining both of the existing two
|
||||||
|
# non-cruft packs), and one cruft pack.
|
||||||
|
find .git/objects/pack -type f -name "*.pack" >packs &&
|
||||||
|
test_line_count = 2 packs &&
|
||||||
|
|
||||||
|
# Make sure that the pack we just wrote is marked as preferred,
|
||||||
|
# not the cruft one.
|
||||||
|
pack="$(test-tool read-midx --preferred-pack $objdir)" &&
|
||||||
|
test_path_is_missing "$packdir/$(basename "$pack" ".idx").mtimes"
|
||||||
|
)
|
||||||
|
'
|
||||||
|
|
||||||
test_done
|
test_done
|
||||||
|
Reference in New Issue
Block a user