diff --git a/Documentation/config/gc.txt b/Documentation/config/gc.txt index 466466d6cc..c6e3acc99d 100644 --- a/Documentation/config/gc.txt +++ b/Documentation/config/gc.txt @@ -86,6 +86,12 @@ gc.cruftPacks:: linkgit:git-repack[1]) instead of as loose objects. The default is `true`. +gc.maxCruftSize:: + Limit the size of new cruft packs when repacking. When + specified in addition to `--max-cruft-size`, the command line + option takes priority. See the `--max-cruft-size` option of + linkgit:git-repack[1]. + gc.pruneExpire:: When 'git gc' is run, it will call 'prune --expire 2.weeks.ago' (and 'repack --cruft --cruft-expiration 2.weeks.ago' if using diff --git a/Documentation/git-gc.txt b/Documentation/git-gc.txt index 90806fd26a..b5561c458a 100644 --- a/Documentation/git-gc.txt +++ b/Documentation/git-gc.txt @@ -59,6 +59,13 @@ be performed as well. cruft pack instead of storing them as loose objects. `--cruft` is on by default. +--max-cruft-size=:: + When packing unreachable objects into a cruft pack, limit the + size of new cruft packs to be at most `` bytes. Overrides any + value specified via the `gc.maxCruftSize` configuration. See + the `--max-cruft-size` option of linkgit:git-repack[1] for + more. + --prune=:: Prune loose objects older than date (default is 2 weeks ago, overridable by the config variable `gc.pruneExpire`). diff --git a/Documentation/git-repack.txt b/Documentation/git-repack.txt index 8545a32667..893b8a2fea 100644 --- a/Documentation/git-repack.txt +++ b/Documentation/git-repack.txt @@ -74,6 +74,17 @@ to the new separate pack will be written. immediately instead of waiting for the next `git gc` invocation. Only useful with `--cruft -d`. +--max-cruft-size=:: + Repack cruft objects into packs as large as `` bytes before + creating new packs. As long as there are enough cruft packs + smaller than ``, repacking will cause a new cruft pack to + be created containing objects from any combined cruft packs, + along with any new unreachable objects. Cruft packs larger than + `` will not be modified. When the new cruft pack is larger + than `` bytes, it will be split into multiple packs, all of + which are guaranteed to be at most `` bytes in size. Only + useful with `--cruft -d`. + --expire-to=:: Write a cruft pack containing pruned objects (if any) to the directory ``. This option is useful for keeping a copy of diff --git a/builtin/gc.c b/builtin/gc.c index 68ca8d45bf..7c11d5ebef 100644 --- a/builtin/gc.c +++ b/builtin/gc.c @@ -52,6 +52,7 @@ static const char * const builtin_gc_usage[] = { static int pack_refs = 1; static int prune_reflogs = 1; static int cruft_packs = 1; +static unsigned long max_cruft_size; static int aggressive_depth = 50; static int aggressive_window = 250; static int gc_auto_threshold = 6700; @@ -165,6 +166,7 @@ static void gc_config(void) git_config_get_int("gc.autopacklimit", &gc_auto_pack_limit); git_config_get_bool("gc.autodetach", &detach_auto); git_config_get_bool("gc.cruftpacks", &cruft_packs); + git_config_get_ulong("gc.maxcruftsize", &max_cruft_size); git_config_get_expiry("gc.pruneexpire", &prune_expire); git_config_get_expiry("gc.worktreepruneexpire", &prune_worktrees_expire); git_config_get_expiry("gc.logexpiry", &gc_log_expire); @@ -352,6 +354,9 @@ static void add_repack_all_option(struct string_list *keep_pack) strvec_push(&repack, "--cruft"); if (prune_expire) strvec_pushf(&repack, "--cruft-expiration=%s", prune_expire); + if (max_cruft_size) + strvec_pushf(&repack, "--max-cruft-size=%lu", + max_cruft_size); } else { strvec_push(&repack, "-A"); if (prune_expire) @@ -585,6 +590,8 @@ int cmd_gc(int argc, const char **argv, const char *prefix) N_("prune unreferenced objects"), PARSE_OPT_OPTARG, NULL, (intptr_t)prune_expire }, OPT_BOOL(0, "cruft", &cruft_packs, N_("pack unreferenced objects separately")), + OPT_MAGNITUDE(0, "max-cruft-size", &max_cruft_size, + N_("with --cruft, limit the size of new cruft packs")), OPT_BOOL(0, "aggressive", &aggressive, N_("be more thorough (increased runtime)")), OPT_BOOL_F(0, "auto", &auto_gc, N_("enable auto-gc mode"), PARSE_OPT_NOCOMPLETE), diff --git a/builtin/repack.c b/builtin/repack.c index db9277081d..edaee4dbec 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -28,6 +28,7 @@ #define PACK_CRUFT 4 #define DELETE_PACK 1 +#define RETAIN_PACK 2 static int pack_everything; static int delta_base_offset = 1; @@ -52,7 +53,7 @@ struct pack_objects_args { const char *window_memory; const char *depth; const char *threads; - const char *max_pack_size; + unsigned long max_pack_size; int no_reuse_delta; int no_reuse_object; int quiet; @@ -118,11 +119,26 @@ static void pack_mark_for_deletion(struct string_list_item *item) item->util = (void*)((uintptr_t)item->util | DELETE_PACK); } +static void pack_unmark_for_deletion(struct string_list_item *item) +{ + item->util = (void*)((uintptr_t)item->util & ~DELETE_PACK); +} + static int pack_is_marked_for_deletion(struct string_list_item *item) { return (uintptr_t)item->util & DELETE_PACK; } +static void pack_mark_retained(struct string_list_item *item) +{ + item->util = (void*)((uintptr_t)item->util | RETAIN_PACK); +} + +static int pack_is_retained(struct string_list_item *item) +{ + return (uintptr_t)item->util & RETAIN_PACK; +} + static void mark_packs_for_deletion_1(struct string_list *names, struct string_list *list) { @@ -135,17 +151,39 @@ static void mark_packs_for_deletion_1(struct string_list *names, if (len < hexsz) continue; sha1 = item->string + len - hexsz; - /* - * Mark this pack for deletion, which ensures that this - * pack won't be included in a MIDX (if `--write-midx` - * was given) and that we will actually delete this pack - * (if `-d` was given). - */ - if (!string_list_has_string(names, sha1)) + + if (pack_is_retained(item)) { + pack_unmark_for_deletion(item); + } else if (!string_list_has_string(names, sha1)) { + /* + * Mark this pack for deletion, which ensures + * that this pack won't be included in a MIDX + * (if `--write-midx` was given) and that we + * will actually delete this pack (if `-d` was + * given). + */ pack_mark_for_deletion(item); + } } } +static void retain_cruft_pack(struct existing_packs *existing, + struct packed_git *cruft) +{ + struct strbuf buf = STRBUF_INIT; + struct string_list_item *item; + + strbuf_addstr(&buf, pack_basename(cruft)); + strbuf_strip_suffix(&buf, ".pack"); + + item = string_list_lookup(&existing->cruft_packs, buf.buf); + if (!item) + BUG("could not find cruft pack '%s'", pack_basename(cruft)); + + pack_mark_retained(item); + strbuf_release(&buf); +} + static void mark_packs_for_deletion(struct existing_packs *existing, struct string_list *names) @@ -227,6 +265,8 @@ static void collect_pack_filenames(struct existing_packs *existing, } string_list_sort(&existing->kept_packs); + string_list_sort(&existing->non_kept_packs); + string_list_sort(&existing->cruft_packs); strbuf_release(&buf); } @@ -244,7 +284,7 @@ static void prepare_pack_objects(struct child_process *cmd, if (args->threads) strvec_pushf(&cmd->args, "--threads=%s", args->threads); if (args->max_pack_size) - strvec_pushf(&cmd->args, "--max-pack-size=%s", args->max_pack_size); + strvec_pushf(&cmd->args, "--max-pack-size=%lu", args->max_pack_size); if (args->no_reuse_delta) strvec_pushf(&cmd->args, "--no-reuse-delta"); if (args->no_reuse_object) @@ -317,6 +357,18 @@ static struct generated_pack_data *populate_pack_exts(const char *name) return data; } +static int has_pack_ext(const struct generated_pack_data *data, + const char *ext) +{ + int i; + for (i = 0; i < ARRAY_SIZE(exts); i++) { + if (strcmp(exts[i].name, ext)) + continue; + return !!data->tempfiles[i]; + } + BUG("unknown pack extension: '%s'", ext); +} + static void repack_promisor_objects(const struct pack_objects_args *args, struct string_list *names) { @@ -734,6 +786,7 @@ static void midx_included_packs(struct string_list *include, static int write_midx_included_packs(struct string_list *include, struct pack_geometry *geometry, + struct string_list *names, const char *refs_snapshot, int show_progress, int write_bitmaps) { @@ -763,6 +816,38 @@ static int write_midx_included_packs(struct string_list *include, if (preferred) strvec_pushf(&cmd.args, "--preferred-pack=%s", pack_basename(preferred)); + else if (names->nr) { + /* The largest pack was repacked, meaning that either + * one or two packs exist depending on whether the + * repository has a cruft pack or not. + * + * Select the non-cruft one as preferred to encourage + * pack-reuse among packs containing reachable objects + * over unreachable ones. + * + * (Note we could write multiple packs here if + * `--max-pack-size` was given, but any one of them + * will suffice, so pick the first one.) + */ + for_each_string_list_item(item, names) { + struct generated_pack_data *data = item->util; + if (has_pack_ext(data, ".mtimes")) + continue; + + strvec_pushf(&cmd.args, "--preferred-pack=pack-%s.pack", + item->string); + break; + } + } else { + /* + * No packs were kept, and no packs were written. The + * only thing remaining are .keep packs (unless + * --pack-kept-objects was given). + * + * Set the `--preferred-pack` arbitrarily here. + */ + ; + } if (refs_snapshot) strvec_pushf(&cmd.args, "--refs-snapshot=%s", refs_snapshot); @@ -888,6 +973,73 @@ static int write_filtered_pack(const struct pack_objects_args *args, return finish_pack_objects_cmd(&cmd, names, local); } +static int existing_cruft_pack_cmp(const void *va, const void *vb) +{ + struct packed_git *a = *(struct packed_git **)va; + struct packed_git *b = *(struct packed_git **)vb; + + if (a->pack_size < b->pack_size) + return -1; + if (a->pack_size > b->pack_size) + return 1; + return 0; +} + +static void collapse_small_cruft_packs(FILE *in, size_t max_size, + struct existing_packs *existing) +{ + struct packed_git **existing_cruft, *p; + struct strbuf buf = STRBUF_INIT; + size_t total_size = 0; + size_t existing_cruft_nr = 0; + size_t i; + + ALLOC_ARRAY(existing_cruft, existing->cruft_packs.nr); + + for (p = get_all_packs(the_repository); p; p = p->next) { + if (!(p->is_cruft && p->pack_local)) + continue; + + strbuf_reset(&buf); + strbuf_addstr(&buf, pack_basename(p)); + strbuf_strip_suffix(&buf, ".pack"); + + if (!string_list_has_string(&existing->cruft_packs, buf.buf)) + continue; + + if (existing_cruft_nr >= existing->cruft_packs.nr) + BUG("too many cruft packs (found %"PRIuMAX", but knew " + "of %"PRIuMAX")", + (uintmax_t)existing_cruft_nr + 1, + (uintmax_t)existing->cruft_packs.nr); + existing_cruft[existing_cruft_nr++] = p; + } + + QSORT(existing_cruft, existing_cruft_nr, existing_cruft_pack_cmp); + + for (i = 0; i < existing_cruft_nr; i++) { + size_t proposed; + + p = existing_cruft[i]; + proposed = st_add(total_size, p->pack_size); + + if (proposed <= max_size) { + total_size = proposed; + fprintf(in, "-%s\n", pack_basename(p)); + } else { + retain_cruft_pack(existing, p); + fprintf(in, "%s\n", pack_basename(p)); + } + } + + for (i = 0; i < existing->non_kept_packs.nr; i++) + fprintf(in, "-%s.pack\n", + existing->non_kept_packs.items[i].string); + + strbuf_release(&buf); + free(existing_cruft); +} + static int write_cruft_pack(const struct pack_objects_args *args, const char *destination, const char *pack_prefix, @@ -934,10 +1086,14 @@ static int write_cruft_pack(const struct pack_objects_args *args, in = xfdopen(cmd.in, "w"); for_each_string_list_item(item, names) fprintf(in, "%s-%s.pack\n", pack_prefix, item->string); - for_each_string_list_item(item, &existing->non_kept_packs) - fprintf(in, "-%s.pack\n", item->string); - for_each_string_list_item(item, &existing->cruft_packs) - fprintf(in, "-%s.pack\n", item->string); + if (args->max_pack_size && !cruft_expiration) { + collapse_small_cruft_packs(in, args->max_pack_size, existing); + } else { + for_each_string_list_item(item, &existing->non_kept_packs) + fprintf(in, "-%s.pack\n", item->string); + for_each_string_list_item(item, &existing->cruft_packs) + fprintf(in, "-%s.pack\n", item->string); + } for_each_string_list_item(item, &existing->kept_packs) fprintf(in, "%s.pack\n", item->string); fclose(in); @@ -990,6 +1146,8 @@ int cmd_repack(int argc, const char **argv, const char *prefix) PACK_CRUFT), OPT_STRING(0, "cruft-expiration", &cruft_expiration, N_("approxidate"), N_("with --cruft, expire objects older than this")), + OPT_MAGNITUDE(0, "max-cruft-size", &cruft_po_args.max_pack_size, + N_("with --cruft, limit the size of new cruft packs")), OPT_BOOL('d', NULL, &delete_redundant, N_("remove redundant packs, and run git-prune-packed")), OPT_BOOL('f', NULL, &po_args.no_reuse_delta, @@ -1017,7 +1175,7 @@ int cmd_repack(int argc, const char **argv, const char *prefix) N_("limits the maximum delta depth")), OPT_STRING(0, "threads", &po_args.threads, N_("n"), N_("limits the maximum number of threads")), - OPT_STRING(0, "max-pack-size", &po_args.max_pack_size, N_("bytes"), + OPT_MAGNITUDE(0, "max-pack-size", &po_args.max_pack_size, N_("maximum size of each packfile")), OPT_PARSE_LIST_OBJECTS_FILTER(&po_args.filter_options), OPT_BOOL(0, "pack-kept-objects", &pack_kept_objects, @@ -1327,7 +1485,7 @@ int cmd_repack(int argc, const char **argv, const char *prefix) struct string_list include = STRING_LIST_INIT_NODUP; midx_included_packs(&include, &existing, &names, &geometry); - ret = write_midx_included_packs(&include, &geometry, + ret = write_midx_included_packs(&include, &geometry, &names, refs_snapshot ? get_tempfile_path(refs_snapshot) : NULL, show_progress, write_bitmaps > 0); diff --git a/t/t6500-gc.sh b/t/t6500-gc.sh index e412cf8daf..04acf22d93 100755 --- a/t/t6500-gc.sh +++ b/t/t6500-gc.sh @@ -327,6 +327,33 @@ test_expect_success 'gc.bigPackThreshold ignores cruft packs' ' ) ' +cruft_max_size_opts="git repack -d -l --cruft --cruft-expiration=2.weeks.ago" + +test_expect_success 'setup for --max-cruft-size tests' ' + git init cruft--max-size && + ( + cd cruft--max-size && + prepare_cruft_history + ) +' + +test_expect_success '--max-cruft-size sets appropriate repack options' ' + GIT_TRACE2_EVENT=$(pwd)/trace2.txt git -C cruft--max-size \ + gc --cruft --max-cruft-size=1M && + test_subcommand $cruft_max_size_opts --max-cruft-size=1048576 moved.raw && - sort moved.raw >moved.want && - - git rev-list --all --objects --no-object-names >expect.raw && - sort expect.raw >expect && - - git checkout main && - git branch -D cruft && - git reflog expire --all --expire=all && - - git init --bare expired.git && - git repack -d \ - --cruft --cruft-expiration="now" \ - --expire-to="expired.git/objects/pack/pack" && - - expired="$(ls expired.git/objects/pack/pack-*.idx)" && - test_path_is_file "${expired%.idx}.mtimes" && - - # Since the `--cruft-expiration` is "now", the effective - # behavior is to move _all_ unreachable objects out to - # the location in `--expire-to`. - git show-index <$expired >expired.raw && - cut -d" " -f2 expired.raw | sort >expired.objects && - git rev-list --all --objects --no-object-names \ - >remaining.objects && - - # ...in other words, the combined contents of this - # repository and expired.git should be the same as the - # set of objects we started with. - cat expired.objects remaining.objects | sort >actual && - test_cmp expect actual && - - # The "moved" objects (i.e., those in expired.git) - # should be the same as the cruft objects which were - # expired in the previous step. - test_cmp moved.want expired.objects - ) -' - -test_expect_success '--expire-to stores pruned objects (5.minutes.ago)' ' - git init expire-to-5.minutes.ago && - ( - cd expire-to-5.minutes.ago && - - git branch -M main && - - test_commit base && - - # Create two classes of unreachable objects, one which - # is older than 5 minutes (stale), and another which is - # newer (recent). - for kind in stale recent - do - git checkout -b $kind main && - test_commit --no-tag $kind || return 1 - done && - - git rev-list --objects --no-object-names main..stale >in && - stale="$(git pack-objects $objdir/pack/pack expect.raw && - sort expect.raw >expect && - - # moved.want holds the set of objects we expect to find - # in expired.git - git rev-list --objects --no-object-names main..stale >out && - sort out >moved.want && - - git checkout main && - git branch -D stale recent && - git reflog expire --all --expire=all && - git prune-packed && - - git init --bare expired.git && - git repack -d \ - --cruft --cruft-expiration=5.minutes.ago \ - --expire-to="expired.git/objects/pack/pack" && - - # Some of the remaining objects in this repository are - # unreachable, so use `cat-file --batch-all-objects` - # instead of `rev-list` to get their names - git cat-file --batch-all-objects --batch-check="%(objectname)" \ - >remaining.objects && - sort remaining.objects >actual && - test_cmp expect actual && - - ( - cd expired.git && - - expired="$(ls objects/pack/pack-*.mtimes)" && - test-tool pack-mtimes $(basename $expired) >out && - cut -d" " -f1 out | sort >../moved.got && - - # Ensure that there are as many objects with the - # expected mtime as were moved to expired.git. - # - # In other words, ensure that the recorded - # mtimes of any moved objects was written - # correctly. - grep " $mtime$" out >matching && - test_line_count = $(wc -l <../moved.want) matching - ) && - test_cmp moved.want moved.got - ) -' - test_done diff --git a/t/t7704-repack-cruft.sh b/t/t7704-repack-cruft.sh new file mode 100755 index 0000000000..be3735dff0 --- /dev/null +++ b/t/t7704-repack-cruft.sh @@ -0,0 +1,414 @@ +#!/bin/sh + +test_description='git repack works correctly' + +. ./test-lib.sh + +objdir=.git/objects +packdir=$objdir/pack + +test_expect_success '--expire-to stores pruned objects (now)' ' + git init expire-to-now && + ( + cd expire-to-now && + + git branch -M main && + + test_commit base && + + git checkout -b cruft && + test_commit --no-tag cruft && + + git rev-list --objects --no-object-names main..cruft >moved.raw && + sort moved.raw >moved.want && + + git rev-list --all --objects --no-object-names >expect.raw && + sort expect.raw >expect && + + git checkout main && + git branch -D cruft && + git reflog expire --all --expire=all && + + git init --bare expired.git && + git repack -d \ + --cruft --cruft-expiration="now" \ + --expire-to="expired.git/objects/pack/pack" && + + expired="$(ls expired.git/objects/pack/pack-*.idx)" && + test_path_is_file "${expired%.idx}.mtimes" && + + # Since the `--cruft-expiration` is "now", the effective + # behavior is to move _all_ unreachable objects out to + # the location in `--expire-to`. + git show-index <$expired >expired.raw && + cut -d" " -f2 expired.raw | sort >expired.objects && + git rev-list --all --objects --no-object-names \ + >remaining.objects && + + # ...in other words, the combined contents of this + # repository and expired.git should be the same as the + # set of objects we started with. + cat expired.objects remaining.objects | sort >actual && + test_cmp expect actual && + + # The "moved" objects (i.e., those in expired.git) + # should be the same as the cruft objects which were + # expired in the previous step. + test_cmp moved.want expired.objects + ) +' + +test_expect_success '--expire-to stores pruned objects (5.minutes.ago)' ' + git init expire-to-5.minutes.ago && + ( + cd expire-to-5.minutes.ago && + + git branch -M main && + + test_commit base && + + # Create two classes of unreachable objects, one which + # is older than 5 minutes (stale), and another which is + # newer (recent). + for kind in stale recent + do + git checkout -b $kind main && + test_commit --no-tag $kind || return 1 + done && + + git rev-list --objects --no-object-names main..stale >in && + stale="$(git pack-objects $objdir/pack/pack expect.raw && + sort expect.raw >expect && + + # moved.want holds the set of objects we expect to find + # in expired.git + git rev-list --objects --no-object-names main..stale >out && + sort out >moved.want && + + git checkout main && + git branch -D stale recent && + git reflog expire --all --expire=all && + git prune-packed && + + git init --bare expired.git && + git repack -d \ + --cruft --cruft-expiration=5.minutes.ago \ + --expire-to="expired.git/objects/pack/pack" && + + # Some of the remaining objects in this repository are + # unreachable, so use `cat-file --batch-all-objects` + # instead of `rev-list` to get their names + git cat-file --batch-all-objects --batch-check="%(objectname)" \ + >remaining.objects && + sort remaining.objects >actual && + test_cmp expect actual && + + ( + cd expired.git && + + expired="$(ls objects/pack/pack-*.mtimes)" && + test-tool pack-mtimes $(basename $expired) >out && + cut -d" " -f1 out | sort >../moved.got && + + # Ensure that there are as many objects with the + # expected mtime as were moved to expired.git. + # + # In other words, ensure that the recorded + # mtimes of any moved objects was written + # correctly. + grep " $mtime$" out >matching && + test_line_count = $(wc -l <../moved.want) matching + ) && + test_cmp moved.want moved.got + ) +' + +generate_random_blob() { + test-tool genrandom "$@" >blob && + git hash-object -w -t blob blob && + rm blob +} + +pack_random_blob () { + generate_random_blob "$@" && + git repack -d -q >/dev/null +} + +generate_cruft_pack () { + pack_random_blob "$@" >/dev/null && + + ls $packdir/pack-*.pack | xargs -n 1 basename >in && + pack="$(git pack-objects --cruft $packdir/pack foo.objects && + test-tool pack-mtimes $(basename "$cruft_bar") >bar.objects && + + grep "^$foo" foo.objects && + test_line_count = 1 foo.objects && + grep "^$bar" bar.objects && + test_line_count = 1 bar.objects + ) +' + +test_expect_success '--max-cruft-size combines existing packs when below threshold' ' + git init max-cruft-size-small && + ( + cd max-cruft-size-small && + test_commit base && + + foo="$(pack_random_blob foo $((1*1024*1024)))" && + git repack --cruft -d && + + bar="$(pack_random_blob bar $((1*1024*1024)))" && + git repack --cruft -d --max-cruft-size=10M && + + cruft=$(ls $packdir/pack-*.mtimes) && + test-tool pack-mtimes $(basename "$cruft") >cruft.objects && + + grep "^$foo" cruft.objects && + grep "^$bar" cruft.objects && + test_line_count = 2 cruft.objects + ) +' + +test_expect_success '--max-cruft-size combines smaller packs first' ' + git init max-cruft-size-consume-small && + ( + cd max-cruft-size-consume-small && + + test_commit base && + git repack -ad && + + cruft_foo="$(generate_cruft_pack foo 524288)" && # 0.5 MiB + cruft_bar="$(generate_cruft_pack bar 524288)" && # 0.5 MiB + cruft_baz="$(generate_cruft_pack baz 1048576)" && # 1.0 MiB + cruft_quux="$(generate_cruft_pack quux 1572864)" && # 1.5 MiB + + test-tool pack-mtimes "$(basename $cruft_foo)" >expect.raw && + test-tool pack-mtimes "$(basename $cruft_bar)" >>expect.raw && + sort expect.raw >expect.objects && + + # repacking with `--max-cruft-size=2M` should combine + # both 0.5 MiB packs together, instead of, say, one of + # the 0.5 MiB packs with the 1.0 MiB pack + ls $packdir/pack-*.mtimes | sort >cruft.before && + git repack -d --cruft --max-cruft-size=2M && + ls $packdir/pack-*.mtimes | sort >cruft.after && + + comm -13 cruft.before cruft.after >cruft.new && + comm -23 cruft.before cruft.after >cruft.removed && + + test_line_count = 1 cruft.new && + test_line_count = 2 cruft.removed && + + # the two smaller packs should be rolled up first + printf "%s\n" $cruft_foo $cruft_bar | sort >expect.removed && + test_cmp expect.removed cruft.removed && + + # ...and contain the set of objects rolled up + test-tool pack-mtimes "$(basename $(cat cruft.new))" >actual.raw && + sort actual.raw >actual.objects && + + test_cmp expect.objects actual.objects + ) +' + +test_expect_success 'setup --max-cruft-size with freshened objects' ' + git init max-cruft-size-freshen && + ( + cd max-cruft-size-freshen && + + test_commit base && + git repack -ad && + + foo="$(generate_random_blob foo 64)" && + test-tool chmtime --get -10000 \ + "$objdir/$(test_oid_to_path "$foo")" >foo.mtime && + + git repack --cruft -d && + + cruft="$(ls $packdir/pack-*.mtimes)" && + test-tool pack-mtimes "$(basename $cruft)" >actual && + echo "$foo $(cat foo.mtime)" >expect && + test_cmp expect actual + ) +' + +test_expect_success '--max-cruft-size with freshened objects (loose)' ' + ( + cd max-cruft-size-freshen && + + # regenerate the object, setting its mtime to be more recent + foo="$(generate_random_blob foo 64)" && + test-tool chmtime --get -100 \ + "$objdir/$(test_oid_to_path "$foo")" >foo.mtime && + + git repack --cruft -d && + + cruft="$(ls $packdir/pack-*.mtimes)" && + test-tool pack-mtimes "$(basename $cruft)" >actual && + echo "$foo $(cat foo.mtime)" >expect && + test_cmp expect actual + ) +' + +test_expect_success '--max-cruft-size with freshened objects (packed)' ' + ( + cd max-cruft-size-freshen && + + # regenerate the object and store it in a packfile, + # setting its mtime to be more recent + # + # store it alongside another cruft object so that we + # do not create an identical copy of the existing + # cruft pack (which contains $foo). + foo="$(generate_random_blob foo 64)" && + bar="$(generate_random_blob bar 64)" && + foo_pack="$(printf "%s\n" $foo $bar | git pack-objects $packdir/pack)" && + git prune-packed && + + test-tool chmtime --get -10 \ + "$packdir/pack-$foo_pack.pack" >foo.mtime && + + git repack --cruft -d && + + cruft="$(ls $packdir/pack-*.mtimes)" && + test-tool pack-mtimes "$(basename $cruft)" >actual && + echo "$foo $(cat foo.mtime)" >expect.raw && + echo "$bar $(cat foo.mtime)" >>expect.raw && + sort expect.raw >expect && + test_cmp expect actual + ) +' + +test_expect_success '--max-cruft-size with pruning' ' + git init max-cruft-size-prune && + ( + cd max-cruft-size-prune && + + test_commit base && + foo="$(generate_random_blob foo $((1024*1024)))" && + bar="$(generate_random_blob bar $((1024*1024)))" && + baz="$(generate_random_blob baz $((1024*1024)))" && + + test-tool chmtime -10000 "$objdir/$(test_oid_to_path "$foo")" && + + git repack -d --cruft --max-cruft-size=1M && + + # backdate the mtimes of all cruft packs to validate + # that they were rewritten as a result of pruning + ls $packdir/pack-*.mtimes | sort >cruft.before && + for cruft in $(cat cruft.before) + do + mtime="$(test-tool chmtime --get -10000 "$cruft")" && + echo $cruft $mtime >>mtimes || return 1 + done && + + # repack (and prune) with a --max-cruft-size to ensure + # that we appropriately split the resulting set of packs + git repack -d --cruft --max-cruft-size=1M \ + --cruft-expiration=10.seconds.ago && + ls $packdir/pack-*.mtimes | sort >cruft.after && + + for cruft in $(cat cruft.after) + do + old_mtime="$(grep $cruft mtimes | cut -d" " -f2)" && + new_mtime="$(test-tool chmtime --get $cruft)" && + test $old_mtime -lt $new_mtime || return 1 + done && + + test_line_count = 3 cruft.before && + test_line_count = 2 cruft.after && + test_must_fail git cat-file -e $foo && + git cat-file -e $bar && + git cat-file -e $baz + ) +' + +test_expect_success '--max-cruft-size ignores non-local packs' ' + repo="max-cruft-size-non-local" && + git init $repo && + ( + cd $repo && + test_commit base && + generate_random_blob foo 64 && + git repack --cruft -d + ) && + + git clone --reference=$repo $repo $repo-alt && + ( + cd $repo-alt && + + test_commit other && + generate_random_blob bar 64 && + + # ensure that we do not attempt to pick up packs from + # the non-alternated repository, which would result in a + # crash + git repack --cruft --max-cruft-size=1M -d + ) +' + +test_expect_success 'reachable packs are preferred over cruft ones' ' + repo="cruft-preferred-packs" && + git init "$repo" && + ( + cd "$repo" && + + # This test needs to exercise careful control over when a MIDX + # is and is not written. Unset the corresponding TEST variable + # accordingly. + sane_unset GIT_TEST_MULTI_PACK_INDEX && + + test_commit base && + test_commit --no-tag cruft && + + non_cruft="$(echo base | git pack-objects --revs $packdir/pack)" && + # Write a cruft pack which both (a) sorts ahead of the non-cruft + # pack in lexical order, and (b) has an older mtime to appease + # the MIDX preferred pack selection routine. + cruft="$(echo pack-$non_cruft.pack | git pack-objects --cruft $packdir/pack-A)" && + test-tool chmtime -1000 $packdir/pack-A-$cruft.pack && + + test_commit other && + git repack -d && + + git repack --geometric 2 -d --write-midx --write-bitmap-index && + + # After repacking, there are two packs left: one reachable one + # (which is the result of combining both of the existing two + # non-cruft packs), and one cruft pack. + find .git/objects/pack -type f -name "*.pack" >packs && + test_line_count = 2 packs && + + # Make sure that the pack we just wrote is marked as preferred, + # not the cruft one. + pack="$(test-tool read-midx --preferred-pack $objdir)" && + test_path_is_missing "$packdir/$(basename "$pack" ".idx").mtimes" + ) +' + +test_done