From e741c078721f8232da769a1100433d96c4393b32 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Mon, 28 Aug 2023 18:49:04 -0400 Subject: [PATCH 1/4] builtin/pack-objects.c: remove unnecessary strbuf_reset() When reading input with the `--cruft` option, `git pack-objects` reads each line into a strbuf, and then moves it to either the list of discarded or fresh packs, depending on whether or not the input line starts with a '-' character. At the beginning of each loop iteration, the next line of input is read with `strbuf_getline()`, which calls `strbuf_reset()` (as a part of `strbuf_getwholeline()`) before reading the next line of input. Thus, the call to `strbuf_reset()` (added back in b757353676 (builtin/pack-objects.c: --cruft without expiration, 2022-05-20)) at the end of the loop is unnecessary, so let's remove it accordingly. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/pack-objects.c | 1 - 1 file changed, 1 deletion(-) diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c index d2a162d528..868efe7e0f 100644 --- a/builtin/pack-objects.c +++ b/builtin/pack-objects.c @@ -3603,7 +3603,6 @@ static void read_cruft_objects(void) string_list_append(&discard_packs, buf.buf + 1); else string_list_append(&fresh_packs, buf.buf); - strbuf_reset(&buf); } string_list_sort(&discard_packs); From 61568efa95608fdafffe67967a82e88bcd90fade Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Mon, 28 Aug 2023 18:49:07 -0400 Subject: [PATCH 2/4] builtin/pack-objects.c: support `--max-pack-size` with `--cruft` When pack-objects learned the `--cruft` option back in b757353676 (builtin/pack-objects.c: --cruft without expiration, 2022-05-20), we explicitly forbade `--cruft` with `--max-pack-size`. At the time, there was no specific rationale given in the patch for not supporting the `--max-pack-size` option with `--cruft`. (As best I can remember, it's because we were trying to push users towards only ever having a single cruft pack, but I cannot be sure). However, `--max-pack-size` is flexible enough that it already works with `--cruft` and can shard unreachable objects across multiple cruft packs, creating separate ".mtimes" files as appropriate. In fact, the `--max-pack-size` option worked with `--cruft` as far back as b757353676! This is because we overwrite the `written_list`, and pass down the appropriate length, i.e. the number of objects written in each pack shard. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- Documentation/git-pack-objects.txt | 4 +-- builtin/pack-objects.c | 4 +-- builtin/repack.c | 3 +- t/t5329-pack-objects-cruft.sh | 54 ++++++++++++++++++++++++------ 4 files changed, 48 insertions(+), 17 deletions(-) diff --git a/Documentation/git-pack-objects.txt b/Documentation/git-pack-objects.txt index a9995a932c..dea7eacb0f 100644 --- a/Documentation/git-pack-objects.txt +++ b/Documentation/git-pack-objects.txt @@ -116,9 +116,7 @@ unreachable object whose mtime is newer than the `--cruft-expiration`). + Incompatible with `--unpack-unreachable`, `--keep-unreachable`, `--pack-loose-unreachable`, `--stdin-packs`, as well as any other -options which imply `--revs`. Also incompatible with `--max-pack-size`; -when this option is set, the maximum pack size is not inferred from -`pack.packSizeLimit`. +options which imply `--revs`. --cruft-expiration=:: If specified, objects are eliminated from the cruft pack if they diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c index 868efe7e0f..72241bdca4 100644 --- a/builtin/pack-objects.c +++ b/builtin/pack-objects.c @@ -4382,7 +4382,7 @@ int cmd_pack_objects(int argc, const char **argv, const char *prefix) if (!HAVE_THREADS && delta_search_threads != 1) warning(_("no threads support, ignoring --threads")); - if (!pack_to_stdout && !pack_size_limit && !cruft) + if (!pack_to_stdout && !pack_size_limit) pack_size_limit = pack_size_limit_cfg; if (pack_to_stdout && pack_size_limit) die(_("--max-pack-size cannot be used to build a pack for transfer")); @@ -4414,8 +4414,6 @@ int cmd_pack_objects(int argc, const char **argv, const char *prefix) die(_("cannot use internal rev list with --cruft")); if (stdin_packs) die(_("cannot use --stdin-packs with --cruft")); - if (pack_size_limit) - die(_("cannot use --max-pack-size with --cruft")); } /* diff --git a/builtin/repack.c b/builtin/repack.c index 2b43a5be08..6943c5ba11 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -720,7 +720,6 @@ static int write_cruft_pack(const struct pack_objects_args *args, strvec_push(&cmd.args, "--honor-pack-keep"); strvec_push(&cmd.args, "--non-empty"); - strvec_push(&cmd.args, "--max-pack-size=0"); cmd.in = -1; @@ -1048,6 +1047,8 @@ int cmd_repack(int argc, const char **argv, const char *prefix) cruft_po_args.depth = po_args.depth; if (!cruft_po_args.threads) cruft_po_args.threads = po_args.threads; + if (!cruft_po_args.max_pack_size) + cruft_po_args.max_pack_size = po_args.max_pack_size; cruft_po_args.local = po_args.local; cruft_po_args.quiet = po_args.quiet; diff --git a/t/t5329-pack-objects-cruft.sh b/t/t5329-pack-objects-cruft.sh index 45667d4999..fc5fedbe9b 100755 --- a/t/t5329-pack-objects-cruft.sh +++ b/t/t5329-pack-objects-cruft.sh @@ -573,23 +573,54 @@ test_expect_success 'cruft repack with no reachable objects' ' ) ' -test_expect_success 'cruft repack ignores --max-pack-size' ' +write_blob () { + test-tool genrandom "$@" >in && + git hash-object -w -t blob in +} + +find_pack () { + for idx in $(ls $packdir/pack-*.idx) + do + git show-index <$idx >out && + if grep -q "$1" out + then + echo $idx + fi || return 1 + done +} + +test_expect_success 'cruft repack with --max-pack-size' ' git init max-pack-size && ( cd max-pack-size && test_commit base && + # two cruft objects which exceed the maximum pack size - test-tool genrandom foo 1048576 | git hash-object --stdin -w && - test-tool genrandom bar 1048576 | git hash-object --stdin -w && + foo=$(write_blob foo 1048576) && + bar=$(write_blob bar 1048576) && + test-tool chmtime --get -1000 \ + "$objdir/$(test_oid_to_path $foo)" >foo.mtime && + test-tool chmtime --get -2000 \ + "$objdir/$(test_oid_to_path $bar)" >bar.mtime && git repack --cruft --max-pack-size=1M && find $packdir -name "*.mtimes" >cruft && - test_line_count = 1 cruft && - test-tool pack-mtimes "$(basename "$(cat cruft)")" >objects && - test_line_count = 2 objects + test_line_count = 2 cruft && + + foo_mtimes="$(basename $(find_pack $foo) .idx).mtimes" && + bar_mtimes="$(basename $(find_pack $bar) .idx).mtimes" && + test-tool pack-mtimes $foo_mtimes >foo.actual && + test-tool pack-mtimes $bar_mtimes >bar.actual && + + echo "$foo $(cat foo.mtime)" >foo.expect && + echo "$bar $(cat bar.mtime)" >bar.expect && + + test_cmp foo.expect foo.actual && + test_cmp bar.expect bar.actual && + test "$foo_mtimes" != "$bar_mtimes" ) ' -test_expect_success 'cruft repack ignores pack.packSizeLimit' ' +test_expect_success 'cruft repack with pack.packSizeLimit' ' ( cd max-pack-size && # repack everything back together to remove the existing cruft @@ -599,9 +630,12 @@ test_expect_success 'cruft repack ignores pack.packSizeLimit' ' # ensure the same post condition is met when --max-pack-size # would otherwise be inferred from the configuration find $packdir -name "*.mtimes" >cruft && - test_line_count = 1 cruft && - test-tool pack-mtimes "$(basename "$(cat cruft)")" >objects && - test_line_count = 2 objects + test_line_count = 2 cruft && + for pack in $(cat cruft) + do + test-tool pack-mtimes "$(basename $pack)" >objects && + test_line_count = 1 objects || return 1 + done ) ' From 3843ef89312593a1e4d70bcdb29fd6ffa87134d6 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Mon, 28 Aug 2023 18:49:10 -0400 Subject: [PATCH 3/4] Documentation/gitformat-pack.txt: remove multi-cruft packs alternative This text, originally from 3d89a8c118 (Documentation/technical: add cruft-packs.txt, 2022-05-20) lists multiple cruft packs as a potential alternative to the design of cruft packs. We have always supported multiple cruft packs (i.e. we use the most recent mtime for a given object among all cruft packs which contain it, etc.), but haven't encouraged its use. We still aren't encouraging users to go out and generate multiple cruft packs, but let's take a step in that direction by dropping language that suggests we aren't capable of working with multiple cruft packs. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- Documentation/gitformat-pack.txt | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/Documentation/gitformat-pack.txt b/Documentation/gitformat-pack.txt index 0c1be2dbe8..49bb09d7df 100644 --- a/Documentation/gitformat-pack.txt +++ b/Documentation/gitformat-pack.txt @@ -618,21 +618,13 @@ understand cruft packs. Notable alternatives to this design include: - - The location of the per-object mtime data, and - - Storing unreachable objects in multiple cruft packs. + - The location of the per-object mtime data. On the location of mtime data, a new auxiliary file tied to the pack was chosen to avoid complicating the `.idx` format. If the `.idx` format were ever to gain support for optional chunks of data, it may make sense to consolidate the `.mtimes` format into the `.idx` itself. -Storing unreachable objects among multiple cruft packs (e.g., creating a new -cruft pack during each repacking operation including only unreachable objects -which aren't already stored in an earlier cruft pack) is significantly more -complicated to construct, and so aren't pursued here. The obvious drawback to -the current implementation is that the entire cruft pack must be re-written from -scratch. - GIT --- Part of the linkgit:git[1] suite From c0b5d46ded46bf6e2cf4bb5325e4bf43374dd1ed Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Mon, 28 Aug 2023 18:49:12 -0400 Subject: [PATCH 4/4] Documentation/gitformat-pack.txt: drop mixed version section This section was added in 3d89a8c118 (Documentation/technical: add cruft-packs.txt, 2022-05-20) to highlight a potential pitfall when deploying cruft packs in an environment where multiple versions of Git are GC-ing the same repository. Now that it has been more than a year since 3d89a8c118 was written, let's drop this section as it is no longer relevant. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- Documentation/gitformat-pack.txt | 26 -------------------------- 1 file changed, 26 deletions(-) diff --git a/Documentation/gitformat-pack.txt b/Documentation/gitformat-pack.txt index 49bb09d7df..870e00f298 100644 --- a/Documentation/gitformat-pack.txt +++ b/Documentation/gitformat-pack.txt @@ -588,32 +588,6 @@ later on. It is linkgit:git-gc[1] that is typically responsible for removing expired unreachable objects. -=== Caution for mixed-version environments - -Repositories that have cruft packs in them will continue to work with any older -version of Git. Note, however, that previous versions of Git which do not -understand the `.mtimes` file will use the cruft pack's mtime as the mtime for -all of the objects in it. In other words, do not expect older (pre-cruft pack) -versions of Git to interpret or even read the contents of the `.mtimes` file. - -Note that having mixed versions of Git GC-ing the same repository can lead to -unreachable objects never being completely pruned. This can happen under the -following circumstances: - - - An older version of Git running GC explodes the contents of an existing - cruft pack loose, using the cruft pack's mtime. - - A newer version running GC collects those loose objects into a cruft pack, - where the .mtime file reflects the loose object's actual mtimes, but the - cruft pack mtime is "now". - -Repeating this process will lead to unreachable objects not getting pruned as a -result of repeatedly resetting the objects' mtimes to the present time. - -If you are GC-ing repositories in a mixed version environment, consider omitting -the `--cruft` option when using linkgit:git-repack[1] and linkgit:git-gc[1], and -setting the `gc.cruftPacks` configuration to "false" until all writers -understand cruft packs. - === Alternatives Notable alternatives to this design include: