diff --git a/Documentation/config/gc.txt b/Documentation/config/gc.txt index 7f95c866e1..ca47eb2008 100644 --- a/Documentation/config/gc.txt +++ b/Documentation/config/gc.txt @@ -130,6 +130,21 @@ or rebase occurring. Since these changes are not part of the current project most users will want to expire them sooner, which is why the default is more aggressive than `gc.reflogExpire`. +gc.recentObjectsHook:: + When considering whether or not to remove an object (either when + generating a cruft pack or storing unreachable objects as + loose), use the shell to execute the specified command(s). + Interpret their output as object IDs which Git will consider as + "recent", regardless of their age. By treating their mtimes as + "now", any objects (and their descendants) mentioned in the + output will be kept regardless of their true age. ++ +Output must contain exactly one hex object ID per line, and nothing +else. Objects which cannot be found in the repository are ignored. +Multiple hooks are supported, but all must exit successfully, else the +operation (either generating a cruft pack or unpacking unreachable +objects) will be halted. + gc.rerereResolved:: Records of conflicted merge you resolved earlier are kept for this many days when 'git rerere gc' is run. diff --git a/reachable.c b/reachable.c index 7a42da5d39..60a7336b87 100644 --- a/reachable.c +++ b/reachable.c @@ -16,6 +16,8 @@ #include "object-store.h" #include "pack-bitmap.h" #include "pack-mtimes.h" +#include "config.h" +#include "run-command.h" struct connectivity_progress { struct progress *progress; @@ -67,12 +69,75 @@ struct recent_data { timestamp_t timestamp; report_recent_object_fn *cb; int ignore_in_core_kept_packs; + + struct oidset extra_recent_oids; + int extra_recent_oids_loaded; }; +static int run_one_gc_recent_objects_hook(struct oidset *set, + const char *args) +{ + struct child_process cmd = CHILD_PROCESS_INIT; + struct strbuf buf = STRBUF_INIT; + FILE *out; + int ret = 0; + + cmd.use_shell = 1; + cmd.out = -1; + + strvec_push(&cmd.args, args); + + if (start_command(&cmd)) + return -1; + + out = xfdopen(cmd.out, "r"); + while (strbuf_getline(&buf, out) != EOF) { + struct object_id oid; + const char *rest; + + if (parse_oid_hex(buf.buf, &oid, &rest) || *rest) { + ret = error(_("invalid extra cruft tip: '%s'"), buf.buf); + break; + } + + oidset_insert(set, &oid); + } + + fclose(out); + ret |= finish_command(&cmd); + + strbuf_release(&buf); + return ret; +} + +static void load_gc_recent_objects(struct recent_data *data) +{ + const struct string_list *programs; + int ret = 0; + size_t i; + + data->extra_recent_oids_loaded = 1; + + if (git_config_get_string_multi("gc.recentobjectshook", &programs)) + return; + + for (i = 0; i < programs->nr; i++) { + ret = run_one_gc_recent_objects_hook(&data->extra_recent_oids, + programs->items[i].string); + if (ret) + die(_("unable to enumerate additional recent objects")); + } +} + static int obj_is_recent(const struct object_id *oid, timestamp_t mtime, struct recent_data *data) { - return mtime > data->timestamp; + if (mtime > data->timestamp) + return 1; + + if (!data->extra_recent_oids_loaded) + load_gc_recent_objects(data); + return oidset_contains(&data->extra_recent_oids, oid); } static void add_recent_object(const struct object_id *oid, @@ -199,16 +264,24 @@ int add_unseen_recent_objects_to_traversal(struct rev_info *revs, data.cb = cb; data.ignore_in_core_kept_packs = ignore_in_core_kept_packs; + oidset_init(&data.extra_recent_oids, 0); + data.extra_recent_oids_loaded = 0; + r = for_each_loose_object(add_recent_loose, &data, FOR_EACH_OBJECT_LOCAL_ONLY); if (r) - return r; + goto done; flags = FOR_EACH_OBJECT_LOCAL_ONLY | FOR_EACH_OBJECT_PACK_ORDER; if (ignore_in_core_kept_packs) flags |= FOR_EACH_OBJECT_SKIP_IN_CORE_KEPT_PACKS; - return for_each_packed_object(add_recent_packed, &data, flags); + r = for_each_packed_object(add_recent_packed, &data, flags); + +done: + oidset_clear(&data.extra_recent_oids); + + return r; } static int mark_object_seen(const struct object_id *oid, diff --git a/t/t5304-prune.sh b/t/t5304-prune.sh index 662ae9b152..a635fe98f8 100755 --- a/t/t5304-prune.sh +++ b/t/t5304-prune.sh @@ -350,4 +350,18 @@ test_expect_success 'old reachable-from-recent retained with bitmaps' ' test_must_fail git cat-file -e $to_drop ' +test_expect_success 'gc.recentObjectsHook' ' + add_blob && + test-tool chmtime =-86500 $BLOB_FILE && + + write_script precious-objects <<-EOF && + echo $BLOB + EOF + test_config gc.recentObjectsHook ./precious-objects && + + git prune --expire=now && + + git cat-file -p $BLOB +' + test_done diff --git a/t/t5329-pack-objects-cruft.sh b/t/t5329-pack-objects-cruft.sh index 303f7a5d84..45667d4999 100755 --- a/t/t5329-pack-objects-cruft.sh +++ b/t/t5329-pack-objects-cruft.sh @@ -739,4 +739,175 @@ test_expect_success 'cruft objects are freshend via loose' ' ) ' +test_expect_success 'gc.recentObjectsHook' ' + git init repo && + test_when_finished "rm -fr repo" && + ( + cd repo && + + # Create a handful of objects. + # + # - one reachable commit, "base", designated for the reachable + # pack + # - one unreachable commit, "cruft.discard", which is marked + # for deletion + # - one unreachable commit, "cruft.old", which would be marked + # for deletion, but is rescued as an extra cruft tip + # - one unreachable commit, "cruft.new", which is not marked + # for deletion + test_commit base && + git branch -M main && + + git checkout --orphan discard && + git rm -fr . && + test_commit --no-tag cruft.discard && + + git checkout --orphan old && + git rm -fr . && + test_commit --no-tag cruft.old && + cruft_old="$(git rev-parse HEAD)" && + + git checkout --orphan new && + git rm -fr . && + test_commit --no-tag cruft.new && + cruft_new="$(git rev-parse HEAD)" && + + git checkout main && + git branch -D discard old new && + git reflog expire --all --expire=all && + + # mark cruft.old with an mtime that is many minutes + # older than the expiration period, and mark cruft.new + # with an mtime that is in the future (and thus not + # eligible for pruning). + test-tool chmtime -2000 "$objdir/$(test_oid_to_path $cruft_old)" && + test-tool chmtime +1000 "$objdir/$(test_oid_to_path $cruft_new)" && + + # Write the list of cruft objects we expect to + # accumulate, which is comprised of everything reachable + # from cruft.old and cruft.new, but not cruft.discard. + git rev-list --objects --no-object-names \ + $cruft_old $cruft_new >cruft.raw && + sort cruft.raw >cruft.expect && + + # Write the script to list extra tips, which are limited + # to cruft.old, in this case. + write_script extra-tips <<-EOF && + echo $cruft_old + EOF + git config gc.recentObjectsHook ./extra-tips && + + git repack --cruft --cruft-expiration=now -d && + + mtimes="$(ls .git/objects/pack/pack-*.mtimes)" && + git show-index <${mtimes%.mtimes}.idx >cruft && + cut -d" " -f2 cruft | sort >cruft.actual && + test_cmp cruft.expect cruft.actual && + + # Ensure that the "old" objects are removed after + # dropping the gc.recentObjectsHook hook. + git config --unset gc.recentObjectsHook && + git repack --cruft --cruft-expiration=now -d && + + mtimes="$(ls .git/objects/pack/pack-*.mtimes)" && + git show-index <${mtimes%.mtimes}.idx >cruft && + cut -d" " -f2 cruft | sort >cruft.actual && + + git rev-list --objects --no-object-names $cruft_new >cruft.raw && + cp cruft.expect cruft.old && + sort cruft.raw >cruft.expect && + test_cmp cruft.expect cruft.actual && + + # ensure objects which are no longer in the cruft pack were + # removed from the repository + for object in $(comm -13 cruft.expect cruft.old) + do + test_must_fail git cat-file -t $object || return 1 + done + ) +' + +test_expect_success 'multi-valued gc.recentObjectsHook' ' + git init repo && + test_when_finished "rm -fr repo" && + ( + cd repo && + + test_commit base && + git branch -M main && + + git checkout --orphan cruft.a && + git rm -fr . && + test_commit --no-tag cruft.a && + cruft_a="$(git rev-parse HEAD)" && + + git checkout --orphan cruft.b && + git rm -fr . && + test_commit --no-tag cruft.b && + cruft_b="$(git rev-parse HEAD)" && + + git checkout main && + git branch -D cruft.a cruft.b && + git reflog expire --all --expire=all && + + echo "echo $cruft_a" | write_script extra-tips.a && + echo "echo $cruft_b" | write_script extra-tips.b && + echo "false" | write_script extra-tips.c && + + git rev-list --objects --no-object-names $cruft_a $cruft_b \ + >cruft.raw && + sort cruft.raw >cruft.expect && + + # ensure that each extra cruft tip is saved by its + # respective hook + git config --add gc.recentObjectsHook ./extra-tips.a && + git config --add gc.recentObjectsHook ./extra-tips.b && + git repack --cruft --cruft-expiration=now -d && + + mtimes="$(ls .git/objects/pack/pack-*.mtimes)" && + git show-index <${mtimes%.mtimes}.idx >cruft && + cut -d" " -f2 cruft | sort >cruft.actual && + test_cmp cruft.expect cruft.actual && + + # ensure that a dirty exit halts cruft pack generation + git config --add gc.recentObjectsHook ./extra-tips.c && + test_must_fail git repack --cruft --cruft-expiration=now -d 2>err && + grep "unable to enumerate additional recent objects" err && + + # and that the existing cruft pack is left alone + test_path_is_file "$mtimes" + ) +' + +test_expect_success 'additional cruft blobs via gc.recentObjectsHook' ' + git init repo && + test_when_finished "rm -fr repo" && + ( + cd repo && + + test_commit base && + + blob=$(echo "unreachable" | git hash-object -w --stdin) && + + # mark the unreachable blob we wrote above as having + # aged out of the retention period + test-tool chmtime -2000 "$objdir/$(test_oid_to_path $blob)" && + + # Write the script to list extra tips, which is just the + # extra blob as above. + write_script extra-tips <<-EOF && + echo $blob + EOF + git config gc.recentObjectsHook ./extra-tips && + + git repack --cruft --cruft-expiration=now -d && + + mtimes="$(ls .git/objects/pack/pack-*.mtimes)" && + git show-index <${mtimes%.mtimes}.idx >cruft && + cut -d" " -f2 cruft >actual && + echo $blob >expect && + test_cmp expect actual + ) +' + test_done diff --git a/t/t7701-repack-unpack-unreachable.sh b/t/t7701-repack-unpack-unreachable.sh index ebb267855f..ba428c18a8 100755 --- a/t/t7701-repack-unpack-unreachable.sh +++ b/t/t7701-repack-unpack-unreachable.sh @@ -113,6 +113,37 @@ test_expect_success 'do not bother loosening old objects' ' test_must_fail git cat-file -p $obj2 ' +test_expect_success 'gc.recentObjectsHook' ' + obj1=$(echo one | git hash-object -w --stdin) && + obj2=$(echo two | git hash-object -w --stdin) && + obj3=$(echo three | git hash-object -w --stdin) && + pack1=$(echo $obj1 | git pack-objects .git/objects/pack/pack) && + pack2=$(echo $obj2 | git pack-objects .git/objects/pack/pack) && + pack3=$(echo $obj3 | git pack-objects .git/objects/pack/pack) && + git prune-packed && + + git cat-file -p $obj1 && + git cat-file -p $obj2 && + git cat-file -p $obj3 && + + git tag -a -m tag obj2-tag $obj2 && + obj2_tag="$(git rev-parse obj2-tag)" && + + write_script precious-objects <<-EOF && + echo $obj2_tag + EOF + git config gc.recentObjectsHook ./precious-objects && + + test-tool chmtime =-86400 .git/objects/pack/pack-$pack2.pack && + test-tool chmtime =-86400 .git/objects/pack/pack-$pack3.pack && + git repack -A -d --unpack-unreachable=1.hour.ago && + + git cat-file -p $obj1 && + git cat-file -p $obj2 && + git cat-file -p $obj2_tag && + test_must_fail git cat-file -p $obj3 +' + test_expect_success 'keep packed objects found only in index' ' echo my-unique-content >file && git add file &&