Merge branch 'tb/gc-recent-object-hook'
"git pack-objects" learned to invoke a new hook program that enumerates extra objects to be used as anchoring points to keep otherwise unreachable objects in cruft packs. * tb/gc-recent-object-hook: gc: introduce `gc.recentObjectsHook` reachable.c: extract `obj_is_recent()`
This commit is contained in:
@ -130,6 +130,21 @@ or rebase occurring. Since these changes are not part of the current
|
|||||||
project most users will want to expire them sooner, which is why the
|
project most users will want to expire them sooner, which is why the
|
||||||
default is more aggressive than `gc.reflogExpire`.
|
default is more aggressive than `gc.reflogExpire`.
|
||||||
|
|
||||||
|
gc.recentObjectsHook::
|
||||||
|
When considering whether or not to remove an object (either when
|
||||||
|
generating a cruft pack or storing unreachable objects as
|
||||||
|
loose), use the shell to execute the specified command(s).
|
||||||
|
Interpret their output as object IDs which Git will consider as
|
||||||
|
"recent", regardless of their age. By treating their mtimes as
|
||||||
|
"now", any objects (and their descendants) mentioned in the
|
||||||
|
output will be kept regardless of their true age.
|
||||||
|
+
|
||||||
|
Output must contain exactly one hex object ID per line, and nothing
|
||||||
|
else. Objects which cannot be found in the repository are ignored.
|
||||||
|
Multiple hooks are supported, but all must exit successfully, else the
|
||||||
|
operation (either generating a cruft pack or unpacking unreachable
|
||||||
|
objects) will be halted.
|
||||||
|
|
||||||
gc.rerereResolved::
|
gc.rerereResolved::
|
||||||
Records of conflicted merge you resolved earlier are
|
Records of conflicted merge you resolved earlier are
|
||||||
kept for this many days when 'git rerere gc' is run.
|
kept for this many days when 'git rerere gc' is run.
|
||||||
|
85
reachable.c
85
reachable.c
@ -16,6 +16,8 @@
|
|||||||
#include "object-store.h"
|
#include "object-store.h"
|
||||||
#include "pack-bitmap.h"
|
#include "pack-bitmap.h"
|
||||||
#include "pack-mtimes.h"
|
#include "pack-mtimes.h"
|
||||||
|
#include "config.h"
|
||||||
|
#include "run-command.h"
|
||||||
|
|
||||||
struct connectivity_progress {
|
struct connectivity_progress {
|
||||||
struct progress *progress;
|
struct progress *progress;
|
||||||
@ -67,8 +69,77 @@ struct recent_data {
|
|||||||
timestamp_t timestamp;
|
timestamp_t timestamp;
|
||||||
report_recent_object_fn *cb;
|
report_recent_object_fn *cb;
|
||||||
int ignore_in_core_kept_packs;
|
int ignore_in_core_kept_packs;
|
||||||
|
|
||||||
|
struct oidset extra_recent_oids;
|
||||||
|
int extra_recent_oids_loaded;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
static int run_one_gc_recent_objects_hook(struct oidset *set,
|
||||||
|
const char *args)
|
||||||
|
{
|
||||||
|
struct child_process cmd = CHILD_PROCESS_INIT;
|
||||||
|
struct strbuf buf = STRBUF_INIT;
|
||||||
|
FILE *out;
|
||||||
|
int ret = 0;
|
||||||
|
|
||||||
|
cmd.use_shell = 1;
|
||||||
|
cmd.out = -1;
|
||||||
|
|
||||||
|
strvec_push(&cmd.args, args);
|
||||||
|
|
||||||
|
if (start_command(&cmd))
|
||||||
|
return -1;
|
||||||
|
|
||||||
|
out = xfdopen(cmd.out, "r");
|
||||||
|
while (strbuf_getline(&buf, out) != EOF) {
|
||||||
|
struct object_id oid;
|
||||||
|
const char *rest;
|
||||||
|
|
||||||
|
if (parse_oid_hex(buf.buf, &oid, &rest) || *rest) {
|
||||||
|
ret = error(_("invalid extra cruft tip: '%s'"), buf.buf);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
oidset_insert(set, &oid);
|
||||||
|
}
|
||||||
|
|
||||||
|
fclose(out);
|
||||||
|
ret |= finish_command(&cmd);
|
||||||
|
|
||||||
|
strbuf_release(&buf);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void load_gc_recent_objects(struct recent_data *data)
|
||||||
|
{
|
||||||
|
const struct string_list *programs;
|
||||||
|
int ret = 0;
|
||||||
|
size_t i;
|
||||||
|
|
||||||
|
data->extra_recent_oids_loaded = 1;
|
||||||
|
|
||||||
|
if (git_config_get_string_multi("gc.recentobjectshook", &programs))
|
||||||
|
return;
|
||||||
|
|
||||||
|
for (i = 0; i < programs->nr; i++) {
|
||||||
|
ret = run_one_gc_recent_objects_hook(&data->extra_recent_oids,
|
||||||
|
programs->items[i].string);
|
||||||
|
if (ret)
|
||||||
|
die(_("unable to enumerate additional recent objects"));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static int obj_is_recent(const struct object_id *oid, timestamp_t mtime,
|
||||||
|
struct recent_data *data)
|
||||||
|
{
|
||||||
|
if (mtime > data->timestamp)
|
||||||
|
return 1;
|
||||||
|
|
||||||
|
if (!data->extra_recent_oids_loaded)
|
||||||
|
load_gc_recent_objects(data);
|
||||||
|
return oidset_contains(&data->extra_recent_oids, oid);
|
||||||
|
}
|
||||||
|
|
||||||
static void add_recent_object(const struct object_id *oid,
|
static void add_recent_object(const struct object_id *oid,
|
||||||
struct packed_git *pack,
|
struct packed_git *pack,
|
||||||
off_t offset,
|
off_t offset,
|
||||||
@ -78,7 +149,7 @@ static void add_recent_object(const struct object_id *oid,
|
|||||||
struct object *obj;
|
struct object *obj;
|
||||||
enum object_type type;
|
enum object_type type;
|
||||||
|
|
||||||
if (mtime <= data->timestamp)
|
if (!obj_is_recent(oid, mtime, data))
|
||||||
return;
|
return;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -193,16 +264,24 @@ int add_unseen_recent_objects_to_traversal(struct rev_info *revs,
|
|||||||
data.cb = cb;
|
data.cb = cb;
|
||||||
data.ignore_in_core_kept_packs = ignore_in_core_kept_packs;
|
data.ignore_in_core_kept_packs = ignore_in_core_kept_packs;
|
||||||
|
|
||||||
|
oidset_init(&data.extra_recent_oids, 0);
|
||||||
|
data.extra_recent_oids_loaded = 0;
|
||||||
|
|
||||||
r = for_each_loose_object(add_recent_loose, &data,
|
r = for_each_loose_object(add_recent_loose, &data,
|
||||||
FOR_EACH_OBJECT_LOCAL_ONLY);
|
FOR_EACH_OBJECT_LOCAL_ONLY);
|
||||||
if (r)
|
if (r)
|
||||||
return r;
|
goto done;
|
||||||
|
|
||||||
flags = FOR_EACH_OBJECT_LOCAL_ONLY | FOR_EACH_OBJECT_PACK_ORDER;
|
flags = FOR_EACH_OBJECT_LOCAL_ONLY | FOR_EACH_OBJECT_PACK_ORDER;
|
||||||
if (ignore_in_core_kept_packs)
|
if (ignore_in_core_kept_packs)
|
||||||
flags |= FOR_EACH_OBJECT_SKIP_IN_CORE_KEPT_PACKS;
|
flags |= FOR_EACH_OBJECT_SKIP_IN_CORE_KEPT_PACKS;
|
||||||
|
|
||||||
return for_each_packed_object(add_recent_packed, &data, flags);
|
r = for_each_packed_object(add_recent_packed, &data, flags);
|
||||||
|
|
||||||
|
done:
|
||||||
|
oidset_clear(&data.extra_recent_oids);
|
||||||
|
|
||||||
|
return r;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int mark_object_seen(const struct object_id *oid,
|
static int mark_object_seen(const struct object_id *oid,
|
||||||
|
@ -350,4 +350,18 @@ test_expect_success 'old reachable-from-recent retained with bitmaps' '
|
|||||||
test_must_fail git cat-file -e $to_drop
|
test_must_fail git cat-file -e $to_drop
|
||||||
'
|
'
|
||||||
|
|
||||||
|
test_expect_success 'gc.recentObjectsHook' '
|
||||||
|
add_blob &&
|
||||||
|
test-tool chmtime =-86500 $BLOB_FILE &&
|
||||||
|
|
||||||
|
write_script precious-objects <<-EOF &&
|
||||||
|
echo $BLOB
|
||||||
|
EOF
|
||||||
|
test_config gc.recentObjectsHook ./precious-objects &&
|
||||||
|
|
||||||
|
git prune --expire=now &&
|
||||||
|
|
||||||
|
git cat-file -p $BLOB
|
||||||
|
'
|
||||||
|
|
||||||
test_done
|
test_done
|
||||||
|
@ -739,4 +739,175 @@ test_expect_success 'cruft objects are freshend via loose' '
|
|||||||
)
|
)
|
||||||
'
|
'
|
||||||
|
|
||||||
|
test_expect_success 'gc.recentObjectsHook' '
|
||||||
|
git init repo &&
|
||||||
|
test_when_finished "rm -fr repo" &&
|
||||||
|
(
|
||||||
|
cd repo &&
|
||||||
|
|
||||||
|
# Create a handful of objects.
|
||||||
|
#
|
||||||
|
# - one reachable commit, "base", designated for the reachable
|
||||||
|
# pack
|
||||||
|
# - one unreachable commit, "cruft.discard", which is marked
|
||||||
|
# for deletion
|
||||||
|
# - one unreachable commit, "cruft.old", which would be marked
|
||||||
|
# for deletion, but is rescued as an extra cruft tip
|
||||||
|
# - one unreachable commit, "cruft.new", which is not marked
|
||||||
|
# for deletion
|
||||||
|
test_commit base &&
|
||||||
|
git branch -M main &&
|
||||||
|
|
||||||
|
git checkout --orphan discard &&
|
||||||
|
git rm -fr . &&
|
||||||
|
test_commit --no-tag cruft.discard &&
|
||||||
|
|
||||||
|
git checkout --orphan old &&
|
||||||
|
git rm -fr . &&
|
||||||
|
test_commit --no-tag cruft.old &&
|
||||||
|
cruft_old="$(git rev-parse HEAD)" &&
|
||||||
|
|
||||||
|
git checkout --orphan new &&
|
||||||
|
git rm -fr . &&
|
||||||
|
test_commit --no-tag cruft.new &&
|
||||||
|
cruft_new="$(git rev-parse HEAD)" &&
|
||||||
|
|
||||||
|
git checkout main &&
|
||||||
|
git branch -D discard old new &&
|
||||||
|
git reflog expire --all --expire=all &&
|
||||||
|
|
||||||
|
# mark cruft.old with an mtime that is many minutes
|
||||||
|
# older than the expiration period, and mark cruft.new
|
||||||
|
# with an mtime that is in the future (and thus not
|
||||||
|
# eligible for pruning).
|
||||||
|
test-tool chmtime -2000 "$objdir/$(test_oid_to_path $cruft_old)" &&
|
||||||
|
test-tool chmtime +1000 "$objdir/$(test_oid_to_path $cruft_new)" &&
|
||||||
|
|
||||||
|
# Write the list of cruft objects we expect to
|
||||||
|
# accumulate, which is comprised of everything reachable
|
||||||
|
# from cruft.old and cruft.new, but not cruft.discard.
|
||||||
|
git rev-list --objects --no-object-names \
|
||||||
|
$cruft_old $cruft_new >cruft.raw &&
|
||||||
|
sort cruft.raw >cruft.expect &&
|
||||||
|
|
||||||
|
# Write the script to list extra tips, which are limited
|
||||||
|
# to cruft.old, in this case.
|
||||||
|
write_script extra-tips <<-EOF &&
|
||||||
|
echo $cruft_old
|
||||||
|
EOF
|
||||||
|
git config gc.recentObjectsHook ./extra-tips &&
|
||||||
|
|
||||||
|
git repack --cruft --cruft-expiration=now -d &&
|
||||||
|
|
||||||
|
mtimes="$(ls .git/objects/pack/pack-*.mtimes)" &&
|
||||||
|
git show-index <${mtimes%.mtimes}.idx >cruft &&
|
||||||
|
cut -d" " -f2 cruft | sort >cruft.actual &&
|
||||||
|
test_cmp cruft.expect cruft.actual &&
|
||||||
|
|
||||||
|
# Ensure that the "old" objects are removed after
|
||||||
|
# dropping the gc.recentObjectsHook hook.
|
||||||
|
git config --unset gc.recentObjectsHook &&
|
||||||
|
git repack --cruft --cruft-expiration=now -d &&
|
||||||
|
|
||||||
|
mtimes="$(ls .git/objects/pack/pack-*.mtimes)" &&
|
||||||
|
git show-index <${mtimes%.mtimes}.idx >cruft &&
|
||||||
|
cut -d" " -f2 cruft | sort >cruft.actual &&
|
||||||
|
|
||||||
|
git rev-list --objects --no-object-names $cruft_new >cruft.raw &&
|
||||||
|
cp cruft.expect cruft.old &&
|
||||||
|
sort cruft.raw >cruft.expect &&
|
||||||
|
test_cmp cruft.expect cruft.actual &&
|
||||||
|
|
||||||
|
# ensure objects which are no longer in the cruft pack were
|
||||||
|
# removed from the repository
|
||||||
|
for object in $(comm -13 cruft.expect cruft.old)
|
||||||
|
do
|
||||||
|
test_must_fail git cat-file -t $object || return 1
|
||||||
|
done
|
||||||
|
)
|
||||||
|
'
|
||||||
|
|
||||||
|
test_expect_success 'multi-valued gc.recentObjectsHook' '
|
||||||
|
git init repo &&
|
||||||
|
test_when_finished "rm -fr repo" &&
|
||||||
|
(
|
||||||
|
cd repo &&
|
||||||
|
|
||||||
|
test_commit base &&
|
||||||
|
git branch -M main &&
|
||||||
|
|
||||||
|
git checkout --orphan cruft.a &&
|
||||||
|
git rm -fr . &&
|
||||||
|
test_commit --no-tag cruft.a &&
|
||||||
|
cruft_a="$(git rev-parse HEAD)" &&
|
||||||
|
|
||||||
|
git checkout --orphan cruft.b &&
|
||||||
|
git rm -fr . &&
|
||||||
|
test_commit --no-tag cruft.b &&
|
||||||
|
cruft_b="$(git rev-parse HEAD)" &&
|
||||||
|
|
||||||
|
git checkout main &&
|
||||||
|
git branch -D cruft.a cruft.b &&
|
||||||
|
git reflog expire --all --expire=all &&
|
||||||
|
|
||||||
|
echo "echo $cruft_a" | write_script extra-tips.a &&
|
||||||
|
echo "echo $cruft_b" | write_script extra-tips.b &&
|
||||||
|
echo "false" | write_script extra-tips.c &&
|
||||||
|
|
||||||
|
git rev-list --objects --no-object-names $cruft_a $cruft_b \
|
||||||
|
>cruft.raw &&
|
||||||
|
sort cruft.raw >cruft.expect &&
|
||||||
|
|
||||||
|
# ensure that each extra cruft tip is saved by its
|
||||||
|
# respective hook
|
||||||
|
git config --add gc.recentObjectsHook ./extra-tips.a &&
|
||||||
|
git config --add gc.recentObjectsHook ./extra-tips.b &&
|
||||||
|
git repack --cruft --cruft-expiration=now -d &&
|
||||||
|
|
||||||
|
mtimes="$(ls .git/objects/pack/pack-*.mtimes)" &&
|
||||||
|
git show-index <${mtimes%.mtimes}.idx >cruft &&
|
||||||
|
cut -d" " -f2 cruft | sort >cruft.actual &&
|
||||||
|
test_cmp cruft.expect cruft.actual &&
|
||||||
|
|
||||||
|
# ensure that a dirty exit halts cruft pack generation
|
||||||
|
git config --add gc.recentObjectsHook ./extra-tips.c &&
|
||||||
|
test_must_fail git repack --cruft --cruft-expiration=now -d 2>err &&
|
||||||
|
grep "unable to enumerate additional recent objects" err &&
|
||||||
|
|
||||||
|
# and that the existing cruft pack is left alone
|
||||||
|
test_path_is_file "$mtimes"
|
||||||
|
)
|
||||||
|
'
|
||||||
|
|
||||||
|
test_expect_success 'additional cruft blobs via gc.recentObjectsHook' '
|
||||||
|
git init repo &&
|
||||||
|
test_when_finished "rm -fr repo" &&
|
||||||
|
(
|
||||||
|
cd repo &&
|
||||||
|
|
||||||
|
test_commit base &&
|
||||||
|
|
||||||
|
blob=$(echo "unreachable" | git hash-object -w --stdin) &&
|
||||||
|
|
||||||
|
# mark the unreachable blob we wrote above as having
|
||||||
|
# aged out of the retention period
|
||||||
|
test-tool chmtime -2000 "$objdir/$(test_oid_to_path $blob)" &&
|
||||||
|
|
||||||
|
# Write the script to list extra tips, which is just the
|
||||||
|
# extra blob as above.
|
||||||
|
write_script extra-tips <<-EOF &&
|
||||||
|
echo $blob
|
||||||
|
EOF
|
||||||
|
git config gc.recentObjectsHook ./extra-tips &&
|
||||||
|
|
||||||
|
git repack --cruft --cruft-expiration=now -d &&
|
||||||
|
|
||||||
|
mtimes="$(ls .git/objects/pack/pack-*.mtimes)" &&
|
||||||
|
git show-index <${mtimes%.mtimes}.idx >cruft &&
|
||||||
|
cut -d" " -f2 cruft >actual &&
|
||||||
|
echo $blob >expect &&
|
||||||
|
test_cmp expect actual
|
||||||
|
)
|
||||||
|
'
|
||||||
|
|
||||||
test_done
|
test_done
|
||||||
|
@ -113,6 +113,37 @@ test_expect_success 'do not bother loosening old objects' '
|
|||||||
test_must_fail git cat-file -p $obj2
|
test_must_fail git cat-file -p $obj2
|
||||||
'
|
'
|
||||||
|
|
||||||
|
test_expect_success 'gc.recentObjectsHook' '
|
||||||
|
obj1=$(echo one | git hash-object -w --stdin) &&
|
||||||
|
obj2=$(echo two | git hash-object -w --stdin) &&
|
||||||
|
obj3=$(echo three | git hash-object -w --stdin) &&
|
||||||
|
pack1=$(echo $obj1 | git pack-objects .git/objects/pack/pack) &&
|
||||||
|
pack2=$(echo $obj2 | git pack-objects .git/objects/pack/pack) &&
|
||||||
|
pack3=$(echo $obj3 | git pack-objects .git/objects/pack/pack) &&
|
||||||
|
git prune-packed &&
|
||||||
|
|
||||||
|
git cat-file -p $obj1 &&
|
||||||
|
git cat-file -p $obj2 &&
|
||||||
|
git cat-file -p $obj3 &&
|
||||||
|
|
||||||
|
git tag -a -m tag obj2-tag $obj2 &&
|
||||||
|
obj2_tag="$(git rev-parse obj2-tag)" &&
|
||||||
|
|
||||||
|
write_script precious-objects <<-EOF &&
|
||||||
|
echo $obj2_tag
|
||||||
|
EOF
|
||||||
|
git config gc.recentObjectsHook ./precious-objects &&
|
||||||
|
|
||||||
|
test-tool chmtime =-86400 .git/objects/pack/pack-$pack2.pack &&
|
||||||
|
test-tool chmtime =-86400 .git/objects/pack/pack-$pack3.pack &&
|
||||||
|
git repack -A -d --unpack-unreachable=1.hour.ago &&
|
||||||
|
|
||||||
|
git cat-file -p $obj1 &&
|
||||||
|
git cat-file -p $obj2 &&
|
||||||
|
git cat-file -p $obj2_tag &&
|
||||||
|
test_must_fail git cat-file -p $obj3
|
||||||
|
'
|
||||||
|
|
||||||
test_expect_success 'keep packed objects found only in index' '
|
test_expect_success 'keep packed objects found only in index' '
|
||||||
echo my-unique-content >file &&
|
echo my-unique-content >file &&
|
||||||
git add file &&
|
git add file &&
|
||||||
|
Reference in New Issue
Block a user