Merge branch 'jt/repack-local-promisor'

"git gc" discards any objects that are outside promisor packs that
are referred to by an object in a promisor pack, and we do not
refetch them from the promisor at runtime, resulting an unusable
repository.  Work it around by including these objects in the
referring promisor pack at the receiving end of the fetch.

* jt/repack-local-promisor:
  index-pack: repack local links into promisor packs
  t5300: move --window clamp test next to unclamped
  t0410: use from-scratch server
  t0410: make test description clearer
This commit is contained in:
Junio C Hamano
2024-11-20 14:47:16 +09:00
6 changed files with 180 additions and 10 deletions

View File

@ -139,6 +139,11 @@ include::object-format-disclaimer.txt[]
written. If a `<message>` is provided, then that content will be written. If a `<message>` is provided, then that content will be
written to the .promisor file for future reference. See written to the .promisor file for future reference. See
link:technical/partial-clone.html[partial clone] for more information. link:technical/partial-clone.html[partial clone] for more information.
+
Also, if there are objects in the given pack that references non-promisor
objects (in the repo), repacks those non-promisor objects into a promisor
pack. This avoids a situation in which a repo has non-promisor objects that are
accessible through promisor objects.
NOTES NOTES
----- -----

View File

@ -9,6 +9,7 @@
#include "csum-file.h" #include "csum-file.h"
#include "blob.h" #include "blob.h"
#include "commit.h" #include "commit.h"
#include "tag.h"
#include "tree.h" #include "tree.h"
#include "progress.h" #include "progress.h"
#include "fsck.h" #include "fsck.h"
@ -20,9 +21,14 @@
#include "object-file.h" #include "object-file.h"
#include "object-store-ll.h" #include "object-store-ll.h"
#include "oid-array.h" #include "oid-array.h"
#include "oidset.h"
#include "path.h"
#include "replace-object.h" #include "replace-object.h"
#include "tree-walk.h"
#include "promisor-remote.h" #include "promisor-remote.h"
#include "run-command.h"
#include "setup.h" #include "setup.h"
#include "strvec.h"
static const char index_pack_usage[] = static const char index_pack_usage[] =
"git index-pack [-v] [-o <index-file>] [--keep | --keep=<msg>] [--[no-]rev-index] [--verify] [--strict[=<msg-id>=<severity>...]] [--fsck-objects[=<msg-id>=<severity>...]] (<pack-file> | --stdin [--fix-thin] [<pack-file>])"; "git index-pack [-v] [-o <index-file>] [--keep | --keep=<msg>] [--[no-]rev-index] [--verify] [--strict[=<msg-id>=<severity>...]] [--fsck-objects[=<msg-id>=<severity>...]] (<pack-file> | --stdin [--fix-thin] [<pack-file>])";
@ -148,6 +154,13 @@ static uint32_t input_crc32;
static int input_fd, output_fd; static int input_fd, output_fd;
static const char *curr_pack; static const char *curr_pack;
/*
* local_links is guarded by read_mutex, and record_local_links is read-only in
* a thread.
*/
static struct oidset local_links = OIDSET_INIT;
static int record_local_links;
static struct thread_local *thread_data; static struct thread_local *thread_data;
static int nr_dispatched; static int nr_dispatched;
static int threads_active; static int threads_active;
@ -799,6 +812,44 @@ static int check_collison(struct object_entry *entry)
return 0; return 0;
} }
static void record_if_local_object(const struct object_id *oid)
{
struct object_info info = OBJECT_INFO_INIT;
if (oid_object_info_extended(the_repository, oid, &info, 0))
/* Missing; assume it is a promisor object */
return;
if (info.whence == OI_PACKED && info.u.packed.pack->pack_promisor)
return;
oidset_insert(&local_links, oid);
}
static void do_record_local_links(struct object *obj)
{
if (obj->type == OBJ_TREE) {
struct tree *tree = (struct tree *)obj;
struct tree_desc desc;
struct name_entry entry;
if (init_tree_desc_gently(&desc, &tree->object.oid,
tree->buffer, tree->size, 0))
/*
* Error messages are given when packs are
* verified, so do not print any here.
*/
return;
while (tree_entry_gently(&desc, &entry))
record_if_local_object(&entry.oid);
} else if (obj->type == OBJ_COMMIT) {
struct commit *commit = (struct commit *) obj;
struct commit_list *parents = commit->parents;
for (; parents; parents = parents->next)
record_if_local_object(&parents->item->object.oid);
} else if (obj->type == OBJ_TAG) {
struct tag *tag = (struct tag *) obj;
record_if_local_object(get_tagged_oid(tag));
}
}
static void sha1_object(const void *data, struct object_entry *obj_entry, static void sha1_object(const void *data, struct object_entry *obj_entry,
unsigned long size, enum object_type type, unsigned long size, enum object_type type,
const struct object_id *oid) const struct object_id *oid)
@ -845,7 +896,7 @@ static void sha1_object(const void *data, struct object_entry *obj_entry,
free(has_data); free(has_data);
} }
if (strict || do_fsck_object) { if (strict || do_fsck_object || record_local_links) {
read_lock(); read_lock();
if (type == OBJ_BLOB) { if (type == OBJ_BLOB) {
struct blob *blob = lookup_blob(the_repository, oid); struct blob *blob = lookup_blob(the_repository, oid);
@ -877,6 +928,8 @@ static void sha1_object(const void *data, struct object_entry *obj_entry,
die(_("fsck error in packed object")); die(_("fsck error in packed object"));
if (strict && fsck_walk(obj, NULL, &fsck_options)) if (strict && fsck_walk(obj, NULL, &fsck_options))
die(_("Not all child objects of %s are reachable"), oid_to_hex(&obj->oid)); die(_("Not all child objects of %s are reachable"), oid_to_hex(&obj->oid));
if (record_local_links)
do_record_local_links(obj);
if (obj->type == OBJ_TREE) { if (obj->type == OBJ_TREE) {
struct tree *item = (struct tree *) obj; struct tree *item = (struct tree *) obj;
@ -1719,6 +1772,58 @@ static void show_pack_info(int stat_only)
free(chain_histogram); free(chain_histogram);
} }
static void repack_local_links(void)
{
struct child_process cmd = CHILD_PROCESS_INIT;
FILE *out;
struct strbuf line = STRBUF_INIT;
struct oidset_iter iter;
struct object_id *oid;
char *base_name;
if (!oidset_size(&local_links))
return;
base_name = mkpathdup("%s/pack/pack", repo_get_object_directory(the_repository));
strvec_push(&cmd.args, "pack-objects");
strvec_push(&cmd.args, "--exclude-promisor-objects-best-effort");
strvec_push(&cmd.args, base_name);
cmd.git_cmd = 1;
cmd.in = -1;
cmd.out = -1;
if (start_command(&cmd))
die(_("could not start pack-objects to repack local links"));
oidset_iter_init(&local_links, &iter);
while ((oid = oidset_iter_next(&iter))) {
if (write_in_full(cmd.in, oid_to_hex(oid), the_hash_algo->hexsz) < 0 ||
write_in_full(cmd.in, "\n", 1) < 0)
die(_("failed to feed local object to pack-objects"));
}
close(cmd.in);
out = xfdopen(cmd.out, "r");
while (strbuf_getline_lf(&line, out) != EOF) {
unsigned char binary[GIT_MAX_RAWSZ];
if (line.len != the_hash_algo->hexsz ||
!hex_to_bytes(binary, line.buf, line.len))
die(_("index-pack: Expecting full hex object ID lines only from pack-objects."));
/*
* pack-objects creates the .pack and .idx files, but not the
* .promisor file. Create the .promisor file, which is empty.
*/
write_special_file("promisor", "", NULL, binary, NULL);
}
fclose(out);
if (finish_command(&cmd))
die(_("could not finish pack-objects to repack local links"));
strbuf_release(&line);
free(base_name);
}
int cmd_index_pack(int argc, int cmd_index_pack(int argc,
const char **argv, const char **argv,
const char *prefix, const char *prefix,
@ -1794,7 +1899,7 @@ int cmd_index_pack(int argc,
} else if (skip_to_optional_arg(arg, "--keep", &keep_msg)) { } else if (skip_to_optional_arg(arg, "--keep", &keep_msg)) {
; /* nothing to do */ ; /* nothing to do */
} else if (skip_to_optional_arg(arg, "--promisor", &promisor_msg)) { } else if (skip_to_optional_arg(arg, "--promisor", &promisor_msg)) {
; /* already parsed */ record_local_links = 1;
} else if (starts_with(arg, "--threads=")) { } else if (starts_with(arg, "--threads=")) {
char *end; char *end;
nr_threads = strtoul(arg+10, &end, 0); nr_threads = strtoul(arg+10, &end, 0);
@ -1970,6 +2075,8 @@ int cmd_index_pack(int argc,
free((void *) curr_index); free((void *) curr_index);
free(curr_rev_index); free(curr_rev_index);
repack_local_links();
/* /*
* Let the caller know this pack is not self contained * Let the caller know this pack is not self contained
*/ */

View File

@ -239,6 +239,7 @@ static enum {
static uint16_t write_bitmap_options = BITMAP_OPT_HASH_CACHE; static uint16_t write_bitmap_options = BITMAP_OPT_HASH_CACHE;
static int exclude_promisor_objects; static int exclude_promisor_objects;
static int exclude_promisor_objects_best_effort;
static int use_delta_islands; static int use_delta_islands;
@ -4312,6 +4313,18 @@ static int option_parse_cruft_expiration(const struct option *opt UNUSED,
return 0; return 0;
} }
static int is_not_in_promisor_pack_obj(struct object *obj, void *data UNUSED)
{
struct object_info info = OBJECT_INFO_INIT;
if (oid_object_info_extended(the_repository, &obj->oid, &info, 0))
BUG("should_include_obj should only be called on existing objects");
return info.whence != OI_PACKED || !info.u.packed.pack->pack_promisor;
}
static int is_not_in_promisor_pack(struct commit *commit, void *data) {
return is_not_in_promisor_pack_obj((struct object *) commit, data);
}
int cmd_pack_objects(int argc, int cmd_pack_objects(int argc,
const char **argv, const char **argv,
const char *prefix, const char *prefix,
@ -4424,6 +4437,9 @@ int cmd_pack_objects(int argc,
option_parse_missing_action), option_parse_missing_action),
OPT_BOOL(0, "exclude-promisor-objects", &exclude_promisor_objects, OPT_BOOL(0, "exclude-promisor-objects", &exclude_promisor_objects,
N_("do not pack objects in promisor packfiles")), N_("do not pack objects in promisor packfiles")),
OPT_BOOL(0, "exclude-promisor-objects-best-effort",
&exclude_promisor_objects_best_effort,
N_("implies --missing=allow-any")),
OPT_BOOL(0, "delta-islands", &use_delta_islands, OPT_BOOL(0, "delta-islands", &use_delta_islands,
N_("respect islands during delta compression")), N_("respect islands during delta compression")),
OPT_STRING_LIST(0, "uri-protocol", &uri_protocols, OPT_STRING_LIST(0, "uri-protocol", &uri_protocols,
@ -4504,10 +4520,18 @@ int cmd_pack_objects(int argc,
strvec_push(&rp, "--unpacked"); strvec_push(&rp, "--unpacked");
} }
if (exclude_promisor_objects && exclude_promisor_objects_best_effort)
die(_("options '%s' and '%s' cannot be used together"),
"--exclude-promisor-objects", "--exclude-promisor-objects-best-effort");
if (exclude_promisor_objects) { if (exclude_promisor_objects) {
use_internal_rev_list = 1; use_internal_rev_list = 1;
fetch_if_missing = 0; fetch_if_missing = 0;
strvec_push(&rp, "--exclude-promisor-objects"); strvec_push(&rp, "--exclude-promisor-objects");
} else if (exclude_promisor_objects_best_effort) {
use_internal_rev_list = 1;
fetch_if_missing = 0;
option_parse_missing_action(NULL, "allow-any", 0);
/* revs configured below */
} }
if (unpack_unreachable || keep_unreachable || pack_loose_unreachable) if (unpack_unreachable || keep_unreachable || pack_loose_unreachable)
use_internal_rev_list = 1; use_internal_rev_list = 1;
@ -4627,6 +4651,10 @@ int cmd_pack_objects(int argc,
repo_init_revisions(the_repository, &revs, NULL); repo_init_revisions(the_repository, &revs, NULL);
list_objects_filter_copy(&revs.filter, &filter_options); list_objects_filter_copy(&revs.filter, &filter_options);
if (exclude_promisor_objects_best_effort) {
revs.include_check = is_not_in_promisor_pack;
revs.include_check_obj = is_not_in_promisor_pack_obj;
}
get_object_list(&revs, rp.nr, rp.v); get_object_list(&revs, rp.nr, rp.v);
release_revisions(&revs); release_revisions(&revs);
} }

View File

@ -241,7 +241,7 @@ test_expect_success 'fetching of missing objects works with ref-in-want enabled'
grep "fetch< fetch=.*ref-in-want" trace grep "fetch< fetch=.*ref-in-want" trace
' '
test_expect_success 'fetching of missing objects from another promisor remote' ' test_expect_success 'fetching from another promisor remote' '
git clone "file://$(pwd)/server" server2 && git clone "file://$(pwd)/server" server2 &&
test_commit -C server2 bar && test_commit -C server2 bar &&
git -C server2 repack -a -d --write-bitmap-index && git -C server2 repack -a -d --write-bitmap-index &&
@ -264,8 +264,8 @@ test_expect_success 'fetching of missing objects from another promisor remote' '
grep "$HASH2" out grep "$HASH2" out
' '
test_expect_success 'fetching of missing objects configures a promisor remote' ' test_expect_success 'fetching with --filter configures a promisor remote' '
git clone "file://$(pwd)/server" server3 && test_create_repo server3 &&
test_commit -C server3 baz && test_commit -C server3 baz &&
git -C server3 repack -a -d --write-bitmap-index && git -C server3 repack -a -d --write-bitmap-index &&
HASH3=$(git -C server3 rev-parse baz) && HASH3=$(git -C server3 rev-parse baz) &&

View File

@ -156,6 +156,11 @@ test_expect_success 'pack without delta' '
check_deltas stderr = 0 check_deltas stderr = 0
' '
test_expect_success 'negative window clamps to 0' '
git pack-objects --progress --window=-1 neg-window <obj-list 2>stderr &&
check_deltas stderr = 0
'
test_expect_success 'pack-objects with bogus arguments' ' test_expect_success 'pack-objects with bogus arguments' '
test_must_fail git pack-objects --window=0 test-1 blah blah <obj-list test_must_fail git pack-objects --window=0 test-1 blah blah <obj-list
' '
@ -630,11 +635,6 @@ test_expect_success 'prefetch objects' '
test_line_count = 1 donelines test_line_count = 1 donelines
' '
test_expect_success 'negative window clamps to 0' '
git pack-objects --progress --window=-1 neg-window <obj-list 2>stderr &&
check_deltas stderr = 0
'
for hash in sha1 sha256 for hash in sha1 sha256
do do
test_expect_success "verify-pack with $hash packfile" ' test_expect_success "verify-pack with $hash packfile" '

View File

@ -694,6 +694,36 @@ test_expect_success 'lazy-fetch in submodule succeeds' '
git -C client restore --recurse-submodules --source=HEAD^ :/ git -C client restore --recurse-submodules --source=HEAD^ :/
' '
test_expect_success 'after fetching descendants of non-promisor commits, gc works' '
# Setup
git init full &&
git -C full config uploadpack.allowfilter 1 &&
git -C full config uploadpack.allowanysha1inwant 1 &&
touch full/foo &&
git -C full add foo &&
git -C full commit -m "commit 1" &&
git -C full checkout --detach &&
# Partial clone and push commit to remote
git clone "file://$(pwd)/full" --filter=blob:none partial &&
echo "hello" > partial/foo &&
git -C partial commit -a -m "commit 2" &&
git -C partial push &&
# gc in partial repo
git -C partial gc --prune=now &&
# Create another commit in normal repo
git -C full checkout main &&
echo " world" >> full/foo &&
git -C full commit -a -m "commit 3" &&
# Pull from remote in partial repo, and run gc again
git -C partial pull &&
git -C partial gc --prune=now
'
. "$TEST_DIRECTORY"/lib-httpd.sh . "$TEST_DIRECTORY"/lib-httpd.sh
start_httpd start_httpd