clone: open a shortcut for connectivity check

In order to make sure the cloned repository is good, we run "rev-list
--objects --not --all $new_refs" on the repository. This is expensive
on large repositories. This patch attempts to mitigate the impact in
this special case.

In the "good" clone case, we only have one pack. If all of the
following are met, we can be sure that all objects reachable from the
new refs exist, which is the intention of running "rev-list ...":

 - all refs point to an object in the pack
 - there are no dangling pointers in any object in the pack
 - no objects in the pack point to objects outside the pack

The second and third checks can be done with the help of index-pack as
a slight variation of --strict check (which introduces a new condition
for the shortcut: pack transfer must be used and the number of objects
large enough to call index-pack). The first is checked in
check_everything_connected after we get an "ok" from index-pack.

"index-pack + new checks" is still faster than the current "index-pack
+ rev-list", which is the whole point of this patch. If any of the
conditions fail, we fall back to the good old but expensive "rev-list
..". In that case it's even more expensive because we have to pay for
the new checks in index-pack. But that should only happen when the
other side is either buggy or malicious.

Cloning linux-2.6 over file://

        before         after
real    3m25.693s      2m53.050s
user    5m2.037s       4m42.396s
sys     0m13.750s      0m16.574s

A more realistic test with ssh:// over wireless

        before         after
real    11m26.629s     10m4.213s
user    5m43.196s      5m19.444s
sys     0m35.812s      0m37.630s

This shortcut is not applied to shallow clones, partly because shallow
clones should have no more objects than a usual fetch and the cost of
rev-list is acceptable, partly to avoid dealing with corner cases when
grafting is involved.

This shortcut does not apply to unpack-objects code path either
because the number of objects must be small in order to trigger that
code path.

Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
This commit is contained in:
Nguyễn Thái Ngọc Duy
2013-05-26 08:16:17 +07:00
committed by Junio C Hamano
parent 920734b069
commit c6807a40dc
9 changed files with 94 additions and 15 deletions

View File

@ -77,8 +77,10 @@ static int nr_threads;
static int from_stdin;
static int strict;
static int do_fsck_object;
static int verbose;
static int show_stat;
static int check_self_contained_and_connected;
static struct progress *progress;
@ -187,13 +189,13 @@ static int mark_link(struct object *obj, int type, void *data)
/* The content of each linked object must have been checked
or it must be already present in the object database */
static void check_object(struct object *obj)
static unsigned check_object(struct object *obj)
{
if (!obj)
return;
return 0;
if (!(obj->flags & FLAG_LINK))
return;
return 0;
if (!(obj->flags & FLAG_CHECKED)) {
unsigned long size;
@ -201,17 +203,20 @@ static void check_object(struct object *obj)
if (type != obj->type || type <= 0)
die(_("object of unexpected type"));
obj->flags |= FLAG_CHECKED;
return;
return 1;
}
return 0;
}
static void check_objects(void)
static unsigned check_objects(void)
{
unsigned i, max;
unsigned i, max, foreign_nr = 0;
max = get_max_object_index();
for (i = 0; i < max; i++)
check_object(get_indexed_object(i));
foreign_nr += check_object(get_indexed_object(i));
return foreign_nr;
}
@ -756,7 +761,8 @@ static void sha1_object(const void *data, struct object_entry *obj_entry,
obj = parse_object_buffer(sha1, type, size, buf, &eaten);
if (!obj)
die(_("invalid %s"), typename(type));
if (fsck_object(obj, 1, fsck_error_function))
if (do_fsck_object &&
fsck_object(obj, 1, fsck_error_function))
die(_("Error in object"));
if (fsck_walk(obj, mark_link, NULL))
die(_("Not all child objects of %s are reachable"), sha1_to_hex(obj->sha1));
@ -1490,6 +1496,7 @@ int cmd_index_pack(int argc, const char **argv, const char *prefix)
struct pack_idx_entry **idx_objects;
struct pack_idx_option opts;
unsigned char pack_sha1[20];
unsigned foreign_nr = 1; /* zero is a "good" value, assume bad */
if (argc == 2 && !strcmp(argv[1], "-h"))
usage(index_pack_usage);
@ -1511,6 +1518,10 @@ int cmd_index_pack(int argc, const char **argv, const char *prefix)
fix_thin_pack = 1;
} else if (!strcmp(arg, "--strict")) {
strict = 1;
do_fsck_object = 1;
} else if (!strcmp(arg, "--check-self-contained-and-connected")) {
strict = 1;
check_self_contained_and_connected = 1;
} else if (!strcmp(arg, "--verify")) {
verify = 1;
} else if (!strcmp(arg, "--verify-stat")) {
@ -1624,7 +1635,7 @@ int cmd_index_pack(int argc, const char **argv, const char *prefix)
conclude_pack(fix_thin_pack, curr_pack, pack_sha1);
free(deltas);
if (strict)
check_objects();
foreign_nr = check_objects();
if (show_stat)
show_pack_info(stat_only);
@ -1650,5 +1661,11 @@ int cmd_index_pack(int argc, const char **argv, const char *prefix)
if (index_name == NULL)
free((void *) curr_index);
/*
* Let the caller know this pack is not self contained
*/
if (check_self_contained_and_connected && foreign_nr)
return 1;
return 0;
}