Merge branch 'tb/pack-bitmap-traversal-with-boundary'
The object traversal using reachability bitmap done by "pack-object" has been tweaked to take advantage of the fact that using "boundary" commits as representative of all the uninteresting ones can save quite a lot of object enumeration. * tb/pack-bitmap-traversal-with-boundary: pack-bitmap.c: use commit boundary during bitmap traversal pack-bitmap.c: extract `fill_in_bitmap()` object: add object_array initializer helper function
This commit is contained in:
@ -14,6 +14,9 @@ feature.experimental::
|
||||
+
|
||||
* `fetch.negotiationAlgorithm=skipping` may improve fetch negotiation times by
|
||||
skipping more commits at a time, reducing the number of round trips.
|
||||
+
|
||||
* `pack.useBitmapBoundaryTraversal=true` may improve bitmap traversal times by
|
||||
walking fewer objects.
|
||||
|
||||
feature.manyFiles::
|
||||
Enable config options that optimize for repos with many files in the
|
||||
|
@ -123,6 +123,23 @@ pack.useBitmaps::
|
||||
true. You should not generally need to turn this off unless
|
||||
you are debugging pack bitmaps.
|
||||
|
||||
pack.useBitmapBoundaryTraversal::
|
||||
When true, Git will use an experimental algorithm for computing
|
||||
reachability queries with bitmaps. Instead of building up
|
||||
complete bitmaps for all of the negated tips and then OR-ing
|
||||
them together, consider negated tips with existing bitmaps as
|
||||
additive (i.e. OR-ing them into the result if they exist,
|
||||
ignoring them otherwise), and build up a bitmap at the boundary
|
||||
instead.
|
||||
+
|
||||
When using this algorithm, Git may include too many objects as a result
|
||||
of not opening up trees belonging to certain UNINTERESTING commits. This
|
||||
inexactness matches the non-bitmap traversal algorithm.
|
||||
+
|
||||
In many cases, this can provide a speed-up over the exact algorithm,
|
||||
particularly when there is poor bitmap coverage of the negated side of
|
||||
the query.
|
||||
|
||||
pack.useSparse::
|
||||
When true, git will default to using the '--sparse' option in
|
||||
'git pack-objects' when the '--revs' option is present. This
|
||||
|
@ -29,6 +29,7 @@ linux-TEST-vars)
|
||||
export GIT_TEST_DEFAULT_INITIAL_BRANCH_NAME=master
|
||||
export GIT_TEST_NO_WRITE_REV_INDEX=1
|
||||
export GIT_TEST_CHECKOUT_WORKERS=2
|
||||
export GIT_TEST_PACK_USE_BITMAP_BOUNDARY_TRAVERSAL=1
|
||||
;;
|
||||
linux-clang)
|
||||
export GIT_TEST_DEFAULT_HASH=sha1
|
||||
|
6
object.c
6
object.c
@ -356,6 +356,12 @@ void object_list_free(struct object_list **list)
|
||||
*/
|
||||
static char object_array_slopbuf[1];
|
||||
|
||||
void object_array_init(struct object_array *array)
|
||||
{
|
||||
struct object_array blank = OBJECT_ARRAY_INIT;
|
||||
memcpy(array, &blank, sizeof(*array));
|
||||
}
|
||||
|
||||
void add_object_array_with_path(struct object *obj, const char *name,
|
||||
struct object_array *array,
|
||||
unsigned mode, const char *path)
|
||||
|
2
object.h
2
object.h
@ -58,6 +58,8 @@ struct object_array {
|
||||
|
||||
#define OBJECT_ARRAY_INIT { 0 }
|
||||
|
||||
void object_array_init(struct object_array *array);
|
||||
|
||||
/*
|
||||
* object flag allocation:
|
||||
* revision.h: 0---------10 15 23------27
|
||||
|
241
pack-bitmap.c
241
pack-bitmap.c
@ -1043,6 +1043,160 @@ static int add_commit_to_bitmap(struct bitmap_index *bitmap_git,
|
||||
return 1;
|
||||
}
|
||||
|
||||
static struct bitmap *fill_in_bitmap(struct bitmap_index *bitmap_git,
|
||||
struct rev_info *revs,
|
||||
struct bitmap *base,
|
||||
struct bitmap *seen)
|
||||
{
|
||||
struct include_data incdata;
|
||||
struct bitmap_show_data show_data;
|
||||
|
||||
if (!base)
|
||||
base = bitmap_new();
|
||||
|
||||
incdata.bitmap_git = bitmap_git;
|
||||
incdata.base = base;
|
||||
incdata.seen = seen;
|
||||
|
||||
revs->include_check = should_include;
|
||||
revs->include_check_obj = should_include_obj;
|
||||
revs->include_check_data = &incdata;
|
||||
|
||||
if (prepare_revision_walk(revs))
|
||||
die(_("revision walk setup failed"));
|
||||
|
||||
show_data.bitmap_git = bitmap_git;
|
||||
show_data.base = base;
|
||||
|
||||
traverse_commit_list(revs, show_commit, show_object, &show_data);
|
||||
|
||||
revs->include_check = NULL;
|
||||
revs->include_check_obj = NULL;
|
||||
revs->include_check_data = NULL;
|
||||
|
||||
return base;
|
||||
}
|
||||
|
||||
struct bitmap_boundary_cb {
|
||||
struct bitmap_index *bitmap_git;
|
||||
struct bitmap *base;
|
||||
|
||||
struct object_array boundary;
|
||||
};
|
||||
|
||||
static void show_boundary_commit(struct commit *commit, void *_data)
|
||||
{
|
||||
struct bitmap_boundary_cb *data = _data;
|
||||
|
||||
if (commit->object.flags & BOUNDARY)
|
||||
add_object_array(&commit->object, "", &data->boundary);
|
||||
|
||||
if (commit->object.flags & UNINTERESTING) {
|
||||
if (bitmap_walk_contains(data->bitmap_git, data->base,
|
||||
&commit->object.oid))
|
||||
return;
|
||||
|
||||
add_commit_to_bitmap(data->bitmap_git, &data->base, commit);
|
||||
}
|
||||
}
|
||||
|
||||
static void show_boundary_object(struct object *object,
|
||||
const char *name, void *data)
|
||||
{
|
||||
BUG("should not be called");
|
||||
}
|
||||
|
||||
static struct bitmap *find_boundary_objects(struct bitmap_index *bitmap_git,
|
||||
struct rev_info *revs,
|
||||
struct object_list *roots)
|
||||
{
|
||||
struct bitmap_boundary_cb cb;
|
||||
struct object_list *root;
|
||||
unsigned int i;
|
||||
unsigned int tmp_blobs, tmp_trees, tmp_tags;
|
||||
int any_missing = 0;
|
||||
|
||||
cb.bitmap_git = bitmap_git;
|
||||
cb.base = bitmap_new();
|
||||
object_array_init(&cb.boundary);
|
||||
|
||||
revs->ignore_missing_links = 1;
|
||||
|
||||
/*
|
||||
* OR in any existing reachability bitmaps among `roots` into
|
||||
* `cb.base`.
|
||||
*/
|
||||
for (root = roots; root; root = root->next) {
|
||||
struct object *object = root->item;
|
||||
if (object->type != OBJ_COMMIT ||
|
||||
bitmap_walk_contains(bitmap_git, cb.base, &object->oid))
|
||||
continue;
|
||||
|
||||
if (add_commit_to_bitmap(bitmap_git, &cb.base,
|
||||
(struct commit *)object))
|
||||
continue;
|
||||
|
||||
any_missing = 1;
|
||||
}
|
||||
|
||||
if (!any_missing)
|
||||
goto cleanup;
|
||||
|
||||
tmp_blobs = revs->blob_objects;
|
||||
tmp_trees = revs->tree_objects;
|
||||
tmp_tags = revs->blob_objects;
|
||||
revs->blob_objects = 0;
|
||||
revs->tree_objects = 0;
|
||||
revs->tag_objects = 0;
|
||||
|
||||
/*
|
||||
* We didn't have complete coverage of the roots. First setup a
|
||||
* revision walk to (a) OR in any bitmaps that are UNINTERESTING
|
||||
* between the tips and boundary, and (b) record the boundary.
|
||||
*/
|
||||
trace2_region_enter("pack-bitmap", "boundary-prepare", the_repository);
|
||||
if (prepare_revision_walk(revs))
|
||||
die("revision walk setup failed");
|
||||
trace2_region_leave("pack-bitmap", "boundary-prepare", the_repository);
|
||||
|
||||
trace2_region_enter("pack-bitmap", "boundary-traverse", the_repository);
|
||||
revs->boundary = 1;
|
||||
traverse_commit_list_filtered(revs,
|
||||
show_boundary_commit,
|
||||
show_boundary_object,
|
||||
&cb, NULL);
|
||||
revs->boundary = 0;
|
||||
trace2_region_leave("pack-bitmap", "boundary-traverse", the_repository);
|
||||
|
||||
revs->blob_objects = tmp_blobs;
|
||||
revs->tree_objects = tmp_trees;
|
||||
revs->tag_objects = tmp_tags;
|
||||
|
||||
reset_revision_walk();
|
||||
clear_object_flags(UNINTERESTING);
|
||||
|
||||
/*
|
||||
* Then add the boundary commit(s) as fill-in traversal tips.
|
||||
*/
|
||||
trace2_region_enter("pack-bitmap", "boundary-fill-in", the_repository);
|
||||
for (i = 0; i < cb.boundary.nr; i++) {
|
||||
struct object *obj = cb.boundary.objects[i].item;
|
||||
if (bitmap_walk_contains(bitmap_git, cb.base, &obj->oid))
|
||||
obj->flags |= SEEN;
|
||||
else
|
||||
add_pending_object(revs, obj, "");
|
||||
}
|
||||
if (revs->pending.nr)
|
||||
cb.base = fill_in_bitmap(bitmap_git, revs, cb.base, NULL);
|
||||
trace2_region_leave("pack-bitmap", "boundary-fill-in", the_repository);
|
||||
|
||||
cleanup:
|
||||
object_array_clear(&cb.boundary);
|
||||
revs->ignore_missing_links = 0;
|
||||
|
||||
return cb.base;
|
||||
}
|
||||
|
||||
static struct bitmap *find_objects(struct bitmap_index *bitmap_git,
|
||||
struct rev_info *revs,
|
||||
struct object_list *roots,
|
||||
@ -1109,33 +1263,19 @@ static struct bitmap *find_objects(struct bitmap_index *bitmap_git,
|
||||
}
|
||||
|
||||
if (needs_walk) {
|
||||
struct include_data incdata;
|
||||
struct bitmap_show_data show_data;
|
||||
|
||||
if (!base)
|
||||
base = bitmap_new();
|
||||
|
||||
incdata.bitmap_git = bitmap_git;
|
||||
incdata.base = base;
|
||||
incdata.seen = seen;
|
||||
|
||||
revs->include_check = should_include;
|
||||
revs->include_check_obj = should_include_obj;
|
||||
revs->include_check_data = &incdata;
|
||||
|
||||
if (prepare_revision_walk(revs))
|
||||
die(_("revision walk setup failed"));
|
||||
|
||||
show_data.bitmap_git = bitmap_git;
|
||||
show_data.base = base;
|
||||
|
||||
traverse_commit_list(revs,
|
||||
show_commit, show_object,
|
||||
&show_data);
|
||||
|
||||
revs->include_check = NULL;
|
||||
revs->include_check_obj = NULL;
|
||||
revs->include_check_data = NULL;
|
||||
/*
|
||||
* This fill-in traversal may walk over some objects
|
||||
* again, since we have already traversed in order to
|
||||
* find the boundary.
|
||||
*
|
||||
* But this extra walk should be extremely cheap, since
|
||||
* all commit objects are loaded into memory, and
|
||||
* because we skip walking to parents that are
|
||||
* UNINTERESTING, since it will be marked in the haves
|
||||
* bitmap already (or it has an on-disk bitmap, since
|
||||
* OR-ing it in covers all of its ancestors).
|
||||
*/
|
||||
base = fill_in_bitmap(bitmap_git, revs, base, seen);
|
||||
}
|
||||
|
||||
return base;
|
||||
@ -1528,6 +1668,7 @@ struct bitmap_index *prepare_bitmap_walk(struct rev_info *revs,
|
||||
int filter_provided_objects)
|
||||
{
|
||||
unsigned int i;
|
||||
int use_boundary_traversal;
|
||||
|
||||
struct object_list *wants = NULL;
|
||||
struct object_list *haves = NULL;
|
||||
@ -1578,13 +1719,21 @@ struct bitmap_index *prepare_bitmap_walk(struct rev_info *revs,
|
||||
object_list_insert(object, &wants);
|
||||
}
|
||||
|
||||
/*
|
||||
* if we have a HAVES list, but none of those haves is contained
|
||||
* in the packfile that has a bitmap, we don't have anything to
|
||||
* optimize here
|
||||
*/
|
||||
if (haves && !in_bitmapped_pack(bitmap_git, haves))
|
||||
goto cleanup;
|
||||
use_boundary_traversal = git_env_bool(GIT_TEST_PACK_USE_BITMAP_BOUNDARY_TRAVERSAL, -1);
|
||||
if (use_boundary_traversal < 0) {
|
||||
prepare_repo_settings(revs->repo);
|
||||
use_boundary_traversal = revs->repo->settings.pack_use_bitmap_boundary_traversal;
|
||||
}
|
||||
|
||||
if (!use_boundary_traversal) {
|
||||
/*
|
||||
* if we have a HAVES list, but none of those haves is contained
|
||||
* in the packfile that has a bitmap, we don't have anything to
|
||||
* optimize here
|
||||
*/
|
||||
if (haves && !in_bitmapped_pack(bitmap_git, haves))
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
/* if we don't want anything, we're done here */
|
||||
if (!wants)
|
||||
@ -1598,18 +1747,32 @@ struct bitmap_index *prepare_bitmap_walk(struct rev_info *revs,
|
||||
if (load_bitmap(revs->repo, bitmap_git) < 0)
|
||||
goto cleanup;
|
||||
|
||||
object_array_clear(&revs->pending);
|
||||
if (!use_boundary_traversal)
|
||||
object_array_clear(&revs->pending);
|
||||
|
||||
if (haves) {
|
||||
revs->ignore_missing_links = 1;
|
||||
haves_bitmap = find_objects(bitmap_git, revs, haves, NULL);
|
||||
reset_revision_walk();
|
||||
revs->ignore_missing_links = 0;
|
||||
if (use_boundary_traversal) {
|
||||
trace2_region_enter("pack-bitmap", "haves/boundary", the_repository);
|
||||
haves_bitmap = find_boundary_objects(bitmap_git, revs, haves);
|
||||
trace2_region_leave("pack-bitmap", "haves/boundary", the_repository);
|
||||
} else {
|
||||
trace2_region_enter("pack-bitmap", "haves/classic", the_repository);
|
||||
revs->ignore_missing_links = 1;
|
||||
haves_bitmap = find_objects(bitmap_git, revs, haves, NULL);
|
||||
reset_revision_walk();
|
||||
revs->ignore_missing_links = 0;
|
||||
trace2_region_leave("pack-bitmap", "haves/classic", the_repository);
|
||||
}
|
||||
|
||||
if (!haves_bitmap)
|
||||
BUG("failed to perform bitmap walk");
|
||||
}
|
||||
|
||||
if (use_boundary_traversal) {
|
||||
object_array_clear(&revs->pending);
|
||||
reset_revision_walk();
|
||||
}
|
||||
|
||||
wants_bitmap = find_objects(bitmap_git, revs, wants, haves_bitmap);
|
||||
|
||||
if (!wants_bitmap)
|
||||
|
@ -62,6 +62,10 @@ void traverse_bitmap_commit_list(struct bitmap_index *,
|
||||
void test_bitmap_walk(struct rev_info *revs);
|
||||
int test_bitmap_commits(struct repository *r);
|
||||
int test_bitmap_hashes(struct repository *r);
|
||||
|
||||
#define GIT_TEST_PACK_USE_BITMAP_BOUNDARY_TRAVERSAL \
|
||||
"GIT_TEST_PACK_USE_BITMAP_BOUNDARY_TRAVERSAL"
|
||||
|
||||
struct bitmap_index *prepare_bitmap_walk(struct rev_info *revs,
|
||||
int filter_provided_objects);
|
||||
uint32_t midx_preferred_pack(struct bitmap_index *bitmap_git);
|
||||
|
@ -41,8 +41,10 @@ void prepare_repo_settings(struct repository *r)
|
||||
repo_cfg_bool(r, "feature.experimental", &experimental, 0);
|
||||
|
||||
/* Defaults modified by feature.* */
|
||||
if (experimental)
|
||||
if (experimental) {
|
||||
r->settings.fetch_negotiation_algorithm = FETCH_NEGOTIATION_SKIPPING;
|
||||
r->settings.pack_use_bitmap_boundary_traversal = 1;
|
||||
}
|
||||
if (manyfiles) {
|
||||
r->settings.index_version = 4;
|
||||
r->settings.index_skip_hash = 1;
|
||||
@ -62,6 +64,9 @@ void prepare_repo_settings(struct repository *r)
|
||||
repo_cfg_bool(r, "index.sparse", &r->settings.sparse_index, 0);
|
||||
repo_cfg_bool(r, "index.skiphash", &r->settings.index_skip_hash, r->settings.index_skip_hash);
|
||||
repo_cfg_bool(r, "pack.readreverseindex", &r->settings.pack_read_reverse_index, 1);
|
||||
repo_cfg_bool(r, "pack.usebitmapboundarytraversal",
|
||||
&r->settings.pack_use_bitmap_boundary_traversal,
|
||||
r->settings.pack_use_bitmap_boundary_traversal);
|
||||
|
||||
/*
|
||||
* The GIT_TEST_MULTI_PACK_INDEX variable is special in that
|
||||
|
@ -37,6 +37,7 @@ struct repo_settings {
|
||||
int command_requires_full_index;
|
||||
int sparse_index;
|
||||
int pack_read_reverse_index;
|
||||
int pack_use_bitmap_boundary_traversal;
|
||||
|
||||
struct fsmonitor_settings *fsmonitor; /* lazily loaded */
|
||||
|
||||
|
4
t/README
4
t/README
@ -442,6 +442,10 @@ GIT_TEST_INDEX_VERSION=<n> exercises the index read/write code path
|
||||
for the index version specified. Can be set to any valid version
|
||||
(currently 2, 3, or 4).
|
||||
|
||||
GIT_TEST_PACK_USE_BITMAP_BOUNDARY_TRAVERSAL=<boolean> if enabled will
|
||||
use the boundary-based bitmap traversal algorithm. See the documentation
|
||||
of `pack.useBitmapBoundaryTraversal` for more details.
|
||||
|
||||
GIT_TEST_PACK_SPARSE=<boolean> if disabled will default the pack-objects
|
||||
builtin to use the non-sparse object walk. This can still be overridden by
|
||||
the --sparse command-line argument.
|
||||
|
@ -9,6 +9,10 @@ test_description='exercise basic bitmap functionality'
|
||||
# their place.
|
||||
GIT_TEST_MULTI_PACK_INDEX_WRITE_BITMAP=0
|
||||
|
||||
# Likewise, allow individual tests to control whether or not they use
|
||||
# the boundary-based traversal.
|
||||
sane_unset GIT_TEST_PACK_USE_BITMAP_BOUNDARY_TRAVERSAL
|
||||
|
||||
objpath () {
|
||||
echo ".git/objects/$(echo "$1" | sed -e 's|\(..\)|\1/|')"
|
||||
}
|
||||
@ -457,6 +461,13 @@ test_bitmap_cases () {
|
||||
|
||||
test_bitmap_cases
|
||||
|
||||
GIT_TEST_PACK_USE_BITMAP_BOUNDARY_TRAVERSAL=1
|
||||
export GIT_TEST_PACK_USE_BITMAP_BOUNDARY_TRAVERSAL
|
||||
|
||||
test_bitmap_cases
|
||||
|
||||
sane_unset GIT_TEST_PACK_USE_BITMAP_BOUNDARY_TRAVERSAL
|
||||
|
||||
test_expect_success 'incremental repack fails when bitmaps are requested' '
|
||||
test_commit more-1 &&
|
||||
test_must_fail git repack -d 2>err &&
|
||||
@ -468,6 +479,33 @@ test_expect_success 'incremental repack can disable bitmaps' '
|
||||
git repack -d --no-write-bitmap-index
|
||||
'
|
||||
|
||||
test_expect_success 'boundary-based traversal is used when requested' '
|
||||
git repack -a -d --write-bitmap-index &&
|
||||
|
||||
for argv in \
|
||||
"git -c pack.useBitmapBoundaryTraversal=true" \
|
||||
"git -c feature.experimental=true" \
|
||||
"GIT_TEST_PACK_USE_BITMAP_BOUNDARY_TRAVERSAL=1 git"
|
||||
do
|
||||
eval "GIT_TRACE2_EVENT=1 $argv rev-list --objects \
|
||||
--use-bitmap-index second..other 2>perf" &&
|
||||
grep "\"region_enter\".*\"label\":\"haves/boundary\"" perf ||
|
||||
return 1
|
||||
done &&
|
||||
|
||||
for argv in \
|
||||
"git -c pack.useBitmapBoundaryTraversal=false" \
|
||||
"git -c feature.experimental=true -c pack.useBitmapBoundaryTraversal=false" \
|
||||
"GIT_TEST_PACK_USE_BITMAP_BOUNDARY_TRAVERSAL=0 git -c pack.useBitmapBoundaryTraversal=true" \
|
||||
"GIT_TEST_PACK_USE_BITMAP_BOUNDARY_TRAVERSAL=0 git -c feature.experimental=true"
|
||||
do
|
||||
eval "GIT_TRACE2_EVENT=1 $argv rev-list --objects \
|
||||
--use-bitmap-index second..other 2>perf" &&
|
||||
grep "\"region_enter\".*\"label\":\"haves/classic\"" perf ||
|
||||
return 1
|
||||
done
|
||||
'
|
||||
|
||||
test_bitmap_cases "pack.writeBitmapLookupTable"
|
||||
|
||||
test_expect_success 'verify writing bitmap lookup table when enabled' '
|
||||
|
Reference in New Issue
Block a user