Merge branch 'tb/pack-bitmap-traversal-with-boundary'

The object traversal using reachability bitmap done by
"pack-object" has been tweaked to take advantage of the fact that
using "boundary" commits as representative of all the uninteresting
ones can save quite a lot of object enumeration.

* tb/pack-bitmap-traversal-with-boundary:
  pack-bitmap.c: use commit boundary during bitmap traversal
  pack-bitmap.c: extract `fill_in_bitmap()`
  object: add object_array initializer helper function
This commit is contained in:
Junio C Hamano
2023-06-22 16:29:05 -07:00
11 changed files with 284 additions and 40 deletions

View File

@ -14,6 +14,9 @@ feature.experimental::
+
* `fetch.negotiationAlgorithm=skipping` may improve fetch negotiation times by
skipping more commits at a time, reducing the number of round trips.
+
* `pack.useBitmapBoundaryTraversal=true` may improve bitmap traversal times by
walking fewer objects.
feature.manyFiles::
Enable config options that optimize for repos with many files in the

View File

@ -123,6 +123,23 @@ pack.useBitmaps::
true. You should not generally need to turn this off unless
you are debugging pack bitmaps.
pack.useBitmapBoundaryTraversal::
When true, Git will use an experimental algorithm for computing
reachability queries with bitmaps. Instead of building up
complete bitmaps for all of the negated tips and then OR-ing
them together, consider negated tips with existing bitmaps as
additive (i.e. OR-ing them into the result if they exist,
ignoring them otherwise), and build up a bitmap at the boundary
instead.
+
When using this algorithm, Git may include too many objects as a result
of not opening up trees belonging to certain UNINTERESTING commits. This
inexactness matches the non-bitmap traversal algorithm.
+
In many cases, this can provide a speed-up over the exact algorithm,
particularly when there is poor bitmap coverage of the negated side of
the query.
pack.useSparse::
When true, git will default to using the '--sparse' option in
'git pack-objects' when the '--revs' option is present. This

View File

@ -29,6 +29,7 @@ linux-TEST-vars)
export GIT_TEST_DEFAULT_INITIAL_BRANCH_NAME=master
export GIT_TEST_NO_WRITE_REV_INDEX=1
export GIT_TEST_CHECKOUT_WORKERS=2
export GIT_TEST_PACK_USE_BITMAP_BOUNDARY_TRAVERSAL=1
;;
linux-clang)
export GIT_TEST_DEFAULT_HASH=sha1

View File

@ -356,6 +356,12 @@ void object_list_free(struct object_list **list)
*/
static char object_array_slopbuf[1];
void object_array_init(struct object_array *array)
{
struct object_array blank = OBJECT_ARRAY_INIT;
memcpy(array, &blank, sizeof(*array));
}
void add_object_array_with_path(struct object *obj, const char *name,
struct object_array *array,
unsigned mode, const char *path)

View File

@ -58,6 +58,8 @@ struct object_array {
#define OBJECT_ARRAY_INIT { 0 }
void object_array_init(struct object_array *array);
/*
* object flag allocation:
* revision.h: 0---------10 15 23------27

View File

@ -1043,6 +1043,160 @@ static int add_commit_to_bitmap(struct bitmap_index *bitmap_git,
return 1;
}
static struct bitmap *fill_in_bitmap(struct bitmap_index *bitmap_git,
struct rev_info *revs,
struct bitmap *base,
struct bitmap *seen)
{
struct include_data incdata;
struct bitmap_show_data show_data;
if (!base)
base = bitmap_new();
incdata.bitmap_git = bitmap_git;
incdata.base = base;
incdata.seen = seen;
revs->include_check = should_include;
revs->include_check_obj = should_include_obj;
revs->include_check_data = &incdata;
if (prepare_revision_walk(revs))
die(_("revision walk setup failed"));
show_data.bitmap_git = bitmap_git;
show_data.base = base;
traverse_commit_list(revs, show_commit, show_object, &show_data);
revs->include_check = NULL;
revs->include_check_obj = NULL;
revs->include_check_data = NULL;
return base;
}
struct bitmap_boundary_cb {
struct bitmap_index *bitmap_git;
struct bitmap *base;
struct object_array boundary;
};
static void show_boundary_commit(struct commit *commit, void *_data)
{
struct bitmap_boundary_cb *data = _data;
if (commit->object.flags & BOUNDARY)
add_object_array(&commit->object, "", &data->boundary);
if (commit->object.flags & UNINTERESTING) {
if (bitmap_walk_contains(data->bitmap_git, data->base,
&commit->object.oid))
return;
add_commit_to_bitmap(data->bitmap_git, &data->base, commit);
}
}
static void show_boundary_object(struct object *object,
const char *name, void *data)
{
BUG("should not be called");
}
static struct bitmap *find_boundary_objects(struct bitmap_index *bitmap_git,
struct rev_info *revs,
struct object_list *roots)
{
struct bitmap_boundary_cb cb;
struct object_list *root;
unsigned int i;
unsigned int tmp_blobs, tmp_trees, tmp_tags;
int any_missing = 0;
cb.bitmap_git = bitmap_git;
cb.base = bitmap_new();
object_array_init(&cb.boundary);
revs->ignore_missing_links = 1;
/*
* OR in any existing reachability bitmaps among `roots` into
* `cb.base`.
*/
for (root = roots; root; root = root->next) {
struct object *object = root->item;
if (object->type != OBJ_COMMIT ||
bitmap_walk_contains(bitmap_git, cb.base, &object->oid))
continue;
if (add_commit_to_bitmap(bitmap_git, &cb.base,
(struct commit *)object))
continue;
any_missing = 1;
}
if (!any_missing)
goto cleanup;
tmp_blobs = revs->blob_objects;
tmp_trees = revs->tree_objects;
tmp_tags = revs->blob_objects;
revs->blob_objects = 0;
revs->tree_objects = 0;
revs->tag_objects = 0;
/*
* We didn't have complete coverage of the roots. First setup a
* revision walk to (a) OR in any bitmaps that are UNINTERESTING
* between the tips and boundary, and (b) record the boundary.
*/
trace2_region_enter("pack-bitmap", "boundary-prepare", the_repository);
if (prepare_revision_walk(revs))
die("revision walk setup failed");
trace2_region_leave("pack-bitmap", "boundary-prepare", the_repository);
trace2_region_enter("pack-bitmap", "boundary-traverse", the_repository);
revs->boundary = 1;
traverse_commit_list_filtered(revs,
show_boundary_commit,
show_boundary_object,
&cb, NULL);
revs->boundary = 0;
trace2_region_leave("pack-bitmap", "boundary-traverse", the_repository);
revs->blob_objects = tmp_blobs;
revs->tree_objects = tmp_trees;
revs->tag_objects = tmp_tags;
reset_revision_walk();
clear_object_flags(UNINTERESTING);
/*
* Then add the boundary commit(s) as fill-in traversal tips.
*/
trace2_region_enter("pack-bitmap", "boundary-fill-in", the_repository);
for (i = 0; i < cb.boundary.nr; i++) {
struct object *obj = cb.boundary.objects[i].item;
if (bitmap_walk_contains(bitmap_git, cb.base, &obj->oid))
obj->flags |= SEEN;
else
add_pending_object(revs, obj, "");
}
if (revs->pending.nr)
cb.base = fill_in_bitmap(bitmap_git, revs, cb.base, NULL);
trace2_region_leave("pack-bitmap", "boundary-fill-in", the_repository);
cleanup:
object_array_clear(&cb.boundary);
revs->ignore_missing_links = 0;
return cb.base;
}
static struct bitmap *find_objects(struct bitmap_index *bitmap_git,
struct rev_info *revs,
struct object_list *roots,
@ -1109,33 +1263,19 @@ static struct bitmap *find_objects(struct bitmap_index *bitmap_git,
}
if (needs_walk) {
struct include_data incdata;
struct bitmap_show_data show_data;
if (!base)
base = bitmap_new();
incdata.bitmap_git = bitmap_git;
incdata.base = base;
incdata.seen = seen;
revs->include_check = should_include;
revs->include_check_obj = should_include_obj;
revs->include_check_data = &incdata;
if (prepare_revision_walk(revs))
die(_("revision walk setup failed"));
show_data.bitmap_git = bitmap_git;
show_data.base = base;
traverse_commit_list(revs,
show_commit, show_object,
&show_data);
revs->include_check = NULL;
revs->include_check_obj = NULL;
revs->include_check_data = NULL;
/*
* This fill-in traversal may walk over some objects
* again, since we have already traversed in order to
* find the boundary.
*
* But this extra walk should be extremely cheap, since
* all commit objects are loaded into memory, and
* because we skip walking to parents that are
* UNINTERESTING, since it will be marked in the haves
* bitmap already (or it has an on-disk bitmap, since
* OR-ing it in covers all of its ancestors).
*/
base = fill_in_bitmap(bitmap_git, revs, base, seen);
}
return base;
@ -1528,6 +1668,7 @@ struct bitmap_index *prepare_bitmap_walk(struct rev_info *revs,
int filter_provided_objects)
{
unsigned int i;
int use_boundary_traversal;
struct object_list *wants = NULL;
struct object_list *haves = NULL;
@ -1578,13 +1719,21 @@ struct bitmap_index *prepare_bitmap_walk(struct rev_info *revs,
object_list_insert(object, &wants);
}
/*
* if we have a HAVES list, but none of those haves is contained
* in the packfile that has a bitmap, we don't have anything to
* optimize here
*/
if (haves && !in_bitmapped_pack(bitmap_git, haves))
goto cleanup;
use_boundary_traversal = git_env_bool(GIT_TEST_PACK_USE_BITMAP_BOUNDARY_TRAVERSAL, -1);
if (use_boundary_traversal < 0) {
prepare_repo_settings(revs->repo);
use_boundary_traversal = revs->repo->settings.pack_use_bitmap_boundary_traversal;
}
if (!use_boundary_traversal) {
/*
* if we have a HAVES list, but none of those haves is contained
* in the packfile that has a bitmap, we don't have anything to
* optimize here
*/
if (haves && !in_bitmapped_pack(bitmap_git, haves))
goto cleanup;
}
/* if we don't want anything, we're done here */
if (!wants)
@ -1598,18 +1747,32 @@ struct bitmap_index *prepare_bitmap_walk(struct rev_info *revs,
if (load_bitmap(revs->repo, bitmap_git) < 0)
goto cleanup;
object_array_clear(&revs->pending);
if (!use_boundary_traversal)
object_array_clear(&revs->pending);
if (haves) {
revs->ignore_missing_links = 1;
haves_bitmap = find_objects(bitmap_git, revs, haves, NULL);
reset_revision_walk();
revs->ignore_missing_links = 0;
if (use_boundary_traversal) {
trace2_region_enter("pack-bitmap", "haves/boundary", the_repository);
haves_bitmap = find_boundary_objects(bitmap_git, revs, haves);
trace2_region_leave("pack-bitmap", "haves/boundary", the_repository);
} else {
trace2_region_enter("pack-bitmap", "haves/classic", the_repository);
revs->ignore_missing_links = 1;
haves_bitmap = find_objects(bitmap_git, revs, haves, NULL);
reset_revision_walk();
revs->ignore_missing_links = 0;
trace2_region_leave("pack-bitmap", "haves/classic", the_repository);
}
if (!haves_bitmap)
BUG("failed to perform bitmap walk");
}
if (use_boundary_traversal) {
object_array_clear(&revs->pending);
reset_revision_walk();
}
wants_bitmap = find_objects(bitmap_git, revs, wants, haves_bitmap);
if (!wants_bitmap)

View File

@ -62,6 +62,10 @@ void traverse_bitmap_commit_list(struct bitmap_index *,
void test_bitmap_walk(struct rev_info *revs);
int test_bitmap_commits(struct repository *r);
int test_bitmap_hashes(struct repository *r);
#define GIT_TEST_PACK_USE_BITMAP_BOUNDARY_TRAVERSAL \
"GIT_TEST_PACK_USE_BITMAP_BOUNDARY_TRAVERSAL"
struct bitmap_index *prepare_bitmap_walk(struct rev_info *revs,
int filter_provided_objects);
uint32_t midx_preferred_pack(struct bitmap_index *bitmap_git);

View File

@ -41,8 +41,10 @@ void prepare_repo_settings(struct repository *r)
repo_cfg_bool(r, "feature.experimental", &experimental, 0);
/* Defaults modified by feature.* */
if (experimental)
if (experimental) {
r->settings.fetch_negotiation_algorithm = FETCH_NEGOTIATION_SKIPPING;
r->settings.pack_use_bitmap_boundary_traversal = 1;
}
if (manyfiles) {
r->settings.index_version = 4;
r->settings.index_skip_hash = 1;
@ -62,6 +64,9 @@ void prepare_repo_settings(struct repository *r)
repo_cfg_bool(r, "index.sparse", &r->settings.sparse_index, 0);
repo_cfg_bool(r, "index.skiphash", &r->settings.index_skip_hash, r->settings.index_skip_hash);
repo_cfg_bool(r, "pack.readreverseindex", &r->settings.pack_read_reverse_index, 1);
repo_cfg_bool(r, "pack.usebitmapboundarytraversal",
&r->settings.pack_use_bitmap_boundary_traversal,
r->settings.pack_use_bitmap_boundary_traversal);
/*
* The GIT_TEST_MULTI_PACK_INDEX variable is special in that

View File

@ -37,6 +37,7 @@ struct repo_settings {
int command_requires_full_index;
int sparse_index;
int pack_read_reverse_index;
int pack_use_bitmap_boundary_traversal;
struct fsmonitor_settings *fsmonitor; /* lazily loaded */

View File

@ -442,6 +442,10 @@ GIT_TEST_INDEX_VERSION=<n> exercises the index read/write code path
for the index version specified. Can be set to any valid version
(currently 2, 3, or 4).
GIT_TEST_PACK_USE_BITMAP_BOUNDARY_TRAVERSAL=<boolean> if enabled will
use the boundary-based bitmap traversal algorithm. See the documentation
of `pack.useBitmapBoundaryTraversal` for more details.
GIT_TEST_PACK_SPARSE=<boolean> if disabled will default the pack-objects
builtin to use the non-sparse object walk. This can still be overridden by
the --sparse command-line argument.

View File

@ -9,6 +9,10 @@ test_description='exercise basic bitmap functionality'
# their place.
GIT_TEST_MULTI_PACK_INDEX_WRITE_BITMAP=0
# Likewise, allow individual tests to control whether or not they use
# the boundary-based traversal.
sane_unset GIT_TEST_PACK_USE_BITMAP_BOUNDARY_TRAVERSAL
objpath () {
echo ".git/objects/$(echo "$1" | sed -e 's|\(..\)|\1/|')"
}
@ -457,6 +461,13 @@ test_bitmap_cases () {
test_bitmap_cases
GIT_TEST_PACK_USE_BITMAP_BOUNDARY_TRAVERSAL=1
export GIT_TEST_PACK_USE_BITMAP_BOUNDARY_TRAVERSAL
test_bitmap_cases
sane_unset GIT_TEST_PACK_USE_BITMAP_BOUNDARY_TRAVERSAL
test_expect_success 'incremental repack fails when bitmaps are requested' '
test_commit more-1 &&
test_must_fail git repack -d 2>err &&
@ -468,6 +479,33 @@ test_expect_success 'incremental repack can disable bitmaps' '
git repack -d --no-write-bitmap-index
'
test_expect_success 'boundary-based traversal is used when requested' '
git repack -a -d --write-bitmap-index &&
for argv in \
"git -c pack.useBitmapBoundaryTraversal=true" \
"git -c feature.experimental=true" \
"GIT_TEST_PACK_USE_BITMAP_BOUNDARY_TRAVERSAL=1 git"
do
eval "GIT_TRACE2_EVENT=1 $argv rev-list --objects \
--use-bitmap-index second..other 2>perf" &&
grep "\"region_enter\".*\"label\":\"haves/boundary\"" perf ||
return 1
done &&
for argv in \
"git -c pack.useBitmapBoundaryTraversal=false" \
"git -c feature.experimental=true -c pack.useBitmapBoundaryTraversal=false" \
"GIT_TEST_PACK_USE_BITMAP_BOUNDARY_TRAVERSAL=0 git -c pack.useBitmapBoundaryTraversal=true" \
"GIT_TEST_PACK_USE_BITMAP_BOUNDARY_TRAVERSAL=0 git -c feature.experimental=true"
do
eval "GIT_TRACE2_EVENT=1 $argv rev-list --objects \
--use-bitmap-index second..other 2>perf" &&
grep "\"region_enter\".*\"label\":\"haves/classic\"" perf ||
return 1
done
'
test_bitmap_cases "pack.writeBitmapLookupTable"
test_expect_success 'verify writing bitmap lookup table when enabled' '