Merge branch 'tb/pack-bitmap-traversal-with-boundary'

The object traversal using reachability bitmap done by
"pack-object" has been tweaked to take advantage of the fact that
using "boundary" commits as representative of all the uninteresting
ones can save quite a lot of object enumeration.

* tb/pack-bitmap-traversal-with-boundary:
  pack-bitmap.c: use commit boundary during bitmap traversal
  pack-bitmap.c: extract `fill_in_bitmap()`
  object: add object_array initializer helper function
This commit is contained in:
Junio C Hamano
2023-06-22 16:29:05 -07:00
11 changed files with 284 additions and 40 deletions

View File

@ -14,6 +14,9 @@ feature.experimental::
+ +
* `fetch.negotiationAlgorithm=skipping` may improve fetch negotiation times by * `fetch.negotiationAlgorithm=skipping` may improve fetch negotiation times by
skipping more commits at a time, reducing the number of round trips. skipping more commits at a time, reducing the number of round trips.
+
* `pack.useBitmapBoundaryTraversal=true` may improve bitmap traversal times by
walking fewer objects.
feature.manyFiles:: feature.manyFiles::
Enable config options that optimize for repos with many files in the Enable config options that optimize for repos with many files in the

View File

@ -123,6 +123,23 @@ pack.useBitmaps::
true. You should not generally need to turn this off unless true. You should not generally need to turn this off unless
you are debugging pack bitmaps. you are debugging pack bitmaps.
pack.useBitmapBoundaryTraversal::
When true, Git will use an experimental algorithm for computing
reachability queries with bitmaps. Instead of building up
complete bitmaps for all of the negated tips and then OR-ing
them together, consider negated tips with existing bitmaps as
additive (i.e. OR-ing them into the result if they exist,
ignoring them otherwise), and build up a bitmap at the boundary
instead.
+
When using this algorithm, Git may include too many objects as a result
of not opening up trees belonging to certain UNINTERESTING commits. This
inexactness matches the non-bitmap traversal algorithm.
+
In many cases, this can provide a speed-up over the exact algorithm,
particularly when there is poor bitmap coverage of the negated side of
the query.
pack.useSparse:: pack.useSparse::
When true, git will default to using the '--sparse' option in When true, git will default to using the '--sparse' option in
'git pack-objects' when the '--revs' option is present. This 'git pack-objects' when the '--revs' option is present. This

View File

@ -29,6 +29,7 @@ linux-TEST-vars)
export GIT_TEST_DEFAULT_INITIAL_BRANCH_NAME=master export GIT_TEST_DEFAULT_INITIAL_BRANCH_NAME=master
export GIT_TEST_NO_WRITE_REV_INDEX=1 export GIT_TEST_NO_WRITE_REV_INDEX=1
export GIT_TEST_CHECKOUT_WORKERS=2 export GIT_TEST_CHECKOUT_WORKERS=2
export GIT_TEST_PACK_USE_BITMAP_BOUNDARY_TRAVERSAL=1
;; ;;
linux-clang) linux-clang)
export GIT_TEST_DEFAULT_HASH=sha1 export GIT_TEST_DEFAULT_HASH=sha1

View File

@ -356,6 +356,12 @@ void object_list_free(struct object_list **list)
*/ */
static char object_array_slopbuf[1]; static char object_array_slopbuf[1];
void object_array_init(struct object_array *array)
{
struct object_array blank = OBJECT_ARRAY_INIT;
memcpy(array, &blank, sizeof(*array));
}
void add_object_array_with_path(struct object *obj, const char *name, void add_object_array_with_path(struct object *obj, const char *name,
struct object_array *array, struct object_array *array,
unsigned mode, const char *path) unsigned mode, const char *path)

View File

@ -58,6 +58,8 @@ struct object_array {
#define OBJECT_ARRAY_INIT { 0 } #define OBJECT_ARRAY_INIT { 0 }
void object_array_init(struct object_array *array);
/* /*
* object flag allocation: * object flag allocation:
* revision.h: 0---------10 15 23------27 * revision.h: 0---------10 15 23------27

View File

@ -1043,6 +1043,160 @@ static int add_commit_to_bitmap(struct bitmap_index *bitmap_git,
return 1; return 1;
} }
static struct bitmap *fill_in_bitmap(struct bitmap_index *bitmap_git,
struct rev_info *revs,
struct bitmap *base,
struct bitmap *seen)
{
struct include_data incdata;
struct bitmap_show_data show_data;
if (!base)
base = bitmap_new();
incdata.bitmap_git = bitmap_git;
incdata.base = base;
incdata.seen = seen;
revs->include_check = should_include;
revs->include_check_obj = should_include_obj;
revs->include_check_data = &incdata;
if (prepare_revision_walk(revs))
die(_("revision walk setup failed"));
show_data.bitmap_git = bitmap_git;
show_data.base = base;
traverse_commit_list(revs, show_commit, show_object, &show_data);
revs->include_check = NULL;
revs->include_check_obj = NULL;
revs->include_check_data = NULL;
return base;
}
struct bitmap_boundary_cb {
struct bitmap_index *bitmap_git;
struct bitmap *base;
struct object_array boundary;
};
static void show_boundary_commit(struct commit *commit, void *_data)
{
struct bitmap_boundary_cb *data = _data;
if (commit->object.flags & BOUNDARY)
add_object_array(&commit->object, "", &data->boundary);
if (commit->object.flags & UNINTERESTING) {
if (bitmap_walk_contains(data->bitmap_git, data->base,
&commit->object.oid))
return;
add_commit_to_bitmap(data->bitmap_git, &data->base, commit);
}
}
static void show_boundary_object(struct object *object,
const char *name, void *data)
{
BUG("should not be called");
}
static struct bitmap *find_boundary_objects(struct bitmap_index *bitmap_git,
struct rev_info *revs,
struct object_list *roots)
{
struct bitmap_boundary_cb cb;
struct object_list *root;
unsigned int i;
unsigned int tmp_blobs, tmp_trees, tmp_tags;
int any_missing = 0;
cb.bitmap_git = bitmap_git;
cb.base = bitmap_new();
object_array_init(&cb.boundary);
revs->ignore_missing_links = 1;
/*
* OR in any existing reachability bitmaps among `roots` into
* `cb.base`.
*/
for (root = roots; root; root = root->next) {
struct object *object = root->item;
if (object->type != OBJ_COMMIT ||
bitmap_walk_contains(bitmap_git, cb.base, &object->oid))
continue;
if (add_commit_to_bitmap(bitmap_git, &cb.base,
(struct commit *)object))
continue;
any_missing = 1;
}
if (!any_missing)
goto cleanup;
tmp_blobs = revs->blob_objects;
tmp_trees = revs->tree_objects;
tmp_tags = revs->blob_objects;
revs->blob_objects = 0;
revs->tree_objects = 0;
revs->tag_objects = 0;
/*
* We didn't have complete coverage of the roots. First setup a
* revision walk to (a) OR in any bitmaps that are UNINTERESTING
* between the tips and boundary, and (b) record the boundary.
*/
trace2_region_enter("pack-bitmap", "boundary-prepare", the_repository);
if (prepare_revision_walk(revs))
die("revision walk setup failed");
trace2_region_leave("pack-bitmap", "boundary-prepare", the_repository);
trace2_region_enter("pack-bitmap", "boundary-traverse", the_repository);
revs->boundary = 1;
traverse_commit_list_filtered(revs,
show_boundary_commit,
show_boundary_object,
&cb, NULL);
revs->boundary = 0;
trace2_region_leave("pack-bitmap", "boundary-traverse", the_repository);
revs->blob_objects = tmp_blobs;
revs->tree_objects = tmp_trees;
revs->tag_objects = tmp_tags;
reset_revision_walk();
clear_object_flags(UNINTERESTING);
/*
* Then add the boundary commit(s) as fill-in traversal tips.
*/
trace2_region_enter("pack-bitmap", "boundary-fill-in", the_repository);
for (i = 0; i < cb.boundary.nr; i++) {
struct object *obj = cb.boundary.objects[i].item;
if (bitmap_walk_contains(bitmap_git, cb.base, &obj->oid))
obj->flags |= SEEN;
else
add_pending_object(revs, obj, "");
}
if (revs->pending.nr)
cb.base = fill_in_bitmap(bitmap_git, revs, cb.base, NULL);
trace2_region_leave("pack-bitmap", "boundary-fill-in", the_repository);
cleanup:
object_array_clear(&cb.boundary);
revs->ignore_missing_links = 0;
return cb.base;
}
static struct bitmap *find_objects(struct bitmap_index *bitmap_git, static struct bitmap *find_objects(struct bitmap_index *bitmap_git,
struct rev_info *revs, struct rev_info *revs,
struct object_list *roots, struct object_list *roots,
@ -1109,33 +1263,19 @@ static struct bitmap *find_objects(struct bitmap_index *bitmap_git,
} }
if (needs_walk) { if (needs_walk) {
struct include_data incdata; /*
struct bitmap_show_data show_data; * This fill-in traversal may walk over some objects
* again, since we have already traversed in order to
if (!base) * find the boundary.
base = bitmap_new(); *
* But this extra walk should be extremely cheap, since
incdata.bitmap_git = bitmap_git; * all commit objects are loaded into memory, and
incdata.base = base; * because we skip walking to parents that are
incdata.seen = seen; * UNINTERESTING, since it will be marked in the haves
* bitmap already (or it has an on-disk bitmap, since
revs->include_check = should_include; * OR-ing it in covers all of its ancestors).
revs->include_check_obj = should_include_obj; */
revs->include_check_data = &incdata; base = fill_in_bitmap(bitmap_git, revs, base, seen);
if (prepare_revision_walk(revs))
die(_("revision walk setup failed"));
show_data.bitmap_git = bitmap_git;
show_data.base = base;
traverse_commit_list(revs,
show_commit, show_object,
&show_data);
revs->include_check = NULL;
revs->include_check_obj = NULL;
revs->include_check_data = NULL;
} }
return base; return base;
@ -1528,6 +1668,7 @@ struct bitmap_index *prepare_bitmap_walk(struct rev_info *revs,
int filter_provided_objects) int filter_provided_objects)
{ {
unsigned int i; unsigned int i;
int use_boundary_traversal;
struct object_list *wants = NULL; struct object_list *wants = NULL;
struct object_list *haves = NULL; struct object_list *haves = NULL;
@ -1578,13 +1719,21 @@ struct bitmap_index *prepare_bitmap_walk(struct rev_info *revs,
object_list_insert(object, &wants); object_list_insert(object, &wants);
} }
/* use_boundary_traversal = git_env_bool(GIT_TEST_PACK_USE_BITMAP_BOUNDARY_TRAVERSAL, -1);
* if we have a HAVES list, but none of those haves is contained if (use_boundary_traversal < 0) {
* in the packfile that has a bitmap, we don't have anything to prepare_repo_settings(revs->repo);
* optimize here use_boundary_traversal = revs->repo->settings.pack_use_bitmap_boundary_traversal;
*/ }
if (haves && !in_bitmapped_pack(bitmap_git, haves))
goto cleanup; if (!use_boundary_traversal) {
/*
* if we have a HAVES list, but none of those haves is contained
* in the packfile that has a bitmap, we don't have anything to
* optimize here
*/
if (haves && !in_bitmapped_pack(bitmap_git, haves))
goto cleanup;
}
/* if we don't want anything, we're done here */ /* if we don't want anything, we're done here */
if (!wants) if (!wants)
@ -1598,18 +1747,32 @@ struct bitmap_index *prepare_bitmap_walk(struct rev_info *revs,
if (load_bitmap(revs->repo, bitmap_git) < 0) if (load_bitmap(revs->repo, bitmap_git) < 0)
goto cleanup; goto cleanup;
object_array_clear(&revs->pending); if (!use_boundary_traversal)
object_array_clear(&revs->pending);
if (haves) { if (haves) {
revs->ignore_missing_links = 1; if (use_boundary_traversal) {
haves_bitmap = find_objects(bitmap_git, revs, haves, NULL); trace2_region_enter("pack-bitmap", "haves/boundary", the_repository);
reset_revision_walk(); haves_bitmap = find_boundary_objects(bitmap_git, revs, haves);
revs->ignore_missing_links = 0; trace2_region_leave("pack-bitmap", "haves/boundary", the_repository);
} else {
trace2_region_enter("pack-bitmap", "haves/classic", the_repository);
revs->ignore_missing_links = 1;
haves_bitmap = find_objects(bitmap_git, revs, haves, NULL);
reset_revision_walk();
revs->ignore_missing_links = 0;
trace2_region_leave("pack-bitmap", "haves/classic", the_repository);
}
if (!haves_bitmap) if (!haves_bitmap)
BUG("failed to perform bitmap walk"); BUG("failed to perform bitmap walk");
} }
if (use_boundary_traversal) {
object_array_clear(&revs->pending);
reset_revision_walk();
}
wants_bitmap = find_objects(bitmap_git, revs, wants, haves_bitmap); wants_bitmap = find_objects(bitmap_git, revs, wants, haves_bitmap);
if (!wants_bitmap) if (!wants_bitmap)

View File

@ -62,6 +62,10 @@ void traverse_bitmap_commit_list(struct bitmap_index *,
void test_bitmap_walk(struct rev_info *revs); void test_bitmap_walk(struct rev_info *revs);
int test_bitmap_commits(struct repository *r); int test_bitmap_commits(struct repository *r);
int test_bitmap_hashes(struct repository *r); int test_bitmap_hashes(struct repository *r);
#define GIT_TEST_PACK_USE_BITMAP_BOUNDARY_TRAVERSAL \
"GIT_TEST_PACK_USE_BITMAP_BOUNDARY_TRAVERSAL"
struct bitmap_index *prepare_bitmap_walk(struct rev_info *revs, struct bitmap_index *prepare_bitmap_walk(struct rev_info *revs,
int filter_provided_objects); int filter_provided_objects);
uint32_t midx_preferred_pack(struct bitmap_index *bitmap_git); uint32_t midx_preferred_pack(struct bitmap_index *bitmap_git);

View File

@ -41,8 +41,10 @@ void prepare_repo_settings(struct repository *r)
repo_cfg_bool(r, "feature.experimental", &experimental, 0); repo_cfg_bool(r, "feature.experimental", &experimental, 0);
/* Defaults modified by feature.* */ /* Defaults modified by feature.* */
if (experimental) if (experimental) {
r->settings.fetch_negotiation_algorithm = FETCH_NEGOTIATION_SKIPPING; r->settings.fetch_negotiation_algorithm = FETCH_NEGOTIATION_SKIPPING;
r->settings.pack_use_bitmap_boundary_traversal = 1;
}
if (manyfiles) { if (manyfiles) {
r->settings.index_version = 4; r->settings.index_version = 4;
r->settings.index_skip_hash = 1; r->settings.index_skip_hash = 1;
@ -62,6 +64,9 @@ void prepare_repo_settings(struct repository *r)
repo_cfg_bool(r, "index.sparse", &r->settings.sparse_index, 0); repo_cfg_bool(r, "index.sparse", &r->settings.sparse_index, 0);
repo_cfg_bool(r, "index.skiphash", &r->settings.index_skip_hash, r->settings.index_skip_hash); repo_cfg_bool(r, "index.skiphash", &r->settings.index_skip_hash, r->settings.index_skip_hash);
repo_cfg_bool(r, "pack.readreverseindex", &r->settings.pack_read_reverse_index, 1); repo_cfg_bool(r, "pack.readreverseindex", &r->settings.pack_read_reverse_index, 1);
repo_cfg_bool(r, "pack.usebitmapboundarytraversal",
&r->settings.pack_use_bitmap_boundary_traversal,
r->settings.pack_use_bitmap_boundary_traversal);
/* /*
* The GIT_TEST_MULTI_PACK_INDEX variable is special in that * The GIT_TEST_MULTI_PACK_INDEX variable is special in that

View File

@ -37,6 +37,7 @@ struct repo_settings {
int command_requires_full_index; int command_requires_full_index;
int sparse_index; int sparse_index;
int pack_read_reverse_index; int pack_read_reverse_index;
int pack_use_bitmap_boundary_traversal;
struct fsmonitor_settings *fsmonitor; /* lazily loaded */ struct fsmonitor_settings *fsmonitor; /* lazily loaded */

View File

@ -442,6 +442,10 @@ GIT_TEST_INDEX_VERSION=<n> exercises the index read/write code path
for the index version specified. Can be set to any valid version for the index version specified. Can be set to any valid version
(currently 2, 3, or 4). (currently 2, 3, or 4).
GIT_TEST_PACK_USE_BITMAP_BOUNDARY_TRAVERSAL=<boolean> if enabled will
use the boundary-based bitmap traversal algorithm. See the documentation
of `pack.useBitmapBoundaryTraversal` for more details.
GIT_TEST_PACK_SPARSE=<boolean> if disabled will default the pack-objects GIT_TEST_PACK_SPARSE=<boolean> if disabled will default the pack-objects
builtin to use the non-sparse object walk. This can still be overridden by builtin to use the non-sparse object walk. This can still be overridden by
the --sparse command-line argument. the --sparse command-line argument.

View File

@ -9,6 +9,10 @@ test_description='exercise basic bitmap functionality'
# their place. # their place.
GIT_TEST_MULTI_PACK_INDEX_WRITE_BITMAP=0 GIT_TEST_MULTI_PACK_INDEX_WRITE_BITMAP=0
# Likewise, allow individual tests to control whether or not they use
# the boundary-based traversal.
sane_unset GIT_TEST_PACK_USE_BITMAP_BOUNDARY_TRAVERSAL
objpath () { objpath () {
echo ".git/objects/$(echo "$1" | sed -e 's|\(..\)|\1/|')" echo ".git/objects/$(echo "$1" | sed -e 's|\(..\)|\1/|')"
} }
@ -457,6 +461,13 @@ test_bitmap_cases () {
test_bitmap_cases test_bitmap_cases
GIT_TEST_PACK_USE_BITMAP_BOUNDARY_TRAVERSAL=1
export GIT_TEST_PACK_USE_BITMAP_BOUNDARY_TRAVERSAL
test_bitmap_cases
sane_unset GIT_TEST_PACK_USE_BITMAP_BOUNDARY_TRAVERSAL
test_expect_success 'incremental repack fails when bitmaps are requested' ' test_expect_success 'incremental repack fails when bitmaps are requested' '
test_commit more-1 && test_commit more-1 &&
test_must_fail git repack -d 2>err && test_must_fail git repack -d 2>err &&
@ -468,6 +479,33 @@ test_expect_success 'incremental repack can disable bitmaps' '
git repack -d --no-write-bitmap-index git repack -d --no-write-bitmap-index
' '
test_expect_success 'boundary-based traversal is used when requested' '
git repack -a -d --write-bitmap-index &&
for argv in \
"git -c pack.useBitmapBoundaryTraversal=true" \
"git -c feature.experimental=true" \
"GIT_TEST_PACK_USE_BITMAP_BOUNDARY_TRAVERSAL=1 git"
do
eval "GIT_TRACE2_EVENT=1 $argv rev-list --objects \
--use-bitmap-index second..other 2>perf" &&
grep "\"region_enter\".*\"label\":\"haves/boundary\"" perf ||
return 1
done &&
for argv in \
"git -c pack.useBitmapBoundaryTraversal=false" \
"git -c feature.experimental=true -c pack.useBitmapBoundaryTraversal=false" \
"GIT_TEST_PACK_USE_BITMAP_BOUNDARY_TRAVERSAL=0 git -c pack.useBitmapBoundaryTraversal=true" \
"GIT_TEST_PACK_USE_BITMAP_BOUNDARY_TRAVERSAL=0 git -c feature.experimental=true"
do
eval "GIT_TRACE2_EVENT=1 $argv rev-list --objects \
--use-bitmap-index second..other 2>perf" &&
grep "\"region_enter\".*\"label\":\"haves/classic\"" perf ||
return 1
done
'
test_bitmap_cases "pack.writeBitmapLookupTable" test_bitmap_cases "pack.writeBitmapLookupTable"
test_expect_success 'verify writing bitmap lookup table when enabled' ' test_expect_success 'verify writing bitmap lookup table when enabled' '