Merge branch 'tb/pseudo-merge-reachability-bitmap'

The pseudo-merge reachability bitmap to help more efficient storage
of the reachability bitmap in a repository with too many refs has
been added.

* tb/pseudo-merge-reachability-bitmap: (26 commits)
  pack-bitmap.c: ensure pseudo-merge offset reads are bounded
  Documentation/technical/bitmap-format.txt: add missing position table
  t/perf: implement performance tests for pseudo-merge bitmaps
  pseudo-merge: implement support for finding existing merges
  ewah: `bitmap_equals_ewah()`
  pack-bitmap: extra trace2 information
  pack-bitmap.c: use pseudo-merges during traversal
  t/test-lib-functions.sh: support `--notick` in `test_commit_bulk()`
  pack-bitmap: implement test helpers for pseudo-merge
  ewah: implement `ewah_bitmap_popcount()`
  pseudo-merge: implement support for reading pseudo-merge commits
  pack-bitmap.c: read pseudo-merge extension
  pseudo-merge: scaffolding for reads
  pack-bitmap: extract `read_bitmap()` function
  pack-bitmap-write.c: write pseudo-merge table
  pseudo-merge: implement support for selecting pseudo-merge commits
  config: introduce `git_config_double()`
  pack-bitmap: make `bitmap_writer_push_bitmapped_commit()` public
  pack-bitmap: implement `bitmap_writer_has_bitmapped_object_id()`
  pack-bitmap-write: support storing pseudo-merge commits
  ...
This commit is contained in:
Junio C Hamano
2024-06-24 16:39:13 -07:00
24 changed files with 2605 additions and 55 deletions

View File

@ -20,6 +20,7 @@
#include "list-objects-filter-options.h"
#include "midx.h"
#include "config.h"
#include "pseudo-merge.h"
/*
* An entry on the bitmap index, representing the bitmap for a given
@ -86,6 +87,9 @@ struct bitmap_index {
*/
unsigned char *table_lookup;
/* This contains the pseudo-merge cache within 'map' (if found). */
struct pseudo_merge_map pseudo_merges;
/*
* Extended index.
*
@ -110,6 +114,13 @@ struct bitmap_index {
unsigned int version;
};
static int pseudo_merges_satisfied_nr;
static int pseudo_merges_cascades_nr;
static int existing_bitmaps_hits_nr;
static int existing_bitmaps_misses_nr;
static int roots_with_bitmaps_nr;
static int roots_without_bitmaps_nr;
static struct ewah_bitmap *lookup_stored_bitmap(struct stored_bitmap *st)
{
struct ewah_bitmap *parent;
@ -129,17 +140,13 @@ static struct ewah_bitmap *lookup_stored_bitmap(struct stored_bitmap *st)
return composed;
}
/*
* Read a bitmap from the current read position on the mmaped
* index, and increase the read position accordingly
*/
static struct ewah_bitmap *read_bitmap_1(struct bitmap_index *index)
struct ewah_bitmap *read_bitmap(const unsigned char *map,
size_t map_size, size_t *map_pos)
{
struct ewah_bitmap *b = ewah_pool_new();
ssize_t bitmap_size = ewah_read_mmap(b,
index->map + index->map_pos,
index->map_size - index->map_pos);
ssize_t bitmap_size = ewah_read_mmap(b, map + *map_pos,
map_size - *map_pos);
if (bitmap_size < 0) {
error(_("failed to load bitmap index (corrupted?)"));
@ -147,10 +154,20 @@ static struct ewah_bitmap *read_bitmap_1(struct bitmap_index *index)
return NULL;
}
index->map_pos += bitmap_size;
*map_pos += bitmap_size;
return b;
}
/*
* Read a bitmap from the current read position on the mmaped
* index, and increase the read position accordingly
*/
static struct ewah_bitmap *read_bitmap_1(struct bitmap_index *index)
{
return read_bitmap(index->map, index->map_size, &index->map_pos);
}
static uint32_t bitmap_num_objects(struct bitmap_index *index)
{
if (index->midx)
@ -199,6 +216,46 @@ static int load_bitmap_header(struct bitmap_index *index)
index->table_lookup = (void *)(index_end - table_size);
index_end -= table_size;
}
if (flags & BITMAP_OPT_PSEUDO_MERGES) {
unsigned char *pseudo_merge_ofs;
size_t table_size;
uint32_t i;
if (sizeof(table_size) > index_end - index->map - header_size)
return error(_("corrupted bitmap index file (too short to fit pseudo-merge table header)"));
table_size = get_be64(index_end - 8);
if (table_size > index_end - index->map - header_size)
return error(_("corrupted bitmap index file (too short to fit pseudo-merge table)"));
if (git_env_bool("GIT_TEST_USE_PSEUDO_MERGES", 1)) {
const unsigned char *ext = (index_end - table_size);
index->pseudo_merges.map = index->map;
index->pseudo_merges.map_size = index->map_size;
index->pseudo_merges.commits = ext + get_be64(index_end - 16);
index->pseudo_merges.commits_nr = get_be32(index_end - 20);
index->pseudo_merges.nr = get_be32(index_end - 24);
if (st_add(st_mult(index->pseudo_merges.nr,
sizeof(uint64_t)),
24) > table_size)
return error(_("corrupted bitmap index file, pseudo-merge table too short"));
CALLOC_ARRAY(index->pseudo_merges.v,
index->pseudo_merges.nr);
pseudo_merge_ofs = index_end - 24 -
(index->pseudo_merges.nr * sizeof(uint64_t));
for (i = 0; i < index->pseudo_merges.nr; i++) {
index->pseudo_merges.v[i].at = get_be64(pseudo_merge_ofs);
pseudo_merge_ofs += sizeof(uint64_t);
}
}
index_end -= table_size;
}
}
index->entry_count = ntohl(header->entry_count);
@ -960,6 +1017,22 @@ static void show_commit(struct commit *commit UNUSED,
{
}
static unsigned apply_pseudo_merges_for_commit_1(struct bitmap_index *bitmap_git,
struct bitmap *result,
struct commit *commit,
uint32_t commit_pos)
{
int ret;
ret = apply_pseudo_merges_for_commit(&bitmap_git->pseudo_merges,
result, commit, commit_pos);
if (ret)
pseudo_merges_satisfied_nr += ret;
return ret;
}
static int add_to_include_set(struct bitmap_index *bitmap_git,
struct include_data *data,
struct commit *commit,
@ -975,11 +1048,19 @@ static int add_to_include_set(struct bitmap_index *bitmap_git,
partial = bitmap_for_commit(bitmap_git, commit);
if (partial) {
existing_bitmaps_hits_nr++;
bitmap_or_ewah(data->base, partial);
return 0;
}
existing_bitmaps_misses_nr++;
bitmap_set(data->base, bitmap_pos);
if (apply_pseudo_merges_for_commit_1(bitmap_git, data->base, commit,
bitmap_pos))
return 0;
return 1;
}
@ -1030,8 +1111,12 @@ static int add_commit_to_bitmap(struct bitmap_index *bitmap_git,
{
struct ewah_bitmap *or_with = bitmap_for_commit(bitmap_git, commit);
if (!or_with)
if (!or_with) {
existing_bitmaps_misses_nr++;
return 0;
}
existing_bitmaps_hits_nr++;
if (!*base)
*base = ewah_to_bitmap(or_with);
@ -1105,6 +1190,20 @@ static void show_boundary_object(struct object *object UNUSED,
BUG("should not be called");
}
static unsigned cascade_pseudo_merges_1(struct bitmap_index *bitmap_git,
struct bitmap *result,
struct bitmap *roots)
{
int ret = cascade_pseudo_merges(&bitmap_git->pseudo_merges,
result, roots);
if (ret) {
pseudo_merges_cascades_nr++;
pseudo_merges_satisfied_nr += ret;
}
return ret;
}
static struct bitmap *find_boundary_objects(struct bitmap_index *bitmap_git,
struct rev_info *revs,
struct object_list *roots)
@ -1114,6 +1213,7 @@ static struct bitmap *find_boundary_objects(struct bitmap_index *bitmap_git,
unsigned int i;
unsigned int tmp_blobs, tmp_trees, tmp_tags;
int any_missing = 0;
int existing_bitmaps = 0;
cb.bitmap_git = bitmap_git;
cb.base = bitmap_new();
@ -1121,6 +1221,25 @@ static struct bitmap *find_boundary_objects(struct bitmap_index *bitmap_git,
revs->ignore_missing_links = 1;
if (bitmap_git->pseudo_merges.nr) {
struct bitmap *roots_bitmap = bitmap_new();
struct object_list *objects = NULL;
for (objects = roots; objects; objects = objects->next) {
struct object *object = objects->item;
int pos;
pos = bitmap_position(bitmap_git, &object->oid);
if (pos < 0)
continue;
bitmap_set(roots_bitmap, pos);
}
if (!cascade_pseudo_merges_1(bitmap_git, cb.base, roots_bitmap))
bitmap_free(roots_bitmap);
}
/*
* OR in any existing reachability bitmaps among `roots` into
* `cb.base`.
@ -1132,8 +1251,10 @@ static struct bitmap *find_boundary_objects(struct bitmap_index *bitmap_git,
continue;
if (add_commit_to_bitmap(bitmap_git, &cb.base,
(struct commit *)object))
(struct commit *)object)) {
existing_bitmaps = 1;
continue;
}
any_missing = 1;
}
@ -1141,6 +1262,9 @@ static struct bitmap *find_boundary_objects(struct bitmap_index *bitmap_git,
if (!any_missing)
goto cleanup;
if (existing_bitmaps)
cascade_pseudo_merges_1(bitmap_git, cb.base, NULL);
tmp_blobs = revs->blob_objects;
tmp_trees = revs->tree_objects;
tmp_tags = revs->blob_objects;
@ -1196,6 +1320,44 @@ cleanup:
return cb.base;
}
struct ewah_bitmap *pseudo_merge_bitmap_for_commit(struct bitmap_index *bitmap_git,
struct commit *commit)
{
struct commit_list *p;
struct bitmap *parents;
struct pseudo_merge *match = NULL;
if (!bitmap_git->pseudo_merges.nr)
return NULL;
parents = bitmap_new();
for (p = commit->parents; p; p = p->next) {
int pos = bitmap_position(bitmap_git, &p->item->object.oid);
if (pos < 0 || pos >= bitmap_num_objects(bitmap_git))
goto done;
bitmap_set(parents, pos);
}
match = pseudo_merge_for_parents(&bitmap_git->pseudo_merges,
parents);
done:
bitmap_free(parents);
if (match)
return pseudo_merge_bitmap(&bitmap_git->pseudo_merges, match);
return NULL;
}
static void unsatisfy_all_pseudo_merges(struct bitmap_index *bitmap_git)
{
uint32_t i;
for (i = 0; i < bitmap_git->pseudo_merges.nr; i++)
bitmap_git->pseudo_merges.v[i].satisfied = 0;
}
static struct bitmap *find_objects(struct bitmap_index *bitmap_git,
struct rev_info *revs,
struct object_list *roots,
@ -1203,9 +1365,32 @@ static struct bitmap *find_objects(struct bitmap_index *bitmap_git,
{
struct bitmap *base = NULL;
int needs_walk = 0;
unsigned existing_bitmaps = 0;
struct object_list *not_mapped = NULL;
unsatisfy_all_pseudo_merges(bitmap_git);
if (bitmap_git->pseudo_merges.nr) {
struct bitmap *roots_bitmap = bitmap_new();
struct object_list *objects = NULL;
for (objects = roots; objects; objects = objects->next) {
struct object *object = objects->item;
int pos;
pos = bitmap_position(bitmap_git, &object->oid);
if (pos < 0)
continue;
bitmap_set(roots_bitmap, pos);
}
base = bitmap_new();
if (!cascade_pseudo_merges_1(bitmap_git, base, roots_bitmap))
bitmap_free(roots_bitmap);
}
/*
* Go through all the roots for the walk. The ones that have bitmaps
* on the bitmap index will be `or`ed together to form an initial
@ -1216,11 +1401,21 @@ static struct bitmap *find_objects(struct bitmap_index *bitmap_git,
*/
while (roots) {
struct object *object = roots->item;
roots = roots->next;
if (base) {
int pos = bitmap_position(bitmap_git, &object->oid);
if (pos > 0 && bitmap_get(base, pos)) {
object->flags |= SEEN;
continue;
}
}
if (object->type == OBJ_COMMIT &&
add_commit_to_bitmap(bitmap_git, &base, (struct commit *)object)) {
object->flags |= SEEN;
existing_bitmaps = 1;
continue;
}
@ -1236,6 +1431,9 @@ static struct bitmap *find_objects(struct bitmap_index *bitmap_git,
roots = not_mapped;
if (existing_bitmaps)
cascade_pseudo_merges_1(bitmap_git, base, NULL);
/*
* Let's iterate through all the roots that don't have bitmaps to
* check if we can determine them to be reachable from the existing
@ -1256,8 +1454,12 @@ static struct bitmap *find_objects(struct bitmap_index *bitmap_git,
object->flags &= ~UNINTERESTING;
add_pending_object(revs, object, "");
needs_walk = 1;
roots_without_bitmaps_nr++;
} else {
object->flags |= SEEN;
roots_with_bitmaps_nr++;
}
}
@ -1820,6 +2022,19 @@ struct bitmap_index *prepare_bitmap_walk(struct rev_info *revs,
object_list_free(&wants);
object_list_free(&haves);
trace2_data_intmax("bitmap", the_repository, "pseudo_merges_satisfied",
pseudo_merges_satisfied_nr);
trace2_data_intmax("bitmap", the_repository, "pseudo_merges_cascades",
pseudo_merges_cascades_nr);
trace2_data_intmax("bitmap", the_repository, "bitmap/hits",
existing_bitmaps_hits_nr);
trace2_data_intmax("bitmap", the_repository, "bitmap/misses",
existing_bitmaps_misses_nr);
trace2_data_intmax("bitmap", the_repository, "bitmap/roots_with_bitmap",
roots_with_bitmaps_nr);
trace2_data_intmax("bitmap", the_repository, "bitmap/roots_without_bitmap",
roots_without_bitmaps_nr);
return bitmap_git;
cleanup:
@ -2410,6 +2625,132 @@ cleanup:
return 0;
}
static void bit_pos_to_object_id(struct bitmap_index *bitmap_git,
uint32_t bit_pos,
struct object_id *oid)
{
uint32_t index_pos;
if (bitmap_is_midx(bitmap_git))
index_pos = pack_pos_to_midx(bitmap_git->midx, bit_pos);
else
index_pos = pack_pos_to_index(bitmap_git->pack, bit_pos);
nth_bitmap_object_oid(bitmap_git, oid, index_pos);
}
int test_bitmap_pseudo_merges(struct repository *r)
{
struct bitmap_index *bitmap_git;
uint32_t i;
bitmap_git = prepare_bitmap_git(r);
if (!bitmap_git || !bitmap_git->pseudo_merges.nr)
goto cleanup;
for (i = 0; i < bitmap_git->pseudo_merges.nr; i++) {
struct pseudo_merge *merge;
struct ewah_bitmap *commits_bitmap, *merge_bitmap;
merge = use_pseudo_merge(&bitmap_git->pseudo_merges,
&bitmap_git->pseudo_merges.v[i]);
commits_bitmap = merge->commits;
merge_bitmap = pseudo_merge_bitmap(&bitmap_git->pseudo_merges,
merge);
printf("at=%"PRIuMAX", commits=%"PRIuMAX", objects=%"PRIuMAX"\n",
(uintmax_t)merge->at,
(uintmax_t)ewah_bitmap_popcount(commits_bitmap),
(uintmax_t)ewah_bitmap_popcount(merge_bitmap));
}
cleanup:
free_bitmap_index(bitmap_git);
return 0;
}
static void dump_ewah_object_ids(struct bitmap_index *bitmap_git,
struct ewah_bitmap *bitmap)
{
struct ewah_iterator it;
eword_t word;
uint32_t pos = 0;
ewah_iterator_init(&it, bitmap);
while (ewah_iterator_next(&word, &it)) {
struct object_id oid;
uint32_t offset;
for (offset = 0; offset < BITS_IN_EWORD; offset++) {
if (!(word >> offset))
break;
offset += ewah_bit_ctz64(word >> offset);
bit_pos_to_object_id(bitmap_git, pos + offset, &oid);
printf("%s\n", oid_to_hex(&oid));
}
pos += BITS_IN_EWORD;
}
}
int test_bitmap_pseudo_merge_commits(struct repository *r, uint32_t n)
{
struct bitmap_index *bitmap_git;
struct pseudo_merge *merge;
int ret = 0;
bitmap_git = prepare_bitmap_git(r);
if (!bitmap_git || !bitmap_git->pseudo_merges.nr)
goto cleanup;
if (n >= bitmap_git->pseudo_merges.nr) {
ret = error(_("pseudo-merge index out of range "
"(%"PRIu32" >= %"PRIuMAX")"),
n, (uintmax_t)bitmap_git->pseudo_merges.nr);
goto cleanup;
}
merge = use_pseudo_merge(&bitmap_git->pseudo_merges,
&bitmap_git->pseudo_merges.v[n]);
dump_ewah_object_ids(bitmap_git, merge->commits);
cleanup:
free_bitmap_index(bitmap_git);
return ret;
}
int test_bitmap_pseudo_merge_objects(struct repository *r, uint32_t n)
{
struct bitmap_index *bitmap_git;
struct pseudo_merge *merge;
int ret = 0;
bitmap_git = prepare_bitmap_git(r);
if (!bitmap_git || !bitmap_git->pseudo_merges.nr)
goto cleanup;
if (n >= bitmap_git->pseudo_merges.nr) {
ret = error(_("pseudo-merge index out of range "
"(%"PRIu32" >= %"PRIuMAX")"),
n, (uintmax_t)bitmap_git->pseudo_merges.nr);
goto cleanup;
}
merge = use_pseudo_merge(&bitmap_git->pseudo_merges,
&bitmap_git->pseudo_merges.v[n]);
dump_ewah_object_ids(bitmap_git,
pseudo_merge_bitmap(&bitmap_git->pseudo_merges,
merge));
cleanup:
free_bitmap_index(bitmap_git);
return ret;
}
int rebuild_bitmap(const uint32_t *reposition,
struct ewah_bitmap *source,
struct bitmap *dest)
@ -2516,6 +2857,7 @@ void free_bitmap_index(struct bitmap_index *b)
*/
close_midx_revindex(b->midx);
}
free_pseudo_merge_map(&b->pseudo_merges);
free(b);
}