midx: introduce bsearch_one_midx()

The `bsearch_midx()` function will be extended in a following commit to
search for the location of a given object ID across all MIDXs in a chain
(or the single non-chain MIDX if no chain is available).

While most callers will naturally want to use the updated
`bsearch_midx()` function, there are a handful of special cases that
will want finer control and will only want to search through a single
MIDX.

For instance, the object abbreviation code, which cares about object IDs
near to where we'd expect to find a match in a MIDX. In that case, we
want to look at the nearby matches in each layer of the MIDX chain, not
just a single one).

Split the more fine-grained control out into a separate function called
`bsearch_one_midx()` which searches only a single MIDX.

At present both `bsearch_midx()` and `bsearch_one_midx()` have identical
behavior, but the following commit will rewrite the former to be aware
of incremental MIDXs for the remaining non-special case callers.

Signed-off-by: Taylor Blau <me@ttaylorr.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
This commit is contained in:
Taylor Blau
2024-08-06 11:37:30 -04:00
committed by Junio C Hamano
parent 60750e1eb9
commit 3f5f1cff92
3 changed files with 71 additions and 50 deletions

17
midx.c
View File

@ -330,10 +330,21 @@ int nth_bitmapped_pack(struct repository *r, struct multi_pack_index *m,
return 0; return 0;
} }
int bsearch_midx(const struct object_id *oid, struct multi_pack_index *m, uint32_t *result) int bsearch_one_midx(const struct object_id *oid, struct multi_pack_index *m,
uint32_t *result)
{ {
return bsearch_hash(oid->hash, m->chunk_oid_fanout, m->chunk_oid_lookup, int ret = bsearch_hash(oid->hash, m->chunk_oid_fanout,
the_hash_algo->rawsz, result); m->chunk_oid_lookup, the_hash_algo->rawsz,
result);
if (result)
*result += m->num_objects_in_base;
return ret;
}
int bsearch_midx(const struct object_id *oid, struct multi_pack_index *m,
uint32_t *result)
{
return bsearch_one_midx(oid, m, result);
} }
struct object_id *nth_midxed_object_oid(struct object_id *oid, struct object_id *nth_midxed_object_oid(struct object_id *oid,

5
midx.h
View File

@ -90,7 +90,10 @@ struct multi_pack_index *load_multi_pack_index(const char *object_dir, int local
int prepare_midx_pack(struct repository *r, struct multi_pack_index *m, uint32_t pack_int_id); int prepare_midx_pack(struct repository *r, struct multi_pack_index *m, uint32_t pack_int_id);
int nth_bitmapped_pack(struct repository *r, struct multi_pack_index *m, int nth_bitmapped_pack(struct repository *r, struct multi_pack_index *m,
struct bitmapped_pack *bp, uint32_t pack_int_id); struct bitmapped_pack *bp, uint32_t pack_int_id);
int bsearch_midx(const struct object_id *oid, struct multi_pack_index *m, uint32_t *result); int bsearch_one_midx(const struct object_id *oid, struct multi_pack_index *m,
uint32_t *result);
int bsearch_midx(const struct object_id *oid, struct multi_pack_index *m,
uint32_t *result);
off_t nth_midxed_offset(struct multi_pack_index *m, uint32_t pos); off_t nth_midxed_offset(struct multi_pack_index *m, uint32_t pos);
uint32_t nth_midxed_pack_int_id(struct multi_pack_index *m, uint32_t pos); uint32_t nth_midxed_pack_int_id(struct multi_pack_index *m, uint32_t pos);
struct object_id *nth_midxed_object_oid(struct object_id *oid, struct object_id *nth_midxed_object_oid(struct object_id *oid,

View File

@ -134,28 +134,32 @@ static int match_hash(unsigned len, const unsigned char *a, const unsigned char
static void unique_in_midx(struct multi_pack_index *m, static void unique_in_midx(struct multi_pack_index *m,
struct disambiguate_state *ds) struct disambiguate_state *ds)
{ {
uint32_t num, i, first = 0; for (; m; m = m->base_midx) {
const struct object_id *current = NULL; uint32_t num, i, first = 0;
int len = ds->len > ds->repo->hash_algo->hexsz ? const struct object_id *current = NULL;
ds->repo->hash_algo->hexsz : ds->len; int len = ds->len > ds->repo->hash_algo->hexsz ?
num = m->num_objects; ds->repo->hash_algo->hexsz : ds->len;
if (!num) if (!m->num_objects)
return; continue;
bsearch_midx(&ds->bin_pfx, m, &first); num = m->num_objects + m->num_objects_in_base;
/* bsearch_one_midx(&ds->bin_pfx, m, &first);
* At this point, "first" is the location of the lowest object
* with an object name that could match "bin_pfx". See if we have /*
* 0, 1 or more objects that actually match(es). * At this point, "first" is the location of the lowest
*/ * object with an object name that could match
for (i = first; i < num && !ds->ambiguous; i++) { * "bin_pfx". See if we have 0, 1 or more objects that
struct object_id oid; * actually match(es).
current = nth_midxed_object_oid(&oid, m, i); */
if (!match_hash(len, ds->bin_pfx.hash, current->hash)) for (i = first; i < num && !ds->ambiguous; i++) {
break; struct object_id oid;
update_candidates(ds, current); current = nth_midxed_object_oid(&oid, m, i);
if (!match_hash(len, ds->bin_pfx.hash, current->hash))
break;
update_candidates(ds, current);
}
} }
} }
@ -708,37 +712,40 @@ static int repo_extend_abbrev_len(struct repository *r UNUSED,
static void find_abbrev_len_for_midx(struct multi_pack_index *m, static void find_abbrev_len_for_midx(struct multi_pack_index *m,
struct min_abbrev_data *mad) struct min_abbrev_data *mad)
{ {
int match = 0; for (; m; m = m->base_midx) {
uint32_t num, first = 0; int match = 0;
struct object_id oid; uint32_t num, first = 0;
const struct object_id *mad_oid; struct object_id oid;
const struct object_id *mad_oid;
if (!m->num_objects) if (!m->num_objects)
return; continue;
num = m->num_objects; num = m->num_objects + m->num_objects_in_base;
mad_oid = mad->oid; mad_oid = mad->oid;
match = bsearch_midx(mad_oid, m, &first); match = bsearch_one_midx(mad_oid, m, &first);
/* /*
* first is now the position in the packfile where we would insert * first is now the position in the packfile where we
* mad->hash if it does not exist (or the position of mad->hash if * would insert mad->hash if it does not exist (or the
* it does exist). Hence, we consider a maximum of two objects * position of mad->hash if it does exist). Hence, we
* nearby for the abbreviation length. * consider a maximum of two objects nearby for the
*/ * abbreviation length.
mad->init_len = 0; */
if (!match) { mad->init_len = 0;
if (nth_midxed_object_oid(&oid, m, first)) if (!match) {
extend_abbrev_len(&oid, mad); if (nth_midxed_object_oid(&oid, m, first))
} else if (first < num - 1) { extend_abbrev_len(&oid, mad);
if (nth_midxed_object_oid(&oid, m, first + 1)) } else if (first < num - 1) {
extend_abbrev_len(&oid, mad); if (nth_midxed_object_oid(&oid, m, first + 1))
extend_abbrev_len(&oid, mad);
}
if (first > 0) {
if (nth_midxed_object_oid(&oid, m, first - 1))
extend_abbrev_len(&oid, mad);
}
mad->init_len = mad->cur_len;
} }
if (first > 0) {
if (nth_midxed_object_oid(&oid, m, first - 1))
extend_abbrev_len(&oid, mad);
}
mad->init_len = mad->cur_len;
} }
static void find_abbrev_len_for_pack(struct packed_git *p, static void find_abbrev_len_for_pack(struct packed_git *p,