pack-revindex: read multi-pack reverse indexes
Implement reading for multi-pack reverse indexes, as described in the previous patch. Note that these functions don't yet have any callers, and won't until multi-pack reachability bitmaps are introduced in a later patch series. In the meantime, this patch implements some of the infrastructure necessary to support multi-pack bitmaps. There are three new functions exposed by the revindex API: - load_midx_revindex(): loads the reverse index corresponding to the given multi-pack index. - midx_to_pack_pos() and pack_pos_to_midx(): these convert between the multi-pack index and pseudo-pack order. load_midx_revindex() and pack_pos_to_midx() are both relatively straightforward. load_midx_revindex() needs a few functions to be exposed from the midx API. One to get the checksum of a midx, and another to get the .rev's filename. Similar to recent changes in the packed_git struct, three new fields are added to the multi_pack_index struct: one to keep track of the size, one to keep track of the mmap'd pointer, and another to point past the header and at the reverse index's data. pack_pos_to_midx() simply reads the corresponding entry out of the table. midx_to_pack_pos() is the trickiest, since it needs to find an object's position in the psuedo-pack order, but that order can only be recovered in the .rev file itself. This mapping can be implemented with a binary search, but note that the thing we're binary searching over isn't an array of values, but rather a permuted order of those values. So, when comparing two items, it's helpful to keep in mind the difference. Instead of a traditional binary search, where you are comparing two things directly, here we're comparing a (pack, offset) tuple with an index into the multi-pack index. That index describes another (pack, offset) tuple, and it is _those_ two tuples that are compared. Signed-off-by: Taylor Blau <me@ttaylorr.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
This commit is contained in:

committed by
Junio C Hamano

parent
b25fd24c00
commit
f894081dea
126
pack-revindex.c
126
pack-revindex.c
@ -3,6 +3,7 @@
|
||||
#include "object-store.h"
|
||||
#include "packfile.h"
|
||||
#include "config.h"
|
||||
#include "midx.h"
|
||||
|
||||
struct revindex_entry {
|
||||
off_t offset;
|
||||
@ -292,6 +293,43 @@ int load_pack_revindex(struct packed_git *p)
|
||||
return -1;
|
||||
}
|
||||
|
||||
int load_midx_revindex(struct multi_pack_index *m)
|
||||
{
|
||||
char *revindex_name;
|
||||
int ret;
|
||||
if (m->revindex_data)
|
||||
return 0;
|
||||
|
||||
revindex_name = get_midx_rev_filename(m);
|
||||
|
||||
ret = load_revindex_from_disk(revindex_name,
|
||||
m->num_objects,
|
||||
&m->revindex_map,
|
||||
&m->revindex_len);
|
||||
if (ret)
|
||||
goto cleanup;
|
||||
|
||||
m->revindex_data = (const uint32_t *)((const char *)m->revindex_map + RIDX_HEADER_SIZE);
|
||||
|
||||
cleanup:
|
||||
free(revindex_name);
|
||||
return ret;
|
||||
}
|
||||
|
||||
int close_midx_revindex(struct multi_pack_index *m)
|
||||
{
|
||||
if (!m || !m->revindex_map)
|
||||
return 0;
|
||||
|
||||
munmap((void*)m->revindex_map, m->revindex_len);
|
||||
|
||||
m->revindex_map = NULL;
|
||||
m->revindex_data = NULL;
|
||||
m->revindex_len = 0;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int offset_to_pack_pos(struct packed_git *p, off_t ofs, uint32_t *pos)
|
||||
{
|
||||
unsigned lo, hi;
|
||||
@ -346,3 +384,91 @@ off_t pack_pos_to_offset(struct packed_git *p, uint32_t pos)
|
||||
else
|
||||
return nth_packed_object_offset(p, pack_pos_to_index(p, pos));
|
||||
}
|
||||
|
||||
uint32_t pack_pos_to_midx(struct multi_pack_index *m, uint32_t pos)
|
||||
{
|
||||
if (!m->revindex_data)
|
||||
BUG("pack_pos_to_midx: reverse index not yet loaded");
|
||||
if (m->num_objects <= pos)
|
||||
BUG("pack_pos_to_midx: out-of-bounds object at %"PRIu32, pos);
|
||||
return get_be32(m->revindex_data + pos);
|
||||
}
|
||||
|
||||
struct midx_pack_key {
|
||||
uint32_t pack;
|
||||
off_t offset;
|
||||
|
||||
uint32_t preferred_pack;
|
||||
struct multi_pack_index *midx;
|
||||
};
|
||||
|
||||
static int midx_pack_order_cmp(const void *va, const void *vb)
|
||||
{
|
||||
const struct midx_pack_key *key = va;
|
||||
struct multi_pack_index *midx = key->midx;
|
||||
|
||||
uint32_t versus = pack_pos_to_midx(midx, (uint32_t*)vb - (const uint32_t *)midx->revindex_data);
|
||||
uint32_t versus_pack = nth_midxed_pack_int_id(midx, versus);
|
||||
off_t versus_offset;
|
||||
|
||||
uint32_t key_preferred = key->pack == key->preferred_pack;
|
||||
uint32_t versus_preferred = versus_pack == key->preferred_pack;
|
||||
|
||||
/*
|
||||
* First, compare the preferred-ness, noting that the preferred pack
|
||||
* comes first.
|
||||
*/
|
||||
if (key_preferred && !versus_preferred)
|
||||
return -1;
|
||||
else if (!key_preferred && versus_preferred)
|
||||
return 1;
|
||||
|
||||
/* Then, break ties first by comparing the pack IDs. */
|
||||
if (key->pack < versus_pack)
|
||||
return -1;
|
||||
else if (key->pack > versus_pack)
|
||||
return 1;
|
||||
|
||||
/* Finally, break ties by comparing offsets within a pack. */
|
||||
versus_offset = nth_midxed_offset(midx, versus);
|
||||
if (key->offset < versus_offset)
|
||||
return -1;
|
||||
else if (key->offset > versus_offset)
|
||||
return 1;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int midx_to_pack_pos(struct multi_pack_index *m, uint32_t at, uint32_t *pos)
|
||||
{
|
||||
struct midx_pack_key key;
|
||||
uint32_t *found;
|
||||
|
||||
if (!m->revindex_data)
|
||||
BUG("midx_to_pack_pos: reverse index not yet loaded");
|
||||
if (m->num_objects <= at)
|
||||
BUG("midx_to_pack_pos: out-of-bounds object at %"PRIu32, at);
|
||||
|
||||
key.pack = nth_midxed_pack_int_id(m, at);
|
||||
key.offset = nth_midxed_offset(m, at);
|
||||
key.midx = m;
|
||||
/*
|
||||
* The preferred pack sorts first, so determine its identifier by
|
||||
* looking at the first object in pseudo-pack order.
|
||||
*
|
||||
* Note that if no --preferred-pack is explicitly given when writing a
|
||||
* multi-pack index, then whichever pack has the lowest identifier
|
||||
* implicitly is preferred (and includes all its objects, since ties are
|
||||
* broken first by pack identifier).
|
||||
*/
|
||||
key.preferred_pack = nth_midxed_pack_int_id(m, pack_pos_to_midx(m, 0));
|
||||
|
||||
found = bsearch(&key, m->revindex_data, m->num_objects,
|
||||
sizeof(*m->revindex_data), midx_pack_order_cmp);
|
||||
|
||||
if (!found)
|
||||
return error("bad offset for revindex");
|
||||
|
||||
*pos = found - m->revindex_data;
|
||||
return 0;
|
||||
}
|
||||
|
Reference in New Issue
Block a user