Merge branch 'ps/reftable-iteration-perf'

The code to iterate over refs with the reftable backend has seen
some optimization.

* ps/reftable-iteration-perf:
  reftable/reader: add comments to `table_iter_next()`
  reftable/record: don't try to reallocate ref record name
  reftable/block: swap buffers instead of copying
  reftable/pq: allocation-less comparison of entry keys
  reftable/merged: skip comparison for records of the same subiter
  reftable/merged: allocation-less dropping of shadowed records
  reftable/record: introduce function to compare records by key
This commit is contained in:
Junio C Hamano
2024-02-26 18:10:24 -08:00
7 changed files with 102 additions and 39 deletions

View File

@ -339,8 +339,7 @@ int block_iter_next(struct block_iter *it, struct reftable_record *rec)
return -1;
string_view_consume(&in, n);
strbuf_reset(&it->last_key);
strbuf_addbuf(&it->last_key, &it->key);
strbuf_swap(&it->last_key, &it->key);
it->next_off += start.len - in.len;
return 0;
}

View File

@ -49,8 +49,6 @@ static void merged_iter_close(void *p)
for (size_t i = 0; i < mi->stack_len; i++)
reftable_iterator_destroy(&mi->stack[i]);
reftable_free(mi->stack);
strbuf_release(&mi->key);
strbuf_release(&mi->entry_key);
}
static int merged_iter_advance_nonnull_subiter(struct merged_iter *mi,
@ -105,14 +103,19 @@ static int merged_iter_next_entry(struct merged_iter *mi,
such a deployment, the loop below must be changed to collect all
entries for the same key, and return new the newest one.
*/
reftable_record_key(&entry.rec, &mi->entry_key);
while (!merged_iter_pqueue_is_empty(mi->pq)) {
struct pq_entry top = merged_iter_pqueue_top(mi->pq);
int cmp = 0;
int cmp;
reftable_record_key(&top.rec, &mi->key);
/*
* When the next entry comes from the same queue as the current
* entry then it must by definition be larger. This avoids a
* comparison in the most common case.
*/
if (top.index == entry.index)
break;
cmp = strbuf_cmp(&mi->key, &mi->entry_key);
cmp = reftable_record_cmp(&top.rec, &entry.rec);
if (cmp > 0)
break;
@ -243,8 +246,6 @@ static int merged_table_seek_record(struct reftable_merged_table *mt,
.typ = reftable_record_type(rec),
.hash_id = mt->hash_id,
.suppress_deletions = mt->suppress_deletions,
.key = STRBUF_INIT,
.entry_key = STRBUF_INIT,
};
struct merged_iter *p;
int err;

View File

@ -31,8 +31,6 @@ struct merged_iter {
uint8_t typ;
int suppress_deletions;
struct merged_iter_pqueue pq;
struct strbuf key;
struct strbuf entry_key;
};
void merged_table_release(struct reftable_merged_table *mt);

View File

@ -14,20 +14,9 @@ https://developers.google.com/open-source/licenses/bsd
int pq_less(struct pq_entry *a, struct pq_entry *b)
{
struct strbuf ak = STRBUF_INIT;
struct strbuf bk = STRBUF_INIT;
int cmp = 0;
reftable_record_key(&a->rec, &ak);
reftable_record_key(&b->rec, &bk);
cmp = strbuf_cmp(&ak, &bk);
strbuf_release(&ak);
strbuf_release(&bk);
int cmp = reftable_record_cmp(&a->rec, &b->rec);
if (cmp == 0)
return a->index > b->index;
return cmp < 0;
}

View File

@ -357,24 +357,32 @@ static int table_iter_next(struct table_iter *ti, struct reftable_record *rec)
while (1) {
struct table_iter next = TABLE_ITER_INIT;
int err = 0;
if (ti->is_finished) {
int err;
if (ti->is_finished)
return 1;
}
/*
* Check whether the current block still has more records. If
* so, return it. If the iterator returns positive then the
* current block has been exhausted.
*/
err = table_iter_next_in_block(ti, rec);
if (err <= 0) {
if (err <= 0)
return err;
/*
* Otherwise, we need to continue to the next block in the
* table and retry. If there are no more blocks then the
* iterator is drained.
*/
err = table_iter_next_block(&next, ti);
table_iter_block_done(ti);
if (err) {
ti->is_finished = 1;
return err;
}
err = table_iter_next_block(&next, ti);
if (err != 0) {
ti->is_finished = 1;
}
table_iter_block_done(ti);
if (err != 0) {
return err;
}
table_iter_copy_from(ti, &next);
block_iter_close(&next.bi);
}

View File

@ -377,10 +377,11 @@ static int reftable_ref_record_decode(void *rec, struct strbuf key,
assert(hash_size > 0);
r->refname = reftable_realloc(r->refname, key.len + 1);
r->refname = reftable_malloc(key.len + 1);
memcpy(r->refname, key.buf, key.len);
r->update_index = update_index;
r->refname[key.len] = 0;
r->update_index = update_index;
r->value_type = val_type;
switch (val_type) {
case REFTABLE_REF_VAL1:
@ -430,7 +431,6 @@ static int reftable_ref_record_is_deletion_void(const void *p)
(const struct reftable_ref_record *)p);
}
static int reftable_ref_record_equal_void(const void *a,
const void *b, int hash_size)
{
@ -439,6 +439,13 @@ static int reftable_ref_record_equal_void(const void *a,
return reftable_ref_record_equal(ra, rb, hash_size);
}
static int reftable_ref_record_cmp_void(const void *_a, const void *_b)
{
const struct reftable_ref_record *a = _a;
const struct reftable_ref_record *b = _b;
return strcmp(a->refname, b->refname);
}
static void reftable_ref_record_print_void(const void *rec,
int hash_size)
{
@ -455,6 +462,7 @@ static struct reftable_record_vtable reftable_ref_record_vtable = {
.release = &reftable_ref_record_release_void,
.is_deletion = &reftable_ref_record_is_deletion_void,
.equal = &reftable_ref_record_equal_void,
.cmp = &reftable_ref_record_cmp_void,
.print = &reftable_ref_record_print_void,
};
@ -627,6 +635,25 @@ static int reftable_obj_record_equal_void(const void *a, const void *b, int hash
return 1;
}
static int reftable_obj_record_cmp_void(const void *_a, const void *_b)
{
const struct reftable_obj_record *a = _a;
const struct reftable_obj_record *b = _b;
int cmp;
cmp = memcmp(a->hash_prefix, b->hash_prefix,
a->hash_prefix_len > b->hash_prefix_len ?
a->hash_prefix_len : b->hash_prefix_len);
if (cmp)
return cmp;
/*
* When the prefix is the same then the object record that is longer is
* considered to be bigger.
*/
return a->hash_prefix_len - b->hash_prefix_len;
}
static struct reftable_record_vtable reftable_obj_record_vtable = {
.key = &reftable_obj_record_key,
.type = BLOCK_TYPE_OBJ,
@ -637,6 +664,7 @@ static struct reftable_record_vtable reftable_obj_record_vtable = {
.release = &reftable_obj_record_release,
.is_deletion = &not_a_deletion,
.equal = &reftable_obj_record_equal_void,
.cmp = &reftable_obj_record_cmp_void,
.print = &reftable_obj_record_print,
};
@ -955,6 +983,22 @@ static int reftable_log_record_equal_void(const void *a,
hash_size);
}
static int reftable_log_record_cmp_void(const void *_a, const void *_b)
{
const struct reftable_log_record *a = _a;
const struct reftable_log_record *b = _b;
int cmp = strcmp(a->refname, b->refname);
if (cmp)
return cmp;
/*
* Note that the comparison here is reversed. This is because the
* update index is reversed when comparing keys. For reference, see how
* we handle this in reftable_log_record_key()`.
*/
return b->update_index - a->update_index;
}
int reftable_log_record_equal(const struct reftable_log_record *a,
const struct reftable_log_record *b, int hash_size)
{
@ -1004,6 +1048,7 @@ static struct reftable_record_vtable reftable_log_record_vtable = {
.release = &reftable_log_record_release_void,
.is_deletion = &reftable_log_record_is_deletion_void,
.equal = &reftable_log_record_equal_void,
.cmp = &reftable_log_record_cmp_void,
.print = &reftable_log_record_print_void,
};
@ -1079,6 +1124,13 @@ static int reftable_index_record_equal(const void *a, const void *b, int hash_si
return ia->offset == ib->offset && !strbuf_cmp(&ia->last_key, &ib->last_key);
}
static int reftable_index_record_cmp(const void *_a, const void *_b)
{
const struct reftable_index_record *a = _a;
const struct reftable_index_record *b = _b;
return strbuf_cmp(&a->last_key, &b->last_key);
}
static void reftable_index_record_print(const void *rec, int hash_size)
{
const struct reftable_index_record *idx = rec;
@ -1096,6 +1148,7 @@ static struct reftable_record_vtable reftable_index_record_vtable = {
.release = &reftable_index_record_release,
.is_deletion = &not_a_deletion,
.equal = &reftable_index_record_equal,
.cmp = &reftable_index_record_cmp,
.print = &reftable_index_record_print,
};
@ -1149,6 +1202,14 @@ int reftable_record_is_deletion(struct reftable_record *rec)
reftable_record_data(rec));
}
int reftable_record_cmp(struct reftable_record *a, struct reftable_record *b)
{
if (a->type != b->type)
BUG("cannot compare reftable records of different type");
return reftable_record_vtable(a)->cmp(
reftable_record_data(a), reftable_record_data(b));
}
int reftable_record_equal(struct reftable_record *a, struct reftable_record *b, int hash_size)
{
if (a->type != b->type)

View File

@ -62,6 +62,12 @@ struct reftable_record_vtable {
/* Are two records equal? This assumes they have the same type. Returns 0 for non-equal. */
int (*equal)(const void *a, const void *b, int hash_size);
/*
* Compare keys of two records with each other. The records must have
* the same type.
*/
int (*cmp)(const void *a, const void *b);
/* Print on stdout, for debugging. */
void (*print)(const void *rec, int hash_size);
};
@ -114,6 +120,7 @@ struct reftable_record {
void reftable_record_init(struct reftable_record *rec, uint8_t typ);
/* see struct record_vtable */
int reftable_record_cmp(struct reftable_record *a, struct reftable_record *b);
int reftable_record_equal(struct reftable_record *a, struct reftable_record *b, int hash_size);
void reftable_record_print(struct reftable_record *rec, int hash_size);
void reftable_record_key(struct reftable_record *rec, struct strbuf *dest);