Merge branch 'ps/reftable-iteration-perf'

The code to iterate over refs with the reftable backend has seen
some optimization.

* ps/reftable-iteration-perf:
  reftable/reader: add comments to `table_iter_next()`
  reftable/record: don't try to reallocate ref record name
  reftable/block: swap buffers instead of copying
  reftable/pq: allocation-less comparison of entry keys
  reftable/merged: skip comparison for records of the same subiter
  reftable/merged: allocation-less dropping of shadowed records
  reftable/record: introduce function to compare records by key
This commit is contained in:
Junio C Hamano
2024-02-26 18:10:24 -08:00
7 changed files with 102 additions and 39 deletions

View File

@ -339,8 +339,7 @@ int block_iter_next(struct block_iter *it, struct reftable_record *rec)
return -1; return -1;
string_view_consume(&in, n); string_view_consume(&in, n);
strbuf_reset(&it->last_key); strbuf_swap(&it->last_key, &it->key);
strbuf_addbuf(&it->last_key, &it->key);
it->next_off += start.len - in.len; it->next_off += start.len - in.len;
return 0; return 0;
} }

View File

@ -49,8 +49,6 @@ static void merged_iter_close(void *p)
for (size_t i = 0; i < mi->stack_len; i++) for (size_t i = 0; i < mi->stack_len; i++)
reftable_iterator_destroy(&mi->stack[i]); reftable_iterator_destroy(&mi->stack[i]);
reftable_free(mi->stack); reftable_free(mi->stack);
strbuf_release(&mi->key);
strbuf_release(&mi->entry_key);
} }
static int merged_iter_advance_nonnull_subiter(struct merged_iter *mi, static int merged_iter_advance_nonnull_subiter(struct merged_iter *mi,
@ -105,14 +103,19 @@ static int merged_iter_next_entry(struct merged_iter *mi,
such a deployment, the loop below must be changed to collect all such a deployment, the loop below must be changed to collect all
entries for the same key, and return new the newest one. entries for the same key, and return new the newest one.
*/ */
reftable_record_key(&entry.rec, &mi->entry_key);
while (!merged_iter_pqueue_is_empty(mi->pq)) { while (!merged_iter_pqueue_is_empty(mi->pq)) {
struct pq_entry top = merged_iter_pqueue_top(mi->pq); struct pq_entry top = merged_iter_pqueue_top(mi->pq);
int cmp = 0; int cmp;
reftable_record_key(&top.rec, &mi->key); /*
* When the next entry comes from the same queue as the current
* entry then it must by definition be larger. This avoids a
* comparison in the most common case.
*/
if (top.index == entry.index)
break;
cmp = strbuf_cmp(&mi->key, &mi->entry_key); cmp = reftable_record_cmp(&top.rec, &entry.rec);
if (cmp > 0) if (cmp > 0)
break; break;
@ -243,8 +246,6 @@ static int merged_table_seek_record(struct reftable_merged_table *mt,
.typ = reftable_record_type(rec), .typ = reftable_record_type(rec),
.hash_id = mt->hash_id, .hash_id = mt->hash_id,
.suppress_deletions = mt->suppress_deletions, .suppress_deletions = mt->suppress_deletions,
.key = STRBUF_INIT,
.entry_key = STRBUF_INIT,
}; };
struct merged_iter *p; struct merged_iter *p;
int err; int err;

View File

@ -31,8 +31,6 @@ struct merged_iter {
uint8_t typ; uint8_t typ;
int suppress_deletions; int suppress_deletions;
struct merged_iter_pqueue pq; struct merged_iter_pqueue pq;
struct strbuf key;
struct strbuf entry_key;
}; };
void merged_table_release(struct reftable_merged_table *mt); void merged_table_release(struct reftable_merged_table *mt);

View File

@ -14,20 +14,9 @@ https://developers.google.com/open-source/licenses/bsd
int pq_less(struct pq_entry *a, struct pq_entry *b) int pq_less(struct pq_entry *a, struct pq_entry *b)
{ {
struct strbuf ak = STRBUF_INIT; int cmp = reftable_record_cmp(&a->rec, &b->rec);
struct strbuf bk = STRBUF_INIT;
int cmp = 0;
reftable_record_key(&a->rec, &ak);
reftable_record_key(&b->rec, &bk);
cmp = strbuf_cmp(&ak, &bk);
strbuf_release(&ak);
strbuf_release(&bk);
if (cmp == 0) if (cmp == 0)
return a->index > b->index; return a->index > b->index;
return cmp < 0; return cmp < 0;
} }

View File

@ -357,24 +357,32 @@ static int table_iter_next(struct table_iter *ti, struct reftable_record *rec)
while (1) { while (1) {
struct table_iter next = TABLE_ITER_INIT; struct table_iter next = TABLE_ITER_INIT;
int err = 0; int err;
if (ti->is_finished) {
if (ti->is_finished)
return 1; return 1;
}
/*
* Check whether the current block still has more records. If
* so, return it. If the iterator returns positive then the
* current block has been exhausted.
*/
err = table_iter_next_in_block(ti, rec); err = table_iter_next_in_block(ti, rec);
if (err <= 0) { if (err <= 0)
return err;
/*
* Otherwise, we need to continue to the next block in the
* table and retry. If there are no more blocks then the
* iterator is drained.
*/
err = table_iter_next_block(&next, ti);
table_iter_block_done(ti);
if (err) {
ti->is_finished = 1;
return err; return err;
} }
err = table_iter_next_block(&next, ti);
if (err != 0) {
ti->is_finished = 1;
}
table_iter_block_done(ti);
if (err != 0) {
return err;
}
table_iter_copy_from(ti, &next); table_iter_copy_from(ti, &next);
block_iter_close(&next.bi); block_iter_close(&next.bi);
} }

View File

@ -377,10 +377,11 @@ static int reftable_ref_record_decode(void *rec, struct strbuf key,
assert(hash_size > 0); assert(hash_size > 0);
r->refname = reftable_realloc(r->refname, key.len + 1); r->refname = reftable_malloc(key.len + 1);
memcpy(r->refname, key.buf, key.len); memcpy(r->refname, key.buf, key.len);
r->update_index = update_index;
r->refname[key.len] = 0; r->refname[key.len] = 0;
r->update_index = update_index;
r->value_type = val_type; r->value_type = val_type;
switch (val_type) { switch (val_type) {
case REFTABLE_REF_VAL1: case REFTABLE_REF_VAL1:
@ -430,7 +431,6 @@ static int reftable_ref_record_is_deletion_void(const void *p)
(const struct reftable_ref_record *)p); (const struct reftable_ref_record *)p);
} }
static int reftable_ref_record_equal_void(const void *a, static int reftable_ref_record_equal_void(const void *a,
const void *b, int hash_size) const void *b, int hash_size)
{ {
@ -439,6 +439,13 @@ static int reftable_ref_record_equal_void(const void *a,
return reftable_ref_record_equal(ra, rb, hash_size); return reftable_ref_record_equal(ra, rb, hash_size);
} }
static int reftable_ref_record_cmp_void(const void *_a, const void *_b)
{
const struct reftable_ref_record *a = _a;
const struct reftable_ref_record *b = _b;
return strcmp(a->refname, b->refname);
}
static void reftable_ref_record_print_void(const void *rec, static void reftable_ref_record_print_void(const void *rec,
int hash_size) int hash_size)
{ {
@ -455,6 +462,7 @@ static struct reftable_record_vtable reftable_ref_record_vtable = {
.release = &reftable_ref_record_release_void, .release = &reftable_ref_record_release_void,
.is_deletion = &reftable_ref_record_is_deletion_void, .is_deletion = &reftable_ref_record_is_deletion_void,
.equal = &reftable_ref_record_equal_void, .equal = &reftable_ref_record_equal_void,
.cmp = &reftable_ref_record_cmp_void,
.print = &reftable_ref_record_print_void, .print = &reftable_ref_record_print_void,
}; };
@ -627,6 +635,25 @@ static int reftable_obj_record_equal_void(const void *a, const void *b, int hash
return 1; return 1;
} }
static int reftable_obj_record_cmp_void(const void *_a, const void *_b)
{
const struct reftable_obj_record *a = _a;
const struct reftable_obj_record *b = _b;
int cmp;
cmp = memcmp(a->hash_prefix, b->hash_prefix,
a->hash_prefix_len > b->hash_prefix_len ?
a->hash_prefix_len : b->hash_prefix_len);
if (cmp)
return cmp;
/*
* When the prefix is the same then the object record that is longer is
* considered to be bigger.
*/
return a->hash_prefix_len - b->hash_prefix_len;
}
static struct reftable_record_vtable reftable_obj_record_vtable = { static struct reftable_record_vtable reftable_obj_record_vtable = {
.key = &reftable_obj_record_key, .key = &reftable_obj_record_key,
.type = BLOCK_TYPE_OBJ, .type = BLOCK_TYPE_OBJ,
@ -637,6 +664,7 @@ static struct reftable_record_vtable reftable_obj_record_vtable = {
.release = &reftable_obj_record_release, .release = &reftable_obj_record_release,
.is_deletion = &not_a_deletion, .is_deletion = &not_a_deletion,
.equal = &reftable_obj_record_equal_void, .equal = &reftable_obj_record_equal_void,
.cmp = &reftable_obj_record_cmp_void,
.print = &reftable_obj_record_print, .print = &reftable_obj_record_print,
}; };
@ -955,6 +983,22 @@ static int reftable_log_record_equal_void(const void *a,
hash_size); hash_size);
} }
static int reftable_log_record_cmp_void(const void *_a, const void *_b)
{
const struct reftable_log_record *a = _a;
const struct reftable_log_record *b = _b;
int cmp = strcmp(a->refname, b->refname);
if (cmp)
return cmp;
/*
* Note that the comparison here is reversed. This is because the
* update index is reversed when comparing keys. For reference, see how
* we handle this in reftable_log_record_key()`.
*/
return b->update_index - a->update_index;
}
int reftable_log_record_equal(const struct reftable_log_record *a, int reftable_log_record_equal(const struct reftable_log_record *a,
const struct reftable_log_record *b, int hash_size) const struct reftable_log_record *b, int hash_size)
{ {
@ -1004,6 +1048,7 @@ static struct reftable_record_vtable reftable_log_record_vtable = {
.release = &reftable_log_record_release_void, .release = &reftable_log_record_release_void,
.is_deletion = &reftable_log_record_is_deletion_void, .is_deletion = &reftable_log_record_is_deletion_void,
.equal = &reftable_log_record_equal_void, .equal = &reftable_log_record_equal_void,
.cmp = &reftable_log_record_cmp_void,
.print = &reftable_log_record_print_void, .print = &reftable_log_record_print_void,
}; };
@ -1079,6 +1124,13 @@ static int reftable_index_record_equal(const void *a, const void *b, int hash_si
return ia->offset == ib->offset && !strbuf_cmp(&ia->last_key, &ib->last_key); return ia->offset == ib->offset && !strbuf_cmp(&ia->last_key, &ib->last_key);
} }
static int reftable_index_record_cmp(const void *_a, const void *_b)
{
const struct reftable_index_record *a = _a;
const struct reftable_index_record *b = _b;
return strbuf_cmp(&a->last_key, &b->last_key);
}
static void reftable_index_record_print(const void *rec, int hash_size) static void reftable_index_record_print(const void *rec, int hash_size)
{ {
const struct reftable_index_record *idx = rec; const struct reftable_index_record *idx = rec;
@ -1096,6 +1148,7 @@ static struct reftable_record_vtable reftable_index_record_vtable = {
.release = &reftable_index_record_release, .release = &reftable_index_record_release,
.is_deletion = &not_a_deletion, .is_deletion = &not_a_deletion,
.equal = &reftable_index_record_equal, .equal = &reftable_index_record_equal,
.cmp = &reftable_index_record_cmp,
.print = &reftable_index_record_print, .print = &reftable_index_record_print,
}; };
@ -1149,6 +1202,14 @@ int reftable_record_is_deletion(struct reftable_record *rec)
reftable_record_data(rec)); reftable_record_data(rec));
} }
int reftable_record_cmp(struct reftable_record *a, struct reftable_record *b)
{
if (a->type != b->type)
BUG("cannot compare reftable records of different type");
return reftable_record_vtable(a)->cmp(
reftable_record_data(a), reftable_record_data(b));
}
int reftable_record_equal(struct reftable_record *a, struct reftable_record *b, int hash_size) int reftable_record_equal(struct reftable_record *a, struct reftable_record *b, int hash_size)
{ {
if (a->type != b->type) if (a->type != b->type)

View File

@ -62,6 +62,12 @@ struct reftable_record_vtable {
/* Are two records equal? This assumes they have the same type. Returns 0 for non-equal. */ /* Are two records equal? This assumes they have the same type. Returns 0 for non-equal. */
int (*equal)(const void *a, const void *b, int hash_size); int (*equal)(const void *a, const void *b, int hash_size);
/*
* Compare keys of two records with each other. The records must have
* the same type.
*/
int (*cmp)(const void *a, const void *b);
/* Print on stdout, for debugging. */ /* Print on stdout, for debugging. */
void (*print)(const void *rec, int hash_size); void (*print)(const void *rec, int hash_size);
}; };
@ -114,6 +120,7 @@ struct reftable_record {
void reftable_record_init(struct reftable_record *rec, uint8_t typ); void reftable_record_init(struct reftable_record *rec, uint8_t typ);
/* see struct record_vtable */ /* see struct record_vtable */
int reftable_record_cmp(struct reftable_record *a, struct reftable_record *b);
int reftable_record_equal(struct reftable_record *a, struct reftable_record *b, int hash_size); int reftable_record_equal(struct reftable_record *a, struct reftable_record *b, int hash_size);
void reftable_record_print(struct reftable_record *rec, int hash_size); void reftable_record_print(struct reftable_record *rec, int hash_size);
void reftable_record_key(struct reftable_record *rec, struct strbuf *dest); void reftable_record_key(struct reftable_record *rec, struct strbuf *dest);