From adb5d2cbe9a0589540ca631fb42e1579e8ee2d60 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 12 Feb 2024 09:32:25 +0100 Subject: [PATCH 1/7] reftable/record: introduce function to compare records by key In some places we need to sort reftable records by their keys to determine their ordering. This is done by first formatting the keys into a `struct strbuf` and then using `strbuf_cmp()` to compare them. This logic is needlessly roundabout and can end up costing quite a bit of CPU cycles, both due to the allocation and formatting logic. Introduce a new `reftable_record_cmp()` function that knows how to compare two records with each other without requiring allocations. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- reftable/record.c | 62 ++++++++++++++++++++++++++++++++++++++++++++++- reftable/record.h | 7 ++++++ 2 files changed, 68 insertions(+), 1 deletion(-) diff --git a/reftable/record.c b/reftable/record.c index 5c3fbb7b2a..f1b6a5eac9 100644 --- a/reftable/record.c +++ b/reftable/record.c @@ -430,7 +430,6 @@ static int reftable_ref_record_is_deletion_void(const void *p) (const struct reftable_ref_record *)p); } - static int reftable_ref_record_equal_void(const void *a, const void *b, int hash_size) { @@ -439,6 +438,13 @@ static int reftable_ref_record_equal_void(const void *a, return reftable_ref_record_equal(ra, rb, hash_size); } +static int reftable_ref_record_cmp_void(const void *_a, const void *_b) +{ + const struct reftable_ref_record *a = _a; + const struct reftable_ref_record *b = _b; + return strcmp(a->refname, b->refname); +} + static void reftable_ref_record_print_void(const void *rec, int hash_size) { @@ -455,6 +461,7 @@ static struct reftable_record_vtable reftable_ref_record_vtable = { .release = &reftable_ref_record_release_void, .is_deletion = &reftable_ref_record_is_deletion_void, .equal = &reftable_ref_record_equal_void, + .cmp = &reftable_ref_record_cmp_void, .print = &reftable_ref_record_print_void, }; @@ -625,6 +632,25 @@ static int reftable_obj_record_equal_void(const void *a, const void *b, int hash return 1; } +static int reftable_obj_record_cmp_void(const void *_a, const void *_b) +{ + const struct reftable_obj_record *a = _a; + const struct reftable_obj_record *b = _b; + int cmp; + + cmp = memcmp(a->hash_prefix, b->hash_prefix, + a->hash_prefix_len > b->hash_prefix_len ? + a->hash_prefix_len : b->hash_prefix_len); + if (cmp) + return cmp; + + /* + * When the prefix is the same then the object record that is longer is + * considered to be bigger. + */ + return a->hash_prefix_len - b->hash_prefix_len; +} + static struct reftable_record_vtable reftable_obj_record_vtable = { .key = &reftable_obj_record_key, .type = BLOCK_TYPE_OBJ, @@ -635,6 +661,7 @@ static struct reftable_record_vtable reftable_obj_record_vtable = { .release = &reftable_obj_record_release, .is_deletion = ¬_a_deletion, .equal = &reftable_obj_record_equal_void, + .cmp = &reftable_obj_record_cmp_void, .print = &reftable_obj_record_print, }; @@ -953,6 +980,22 @@ static int reftable_log_record_equal_void(const void *a, hash_size); } +static int reftable_log_record_cmp_void(const void *_a, const void *_b) +{ + const struct reftable_log_record *a = _a; + const struct reftable_log_record *b = _b; + int cmp = strcmp(a->refname, b->refname); + if (cmp) + return cmp; + + /* + * Note that the comparison here is reversed. This is because the + * update index is reversed when comparing keys. For reference, see how + * we handle this in reftable_log_record_key()`. + */ + return b->update_index - a->update_index; +} + int reftable_log_record_equal(const struct reftable_log_record *a, const struct reftable_log_record *b, int hash_size) { @@ -1002,6 +1045,7 @@ static struct reftable_record_vtable reftable_log_record_vtable = { .release = &reftable_log_record_release_void, .is_deletion = &reftable_log_record_is_deletion_void, .equal = &reftable_log_record_equal_void, + .cmp = &reftable_log_record_cmp_void, .print = &reftable_log_record_print_void, }; @@ -1077,6 +1121,13 @@ static int reftable_index_record_equal(const void *a, const void *b, int hash_si return ia->offset == ib->offset && !strbuf_cmp(&ia->last_key, &ib->last_key); } +static int reftable_index_record_cmp(const void *_a, const void *_b) +{ + const struct reftable_index_record *a = _a; + const struct reftable_index_record *b = _b; + return strbuf_cmp(&a->last_key, &b->last_key); +} + static void reftable_index_record_print(const void *rec, int hash_size) { const struct reftable_index_record *idx = rec; @@ -1094,6 +1145,7 @@ static struct reftable_record_vtable reftable_index_record_vtable = { .release = &reftable_index_record_release, .is_deletion = ¬_a_deletion, .equal = &reftable_index_record_equal, + .cmp = &reftable_index_record_cmp, .print = &reftable_index_record_print, }; @@ -1147,6 +1199,14 @@ int reftable_record_is_deletion(struct reftable_record *rec) reftable_record_data(rec)); } +int reftable_record_cmp(struct reftable_record *a, struct reftable_record *b) +{ + if (a->type != b->type) + BUG("cannot compare reftable records of different type"); + return reftable_record_vtable(a)->cmp( + reftable_record_data(a), reftable_record_data(b)); +} + int reftable_record_equal(struct reftable_record *a, struct reftable_record *b, int hash_size) { if (a->type != b->type) diff --git a/reftable/record.h b/reftable/record.h index fd80cd451d..0d96fbfd1b 100644 --- a/reftable/record.h +++ b/reftable/record.h @@ -62,6 +62,12 @@ struct reftable_record_vtable { /* Are two records equal? This assumes they have the same type. Returns 0 for non-equal. */ int (*equal)(const void *a, const void *b, int hash_size); + /* + * Compare keys of two records with each other. The records must have + * the same type. + */ + int (*cmp)(const void *a, const void *b); + /* Print on stdout, for debugging. */ void (*print)(const void *rec, int hash_size); }; @@ -114,6 +120,7 @@ struct reftable_record { }; /* see struct record_vtable */ +int reftable_record_cmp(struct reftable_record *a, struct reftable_record *b); int reftable_record_equal(struct reftable_record *a, struct reftable_record *b, int hash_size); void reftable_record_print(struct reftable_record *rec, int hash_size); void reftable_record_key(struct reftable_record *rec, struct strbuf *dest); From a96e9a20f312276e624a23c1ce97487e054a8093 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 12 Feb 2024 09:32:29 +0100 Subject: [PATCH 2/7] reftable/merged: allocation-less dropping of shadowed records MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The purpose of the merged reftable iterator is to iterate through all entries of a set of tables in the correct order. This is implemented by using a sub-iterator for each table, where the next entry of each of these iterators gets put into a priority queue. For each iteration, we do roughly the following steps: 1. Retrieve the top record of the priority queue. This is the entry we want to return to the caller. 2. Retrieve the next record of the sub-iterator that this record came from. If any, add it to the priority queue at the correct position. The position is determined by comparing the record keys, which e.g. corresponds to the refname for ref records. 3. Keep removing the top record of the priority queue until we hit the first entry whose key is larger than the returned record's key. This is required to drop "shadowed" records. The last step will lead to at least one comparison to the next entry, but may lead to many comparisons in case the reftable stack consists of many tables with shadowed records. It is thus part of the hot code path when iterating through records. The code to compare the entries with each other is quite inefficient though. Instead of comparing record keys with each other directly, we first format them into `struct strbuf`s and only then compare them with each other. While we already optimized this code path to reuse buffers in 829231dc20 (reftable/merged: reuse buffer to compute record keys, 2023-12-11), the cost to format the keys into the buffers still adds up quite significantly. Refactor the code to use `reftable_record_cmp()` instead, which has been introduced in the preceding commit. This function compares records with each other directly without requiring any memory allocations or copying and is thus way more efficient. The following benchmark uses git-show-ref(1) to print a single ref matching a pattern out of 1 million refs. This is the most direct way to exercise ref iteration speed as we remove all overhead of having to show the refs, too. Benchmark 1: show-ref: single matching ref (revision = HEAD~) Time (mean ± σ): 180.7 ms ± 4.7 ms [User: 177.1 ms, System: 3.4 ms] Range (min … max): 174.9 ms … 211.7 ms 1000 runs Benchmark 2: show-ref: single matching ref (revision = HEAD) Time (mean ± σ): 162.1 ms ± 4.4 ms [User: 158.5 ms, System: 3.4 ms] Range (min … max): 155.4 ms … 189.3 ms 1000 runs Summary show-ref: single matching ref (revision = HEAD) ran 1.11 ± 0.04 times faster than show-ref: single matching ref (revision = HEAD~) Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- reftable/merged.c | 11 ++--------- reftable/merged.h | 2 -- 2 files changed, 2 insertions(+), 11 deletions(-) diff --git a/reftable/merged.c b/reftable/merged.c index c258ce953e..fb9978d798 100644 --- a/reftable/merged.c +++ b/reftable/merged.c @@ -51,8 +51,6 @@ static void merged_iter_close(void *p) reftable_iterator_destroy(&mi->stack[i]); } reftable_free(mi->stack); - strbuf_release(&mi->key); - strbuf_release(&mi->entry_key); } static int merged_iter_advance_nonnull_subiter(struct merged_iter *mi, @@ -105,14 +103,11 @@ static int merged_iter_next_entry(struct merged_iter *mi, such a deployment, the loop below must be changed to collect all entries for the same key, and return new the newest one. */ - reftable_record_key(&entry.rec, &mi->entry_key); while (!merged_iter_pqueue_is_empty(mi->pq)) { struct pq_entry top = merged_iter_pqueue_top(mi->pq); - int cmp = 0; + int cmp; - reftable_record_key(&top.rec, &mi->key); - - cmp = strbuf_cmp(&mi->key, &mi->entry_key); + cmp = reftable_record_cmp(&top.rec, &entry.rec); if (cmp > 0) break; @@ -246,8 +241,6 @@ static int merged_table_seek_record(struct reftable_merged_table *mt, .typ = reftable_record_type(rec), .hash_id = mt->hash_id, .suppress_deletions = mt->suppress_deletions, - .key = STRBUF_INIT, - .entry_key = STRBUF_INIT, }; int n = 0; int err = 0; diff --git a/reftable/merged.h b/reftable/merged.h index d5b39dfe7f..7d9f95d27e 100644 --- a/reftable/merged.h +++ b/reftable/merged.h @@ -31,8 +31,6 @@ struct merged_iter { uint8_t typ; int suppress_deletions; struct merged_iter_pqueue pq; - struct strbuf key; - struct strbuf entry_key; }; void merged_table_release(struct reftable_merged_table *mt); From 5730a9dccf1d26f4d98a6d3868614ec2bb0e05a0 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 12 Feb 2024 09:32:37 +0100 Subject: [PATCH 3/7] reftable/merged: skip comparison for records of the same subiter MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When retrieving the next entry of a merged iterator we need to drop all records of other sub-iterators that would be shadowed by the record that we are about to return. We do this by comparing record keys, dropping all keys that are smaller or equal to the key of the record we are about to return. There is an edge case here where we can skip that comparison: when the record in the priority queue comes from the same subiterator as the record we are about to return then we know that its key must be larger than the key of the record we are about to return. This property is guaranteed by the sub-iterators, and if it didn't hold then the whole merged iterator would return records in the wrong order, too. While this may seem like a very specific edge case it's in fact quite likely to happen. For most repositories out there you can assume that we will end up with one large table and several smaller ones on top of it. Thus, it is very likely that the next entry will sort towards the top of the priority queue. Special case this and break out of the loop in that case. The following benchmark uses git-show-ref(1) to print a single ref matching a pattern out of 1 million refs: Benchmark 1: show-ref: single matching ref (revision = HEAD~) Time (mean ± σ): 162.6 ms ± 4.5 ms [User: 159.0 ms, System: 3.5 ms] Range (min … max): 156.6 ms … 188.5 ms 1000 runs Benchmark 2: show-ref: single matching ref (revision = HEAD) Time (mean ± σ): 156.8 ms ± 4.7 ms [User: 153.0 ms, System: 3.6 ms] Range (min … max): 151.4 ms … 188.4 ms 1000 runs Summary show-ref: single matching ref (revision = HEAD) ran 1.04 ± 0.04 times faster than show-ref: single matching ref (revision = HEAD~) Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- reftable/merged.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/reftable/merged.c b/reftable/merged.c index fb9978d798..0f74a14a77 100644 --- a/reftable/merged.c +++ b/reftable/merged.c @@ -107,6 +107,14 @@ static int merged_iter_next_entry(struct merged_iter *mi, struct pq_entry top = merged_iter_pqueue_top(mi->pq); int cmp; + /* + * When the next entry comes from the same queue as the current + * entry then it must by definition be larger. This avoids a + * comparison in the most common case. + */ + if (top.index == entry.index) + break; + cmp = reftable_record_cmp(&top.rec, &entry.rec); if (cmp > 0) break; From dbe4e8b3fdd11b96e3ae291ecd09ed6d763a44a1 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 12 Feb 2024 09:32:43 +0100 Subject: [PATCH 4/7] reftable/pq: allocation-less comparison of entry keys MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The priority queue is used by the merged iterator to iterate over reftable records from multiple tables in the correct order. The queue ends up having one record for each table that is being iterated over, with the record that is supposed to be shown next at the top. For example, the key of a ref record is equal to its name so that we end up sorting the priority queue lexicographically by ref name. To figure out the order we need to compare the reftable record keys with each other. This comparison is done by formatting them into a `struct strbuf` and then doing `strbuf_strcmp()` on the result. We then discard the buffers immediately after the comparison. This ends up being very expensive. Because the priority queue usually contains as many records as we have tables, we call the comparison function `O(log($tablecount))` many times for every record we insert. Furthermore, when iterating over many refs, we will insert at least one record for every ref we are iterating over. So ultimately, this ends up being called `O($refcount * log($tablecount))` many times. Refactor the code to use the new `refatble_record_cmp()` function that has been implemented in a preceding commit. This function does not need to allocate memory and is thus significantly more efficient. The following benchmark prints a single ref matching a specific pattern out of 1 million refs via git-show-ref(1), where the reftable stack consists of three tables: Benchmark 1: show-ref: single matching ref (revision = HEAD~) Time (mean ± σ): 224.4 ms ± 6.5 ms [User: 220.6 ms, System: 3.6 ms] Range (min … max): 216.5 ms … 261.1 ms 1000 runs Benchmark 2: show-ref: single matching ref (revision = HEAD) Time (mean ± σ): 172.9 ms ± 4.4 ms [User: 169.2 ms, System: 3.6 ms] Range (min … max): 166.5 ms … 204.6 ms 1000 runs Summary show-ref: single matching ref (revision = HEAD) ran 1.30 ± 0.05 times faster than show-ref: single matching ref (revision = HEAD~) Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- reftable/pq.c | 13 +------------ 1 file changed, 1 insertion(+), 12 deletions(-) diff --git a/reftable/pq.c b/reftable/pq.c index dcefeb793a..7220efc39a 100644 --- a/reftable/pq.c +++ b/reftable/pq.c @@ -14,20 +14,9 @@ https://developers.google.com/open-source/licenses/bsd int pq_less(struct pq_entry *a, struct pq_entry *b) { - struct strbuf ak = STRBUF_INIT; - struct strbuf bk = STRBUF_INIT; - int cmp = 0; - reftable_record_key(&a->rec, &ak); - reftable_record_key(&b->rec, &bk); - - cmp = strbuf_cmp(&ak, &bk); - - strbuf_release(&ak); - strbuf_release(&bk); - + int cmp = reftable_record_cmp(&a->rec, &b->rec); if (cmp == 0) return a->index > b->index; - return cmp < 0; } From 92fa3253c8423815e84be91a2d9a3509df20a646 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 12 Feb 2024 09:32:48 +0100 Subject: [PATCH 5/7] reftable/block: swap buffers instead of copying MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When iterating towards the next record in a reftable block we need to keep track of the key that the last record had. This is required because reftable records use prefix compression, where subsequent records may reuse parts of their preceding record's key. This key is stored in the `block_iter::last_key`, which we update after every call to `block_iter_next()`: we simply reset the buffer and then add the current key to it. This is a bit inefficient though because it requires us to copy over the key on every iteration, which adds up when iterating over many records. Instead, we can make use of the fact that the `block_iter::key` buffer is basically only a scratch buffer. So instead of copying over contents, we can just swap both buffers. The following benchmark prints a single ref matching a specific pattern out of 1 million refs via git-show-ref(1): Benchmark 1: show-ref: single matching ref (revision = HEAD~) Time (mean ± σ): 155.7 ms ± 5.0 ms [User: 152.1 ms, System: 3.4 ms] Range (min … max): 150.8 ms … 185.7 ms 1000 runs Benchmark 2: show-ref: single matching ref (revision = HEAD) Time (mean ± σ): 150.8 ms ± 4.2 ms [User: 147.1 ms, System: 3.5 ms] Range (min … max): 145.1 ms … 180.7 ms 1000 runs Summary show-ref: single matching ref (revision = HEAD) ran 1.03 ± 0.04 times faster than show-ref: single matching ref (revision = HEAD~) Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- reftable/block.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/reftable/block.c b/reftable/block.c index 1df3d8a0f0..44381ea6a3 100644 --- a/reftable/block.c +++ b/reftable/block.c @@ -342,8 +342,7 @@ int block_iter_next(struct block_iter *it, struct reftable_record *rec) return -1; string_view_consume(&in, n); - strbuf_reset(&it->last_key); - strbuf_addbuf(&it->last_key, &it->key); + strbuf_swap(&it->last_key, &it->key); it->next_off += start.len - in.len; return 0; } From a418a7abef72132eff23e783f80f8c88b3c63266 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 12 Feb 2024 09:32:53 +0100 Subject: [PATCH 6/7] reftable/record: don't try to reallocate ref record name MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When decoding reftable ref records we first release the pointer to the record passed to us and then use realloc(3P) to allocate the refname array. This is a bit misleading though as we know at that point that the refname will always be `NULL`, so we would always end up allocating a new char array anyway. Refactor the code to use `REFTABLE_ALLOC_ARRAY()` instead. As the following benchmark demonstrates this is a tiny bit more efficient. But the bigger selling point really is the gained clarity. Benchmark 1: show-ref: single matching ref (revision = HEAD~) Time (mean ± σ): 150.1 ms ± 4.1 ms [User: 146.6 ms, System: 3.3 ms] Range (min … max): 144.5 ms … 180.5 ms 1000 runs Benchmark 2: show-ref: single matching ref (revision = HEAD) Time (mean ± σ): 148.9 ms ± 4.5 ms [User: 145.2 ms, System: 3.4 ms] Range (min … max): 143.0 ms … 185.4 ms 1000 runs Summary show-ref: single matching ref (revision = HEAD) ran 1.01 ± 0.04 times faster than show-ref: single matching ref (revision = HEAD~) Ideally, we should try and reuse the memory of the old record instead of first freeing and then immediately reallocating it. This requires some more surgery though and is thus left for a future iteration. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- reftable/record.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/reftable/record.c b/reftable/record.c index f1b6a5eac9..6465a7b8f4 100644 --- a/reftable/record.c +++ b/reftable/record.c @@ -377,10 +377,11 @@ static int reftable_ref_record_decode(void *rec, struct strbuf key, assert(hash_size > 0); - r->refname = reftable_realloc(r->refname, key.len + 1); + r->refname = reftable_malloc(key.len + 1); memcpy(r->refname, key.buf, key.len); - r->update_index = update_index; r->refname[key.len] = 0; + + r->update_index = update_index; r->value_type = val_type; switch (val_type) { case REFTABLE_REF_VAL1: From c68ca7abd30b22404ce59d5133566729c07ffe8f Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 12 Feb 2024 09:32:57 +0100 Subject: [PATCH 7/7] reftable/reader: add comments to `table_iter_next()` While working on the optimizations in the preceding patches I stumbled upon `table_iter_next()` multiple times. It is quite easy to miss the fact that we don't call `table_iter_next_in_block()` twice, but that the second call is in fact `table_iter_next_block()`. Add comments to explain what exactly is going on here to make things more obvious. While at it, touch up the code to conform to our code style better. Note that one of the refactorings merges two conditional blocks into one. Before, we had the following code: ``` err = table_iter_next_block(&next, ti); if (err != 0) { ti->is_finished = 1; } table_iter_block_done(ti); if (err != 0) { return err; } ``` As `table_iter_block_done()` does not care about `is_finished`, the conditional blocks can be merged into one block: ``` err = table_iter_next_block(&next, ti); table_iter_block_done(ti); if (err != 0) { ti->is_finished = 1; return err; } ``` This is both easier to reason about and more performant because we have one branch less. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- reftable/reader.c | 32 ++++++++++++++++++++------------ 1 file changed, 20 insertions(+), 12 deletions(-) diff --git a/reftable/reader.c b/reftable/reader.c index 64dc366fb1..add7d57f0b 100644 --- a/reftable/reader.c +++ b/reftable/reader.c @@ -357,24 +357,32 @@ static int table_iter_next(struct table_iter *ti, struct reftable_record *rec) while (1) { struct table_iter next = TABLE_ITER_INIT; - int err = 0; - if (ti->is_finished) { + int err; + + if (ti->is_finished) return 1; - } + /* + * Check whether the current block still has more records. If + * so, return it. If the iterator returns positive then the + * current block has been exhausted. + */ err = table_iter_next_in_block(ti, rec); - if (err <= 0) { + if (err <= 0) + return err; + + /* + * Otherwise, we need to continue to the next block in the + * table and retry. If there are no more blocks then the + * iterator is drained. + */ + err = table_iter_next_block(&next, ti); + table_iter_block_done(ti); + if (err) { + ti->is_finished = 1; return err; } - err = table_iter_next_block(&next, ti); - if (err != 0) { - ti->is_finished = 1; - } - table_iter_block_done(ti); - if (err != 0) { - return err; - } table_iter_copy_from(ti, &next); block_iter_close(&next.bi); }