From adb5d2cbe9a0589540ca631fb42e1579e8ee2d60 Mon Sep 17 00:00:00 2001
From: Patrick Steinhardt <ps@pks.im>
Date: Mon, 12 Feb 2024 09:32:25 +0100
Subject: [PATCH 1/7] reftable/record: introduce function to compare records by
 key

In some places we need to sort reftable records by their keys to
determine their ordering. This is done by first formatting the keys into
a `struct strbuf` and then using `strbuf_cmp()` to compare them. This
logic is needlessly roundabout and can end up costing quite a bit of CPU
cycles, both due to the allocation and formatting logic.

Introduce a new `reftable_record_cmp()` function that knows how to
compare two records with each other without requiring allocations.

Signed-off-by: Patrick Steinhardt <ps@pks.im>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 reftable/record.c | 62 ++++++++++++++++++++++++++++++++++++++++++++++-
 reftable/record.h |  7 ++++++
 2 files changed, 68 insertions(+), 1 deletion(-)

diff --git a/reftable/record.c b/reftable/record.c
index 5c3fbb7b2a..f1b6a5eac9 100644
--- a/reftable/record.c
+++ b/reftable/record.c
@@ -430,7 +430,6 @@ static int reftable_ref_record_is_deletion_void(const void *p)
 		(const struct reftable_ref_record *)p);
 }
 
-
 static int reftable_ref_record_equal_void(const void *a,
 					  const void *b, int hash_size)
 {
@@ -439,6 +438,13 @@ static int reftable_ref_record_equal_void(const void *a,
 	return reftable_ref_record_equal(ra, rb, hash_size);
 }
 
+static int reftable_ref_record_cmp_void(const void *_a, const void *_b)
+{
+	const struct reftable_ref_record *a = _a;
+	const struct reftable_ref_record *b = _b;
+	return strcmp(a->refname, b->refname);
+}
+
 static void reftable_ref_record_print_void(const void *rec,
 					   int hash_size)
 {
@@ -455,6 +461,7 @@ static struct reftable_record_vtable reftable_ref_record_vtable = {
 	.release = &reftable_ref_record_release_void,
 	.is_deletion = &reftable_ref_record_is_deletion_void,
 	.equal = &reftable_ref_record_equal_void,
+	.cmp = &reftable_ref_record_cmp_void,
 	.print = &reftable_ref_record_print_void,
 };
 
@@ -625,6 +632,25 @@ static int reftable_obj_record_equal_void(const void *a, const void *b, int hash
 	return 1;
 }
 
+static int reftable_obj_record_cmp_void(const void *_a, const void *_b)
+{
+	const struct reftable_obj_record *a = _a;
+	const struct reftable_obj_record *b = _b;
+	int cmp;
+
+	cmp = memcmp(a->hash_prefix, b->hash_prefix,
+		     a->hash_prefix_len > b->hash_prefix_len ?
+		     a->hash_prefix_len : b->hash_prefix_len);
+	if (cmp)
+		return cmp;
+
+	/*
+	 * When the prefix is the same then the object record that is longer is
+	 * considered to be bigger.
+	 */
+	return a->hash_prefix_len - b->hash_prefix_len;
+}
+
 static struct reftable_record_vtable reftable_obj_record_vtable = {
 	.key = &reftable_obj_record_key,
 	.type = BLOCK_TYPE_OBJ,
@@ -635,6 +661,7 @@ static struct reftable_record_vtable reftable_obj_record_vtable = {
 	.release = &reftable_obj_record_release,
 	.is_deletion = &not_a_deletion,
 	.equal = &reftable_obj_record_equal_void,
+	.cmp = &reftable_obj_record_cmp_void,
 	.print = &reftable_obj_record_print,
 };
 
@@ -953,6 +980,22 @@ static int reftable_log_record_equal_void(const void *a,
 					 hash_size);
 }
 
+static int reftable_log_record_cmp_void(const void *_a, const void *_b)
+{
+	const struct reftable_log_record *a = _a;
+	const struct reftable_log_record *b = _b;
+	int cmp = strcmp(a->refname, b->refname);
+	if (cmp)
+		return cmp;
+
+	/*
+	 * Note that the comparison here is reversed. This is because the
+	 * update index is reversed when comparing keys. For reference, see how
+	 * we handle this in reftable_log_record_key()`.
+	 */
+	return b->update_index - a->update_index;
+}
+
 int reftable_log_record_equal(const struct reftable_log_record *a,
 			      const struct reftable_log_record *b, int hash_size)
 {
@@ -1002,6 +1045,7 @@ static struct reftable_record_vtable reftable_log_record_vtable = {
 	.release = &reftable_log_record_release_void,
 	.is_deletion = &reftable_log_record_is_deletion_void,
 	.equal = &reftable_log_record_equal_void,
+	.cmp = &reftable_log_record_cmp_void,
 	.print = &reftable_log_record_print_void,
 };
 
@@ -1077,6 +1121,13 @@ static int reftable_index_record_equal(const void *a, const void *b, int hash_si
 	return ia->offset == ib->offset && !strbuf_cmp(&ia->last_key, &ib->last_key);
 }
 
+static int reftable_index_record_cmp(const void *_a, const void *_b)
+{
+	const struct reftable_index_record *a = _a;
+	const struct reftable_index_record *b = _b;
+	return strbuf_cmp(&a->last_key, &b->last_key);
+}
+
 static void reftable_index_record_print(const void *rec, int hash_size)
 {
 	const struct reftable_index_record *idx = rec;
@@ -1094,6 +1145,7 @@ static struct reftable_record_vtable reftable_index_record_vtable = {
 	.release = &reftable_index_record_release,
 	.is_deletion = &not_a_deletion,
 	.equal = &reftable_index_record_equal,
+	.cmp = &reftable_index_record_cmp,
 	.print = &reftable_index_record_print,
 };
 
@@ -1147,6 +1199,14 @@ int reftable_record_is_deletion(struct reftable_record *rec)
 		reftable_record_data(rec));
 }
 
+int reftable_record_cmp(struct reftable_record *a, struct reftable_record *b)
+{
+	if (a->type != b->type)
+		BUG("cannot compare reftable records of different type");
+	return reftable_record_vtable(a)->cmp(
+		reftable_record_data(a), reftable_record_data(b));
+}
+
 int reftable_record_equal(struct reftable_record *a, struct reftable_record *b, int hash_size)
 {
 	if (a->type != b->type)
diff --git a/reftable/record.h b/reftable/record.h
index fd80cd451d..0d96fbfd1b 100644
--- a/reftable/record.h
+++ b/reftable/record.h
@@ -62,6 +62,12 @@ struct reftable_record_vtable {
 	/* Are two records equal? This assumes they have the same type. Returns 0 for non-equal. */
 	int (*equal)(const void *a, const void *b, int hash_size);
 
+	/*
+	 * Compare keys of two records with each other. The records must have
+	 * the same type.
+	 */
+	int (*cmp)(const void *a, const void *b);
+
 	/* Print on stdout, for debugging. */
 	void (*print)(const void *rec, int hash_size);
 };
@@ -114,6 +120,7 @@ struct reftable_record {
 };
 
 /* see struct record_vtable */
+int reftable_record_cmp(struct reftable_record *a, struct reftable_record *b);
 int reftable_record_equal(struct reftable_record *a, struct reftable_record *b, int hash_size);
 void reftable_record_print(struct reftable_record *rec, int hash_size);
 void reftable_record_key(struct reftable_record *rec, struct strbuf *dest);

From a96e9a20f312276e624a23c1ce97487e054a8093 Mon Sep 17 00:00:00 2001
From: Patrick Steinhardt <ps@pks.im>
Date: Mon, 12 Feb 2024 09:32:29 +0100
Subject: [PATCH 2/7] reftable/merged: allocation-less dropping of shadowed
 records
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The purpose of the merged reftable iterator is to iterate through all
entries of a set of tables in the correct order. This is implemented by
using a sub-iterator for each table, where the next entry of each of
these iterators gets put into a priority queue. For each iteration, we
do roughly the following steps:

  1. Retrieve the top record of the priority queue. This is the entry we
     want to return to the caller.

  2. Retrieve the next record of the sub-iterator that this record came
     from. If any, add it to the priority queue at the correct position.
     The position is determined by comparing the record keys, which e.g.
     corresponds to the refname for ref records.

  3. Keep removing the top record of the priority queue until we hit the
     first entry whose key is larger than the returned record's key.
     This is required to drop "shadowed" records.

The last step will lead to at least one comparison to the next entry,
but may lead to many comparisons in case the reftable stack consists of
many tables with shadowed records. It is thus part of the hot code path
when iterating through records.

The code to compare the entries with each other is quite inefficient
though. Instead of comparing record keys with each other directly, we
first format them into `struct strbuf`s and only then compare them with
each other. While we already optimized this code path to reuse buffers
in 829231dc20 (reftable/merged: reuse buffer to compute record keys,
2023-12-11), the cost to format the keys into the buffers still adds up
quite significantly.

Refactor the code to use `reftable_record_cmp()` instead, which has been
introduced in the preceding commit. This function compares records with
each other directly without requiring any memory allocations or copying
and is thus way more efficient.

The following benchmark uses git-show-ref(1) to print a single ref
matching a pattern out of 1 million refs. This is the most direct way to
exercise ref iteration speed as we remove all overhead of having to show
the refs, too.

    Benchmark 1: show-ref: single matching ref (revision = HEAD~)
      Time (mean ± σ):     180.7 ms ±   4.7 ms    [User: 177.1 ms, System: 3.4 ms]
      Range (min … max):   174.9 ms … 211.7 ms    1000 runs

    Benchmark 2: show-ref: single matching ref (revision = HEAD)
      Time (mean ± σ):     162.1 ms ±   4.4 ms    [User: 158.5 ms, System: 3.4 ms]
      Range (min … max):   155.4 ms … 189.3 ms    1000 runs

    Summary
      show-ref: single matching ref (revision = HEAD) ran
        1.11 ± 0.04 times faster than show-ref: single matching ref (revision = HEAD~)

Signed-off-by: Patrick Steinhardt <ps@pks.im>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 reftable/merged.c | 11 ++---------
 reftable/merged.h |  2 --
 2 files changed, 2 insertions(+), 11 deletions(-)

diff --git a/reftable/merged.c b/reftable/merged.c
index c258ce953e..fb9978d798 100644
--- a/reftable/merged.c
+++ b/reftable/merged.c
@@ -51,8 +51,6 @@ static void merged_iter_close(void *p)
 		reftable_iterator_destroy(&mi->stack[i]);
 	}
 	reftable_free(mi->stack);
-	strbuf_release(&mi->key);
-	strbuf_release(&mi->entry_key);
 }
 
 static int merged_iter_advance_nonnull_subiter(struct merged_iter *mi,
@@ -105,14 +103,11 @@ static int merged_iter_next_entry(struct merged_iter *mi,
 	  such a deployment, the loop below must be changed to collect all
 	  entries for the same key, and return new the newest one.
 	*/
-	reftable_record_key(&entry.rec, &mi->entry_key);
 	while (!merged_iter_pqueue_is_empty(mi->pq)) {
 		struct pq_entry top = merged_iter_pqueue_top(mi->pq);
-		int cmp = 0;
+		int cmp;
 
-		reftable_record_key(&top.rec, &mi->key);
-
-		cmp = strbuf_cmp(&mi->key, &mi->entry_key);
+		cmp = reftable_record_cmp(&top.rec, &entry.rec);
 		if (cmp > 0)
 			break;
 
@@ -246,8 +241,6 @@ static int merged_table_seek_record(struct reftable_merged_table *mt,
 		.typ = reftable_record_type(rec),
 		.hash_id = mt->hash_id,
 		.suppress_deletions = mt->suppress_deletions,
-		.key = STRBUF_INIT,
-		.entry_key = STRBUF_INIT,
 	};
 	int n = 0;
 	int err = 0;
diff --git a/reftable/merged.h b/reftable/merged.h
index d5b39dfe7f..7d9f95d27e 100644
--- a/reftable/merged.h
+++ b/reftable/merged.h
@@ -31,8 +31,6 @@ struct merged_iter {
 	uint8_t typ;
 	int suppress_deletions;
 	struct merged_iter_pqueue pq;
-	struct strbuf key;
-	struct strbuf entry_key;
 };
 
 void merged_table_release(struct reftable_merged_table *mt);

From 5730a9dccf1d26f4d98a6d3868614ec2bb0e05a0 Mon Sep 17 00:00:00 2001
From: Patrick Steinhardt <ps@pks.im>
Date: Mon, 12 Feb 2024 09:32:37 +0100
Subject: [PATCH 3/7] reftable/merged: skip comparison for records of the same
 subiter
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When retrieving the next entry of a merged iterator we need to drop all
records of other sub-iterators that would be shadowed by the record that
we are about to return. We do this by comparing record keys, dropping
all keys that are smaller or equal to the key of the record we are about
to return.

There is an edge case here where we can skip that comparison: when the
record in the priority queue comes from the same subiterator as the
record we are about to return then we know that its key must be larger
than the key of the record we are about to return. This property is
guaranteed by the sub-iterators, and if it didn't hold then the whole
merged iterator would return records in the wrong order, too.

While this may seem like a very specific edge case it's in fact quite
likely to happen. For most repositories out there you can assume that we
will end up with one large table and several smaller ones on top of it.
Thus, it is very likely that the next entry will sort towards the top of
the priority queue.

Special case this and break out of the loop in that case. The following
benchmark uses git-show-ref(1) to print a single ref matching a pattern
out of 1 million refs:

  Benchmark 1: show-ref: single matching ref (revision = HEAD~)
    Time (mean ± σ):     162.6 ms ±   4.5 ms    [User: 159.0 ms, System: 3.5 ms]
    Range (min … max):   156.6 ms … 188.5 ms    1000 runs

  Benchmark 2: show-ref: single matching ref (revision = HEAD)
    Time (mean ± σ):     156.8 ms ±   4.7 ms    [User: 153.0 ms, System: 3.6 ms]
    Range (min … max):   151.4 ms … 188.4 ms    1000 runs

  Summary
    show-ref: single matching ref (revision = HEAD) ran
      1.04 ± 0.04 times faster than show-ref: single matching ref (revision = HEAD~)

Signed-off-by: Patrick Steinhardt <ps@pks.im>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 reftable/merged.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/reftable/merged.c b/reftable/merged.c
index fb9978d798..0f74a14a77 100644
--- a/reftable/merged.c
+++ b/reftable/merged.c
@@ -107,6 +107,14 @@ static int merged_iter_next_entry(struct merged_iter *mi,
 		struct pq_entry top = merged_iter_pqueue_top(mi->pq);
 		int cmp;
 
+		/*
+		 * When the next entry comes from the same queue as the current
+		 * entry then it must by definition be larger. This avoids a
+		 * comparison in the most common case.
+		 */
+		if (top.index == entry.index)
+			break;
+
 		cmp = reftable_record_cmp(&top.rec, &entry.rec);
 		if (cmp > 0)
 			break;

From dbe4e8b3fdd11b96e3ae291ecd09ed6d763a44a1 Mon Sep 17 00:00:00 2001
From: Patrick Steinhardt <ps@pks.im>
Date: Mon, 12 Feb 2024 09:32:43 +0100
Subject: [PATCH 4/7] reftable/pq: allocation-less comparison of entry keys
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The priority queue is used by the merged iterator to iterate over
reftable records from multiple tables in the correct order. The queue
ends up having one record for each table that is being iterated over,
with the record that is supposed to be shown next at the top. For
example, the key of a ref record is equal to its name so that we end up
sorting the priority queue lexicographically by ref name.

To figure out the order we need to compare the reftable record keys with
each other. This comparison is done by formatting them into a `struct
strbuf` and then doing `strbuf_strcmp()` on the result. We then discard
the buffers immediately after the comparison.

This ends up being very expensive. Because the priority queue usually
contains as many records as we have tables, we call the comparison
function `O(log($tablecount))` many times for every record we insert.
Furthermore, when iterating over many refs, we will insert at least one
record for every ref we are iterating over. So ultimately, this ends up
being called `O($refcount * log($tablecount))` many times.

Refactor the code to use the new `refatble_record_cmp()` function that
has been implemented in a preceding commit. This function does not need
to allocate memory and is thus significantly more efficient.

The following benchmark prints a single ref matching a specific pattern
out of 1 million refs via git-show-ref(1), where the reftable stack
consists of three tables:

  Benchmark 1: show-ref: single matching ref (revision = HEAD~)
    Time (mean ± σ):     224.4 ms ±   6.5 ms    [User: 220.6 ms, System: 3.6 ms]
    Range (min … max):   216.5 ms … 261.1 ms    1000 runs

  Benchmark 2: show-ref: single matching ref (revision = HEAD)
    Time (mean ± σ):     172.9 ms ±   4.4 ms    [User: 169.2 ms, System: 3.6 ms]
    Range (min … max):   166.5 ms … 204.6 ms    1000 runs

  Summary
    show-ref: single matching ref (revision = HEAD) ran
      1.30 ± 0.05 times faster than show-ref: single matching ref (revision = HEAD~)

Signed-off-by: Patrick Steinhardt <ps@pks.im>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 reftable/pq.c | 13 +------------
 1 file changed, 1 insertion(+), 12 deletions(-)

diff --git a/reftable/pq.c b/reftable/pq.c
index dcefeb793a..7220efc39a 100644
--- a/reftable/pq.c
+++ b/reftable/pq.c
@@ -14,20 +14,9 @@ https://developers.google.com/open-source/licenses/bsd
 
 int pq_less(struct pq_entry *a, struct pq_entry *b)
 {
-	struct strbuf ak = STRBUF_INIT;
-	struct strbuf bk = STRBUF_INIT;
-	int cmp = 0;
-	reftable_record_key(&a->rec, &ak);
-	reftable_record_key(&b->rec, &bk);
-
-	cmp = strbuf_cmp(&ak, &bk);
-
-	strbuf_release(&ak);
-	strbuf_release(&bk);
-
+	int cmp = reftable_record_cmp(&a->rec, &b->rec);
 	if (cmp == 0)
 		return a->index > b->index;
-
 	return cmp < 0;
 }
 

From 92fa3253c8423815e84be91a2d9a3509df20a646 Mon Sep 17 00:00:00 2001
From: Patrick Steinhardt <ps@pks.im>
Date: Mon, 12 Feb 2024 09:32:48 +0100
Subject: [PATCH 5/7] reftable/block: swap buffers instead of copying
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When iterating towards the next record in a reftable block we need to
keep track of the key that the last record had. This is required because
reftable records use prefix compression, where subsequent records may
reuse parts of their preceding record's key.

This key is stored in the `block_iter::last_key`, which we update after
every call to `block_iter_next()`: we simply reset the buffer and then
add the current key to it.

This is a bit inefficient though because it requires us to copy over the
key on every iteration, which adds up when iterating over many records.
Instead, we can make use of the fact that the `block_iter::key` buffer
is basically only a scratch buffer. So instead of copying over contents,
we can just swap both buffers.

The following benchmark prints a single ref matching a specific pattern
out of 1 million refs via git-show-ref(1):

  Benchmark 1: show-ref: single matching ref (revision = HEAD~)
    Time (mean ± σ):     155.7 ms ±   5.0 ms    [User: 152.1 ms, System: 3.4 ms]
    Range (min … max):   150.8 ms … 185.7 ms    1000 runs

  Benchmark 2: show-ref: single matching ref (revision = HEAD)
    Time (mean ± σ):     150.8 ms ±   4.2 ms    [User: 147.1 ms, System: 3.5 ms]
    Range (min … max):   145.1 ms … 180.7 ms    1000 runs

  Summary
    show-ref: single matching ref (revision = HEAD) ran
      1.03 ± 0.04 times faster than show-ref: single matching ref (revision = HEAD~)

Signed-off-by: Patrick Steinhardt <ps@pks.im>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 reftable/block.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/reftable/block.c b/reftable/block.c
index 1df3d8a0f0..44381ea6a3 100644
--- a/reftable/block.c
+++ b/reftable/block.c
@@ -342,8 +342,7 @@ int block_iter_next(struct block_iter *it, struct reftable_record *rec)
 		return -1;
 	string_view_consume(&in, n);
 
-	strbuf_reset(&it->last_key);
-	strbuf_addbuf(&it->last_key, &it->key);
+	strbuf_swap(&it->last_key, &it->key);
 	it->next_off += start.len - in.len;
 	return 0;
 }

From a418a7abef72132eff23e783f80f8c88b3c63266 Mon Sep 17 00:00:00 2001
From: Patrick Steinhardt <ps@pks.im>
Date: Mon, 12 Feb 2024 09:32:53 +0100
Subject: [PATCH 6/7] reftable/record: don't try to reallocate ref record name
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When decoding reftable ref records we first release the pointer to the
record passed to us and then use realloc(3P) to allocate the refname
array. This is a bit misleading though as we know at that point that the
refname will always be `NULL`, so we would always end up allocating a
new char array anyway.

Refactor the code to use `REFTABLE_ALLOC_ARRAY()` instead. As the
following benchmark demonstrates this is a tiny bit more efficient. But
the bigger selling point really is the gained clarity.

  Benchmark 1: show-ref: single matching ref (revision = HEAD~)
    Time (mean ± σ):     150.1 ms ±   4.1 ms    [User: 146.6 ms, System: 3.3 ms]
    Range (min … max):   144.5 ms … 180.5 ms    1000 runs

  Benchmark 2: show-ref: single matching ref (revision = HEAD)
    Time (mean ± σ):     148.9 ms ±   4.5 ms    [User: 145.2 ms, System: 3.4 ms]
    Range (min … max):   143.0 ms … 185.4 ms    1000 runs

  Summary
    show-ref: single matching ref (revision = HEAD) ran
      1.01 ± 0.04 times faster than show-ref: single matching ref (revision = HEAD~)

Ideally, we should try and reuse the memory of the old record instead of
first freeing and then immediately reallocating it. This requires some
more surgery though and is thus left for a future iteration.

Signed-off-by: Patrick Steinhardt <ps@pks.im>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 reftable/record.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/reftable/record.c b/reftable/record.c
index f1b6a5eac9..6465a7b8f4 100644
--- a/reftable/record.c
+++ b/reftable/record.c
@@ -377,10 +377,11 @@ static int reftable_ref_record_decode(void *rec, struct strbuf key,
 
 	assert(hash_size > 0);
 
-	r->refname = reftable_realloc(r->refname, key.len + 1);
+	r->refname = reftable_malloc(key.len + 1);
 	memcpy(r->refname, key.buf, key.len);
-	r->update_index = update_index;
 	r->refname[key.len] = 0;
+
+	r->update_index = update_index;
 	r->value_type = val_type;
 	switch (val_type) {
 	case REFTABLE_REF_VAL1:

From c68ca7abd30b22404ce59d5133566729c07ffe8f Mon Sep 17 00:00:00 2001
From: Patrick Steinhardt <ps@pks.im>
Date: Mon, 12 Feb 2024 09:32:57 +0100
Subject: [PATCH 7/7] reftable/reader: add comments to `table_iter_next()`

While working on the optimizations in the preceding patches I stumbled
upon `table_iter_next()` multiple times. It is quite easy to miss the
fact that we don't call `table_iter_next_in_block()` twice, but that the
second call is in fact `table_iter_next_block()`.

Add comments to explain what exactly is going on here to make things
more obvious. While at it, touch up the code to conform to our code
style better.

Note that one of the refactorings merges two conditional blocks into
one. Before, we had the following code:

```
err = table_iter_next_block(&next, ti);
if (err != 0) {
	ti->is_finished = 1;
}
table_iter_block_done(ti);
if (err != 0) {
	return err;
}
```

As `table_iter_block_done()` does not care about `is_finished`, the
conditional blocks can be merged into one block:

```
err = table_iter_next_block(&next, ti);
table_iter_block_done(ti);
if (err != 0) {
	ti->is_finished = 1;
	return err;
}
```

This is both easier to reason about and more performant because we have
one branch less.

Signed-off-by: Patrick Steinhardt <ps@pks.im>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 reftable/reader.c | 32 ++++++++++++++++++++------------
 1 file changed, 20 insertions(+), 12 deletions(-)

diff --git a/reftable/reader.c b/reftable/reader.c
index 64dc366fb1..add7d57f0b 100644
--- a/reftable/reader.c
+++ b/reftable/reader.c
@@ -357,24 +357,32 @@ static int table_iter_next(struct table_iter *ti, struct reftable_record *rec)
 
 	while (1) {
 		struct table_iter next = TABLE_ITER_INIT;
-		int err = 0;
-		if (ti->is_finished) {
+		int err;
+
+		if (ti->is_finished)
 			return 1;
-		}
 
+		/*
+		 * Check whether the current block still has more records. If
+		 * so, return it. If the iterator returns positive then the
+		 * current block has been exhausted.
+		 */
 		err = table_iter_next_in_block(ti, rec);
-		if (err <= 0) {
+		if (err <= 0)
+			return err;
+
+		/*
+		 * Otherwise, we need to continue to the next block in the
+		 * table and retry. If there are no more blocks then the
+		 * iterator is drained.
+		 */
+		err = table_iter_next_block(&next, ti);
+		table_iter_block_done(ti);
+		if (err) {
+			ti->is_finished = 1;
 			return err;
 		}
 
-		err = table_iter_next_block(&next, ti);
-		if (err != 0) {
-			ti->is_finished = 1;
-		}
-		table_iter_block_done(ti);
-		if (err != 0) {
-			return err;
-		}
 		table_iter_copy_from(ti, &next);
 		block_iter_close(&next.bi);
 	}