diff --git a/reftable/record.c b/reftable/record.c index 04429d23fe..a55ce76aeb 100644 --- a/reftable/record.c +++ b/reftable/record.c @@ -21,47 +21,49 @@ static void *reftable_record_data(struct reftable_record *rec); int get_var_int(uint64_t *dest, struct string_view *in) { - int ptr = 0; + const unsigned char *buf = in->buf; + unsigned char c; uint64_t val; - if (in->len == 0) + if (!in->len) return -1; - val = in->buf[ptr] & 0x7f; + c = *buf++; + val = c & 0x7f; - while (in->buf[ptr] & 0x80) { - ptr++; - if (ptr > in->len) { + while (c & 0x80) { + /* + * We use a micro-optimization here: whenever we see that the + * 0x80 bit is set, we know that the remainder of the value + * cannot be 0. The zero-values thus doesn't need to be encoded + * at all, which is why we subtract 1 when encoding and add 1 + * when decoding. + * + * This allows us to save a byte in some edge cases. + */ + val += 1; + if (!val || (val & (uint64_t)(~0ULL << (64 - 7)))) + return -1; /* overflow */ + if (buf >= in->buf + in->len) return -1; - } - val = (val + 1) << 7 | (uint64_t)(in->buf[ptr] & 0x7f); + c = *buf++; + val = (val << 7) + (c & 0x7f); } *dest = val; - return ptr + 1; + return buf - in->buf; } -int put_var_int(struct string_view *dest, uint64_t val) +int put_var_int(struct string_view *dest, uint64_t value) { - uint8_t buf[10] = { 0 }; - int i = 9; - int n = 0; - buf[i] = (uint8_t)(val & 0x7f); - i--; - while (1) { - val >>= 7; - if (!val) { - break; - } - val--; - buf[i] = 0x80 | (uint8_t)(val & 0x7f); - i--; - } - - n = sizeof(buf) - i - 1; - if (dest->len < n) + unsigned char varint[10]; + unsigned pos = sizeof(varint) - 1; + varint[pos] = value & 0x7f; + while (value >>= 7) + varint[--pos] = 0x80 | (--value & 0x7f); + if (dest->len < sizeof(varint) - pos) return -1; - memcpy(dest->buf, &buf[i + 1], n); - return n; + memcpy(dest->buf, varint + pos, sizeof(varint) - pos); + return sizeof(varint) - pos; } int reftable_is_block_type(uint8_t typ) diff --git a/reftable/record.h b/reftable/record.h index a24cb23bd4..0df950f401 100644 --- a/reftable/record.h +++ b/reftable/record.h @@ -32,8 +32,10 @@ static inline void string_view_consume(struct string_view *s, int n) s->len -= n; } -/* utilities for de/encoding varints */ - +/* + * Decode and encode a varint. Returns the number of bytes read/written, or a + * negative value in case encoding/decoding the varint has failed. + */ int get_var_int(uint64_t *dest, struct string_view *in); int put_var_int(struct string_view *dest, uint64_t val); diff --git a/t/unit-tests/t-reftable-record.c b/t/unit-tests/t-reftable-record.c index 42bc64cec8..6d912b9c8f 100644 --- a/t/unit-tests/t-reftable-record.c +++ b/t/unit-tests/t-reftable-record.c @@ -58,6 +58,22 @@ static void t_varint_roundtrip(void) } } +static void t_varint_overflow(void) +{ + unsigned char buf[] = { + 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0x00, + }; + struct string_view view = { + .buf = buf, + .len = sizeof(buf), + }; + uint64_t value; + int err = get_var_int(&value, &view); + check_int(err, ==, -1); +} + static void set_hash(uint8_t *h, int j) { for (int i = 0; i < hash_size(REFTABLE_HASH_SHA1); i++) @@ -544,6 +560,7 @@ int cmd_main(int argc UNUSED, const char *argv[] UNUSED) TEST(t_reftable_log_record_roundtrip(), "record operations work on log record"); TEST(t_reftable_ref_record_roundtrip(), "record operations work on ref record"); TEST(t_varint_roundtrip(), "put_var_int and get_var_int work"); + TEST(t_varint_overflow(), "get_var_int notices an integer overflow"); TEST(t_key_roundtrip(), "reftable_encode_key and reftable_decode_key work"); TEST(t_reftable_obj_record_roundtrip(), "record operations work on obj record"); TEST(t_reftable_index_record_roundtrip(), "record operations work on index record");