From eb8728d88ab92d866f454a5b981b7bbfe4e2235b Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 20 Jan 2025 17:17:19 +0100 Subject: [PATCH 01/10] meson: stop disabling -Wsign-compare In 4f9264b0cd (config.mak.dev: drop `-Wno-sign-compare`, 2024-12-06) we have started an effort to make our codebase compile with -Wsign-compare. But while we removed the -Wno-sign-compare flag from "config.mak.dev", we didn't adjust the Meson build instructions in the same way. Fix this. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- meson.build | 1 - 1 file changed, 1 deletion(-) diff --git a/meson.build b/meson.build index 0064eb64f5..07744c73b1 100644 --- a/meson.build +++ b/meson.build @@ -708,7 +708,6 @@ if get_option('warning_level') in ['2','3', 'everything'] and compiler.get_argum # These are disabled because we have these all over the place. '-Wno-empty-body', '-Wno-missing-field-initializers', - '-Wno-sign-compare', ] if compiler.has_argument(cflag) libgit_c_args += cflag From a204f92d1cb08f3a0450551b5e6759284bbab12a Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 20 Jan 2025 17:17:20 +0100 Subject: [PATCH 02/10] reftable/record: drop unused `print` function pointer In 42c424d69d (t/helper: inline printing of reftable records, 2024-08-22) we stopped using the `print` function of the reftable record vtable and instead moved its implementation into the single user of it. We didn't remove the function itself from the vtable though. Drop it. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- reftable/record.h | 3 --- 1 file changed, 3 deletions(-) diff --git a/reftable/record.h b/reftable/record.h index 25aa908c85..a24cb23bd4 100644 --- a/reftable/record.h +++ b/reftable/record.h @@ -73,9 +73,6 @@ struct reftable_record_vtable { * the same type. */ int (*cmp)(const void *a, const void *b); - - /* Print on stdout, for debugging. */ - void (*print)(const void *rec, int hash_size); }; /* returns true for recognized block types. Block start with the block type. */ From 072e3aa3a5c29ca1b68a7aaf570a0a8e7ab67127 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 20 Jan 2025 17:17:21 +0100 Subject: [PATCH 03/10] reftable/record: handle overflows when decoding varints The logic to decode varints isn't able to detect integer overflows: as long as the buffer still has more data available, and as long as the current byte has its 0x80 bit set, we'll continue to add up these values to the result. This will eventually cause the `uint64_t` to overflow, at which point we'll return an invalid result. Refactor the function so that it is able to detect such overflows. The implementation is basically copied from Git's own `decode_varint()`, which already knows to handle overflows. The only adjustment is that we also take into account the string view's length in order to not overrun it. The reftable documentation explicitly notes that those two encoding schemas are supposed to be the same: Varint encoding ^^^^^^^^^^^^^^^ Varint encoding is identical to the ofs-delta encoding method used within pack files. Decoder works as follows: .... val = buf[ptr] & 0x7f while (buf[ptr] & 0x80) { ptr++ val = ((val + 1) << 7) | (buf[ptr] & 0x7f) } .... While at it, refactor `put_var_int()` in the same way by copying over the implementation of `encode_varint()`. While `put_var_int()` doesn't have an issue with overflows, it generates warnings with -Wsign-compare. The implementation of `encode_varint()` doesn't, is battle-tested and at the same time way simpler than what we currently have. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- reftable/record.c | 60 +++++++++++++++++--------------- reftable/record.h | 6 ++-- t/unit-tests/t-reftable-record.c | 17 +++++++++ 3 files changed, 52 insertions(+), 31 deletions(-) diff --git a/reftable/record.c b/reftable/record.c index 04429d23fe..a55ce76aeb 100644 --- a/reftable/record.c +++ b/reftable/record.c @@ -21,47 +21,49 @@ static void *reftable_record_data(struct reftable_record *rec); int get_var_int(uint64_t *dest, struct string_view *in) { - int ptr = 0; + const unsigned char *buf = in->buf; + unsigned char c; uint64_t val; - if (in->len == 0) + if (!in->len) return -1; - val = in->buf[ptr] & 0x7f; + c = *buf++; + val = c & 0x7f; - while (in->buf[ptr] & 0x80) { - ptr++; - if (ptr > in->len) { + while (c & 0x80) { + /* + * We use a micro-optimization here: whenever we see that the + * 0x80 bit is set, we know that the remainder of the value + * cannot be 0. The zero-values thus doesn't need to be encoded + * at all, which is why we subtract 1 when encoding and add 1 + * when decoding. + * + * This allows us to save a byte in some edge cases. + */ + val += 1; + if (!val || (val & (uint64_t)(~0ULL << (64 - 7)))) + return -1; /* overflow */ + if (buf >= in->buf + in->len) return -1; - } - val = (val + 1) << 7 | (uint64_t)(in->buf[ptr] & 0x7f); + c = *buf++; + val = (val << 7) + (c & 0x7f); } *dest = val; - return ptr + 1; + return buf - in->buf; } -int put_var_int(struct string_view *dest, uint64_t val) +int put_var_int(struct string_view *dest, uint64_t value) { - uint8_t buf[10] = { 0 }; - int i = 9; - int n = 0; - buf[i] = (uint8_t)(val & 0x7f); - i--; - while (1) { - val >>= 7; - if (!val) { - break; - } - val--; - buf[i] = 0x80 | (uint8_t)(val & 0x7f); - i--; - } - - n = sizeof(buf) - i - 1; - if (dest->len < n) + unsigned char varint[10]; + unsigned pos = sizeof(varint) - 1; + varint[pos] = value & 0x7f; + while (value >>= 7) + varint[--pos] = 0x80 | (--value & 0x7f); + if (dest->len < sizeof(varint) - pos) return -1; - memcpy(dest->buf, &buf[i + 1], n); - return n; + memcpy(dest->buf, varint + pos, sizeof(varint) - pos); + return sizeof(varint) - pos; } int reftable_is_block_type(uint8_t typ) diff --git a/reftable/record.h b/reftable/record.h index a24cb23bd4..0df950f401 100644 --- a/reftable/record.h +++ b/reftable/record.h @@ -32,8 +32,10 @@ static inline void string_view_consume(struct string_view *s, int n) s->len -= n; } -/* utilities for de/encoding varints */ - +/* + * Decode and encode a varint. Returns the number of bytes read/written, or a + * negative value in case encoding/decoding the varint has failed. + */ int get_var_int(uint64_t *dest, struct string_view *in); int put_var_int(struct string_view *dest, uint64_t val); diff --git a/t/unit-tests/t-reftable-record.c b/t/unit-tests/t-reftable-record.c index 42bc64cec8..6d912b9c8f 100644 --- a/t/unit-tests/t-reftable-record.c +++ b/t/unit-tests/t-reftable-record.c @@ -58,6 +58,22 @@ static void t_varint_roundtrip(void) } } +static void t_varint_overflow(void) +{ + unsigned char buf[] = { + 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0x00, + }; + struct string_view view = { + .buf = buf, + .len = sizeof(buf), + }; + uint64_t value; + int err = get_var_int(&value, &view); + check_int(err, ==, -1); +} + static void set_hash(uint8_t *h, int j) { for (int i = 0; i < hash_size(REFTABLE_HASH_SHA1); i++) @@ -544,6 +560,7 @@ int cmd_main(int argc UNUSED, const char *argv[] UNUSED) TEST(t_reftable_log_record_roundtrip(), "record operations work on log record"); TEST(t_reftable_ref_record_roundtrip(), "record operations work on ref record"); TEST(t_varint_roundtrip(), "put_var_int and get_var_int work"); + TEST(t_varint_overflow(), "get_var_int notices an integer overflow"); TEST(t_key_roundtrip(), "reftable_encode_key and reftable_decode_key work"); TEST(t_reftable_obj_record_roundtrip(), "record operations work on obj record"); TEST(t_reftable_index_record_roundtrip(), "record operations work on index record"); From 5ac65f0d6b867ff031fda03779c2f2613f022b10 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 20 Jan 2025 17:17:22 +0100 Subject: [PATCH 04/10] reftable/basics: adjust `common_prefix_size()` to return `size_t` The `common_prefix_size()` function computes the length of the common prefix between two buffers. As such its return value will always be an unsigned integer, as the length cannot be negative. Regardless of that, the function returns a signed integer, which is nonsensical and causes a couple of -Wsign-compare warnings all over the place. Adjust the function to return a `size_t` instead. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- reftable/basics.c | 8 +++----- reftable/basics.h | 2 +- reftable/record.c | 4 ++-- reftable/writer.c | 7 +++---- t/unit-tests/t-reftable-basics.c | 2 +- 5 files changed, 10 insertions(+), 13 deletions(-) diff --git a/reftable/basics.c b/reftable/basics.c index fe2b83ff83..10b234ea55 100644 --- a/reftable/basics.c +++ b/reftable/basics.c @@ -263,14 +263,12 @@ int names_equal(const char **a, const char **b) return a[i] == b[i]; } -int common_prefix_size(struct reftable_buf *a, struct reftable_buf *b) +size_t common_prefix_size(struct reftable_buf *a, struct reftable_buf *b) { - int p = 0; - for (; p < a->len && p < b->len; p++) { + size_t p = 0; + for (; p < a->len && p < b->len; p++) if (a->buf[p] != b->buf[p]) break; - } - return p; } diff --git a/reftable/basics.h b/reftable/basics.h index 4bf71b0954..9ff81a68f8 100644 --- a/reftable/basics.h +++ b/reftable/basics.h @@ -169,7 +169,7 @@ static inline void *reftable_alloc_grow(void *p, size_t nelem, size_t elsize, #endif /* Find the longest shared prefix size of `a` and `b` */ -int common_prefix_size(struct reftable_buf *a, struct reftable_buf *b); +size_t common_prefix_size(struct reftable_buf *a, struct reftable_buf *b); int hash_size(enum reftable_hash id); diff --git a/reftable/record.c b/reftable/record.c index a55ce76aeb..4a3e019528 100644 --- a/reftable/record.c +++ b/reftable/record.c @@ -144,9 +144,9 @@ int reftable_encode_key(int *restart, struct string_view dest, uint8_t extra) { struct string_view start = dest; - int prefix_len = common_prefix_size(&prev_key, &key); + size_t prefix_len = common_prefix_size(&prev_key, &key); uint64_t suffix_len = key.len - prefix_len; - int n = put_var_int(&dest, (uint64_t)prefix_len); + int n = put_var_int(&dest, prefix_len); if (n < 0) return -1; string_view_consume(&dest, n); diff --git a/reftable/writer.c b/reftable/writer.c index 740c98038e..91d6629486 100644 --- a/reftable/writer.c +++ b/reftable/writer.c @@ -577,7 +577,7 @@ static int writer_finish_section(struct reftable_writer *w) struct common_prefix_arg { struct reftable_buf *last; - int max; + size_t max; }; static void update_common(void *void_arg, void *key) @@ -585,10 +585,9 @@ static void update_common(void *void_arg, void *key) struct common_prefix_arg *arg = void_arg; struct obj_index_tree_node *entry = key; if (arg->last) { - int n = common_prefix_size(&entry->hash, arg->last); - if (n > arg->max) { + size_t n = common_prefix_size(&entry->hash, arg->last); + if (n > arg->max) arg->max = n; - } } arg->last = &entry->hash; } diff --git a/t/unit-tests/t-reftable-basics.c b/t/unit-tests/t-reftable-basics.c index 1d640b280f..9ba7eb05ad 100644 --- a/t/unit-tests/t-reftable-basics.c +++ b/t/unit-tests/t-reftable-basics.c @@ -120,7 +120,7 @@ int cmd_main(int argc UNUSED, const char *argv[] UNUSED) for (size_t i = 0; i < ARRAY_SIZE(cases); i++) { check(!reftable_buf_addstr(&a, cases[i].a)); check(!reftable_buf_addstr(&b, cases[i].b)); - check_int(common_prefix_size(&a, &b), ==, cases[i].want); + check_uint(common_prefix_size(&a, &b), ==, cases[i].want); reftable_buf_reset(&a); reftable_buf_reset(&b); } From 57adf71b93efa9f9b4db5147e9fa1235f0a1d5ba Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 20 Jan 2025 17:17:23 +0100 Subject: [PATCH 05/10] reftable/basics: adjust `hash_size()` to return `uint32_t` The `hash_size()` function returns the number of bytes used by the hash function. Weirdly enough though, it returns a signed integer for its size even though the size obviously cannot ever be negative. The only case where it could be negative is if the function returned an error when asked for an unknown hash, but we assert(3p) instead. Adjust the type of `hash_size()` to be `uint32_t` and adapt all places that use signed integers for the hash size to follow suit. This also allows us to get rid of a couple asserts that we had which verified that the size was indeed positive, which further stresses the point that this refactoring makes sense. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- reftable/basics.c | 2 +- reftable/basics.h | 2 +- reftable/block.c | 4 +-- reftable/block.h | 10 +++--- reftable/reader.c | 2 +- reftable/record.c | 52 ++++++++++++++------------------ reftable/record.h | 16 +++++----- reftable/reftable-record.h | 4 +-- t/unit-tests/t-reftable-record.c | 2 +- 9 files changed, 44 insertions(+), 50 deletions(-) diff --git a/reftable/basics.c b/reftable/basics.c index 10b234ea55..3b5ea27bbd 100644 --- a/reftable/basics.c +++ b/reftable/basics.c @@ -272,7 +272,7 @@ size_t common_prefix_size(struct reftable_buf *a, struct reftable_buf *b) return p; } -int hash_size(enum reftable_hash id) +uint32_t hash_size(enum reftable_hash id) { if (!id) return REFTABLE_HASH_SIZE_SHA1; diff --git a/reftable/basics.h b/reftable/basics.h index 9ff81a68f8..a2a010a0e1 100644 --- a/reftable/basics.h +++ b/reftable/basics.h @@ -171,7 +171,7 @@ static inline void *reftable_alloc_grow(void *p, size_t nelem, size_t elsize, /* Find the longest shared prefix size of `a` and `b` */ size_t common_prefix_size(struct reftable_buf *a, struct reftable_buf *b); -int hash_size(enum reftable_hash id); +uint32_t hash_size(enum reftable_hash id); /* * Format IDs that identify the hash function used by a reftable. Note that diff --git a/reftable/block.c b/reftable/block.c index 9858bbc7c5..2380aabb2f 100644 --- a/reftable/block.c +++ b/reftable/block.c @@ -72,7 +72,7 @@ static int block_writer_register_restart(struct block_writer *w, int n, } int block_writer_init(struct block_writer *bw, uint8_t typ, uint8_t *block, - uint32_t block_size, uint32_t header_off, int hash_size) + uint32_t block_size, uint32_t header_off, uint32_t hash_size) { bw->block = block; bw->hash_size = hash_size; @@ -214,7 +214,7 @@ int block_writer_finish(struct block_writer *w) int block_reader_init(struct block_reader *br, struct reftable_block *block, uint32_t header_off, uint32_t table_block_size, - int hash_size) + uint32_t hash_size) { uint32_t full_block_size = table_block_size; uint8_t typ = block->data[header_off]; diff --git a/reftable/block.h b/reftable/block.h index 0431e8591f..5f67ed74c5 100644 --- a/reftable/block.h +++ b/reftable/block.h @@ -30,7 +30,7 @@ struct block_writer { /* How often to restart keys. */ uint16_t restart_interval; - int hash_size; + uint32_t hash_size; /* Offset of next uint8_t to write. */ uint32_t next; @@ -48,7 +48,7 @@ struct block_writer { * initializes the blockwriter to write `typ` entries, using `block` as temporary * storage. `block` is not owned by the block_writer. */ int block_writer_init(struct block_writer *bw, uint8_t typ, uint8_t *block, - uint32_t block_size, uint32_t header_off, int hash_size); + uint32_t block_size, uint32_t header_off, uint32_t hash_size); /* returns the block type (eg. 'r' for ref records. */ uint8_t block_writer_type(struct block_writer *bw); @@ -72,7 +72,7 @@ struct block_reader { /* the memory block */ struct reftable_block block; - int hash_size; + uint32_t hash_size; /* Uncompressed data for log entries. */ z_stream *zstream; @@ -92,7 +92,7 @@ struct block_reader { /* initializes a block reader. */ int block_reader_init(struct block_reader *br, struct reftable_block *bl, uint32_t header_off, uint32_t table_block_size, - int hash_size); + uint32_t hash_size); void block_reader_release(struct block_reader *br); @@ -108,7 +108,7 @@ struct block_iter { uint32_t next_off; const unsigned char *block; size_t block_len; - int hash_size; + uint32_t hash_size; /* key for last entry we read. */ struct reftable_buf last_key; diff --git a/reftable/reader.c b/reftable/reader.c index ea82955c9b..9df8a5ecb1 100644 --- a/reftable/reader.c +++ b/reftable/reader.c @@ -750,7 +750,7 @@ static int reftable_reader_refs_for_unindexed(struct reftable_reader *r, struct table_iter *ti; struct filtering_ref_iterator *filter = NULL; struct filtering_ref_iterator empty = FILTERING_REF_ITERATOR_INIT; - int oid_len = hash_size(r->hash_id); + uint32_t oid_len = hash_size(r->hash_id); int err; REFTABLE_ALLOC_ARRAY(ti, 1); diff --git a/reftable/record.c b/reftable/record.c index 4a3e019528..f7766a32ef 100644 --- a/reftable/record.c +++ b/reftable/record.c @@ -229,7 +229,7 @@ static int reftable_ref_record_key(const void *r, struct reftable_buf *dest) } static int reftable_ref_record_copy_from(void *rec, const void *src_rec, - int hash_size) + uint32_t hash_size) { struct reftable_ref_record *ref = rec; const struct reftable_ref_record *src = src_rec; @@ -237,8 +237,6 @@ static int reftable_ref_record_copy_from(void *rec, const void *src_rec, size_t refname_cap = 0; int err; - assert(hash_size > 0); - SWAP(refname, ref->refname); SWAP(refname_cap, ref->refname_cap); reftable_ref_record_release(ref); @@ -319,13 +317,12 @@ static uint8_t reftable_ref_record_val_type(const void *rec) } static int reftable_ref_record_encode(const void *rec, struct string_view s, - int hash_size) + uint32_t hash_size) { const struct reftable_ref_record *r = (const struct reftable_ref_record *)rec; struct string_view start = s; int n = put_var_int(&s, r->update_index); - assert(hash_size > 0); if (n < 0) return -1; string_view_consume(&s, n); @@ -365,7 +362,7 @@ static int reftable_ref_record_encode(const void *rec, struct string_view s, static int reftable_ref_record_decode(void *rec, struct reftable_buf key, uint8_t val_type, struct string_view in, - int hash_size, struct reftable_buf *scratch) + uint32_t hash_size, struct reftable_buf *scratch) { struct reftable_ref_record *r = rec; struct string_view start = in; @@ -374,8 +371,6 @@ static int reftable_ref_record_decode(void *rec, struct reftable_buf key, size_t refname_cap = 0; int n, err; - assert(hash_size > 0); - n = get_var_int(&update_index, &in); if (n < 0) return n; @@ -451,7 +446,7 @@ static int reftable_ref_record_is_deletion_void(const void *p) } static int reftable_ref_record_equal_void(const void *a, - const void *b, int hash_size) + const void *b, uint32_t hash_size) { struct reftable_ref_record *ra = (struct reftable_ref_record *) a; struct reftable_ref_record *rb = (struct reftable_ref_record *) b; @@ -495,7 +490,7 @@ static void reftable_obj_record_release(void *rec) } static int reftable_obj_record_copy_from(void *rec, const void *src_rec, - int hash_size UNUSED) + uint32_t hash_size UNUSED) { struct reftable_obj_record *obj = rec; const struct reftable_obj_record *src = src_rec; @@ -527,7 +522,7 @@ static uint8_t reftable_obj_record_val_type(const void *rec) } static int reftable_obj_record_encode(const void *rec, struct string_view s, - int hash_size UNUSED) + uint32_t hash_size UNUSED) { const struct reftable_obj_record *r = rec; struct string_view start = s; @@ -562,7 +557,7 @@ static int reftable_obj_record_encode(const void *rec, struct string_view s, static int reftable_obj_record_decode(void *rec, struct reftable_buf key, uint8_t val_type, struct string_view in, - int hash_size UNUSED, + uint32_t hash_size UNUSED, struct reftable_buf *scratch UNUSED) { struct string_view start = in; @@ -626,7 +621,7 @@ static int not_a_deletion(const void *p UNUSED) } static int reftable_obj_record_equal_void(const void *a, const void *b, - int hash_size UNUSED) + uint32_t hash_size UNUSED) { struct reftable_obj_record *ra = (struct reftable_obj_record *) a; struct reftable_obj_record *rb = (struct reftable_obj_record *) b; @@ -701,7 +696,7 @@ static int reftable_log_record_key(const void *r, struct reftable_buf *dest) } static int reftable_log_record_copy_from(void *rec, const void *src_rec, - int hash_size) + uint32_t hash_size) { struct reftable_log_record *dst = rec; const struct reftable_log_record *src = @@ -782,7 +777,7 @@ static uint8_t reftable_log_record_val_type(const void *rec) } static int reftable_log_record_encode(const void *rec, struct string_view s, - int hash_size) + uint32_t hash_size) { const struct reftable_log_record *r = rec; struct string_view start = s; @@ -830,7 +825,7 @@ static int reftable_log_record_encode(const void *rec, struct string_view s, static int reftable_log_record_decode(void *rec, struct reftable_buf key, uint8_t val_type, struct string_view in, - int hash_size, struct reftable_buf *scratch) + uint32_t hash_size, struct reftable_buf *scratch) { struct string_view start = in; struct reftable_log_record *r = rec; @@ -978,7 +973,7 @@ static int null_streq(const char *a, const char *b) } static int reftable_log_record_equal_void(const void *a, - const void *b, int hash_size) + const void *b, uint32_t hash_size) { return reftable_log_record_equal((struct reftable_log_record *) a, (struct reftable_log_record *) b, @@ -1002,7 +997,7 @@ static int reftable_log_record_cmp_void(const void *_a, const void *_b) } int reftable_log_record_equal(const struct reftable_log_record *a, - const struct reftable_log_record *b, int hash_size) + const struct reftable_log_record *b, uint32_t hash_size) { if (!(null_streq(a->refname, b->refname) && a->update_index == b->update_index && @@ -1056,7 +1051,7 @@ static int reftable_index_record_key(const void *r, struct reftable_buf *dest) } static int reftable_index_record_copy_from(void *rec, const void *src_rec, - int hash_size UNUSED) + uint32_t hash_size UNUSED) { struct reftable_index_record *dst = rec; const struct reftable_index_record *src = src_rec; @@ -1083,7 +1078,7 @@ static uint8_t reftable_index_record_val_type(const void *rec UNUSED) } static int reftable_index_record_encode(const void *rec, struct string_view out, - int hash_size UNUSED) + uint32_t hash_size UNUSED) { const struct reftable_index_record *r = (const struct reftable_index_record *)rec; @@ -1101,7 +1096,7 @@ static int reftable_index_record_encode(const void *rec, struct string_view out, static int reftable_index_record_decode(void *rec, struct reftable_buf key, uint8_t val_type UNUSED, struct string_view in, - int hash_size UNUSED, + uint32_t hash_size UNUSED, struct reftable_buf *scratch UNUSED) { struct string_view start = in; @@ -1122,7 +1117,7 @@ static int reftable_index_record_decode(void *rec, struct reftable_buf key, } static int reftable_index_record_equal(const void *a, const void *b, - int hash_size UNUSED) + uint32_t hash_size UNUSED) { struct reftable_index_record *ia = (struct reftable_index_record *) a; struct reftable_index_record *ib = (struct reftable_index_record *) b; @@ -1156,14 +1151,14 @@ int reftable_record_key(struct reftable_record *rec, struct reftable_buf *dest) } int reftable_record_encode(struct reftable_record *rec, struct string_view dest, - int hash_size) + uint32_t hash_size) { return reftable_record_vtable(rec)->encode(reftable_record_data(rec), dest, hash_size); } int reftable_record_copy_from(struct reftable_record *rec, - struct reftable_record *src, int hash_size) + struct reftable_record *src, uint32_t hash_size) { assert(src->type == rec->type); @@ -1178,7 +1173,7 @@ uint8_t reftable_record_val_type(struct reftable_record *rec) } int reftable_record_decode(struct reftable_record *rec, struct reftable_buf key, - uint8_t extra, struct string_view src, int hash_size, + uint8_t extra, struct string_view src, uint32_t hash_size, struct reftable_buf *scratch) { return reftable_record_vtable(rec)->decode(reftable_record_data(rec), @@ -1205,7 +1200,7 @@ int reftable_record_cmp(struct reftable_record *a, struct reftable_record *b) reftable_record_data(a), reftable_record_data(b)); } -int reftable_record_equal(struct reftable_record *a, struct reftable_record *b, int hash_size) +int reftable_record_equal(struct reftable_record *a, struct reftable_record *b, uint32_t hash_size) { if (a->type != b->type) return 0; @@ -1213,7 +1208,7 @@ int reftable_record_equal(struct reftable_record *a, struct reftable_record *b, reftable_record_data(a), reftable_record_data(b), hash_size); } -static int hash_equal(const unsigned char *a, const unsigned char *b, int hash_size) +static int hash_equal(const unsigned char *a, const unsigned char *b, uint32_t hash_size) { if (a && b) return !memcmp(a, b, hash_size); @@ -1222,9 +1217,8 @@ static int hash_equal(const unsigned char *a, const unsigned char *b, int hash_s } int reftable_ref_record_equal(const struct reftable_ref_record *a, - const struct reftable_ref_record *b, int hash_size) + const struct reftable_ref_record *b, uint32_t hash_size) { - assert(hash_size > 0); if (!null_streq(a->refname, b->refname)) return 0; diff --git a/reftable/record.h b/reftable/record.h index 0df950f401..c7755a4d75 100644 --- a/reftable/record.h +++ b/reftable/record.h @@ -47,18 +47,18 @@ struct reftable_record_vtable { /* The record type of ('r' for ref). */ uint8_t type; - int (*copy_from)(void *dest, const void *src, int hash_size); + int (*copy_from)(void *dest, const void *src, uint32_t hash_size); /* a value of [0..7], indicating record subvariants (eg. ref vs. symref * vs ref deletion) */ uint8_t (*val_type)(const void *rec); /* encodes rec into dest, returning how much space was used. */ - int (*encode)(const void *rec, struct string_view dest, int hash_size); + int (*encode)(const void *rec, struct string_view dest, uint32_t hash_size); /* decode data from `src` into the record. */ int (*decode)(void *rec, struct reftable_buf key, uint8_t extra, - struct string_view src, int hash_size, + struct string_view src, uint32_t hash_size, struct reftable_buf *scratch); /* deallocate and null the record. */ @@ -68,7 +68,7 @@ struct reftable_record_vtable { int (*is_deletion)(const void *rec); /* Are two records equal? This assumes they have the same type. Returns 0 for non-equal. */ - int (*equal)(const void *a, const void *b, int hash_size); + int (*equal)(const void *a, const void *b, uint32_t hash_size); /* * Compare keys of two records with each other. The records must have @@ -135,16 +135,16 @@ void reftable_record_init(struct reftable_record *rec, uint8_t typ); /* see struct record_vtable */ int reftable_record_cmp(struct reftable_record *a, struct reftable_record *b); -int reftable_record_equal(struct reftable_record *a, struct reftable_record *b, int hash_size); +int reftable_record_equal(struct reftable_record *a, struct reftable_record *b, uint32_t hash_size); int reftable_record_key(struct reftable_record *rec, struct reftable_buf *dest); int reftable_record_copy_from(struct reftable_record *rec, - struct reftable_record *src, int hash_size); + struct reftable_record *src, uint32_t hash_size); uint8_t reftable_record_val_type(struct reftable_record *rec); int reftable_record_encode(struct reftable_record *rec, struct string_view dest, - int hash_size); + uint32_t hash_size); int reftable_record_decode(struct reftable_record *rec, struct reftable_buf key, uint8_t extra, struct string_view src, - int hash_size, struct reftable_buf *scratch); + uint32_t hash_size, struct reftable_buf *scratch); int reftable_record_is_deletion(struct reftable_record *rec); static inline uint8_t reftable_record_type(struct reftable_record *rec) diff --git a/reftable/reftable-record.h b/reftable/reftable-record.h index ddd48eb579..931e594744 100644 --- a/reftable/reftable-record.h +++ b/reftable/reftable-record.h @@ -65,7 +65,7 @@ void reftable_ref_record_release(struct reftable_ref_record *ref); /* returns whether two reftable_ref_records are the same. Useful for testing. */ int reftable_ref_record_equal(const struct reftable_ref_record *a, - const struct reftable_ref_record *b, int hash_size); + const struct reftable_ref_record *b, uint32_t hash_size); /* reftable_log_record holds a reflog entry */ struct reftable_log_record { @@ -105,6 +105,6 @@ void reftable_log_record_release(struct reftable_log_record *log); /* returns whether two records are equal. Useful for testing. */ int reftable_log_record_equal(const struct reftable_log_record *a, - const struct reftable_log_record *b, int hash_size); + const struct reftable_log_record *b, uint32_t hash_size); #endif diff --git a/t/unit-tests/t-reftable-record.c b/t/unit-tests/t-reftable-record.c index 6d912b9c8f..d49d2a2729 100644 --- a/t/unit-tests/t-reftable-record.c +++ b/t/unit-tests/t-reftable-record.c @@ -76,7 +76,7 @@ static void t_varint_overflow(void) static void set_hash(uint8_t *h, int j) { - for (int i = 0; i < hash_size(REFTABLE_HASH_SHA1); i++) + for (size_t i = 0; i < hash_size(REFTABLE_HASH_SHA1); i++) h[i] = (j >> i) & 0xff; } From ffe664366890f252ad14e87c987c57e080182bca Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 20 Jan 2025 17:17:24 +0100 Subject: [PATCH 06/10] reftable/block: adapt header and footer size to return a `size_t` The functions `header_size()` and `footer_size()` return a positive integer representing the size of the header and footer, respectively, dependent on the version of the reftable format. Similar to the preceding commit, these functions return a signed integer though, which is nonsensical given that there is no way for these functions to return negative. Adapt the functions to return a `size_t` instead to fix a couple of sign comparison warnings. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- reftable/block.c | 4 ++-- reftable/block.h | 4 ++-- t/unit-tests/t-reftable-readwrite.c | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/reftable/block.c b/reftable/block.c index 2380aabb2f..1275085257 100644 --- a/reftable/block.c +++ b/reftable/block.c @@ -15,7 +15,7 @@ license that can be found in the LICENSE file or at #include "system.h" #include -int header_size(int version) +size_t header_size(int version) { switch (version) { case 1: @@ -26,7 +26,7 @@ int header_size(int version) abort(); } -int footer_size(int version) +size_t footer_size(int version) { switch (version) { case 1: diff --git a/reftable/block.h b/reftable/block.h index 5f67ed74c5..bef2b8a4c5 100644 --- a/reftable/block.h +++ b/reftable/block.h @@ -137,10 +137,10 @@ void block_iter_reset(struct block_iter *it); void block_iter_close(struct block_iter *it); /* size of file header, depending on format version */ -int header_size(int version); +size_t header_size(int version); /* size of file footer, depending on format version */ -int footer_size(int version); +size_t footer_size(int version); /* returns a block to its source. */ void reftable_block_done(struct reftable_block *ret); diff --git a/t/unit-tests/t-reftable-readwrite.c b/t/unit-tests/t-reftable-readwrite.c index 6b75a419b9..2e553154ea 100644 --- a/t/unit-tests/t-reftable-readwrite.c +++ b/t/unit-tests/t-reftable-readwrite.c @@ -643,7 +643,7 @@ static void t_write_empty_table(void) check_int(err, ==, REFTABLE_EMPTY_TABLE_ERROR); reftable_writer_free(w); - check_int(buf.len, ==, header_size(1) + footer_size(1)); + check_uint(buf.len, ==, header_size(1) + footer_size(1)); block_source_from_buf(&source, &buf); From b1e4b6f4dc27481e8c07acc2e7629ae206d25f6c Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 20 Jan 2025 17:17:25 +0100 Subject: [PATCH 07/10] reftable/block: adjust type of the restart length The restart length is tracked as a positive integer even though it cannot ever be negative. Furthermore, it is effectively capped via the MAX_RESTARTS variable. Adjust the type of the variable to be `uint32_t`. While this type is excessive given that MAX_RESTARTS fits into an `uint16_t`, other places already use 32 bit integers for restarts, so this type is being more consistent. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- reftable/block.c | 12 +++++------- reftable/reftable-writer.h | 2 +- 2 files changed, 6 insertions(+), 8 deletions(-) diff --git a/reftable/block.c b/reftable/block.c index 1275085257..8ac865ce78 100644 --- a/reftable/block.c +++ b/reftable/block.c @@ -40,16 +40,15 @@ size_t footer_size(int version) static int block_writer_register_restart(struct block_writer *w, int n, int is_restart, struct reftable_buf *key) { - int rlen, err; + uint32_t rlen; + int err; rlen = w->restart_len; - if (rlen >= MAX_RESTARTS) { + if (rlen >= MAX_RESTARTS) is_restart = 0; - } - if (is_restart) { + if (is_restart) rlen++; - } if (2 + 3 * rlen + n > w->block_size - w->next) return -1; if (is_restart) { @@ -148,8 +147,7 @@ int block_writer_add(struct block_writer *w, struct reftable_record *rec) int block_writer_finish(struct block_writer *w) { - int i; - for (i = 0; i < w->restart_len; i++) { + for (uint32_t i = 0; i < w->restart_len; i++) { put_be24(w->block + w->next, w->restarts[i]); w->next += 3; } diff --git a/reftable/reftable-writer.h b/reftable/reftable-writer.h index 5f9afa620b..bfef3b1721 100644 --- a/reftable/reftable-writer.h +++ b/reftable/reftable-writer.h @@ -84,7 +84,7 @@ struct reftable_block_stats { /* total number of entries written */ int entries; /* total number of key restarts */ - int restarts; + uint32_t restarts; /* total number of blocks */ int blocks; /* total number of index blocks */ From 1f054af72f509f1247687b91a360961b5d930869 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 20 Jan 2025 17:17:26 +0100 Subject: [PATCH 08/10] reftable/blocksource: adjust type of the block length The block length is used to track the number of bytes available in a specific block. As such, it is never set to a negative value, but is still represented by a signed integer. Adjust the type of the variable to be `size_t`. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- reftable/reftable-blocksource.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/reftable/reftable-blocksource.h b/reftable/reftable-blocksource.h index 5aa3990a57..f06ad52e0a 100644 --- a/reftable/reftable-blocksource.h +++ b/reftable/reftable-blocksource.h @@ -22,7 +22,7 @@ struct reftable_block_source { * so it can return itself into the pool. */ struct reftable_block { uint8_t *data; - int len; + size_t len; struct reftable_block_source source; }; From 7c4c1cbc0b94665d6a94ac7df385459346af5265 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 20 Jan 2025 17:17:27 +0100 Subject: [PATCH 09/10] reftable/blocksource: adjust `read_block()` to return `ssize_t` The `block_source_read_block()` function and its implementations return an integer as a result that reflects either the number of bytes read, or an error. As such its return type, a signed integer, isn't wrong, but it doesn't give the reader a good hint what it actually returns. Refactor the function to return an `ssize_t` instead, which is typical for functions similar to read(3p) and should thus give readers a better signal what they can expect as a result. Adjust callers to better handle the returned value to avoid warnings with -Wsign-compare. One of these callers is `reader_get_block()`, whose return value is only ever used by its callers to figure out whether or not the read was successful. So instead of bubbling up the `ssize_t` there, too, we adapt it to only indicate success or errors. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- reftable/blocksource.c | 8 ++++---- reftable/reader.c | 30 +++++++++++++++++------------- reftable/reader.h | 6 +++--- reftable/reftable-blocksource.h | 11 +++++++---- 4 files changed, 31 insertions(+), 24 deletions(-) diff --git a/reftable/blocksource.c b/reftable/blocksource.c index 52e0915a67..bba4a45b98 100644 --- a/reftable/blocksource.c +++ b/reftable/blocksource.c @@ -24,8 +24,8 @@ static void reftable_buf_close(void *b UNUSED) { } -static int reftable_buf_read_block(void *v, struct reftable_block *dest, - uint64_t off, uint32_t size) +static ssize_t reftable_buf_read_block(void *v, struct reftable_block *dest, + uint64_t off, uint32_t size) { struct reftable_buf *b = v; assert(off + size <= b->len); @@ -78,8 +78,8 @@ static void file_close(void *v) reftable_free(b); } -static int file_read_block(void *v, struct reftable_block *dest, uint64_t off, - uint32_t size) +static ssize_t file_read_block(void *v, struct reftable_block *dest, uint64_t off, + uint32_t size) { struct file_block_source *b = v; assert(off + size <= b->size); diff --git a/reftable/reader.c b/reftable/reader.c index 9df8a5ecb1..3f2e4b2800 100644 --- a/reftable/reader.c +++ b/reftable/reader.c @@ -20,11 +20,11 @@ uint64_t block_source_size(struct reftable_block_source *source) return source->ops->size(source->arg); } -int block_source_read_block(struct reftable_block_source *source, - struct reftable_block *dest, uint64_t off, - uint32_t size) +ssize_t block_source_read_block(struct reftable_block_source *source, + struct reftable_block *dest, uint64_t off, + uint32_t size) { - int result = source->ops->read_block(source->arg, dest, off, size); + ssize_t result = source->ops->read_block(source->arg, dest, off, size); dest->source = *source; return result; } @@ -57,14 +57,17 @@ static int reader_get_block(struct reftable_reader *r, struct reftable_block *dest, uint64_t off, uint32_t sz) { + ssize_t bytes_read; if (off >= r->size) return 0; - - if (off + sz > r->size) { + if (off + sz > r->size) sz = r->size - off; - } - return block_source_read_block(&r->source, dest, off, sz); + bytes_read = block_source_read_block(&r->source, dest, off, sz); + if (bytes_read < 0) + return (int)bytes_read; + + return 0; } enum reftable_hash reftable_reader_hash_id(struct reftable_reader *r) @@ -601,6 +604,7 @@ int reftable_reader_new(struct reftable_reader **out, struct reftable_reader *r; uint64_t file_size = block_source_size(source); uint32_t read_size; + ssize_t bytes_read; int err; REFTABLE_CALLOC_ARRAY(r, 1); @@ -619,8 +623,8 @@ int reftable_reader_new(struct reftable_reader **out, goto done; } - err = block_source_read_block(source, &header, 0, read_size); - if (err != read_size) { + bytes_read = block_source_read_block(source, &header, 0, read_size); + if (bytes_read < 0 || (size_t)bytes_read != read_size) { err = REFTABLE_IO_ERROR; goto done; } @@ -645,9 +649,9 @@ int reftable_reader_new(struct reftable_reader **out, r->hash_id = 0; r->refcount = 1; - err = block_source_read_block(source, &footer, r->size, - footer_size(r->version)); - if (err != footer_size(r->version)) { + bytes_read = block_source_read_block(source, &footer, r->size, + footer_size(r->version)); + if (bytes_read < 0 || (size_t)bytes_read != footer_size(r->version)) { err = REFTABLE_IO_ERROR; goto done; } diff --git a/reftable/reader.h b/reftable/reader.h index d2b48a4849..bb72108a6f 100644 --- a/reftable/reader.h +++ b/reftable/reader.h @@ -16,9 +16,9 @@ license that can be found in the LICENSE file or at uint64_t block_source_size(struct reftable_block_source *source); -int block_source_read_block(struct reftable_block_source *source, - struct reftable_block *dest, uint64_t off, - uint32_t size); +ssize_t block_source_read_block(struct reftable_block_source *source, + struct reftable_block *dest, uint64_t off, + uint32_t size); void block_source_close(struct reftable_block_source *source); /* metadata for a block type */ diff --git a/reftable/reftable-blocksource.h b/reftable/reftable-blocksource.h index f06ad52e0a..6b326aa5ea 100644 --- a/reftable/reftable-blocksource.h +++ b/reftable/reftable-blocksource.h @@ -31,10 +31,13 @@ struct reftable_block_source_vtable { /* returns the size of a block source */ uint64_t (*size)(void *source); - /* reads a segment from the block source. It is an error to read - beyond the end of the block */ - int (*read_block)(void *source, struct reftable_block *dest, - uint64_t off, uint32_t size); + /* + * Reads a segment from the block source. It is an error to read beyond + * the end of the block. + */ + ssize_t (*read_block)(void *source, struct reftable_block *dest, + uint64_t off, uint32_t size); + /* mark the block as read; may return the data back to malloc */ void (*return_block)(void *source, struct reftable_block *blockp); From 33319b0976ff9975e7509b6096887370146893f4 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 20 Jan 2025 17:17:28 +0100 Subject: [PATCH 10/10] reftable: address trivial -Wsign-compare warnings Address the last couple of trivial -Wsign-compare warnings in the reftable library and remove the DISABLE_SIGN_COMPARE_WARNINGS macro that we have in "reftable/system.h". Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- reftable/record.c | 7 ++----- reftable/stack.c | 12 +++++------- reftable/system.h | 2 -- 3 files changed, 7 insertions(+), 14 deletions(-) diff --git a/reftable/record.c b/reftable/record.c index f7766a32ef..8919df8a4d 100644 --- a/reftable/record.c +++ b/reftable/record.c @@ -126,7 +126,7 @@ static int decode_string(struct reftable_buf *dest, struct string_view in) static int encode_string(const char *str, struct string_view s) { struct string_view start = s; - int l = strlen(str); + size_t l = strlen(str); int n = put_var_int(&s, l); if (n < 0) return -1; @@ -565,7 +565,6 @@ static int reftable_obj_record_decode(void *rec, struct reftable_buf key, uint64_t count = val_type; int n = 0; uint64_t last; - int j; reftable_obj_record_release(r); @@ -600,8 +599,7 @@ static int reftable_obj_record_decode(void *rec, struct reftable_buf key, string_view_consume(&in, n); last = r->offsets[0]; - j = 1; - while (j < count) { + for (uint64_t j = 1; j < count; j++) { uint64_t delta = 0; int n = get_var_int(&delta, &in); if (n < 0) { @@ -610,7 +608,6 @@ static int reftable_obj_record_decode(void *rec, struct reftable_buf key, string_view_consume(&in, n); last = r->offsets[j] = (delta + last); - j++; } return start.len - in.len; } diff --git a/reftable/stack.c b/reftable/stack.c index 531660a49f..5c0d6273a7 100644 --- a/reftable/stack.c +++ b/reftable/stack.c @@ -220,9 +220,9 @@ void reftable_stack_destroy(struct reftable_stack *st) } if (st->readers) { - int i = 0; struct reftable_buf filename = REFTABLE_BUF_INIT; - for (i = 0; i < st->readers_len; i++) { + + for (size_t i = 0; i < st->readers_len; i++) { const char *name = reader_name(st->readers[i]); int try_unlinking = 1; @@ -238,6 +238,7 @@ void reftable_stack_destroy(struct reftable_stack *st) unlink(filename.buf); } } + reftable_buf_release(&filename); st->readers_len = 0; REFTABLE_FREE_AND_NULL(st->readers); @@ -568,7 +569,6 @@ static int stack_uptodate(struct reftable_stack *st) { char **names = NULL; int err; - int i = 0; /* * When we have cached stat information available then we use it to @@ -608,7 +608,7 @@ static int stack_uptodate(struct reftable_stack *st) if (err < 0) return err; - for (i = 0; i < st->readers_len; i++) { + for (size_t i = 0; i < st->readers_len; i++) { if (!names[i]) { err = 1; goto done; @@ -1767,14 +1767,12 @@ static int reftable_stack_clean_locked(struct reftable_stack *st) } while ((d = readdir(dir))) { - int i = 0; int found = 0; if (!is_table_name(d->d_name)) continue; - for (i = 0; !found && i < st->readers_len; i++) { + for (size_t i = 0; !found && i < st->readers_len; i++) found = !strcmp(reader_name(st->readers[i]), d->d_name); - } if (found) continue; diff --git a/reftable/system.h b/reftable/system.h index 5274eca1d0..7d5f803eeb 100644 --- a/reftable/system.h +++ b/reftable/system.h @@ -11,8 +11,6 @@ license that can be found in the LICENSE file or at /* This header glues the reftable library to the rest of Git */ -#define DISABLE_SIGN_COMPARE_WARNINGS - #include "git-compat-util.h" /*