From 2105064b10758c9032b94112276e8d3eb5718a2f Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Sun, 19 Jan 2025 08:23:08 -0500 Subject: [PATCH 1/5] bswap.h: squelch potential sparse -Wcast-truncate warnings In put_be32(), we right-shift a uint32_t value various amounts and then assign the low 8-bits to individual "unsigned char" bytes, throwing away the high bits. For shifts smaller than 24 bits, those thrown away bits will be arbitrary bits from the original uint32_t. This works exactly as we want, but if you feed a constant, then sparse complains. For example if we write this (which we plan to do in a future patch): put_be32(hdr, PACK_SIGNATURE); then "make sparse" produces: compat/bswap.h:175:22: error: cast truncates bits from constant value (5041 becomes 41) compat/bswap.h:176:22: error: cast truncates bits from constant value (504143 becomes 43) compat/bswap.h:177:22: error: cast truncates bits from constant value (5041434b becomes 4b) And the same issue exists in the other put_be*() functions, when used with a constant. We can silence this warning by explicitly masking off the truncated bits. The compiler is smart enough to know the result is the same, and the asm generated by gcc (with both -O0 and -O2) is identical. Curiously this line already exists: put_be32(&hdr_version, INDEX_EXTENSION_VERSION2); in the fsmonitor.c file, but it does not get flagged because the CPP macro expands to a small integer (2). Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- compat/bswap.h | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/compat/bswap.h b/compat/bswap.h index 512f6f4b99..b34054f2bd 100644 --- a/compat/bswap.h +++ b/compat/bswap.h @@ -171,23 +171,23 @@ static inline uint64_t get_be64(const void *ptr) static inline void put_be32(void *ptr, uint32_t value) { unsigned char *p = ptr; - p[0] = value >> 24; - p[1] = value >> 16; - p[2] = value >> 8; - p[3] = value >> 0; + p[0] = (value >> 24) & 0xff; + p[1] = (value >> 16) & 0xff; + p[2] = (value >> 8) & 0xff; + p[3] = (value >> 0) & 0xff; } static inline void put_be64(void *ptr, uint64_t value) { unsigned char *p = ptr; - p[0] = value >> 56; - p[1] = value >> 48; - p[2] = value >> 40; - p[3] = value >> 32; - p[4] = value >> 24; - p[5] = value >> 16; - p[6] = value >> 8; - p[7] = value >> 0; + p[0] = (value >> 56) & 0xff; + p[1] = (value >> 48) & 0xff; + p[2] = (value >> 40) & 0xff; + p[3] = (value >> 32) & 0xff; + p[4] = (value >> 24) & 0xff; + p[5] = (value >> 16) & 0xff; + p[6] = (value >> 8) & 0xff; + p[7] = (value >> 0) & 0xff; } #endif /* COMPAT_BSWAP_H */ From 798e0f451661f81f4568dce4033cf1c9076f224f Mon Sep 17 00:00:00 2001 From: Jeff King Date: Sun, 19 Jan 2025 08:23:37 -0500 Subject: [PATCH 2/5] packfile: factor out --pack_header argument parsing Both index-pack and unpack-objects accept a --pack_header argument. This is an undocumented internal argument used by receive-pack and fetch to pass along information about the header of the pack, which they've already read from the incoming stream. In preparation for a bugfix, let's factor the duplicated code into a common helper. The callers are still responsible for identifying the option. While this could likewise be factored out, it is more flexible this way (e.g., if they ever started using parse-options and wanted to handle both the stuck and unstuck forms). Likewise, the callers are responsible for reporting errors, though they both just call die(). I've tweaked unpack-objects to match index-pack in marking the error for translation. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- builtin/index-pack.c | 14 +++----------- builtin/unpack-objects.c | 16 ++++------------ packfile.c | 17 +++++++++++++++++ packfile.h | 6 ++++++ 4 files changed, 30 insertions(+), 23 deletions(-) diff --git a/builtin/index-pack.c b/builtin/index-pack.c index 763b01372a..bab42dfc2a 100644 --- a/builtin/index-pack.c +++ b/builtin/index-pack.c @@ -1801,18 +1801,10 @@ int cmd_index_pack(int argc, const char **argv, const char *prefix) nr_threads = 1; } } else if (starts_with(arg, "--pack_header=")) { - struct pack_header *hdr; - char *c; - - hdr = (struct pack_header *)input_buffer; - hdr->hdr_signature = htonl(PACK_SIGNATURE); - hdr->hdr_version = htonl(strtoul(arg + 14, &c, 10)); - if (*c != ',') + if (parse_pack_header_option(arg + 14, + input_buffer, + &input_len) < 0) die(_("bad %s"), arg); - hdr->hdr_entries = htonl(strtoul(c + 1, &c, 10)); - if (*c) - die(_("bad %s"), arg); - input_len = sizeof(*hdr); } else if (!strcmp(arg, "-v")) { verbose = 1; } else if (!strcmp(arg, "--progress-title")) { diff --git a/builtin/unpack-objects.c b/builtin/unpack-objects.c index 08fa2a7a74..3161447274 100644 --- a/builtin/unpack-objects.c +++ b/builtin/unpack-objects.c @@ -15,6 +15,7 @@ #include "progress.h" #include "decorate.h" #include "fsck.h" +#include "packfile.h" static int dry_run, quiet, recover, has_errors, strict; static const char unpack_usage[] = "git unpack-objects [-n] [-q] [-r] [--strict]"; @@ -639,18 +640,9 @@ int cmd_unpack_objects(int argc, const char **argv, const char *prefix UNUSED) continue; } if (starts_with(arg, "--pack_header=")) { - struct pack_header *hdr; - char *c; - - hdr = (struct pack_header *)buffer; - hdr->hdr_signature = htonl(PACK_SIGNATURE); - hdr->hdr_version = htonl(strtoul(arg + 14, &c, 10)); - if (*c != ',') - die("bad %s", arg); - hdr->hdr_entries = htonl(strtoul(c + 1, &c, 10)); - if (*c) - die("bad %s", arg); - len = sizeof(*hdr); + if (parse_pack_header_option(arg + 14, + buffer, &len) < 0) + die(_("bad %s"), arg); continue; } if (skip_prefix(arg, "--max-input-size=", &arg)) { diff --git a/packfile.c b/packfile.c index 813584646f..e2bdadc7cb 100644 --- a/packfile.c +++ b/packfile.c @@ -2294,3 +2294,20 @@ int is_promisor_object(const struct object_id *oid) } return oidset_contains(&promisor_objects, oid); } + +int parse_pack_header_option(const char *in, unsigned char *out, unsigned int *len) +{ + struct pack_header *hdr; + char *c; + + hdr = (struct pack_header *)out; + hdr->hdr_signature = htonl(PACK_SIGNATURE); + hdr->hdr_version = htonl(strtoul(in, &c, 10)); + if (*c != ',') + return -1; + hdr->hdr_entries = htonl(strtoul(c + 1, &c, 10)); + if (*c) + return -1; + *len = sizeof(*hdr); + return 0; +} diff --git a/packfile.h b/packfile.h index eb18ec15db..41f38b4832 100644 --- a/packfile.h +++ b/packfile.h @@ -210,4 +210,10 @@ int is_promisor_object(const struct object_id *oid); int load_idx(const char *path, const unsigned int hashsz, void *idx_map, size_t idx_size, struct packed_git *p); +/* + * Parse a --pack_header option as accepted by index-pack and unpack-objects, + * turning it into the matching bytes we'd find in a pack. + */ +int parse_pack_header_option(const char *in, unsigned char *out, unsigned int *len); + #endif From 4f02f4d68d8eefe728008974640839ef6e1b2182 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Sun, 19 Jan 2025 08:23:44 -0500 Subject: [PATCH 3/5] parse_pack_header_option(): avoid unaligned memory writes In order to recreate a pack header in our in-memory buffer, we cast the buffer to a "struct pack_header" and assign the individual fields. This is reported to cause SIGBUS on sparc64 due to alignment issues. We can work around this by using put_be32() which will write individual bytes into the buffer. Reported-by: Koakuma Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- packfile.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/packfile.c b/packfile.c index e2bdadc7cb..93f771ad95 100644 --- a/packfile.c +++ b/packfile.c @@ -2297,17 +2297,20 @@ int is_promisor_object(const struct object_id *oid) int parse_pack_header_option(const char *in, unsigned char *out, unsigned int *len) { - struct pack_header *hdr; + unsigned char *hdr; char *c; - hdr = (struct pack_header *)out; - hdr->hdr_signature = htonl(PACK_SIGNATURE); - hdr->hdr_version = htonl(strtoul(in, &c, 10)); + hdr = out; + put_be32(hdr, PACK_SIGNATURE); + hdr += 4; + put_be32(hdr, strtoul(in, &c, 10)); + hdr += 4; if (*c != ',') return -1; - hdr->hdr_entries = htonl(strtoul(c + 1, &c, 10)); + put_be32(hdr, strtoul(c + 1, &c, 10)); + hdr += 4; if (*c) return -1; - *len = sizeof(*hdr); + *len = hdr - out; return 0; } From f1299bff26a20b70bb5b8440526a2bd3c6de298a Mon Sep 17 00:00:00 2001 From: Jeff King Date: Sun, 19 Jan 2025 08:25:47 -0500 Subject: [PATCH 4/5] index-pack, unpack-objects: use get_be32() for reading pack header Both of these commands read the incoming pack into a static unsigned char buffer in BSS, and then parse it by casting the start of the buffer to a struct pack_header. This can result in SIGBUS on some platforms if the compiler doesn't place the buffer in a position that is properly aligned for 4-byte integers. This reportedly happens with unpack-objects (but not index-pack) on sparc64 when compiled with clang (but not gcc). But we are definitely in the wrong in both spots; since the buffer's type is unsigned char, we can't depend on larger alignment. When it works it is only because we are lucky. We'll fix this by switching to get_be32() to read the headers (just like the last few commits similarly switched us to put_be32() for writing into the same buffer). It would be nice to factor this out into a common helper function, but the interface ends up quite awkward. Either the caller needs to hardcode how many bytes we'll need, or it needs to pass us its fill()/use() functions as pointers. So I've just fixed both spots in the same way; this is not code that is likely to be repeated a third time (most of the pack reading code uses an mmap'd buffer, which should be properly aligned). I did make one tweak to the shared code: our pack_version_ok() macro expects us to pass the big-endian value we'd get by casting. We can introduce a "native" variant which uses the host integer ordering. Reported-by: Koakuma Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- builtin/index-pack.c | 12 +++++++----- builtin/unpack-objects.c | 13 +++++++------ pack.h | 3 ++- 3 files changed, 16 insertions(+), 12 deletions(-) diff --git a/builtin/index-pack.c b/builtin/index-pack.c index bab42dfc2a..5f0ff1ce04 100644 --- a/builtin/index-pack.c +++ b/builtin/index-pack.c @@ -363,16 +363,18 @@ static const char *open_pack_file(const char *pack_name) static void parse_pack_header(void) { - struct pack_header *hdr = fill(sizeof(struct pack_header)); + unsigned char *hdr = fill(sizeof(struct pack_header)); /* Header consistency check */ - if (hdr->hdr_signature != htonl(PACK_SIGNATURE)) + if (get_be32(hdr) != PACK_SIGNATURE) die(_("pack signature mismatch")); - if (!pack_version_ok(hdr->hdr_version)) + hdr += 4; + if (!pack_version_ok_native(get_be32(hdr))) die(_("pack version %"PRIu32" unsupported"), - ntohl(hdr->hdr_version)); + get_be32(hdr)); + hdr += 4; - nr_objects = ntohl(hdr->hdr_entries); + nr_objects = get_be32(hdr); use(sizeof(struct pack_header)); } diff --git a/builtin/unpack-objects.c b/builtin/unpack-objects.c index 3161447274..fc3de6dac8 100644 --- a/builtin/unpack-objects.c +++ b/builtin/unpack-objects.c @@ -576,15 +576,16 @@ static void unpack_one(unsigned nr) static void unpack_all(void) { int i; - struct pack_header *hdr = fill(sizeof(struct pack_header)); + unsigned char *hdr = fill(sizeof(struct pack_header)); - nr_objects = ntohl(hdr->hdr_entries); - - if (ntohl(hdr->hdr_signature) != PACK_SIGNATURE) + if (get_be32(hdr) != PACK_SIGNATURE) die("bad pack file"); - if (!pack_version_ok(hdr->hdr_version)) + hdr += 4; + if (!pack_version_ok_native(get_be32(hdr))) die("unknown pack file version %"PRIu32, - ntohl(hdr->hdr_version)); + get_be32(hdr)); + hdr += 4; + nr_objects = get_be32(hdr); use(sizeof(struct pack_header)); if (!quiet) diff --git a/pack.h b/pack.h index 3ab9e3f60c..33b42fdad5 100644 --- a/pack.h +++ b/pack.h @@ -13,7 +13,8 @@ struct repository; */ #define PACK_SIGNATURE 0x5041434b /* "PACK" */ #define PACK_VERSION 2 -#define pack_version_ok(v) ((v) == htonl(2) || (v) == htonl(3)) +#define pack_version_ok(v) pack_version_ok_native(ntohl(v)) +#define pack_version_ok_native(v) ((v) == 2 || (v) == 3) struct pack_header { uint32_t hdr_signature; uint32_t hdr_version; From 98046591b96a213e05d17569b1645e772df91b90 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Sun, 19 Jan 2025 08:25:53 -0500 Subject: [PATCH 5/5] index-pack, unpack-objects: use skip_prefix to avoid magic number When parsing --pack_header=, we manually skip 14 bytes to the data. Let's use skip_prefix() to do this automatically. Note that we overwrite our pointer to the front of the string, so we have to add more context to the error message. We could avoid this by declaring an extra pointer to hold the value, but I think the modified message is actually preferable; it should give translators a bit more context. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- builtin/index-pack.c | 6 +++--- builtin/unpack-objects.c | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/builtin/index-pack.c b/builtin/index-pack.c index 5f0ff1ce04..d80e054581 100644 --- a/builtin/index-pack.c +++ b/builtin/index-pack.c @@ -1802,11 +1802,11 @@ int cmd_index_pack(int argc, const char **argv, const char *prefix) warning(_("no threads support, ignoring %s"), arg); nr_threads = 1; } - } else if (starts_with(arg, "--pack_header=")) { - if (parse_pack_header_option(arg + 14, + } else if (skip_prefix(arg, "--pack_header=", &arg)) { + if (parse_pack_header_option(arg, input_buffer, &input_len) < 0) - die(_("bad %s"), arg); + die(_("bad --pack_header: %s"), arg); } else if (!strcmp(arg, "-v")) { verbose = 1; } else if (!strcmp(arg, "--progress-title")) { diff --git a/builtin/unpack-objects.c b/builtin/unpack-objects.c index fc3de6dac8..028cfe175e 100644 --- a/builtin/unpack-objects.c +++ b/builtin/unpack-objects.c @@ -640,10 +640,10 @@ int cmd_unpack_objects(int argc, const char **argv, const char *prefix UNUSED) fsck_set_msg_types(&fsck_options, arg); continue; } - if (starts_with(arg, "--pack_header=")) { - if (parse_pack_header_option(arg + 14, + if (skip_prefix(arg, "--pack_header=", &arg)) { + if (parse_pack_header_option(arg, buffer, &len) < 0) - die(_("bad %s"), arg); + die(_("bad --pack_header: %s"), arg); continue; } if (skip_prefix(arg, "--max-input-size=", &arg)) {