From d9213e4716ec8d0ac543d32a52a39c79818cb8ca Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Thu, 23 Jan 2025 12:34:19 -0500 Subject: [PATCH 1/8] t/helper/test-tool: implement sha1-unsafe helper With the new "unsafe" SHA-1 build knob, it is convenient to have a test-tool that can exercise Git's unsafe SHA-1 wrappers for testing, similar to 't/helper/test-tool sha1'. Implement that helper by altering the implementation of that test-tool (in cmd_hash_impl(), which is generic and parameterized over different hash functions) to conditionally run the unsafe variants of the chosen hash function, and expose the new behavior via a new 'sha1-unsafe' test helper. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- t/helper/test-hash.c | 17 +++++++++++++---- t/helper/test-sha1.c | 7 ++++++- t/helper/test-sha1.sh | 38 ++++++++++++++++++++++---------------- t/helper/test-sha256.c | 2 +- t/helper/test-tool.c | 1 + t/helper/test-tool.h | 3 ++- 6 files changed, 45 insertions(+), 23 deletions(-) diff --git a/t/helper/test-hash.c b/t/helper/test-hash.c index 45d829c908..d0ee668df9 100644 --- a/t/helper/test-hash.c +++ b/t/helper/test-hash.c @@ -1,7 +1,7 @@ #include "test-tool.h" #include "hex.h" -int cmd_hash_impl(int ac, const char **av, int algo) +int cmd_hash_impl(int ac, const char **av, int algo, int unsafe) { git_hash_ctx ctx; unsigned char hash[GIT_MAX_HEXSZ]; @@ -27,7 +27,10 @@ int cmd_hash_impl(int ac, const char **av, int algo) die("OOPS"); } - algop->init_fn(&ctx); + if (unsafe) + algop->unsafe_init_fn(&ctx); + else + algop->init_fn(&ctx); while (1) { ssize_t sz, this_sz; @@ -46,9 +49,15 @@ int cmd_hash_impl(int ac, const char **av, int algo) } if (this_sz == 0) break; - algop->update_fn(&ctx, buffer, this_sz); + if (unsafe) + algop->unsafe_update_fn(&ctx, buffer, this_sz); + else + algop->update_fn(&ctx, buffer, this_sz); } - algop->final_fn(hash, &ctx); + if (unsafe) + algop->unsafe_final_fn(hash, &ctx); + else + algop->final_fn(hash, &ctx); if (binary) fwrite(hash, 1, algop->rawsz, stdout); diff --git a/t/helper/test-sha1.c b/t/helper/test-sha1.c index e60d000c03..349540c4df 100644 --- a/t/helper/test-sha1.c +++ b/t/helper/test-sha1.c @@ -3,7 +3,7 @@ int cmd__sha1(int ac, const char **av) { - return cmd_hash_impl(ac, av, GIT_HASH_SHA1); + return cmd_hash_impl(ac, av, GIT_HASH_SHA1, 0); } int cmd__sha1_is_sha1dc(int argc UNUSED, const char **argv UNUSED) @@ -13,3 +13,8 @@ int cmd__sha1_is_sha1dc(int argc UNUSED, const char **argv UNUSED) #endif return 1; } + +int cmd__sha1_unsafe(int ac, const char **av) +{ + return cmd_hash_impl(ac, av, GIT_HASH_SHA1, 1); +} diff --git a/t/helper/test-sha1.sh b/t/helper/test-sha1.sh index 84594885c7..bf387d3db1 100755 --- a/t/helper/test-sha1.sh +++ b/t/helper/test-sha1.sh @@ -3,25 +3,31 @@ dd if=/dev/zero bs=1048576 count=100 2>/dev/null | /usr/bin/time t/helper/test-tool sha1 >/dev/null +dd if=/dev/zero bs=1048576 count=100 2>/dev/null | +/usr/bin/time t/helper/test-tool sha1-unsafe >/dev/null + while read expect cnt pfx do case "$expect" in '#'*) continue ;; esac - actual=$( - { - test -z "$pfx" || echo "$pfx" - dd if=/dev/zero bs=1048576 count=$cnt 2>/dev/null | - perl -pe 'y/\000/g/' - } | ./t/helper/test-tool sha1 $cnt - ) - if test "$expect" = "$actual" - then - echo "OK: $expect $cnt $pfx" - else - echo >&2 "OOPS: $cnt" - echo >&2 "expect: $expect" - echo >&2 "actual: $actual" - exit 1 - fi + for sha1 in sha1 sha1-unsafe + do + actual=$( + { + test -z "$pfx" || echo "$pfx" + dd if=/dev/zero bs=1048576 count=$cnt 2>/dev/null | + perl -pe 'y/\000/g/' + } | ./t/helper/test-tool $sha1 $cnt + ) + if test "$expect" = "$actual" + then + echo "OK ($sha1): $expect $cnt $pfx" + else + echo >&2 "OOPS ($sha1): $cnt" + echo >&2 "expect ($sha1): $expect" + echo >&2 "actual ($sha1): $actual" + exit 1 + fi + done done < Date: Thu, 23 Jan 2025 12:34:23 -0500 Subject: [PATCH 2/8] csum-file: store the hash algorithm as a struct field Throughout the hashfile API, we rely on a reference to 'the_hash_algo', and call its _unsafe function variants directly. Prepare for a future change where we may use a different 'git_hash_algo' pointer (instead of just relying on 'the_hash_algo' throughout) by making the 'git_hash_algo' pointer a member of the 'hashfile' structure itself. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- csum-file.c | 20 +++++++++++--------- csum-file.h | 1 + 2 files changed, 12 insertions(+), 9 deletions(-) diff --git a/csum-file.c b/csum-file.c index 5716016e12..b28cd047e3 100644 --- a/csum-file.c +++ b/csum-file.c @@ -50,7 +50,7 @@ void hashflush(struct hashfile *f) if (offset) { if (!f->skip_hash) - the_hash_algo->unsafe_update_fn(&f->ctx, f->buffer, offset); + f->algop->unsafe_update_fn(&f->ctx, f->buffer, offset); flush(f, f->buffer, offset); f->offset = 0; } @@ -71,14 +71,14 @@ int finalize_hashfile(struct hashfile *f, unsigned char *result, hashflush(f); if (f->skip_hash) - hashclr(f->buffer, the_repository->hash_algo); + hashclr(f->buffer, f->algop); else - the_hash_algo->unsafe_final_fn(f->buffer, &f->ctx); + f->algop->unsafe_final_fn(f->buffer, &f->ctx); if (result) - hashcpy(result, f->buffer, the_repository->hash_algo); + hashcpy(result, f->buffer, f->algop); if (flags & CSUM_HASH_IN_STREAM) - flush(f, f->buffer, the_hash_algo->rawsz); + flush(f, f->buffer, f->algop->rawsz); if (flags & CSUM_FSYNC) fsync_component_or_die(component, f->fd, f->name); if (flags & CSUM_CLOSE) { @@ -128,7 +128,7 @@ void hashwrite(struct hashfile *f, const void *buf, unsigned int count) * f->offset is necessarily zero. */ if (!f->skip_hash) - the_hash_algo->unsafe_update_fn(&f->ctx, buf, nr); + f->algop->unsafe_update_fn(&f->ctx, buf, nr); flush(f, buf, nr); } else { /* @@ -174,7 +174,9 @@ static struct hashfile *hashfd_internal(int fd, const char *name, f->name = name; f->do_crc = 0; f->skip_hash = 0; - the_hash_algo->unsafe_init_fn(&f->ctx); + + f->algop = the_hash_algo; + f->algop->unsafe_init_fn(&f->ctx); f->buffer_len = buffer_len; f->buffer = xmalloc(buffer_len); @@ -208,7 +210,7 @@ void hashfile_checkpoint(struct hashfile *f, struct hashfile_checkpoint *checkpo { hashflush(f); checkpoint->offset = f->total; - the_hash_algo->unsafe_clone_fn(&checkpoint->ctx, &f->ctx); + f->algop->unsafe_clone_fn(&checkpoint->ctx, &f->ctx); } int hashfile_truncate(struct hashfile *f, struct hashfile_checkpoint *checkpoint) @@ -219,7 +221,7 @@ int hashfile_truncate(struct hashfile *f, struct hashfile_checkpoint *checkpoint lseek(f->fd, offset, SEEK_SET) != offset) return -1; f->total = offset; - the_hash_algo->unsafe_clone_fn(&f->ctx, &checkpoint->ctx); + f->algop->unsafe_clone_fn(&f->ctx, &checkpoint->ctx); f->offset = 0; /* hashflush() was called in checkpoint */ return 0; } diff --git a/csum-file.h b/csum-file.h index 7c73da0a40..2b45f4673a 100644 --- a/csum-file.h +++ b/csum-file.h @@ -20,6 +20,7 @@ struct hashfile { size_t buffer_len; unsigned char *buffer; unsigned char *check_buffer; + const struct git_hash_algo *algop; /** * If non-zero, skip_hash indicates that we should From 5fcc683338e947d1226a9426174e7c48ce849c47 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Thu, 23 Jan 2025 12:34:26 -0500 Subject: [PATCH 3/8] csum-file.c: extract algop from hashfile_checksum_valid() Perform a similar transformation as in the previous commit, but focused instead on hashfile_checksum_valid(). This function does not work with a hashfile structure itself, and instead validates the raw contents of a file written using the hashfile API. We'll want to be prepared for a similar change to this function in the future, so prepare ourselves for that by extracting 'the_hash_algo' into its own field for use within this function. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- csum-file.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/csum-file.c b/csum-file.c index b28cd047e3..7a71121e34 100644 --- a/csum-file.c +++ b/csum-file.c @@ -242,14 +242,15 @@ int hashfile_checksum_valid(const unsigned char *data, size_t total_len) { unsigned char got[GIT_MAX_RAWSZ]; git_hash_ctx ctx; - size_t data_len = total_len - the_hash_algo->rawsz; + const struct git_hash_algo *algop = the_hash_algo; + size_t data_len = total_len - algop->rawsz; - if (total_len < the_hash_algo->rawsz) + if (total_len < algop->rawsz) return 0; /* say "too short"? */ - the_hash_algo->unsafe_init_fn(&ctx); - the_hash_algo->unsafe_update_fn(&ctx, data, data_len); - the_hash_algo->unsafe_final_fn(got, &ctx); + algop->unsafe_init_fn(&ctx); + algop->unsafe_update_fn(&ctx, data, data_len); + algop->unsafe_final_fn(got, &ctx); - return hasheq(got, data + data_len, the_repository->hash_algo); + return hasheq(got, data + data_len, algop); } From 7b081d2f70feb7eadd1e93f52146e5d68371451d Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Thu, 23 Jan 2025 12:34:29 -0500 Subject: [PATCH 4/8] hash.h: introduce `unsafe_hash_algo()` In 253ed9ecff (hash.h: scaffolding for _unsafe hashing variants, 2024-09-26), we introduced "unsafe" variants of the SHA-1 hashing functions by introducing new functions like "unsafe_init_fn()" and so on. This approach has a major shortcoming that callers must remember to consistently use one variant or the other. Failing to consistently use (or not use) the unsafe variants can lead to crashes at best, or subtle memory corruption issues at worst. In the hashfile API, this isn't difficult to achieve, but verifying that all callers consistently use the unsafe variants is somewhat of a chore given how spread out all of the callers are. In the sha1 and sha1-unsafe test helpers, all of the calls to various hash functions are guarded by an "if (unsafe)" conditional, which is repetitive and cumbersome. Address these issues by introducing a new pattern whereby one 'git_hash_algo' can return a pointer to another 'git_hash_algo' that represents the unsafe version of itself. So instead of having something like: if (unsafe) the_hash_algo->init_fn(...); the_hash_algo->update_fn(...); the_hash_algo->final_fn(...); else the_hash_algo->unsafe_init_fn(...); the_hash_algo->unsafe_update_fn(...); the_hash_algo->unsafe_final_fn(...); we can instead write: struct git_hash_algo *algop = the_hash_algo; if (unsafe) algop = unsafe_hash_algo(algop); algop->init_fn(...); algop->update_fn(...); algop->final_fn(...); This removes the existing shortcoming by no longer forcing the caller to "remember" which variant of the hash functions it wants to call, only to hold onto a 'struct git_hash_algo' pointer that is initialized once. Similarly, while there currently is still a way to "mix" safe and unsafe functions, this too will go away after subsequent commits remove all direct calls to the unsafe_ variants. Note that hash_algo_by_ptr() needs an adjustment to allow passing in the unsafe variant of a hash function. All other query functions on the hash_algos array will continue to return the safe variants of any function. Suggested-by: Jeff King Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- hash.h | 13 ++++++++++++- object-file.c | 26 ++++++++++++++++++++++++++ 2 files changed, 38 insertions(+), 1 deletion(-) diff --git a/hash.h b/hash.h index 756166ce5e..0bf63cedfa 100644 --- a/hash.h +++ b/hash.h @@ -305,6 +305,9 @@ struct git_hash_algo { /* The all-zeros OID. */ const struct object_id *null_oid; + + /* The unsafe variant of this hash function, if one exists. */ + const struct git_hash_algo *unsafe; }; extern const struct git_hash_algo hash_algos[GIT_HASH_NALGOS]; @@ -320,9 +323,17 @@ int hash_algo_by_length(int len); /* Identical, except for a pointer to struct git_hash_algo. */ static inline int hash_algo_by_ptr(const struct git_hash_algo *p) { - return p - hash_algos; + size_t i; + for (i = 0; i < GIT_HASH_NALGOS; i++) { + const struct git_hash_algo *algop = &hash_algos[i]; + if (p == algop) + return i; + } + return GIT_HASH_UNKNOWN; } +const struct git_hash_algo *unsafe_hash_algo(const struct git_hash_algo *algop); + const struct object_id *null_oid(void); static inline int hashcmp(const unsigned char *sha1, const unsigned char *sha2, const struct git_hash_algo *algop) diff --git a/object-file.c b/object-file.c index 5b792b3dd4..43efa4ca36 100644 --- a/object-file.c +++ b/object-file.c @@ -202,6 +202,22 @@ static void git_hash_unknown_final_oid(struct object_id *oid UNUSED, BUG("trying to finalize unknown hash"); } +static const struct git_hash_algo sha1_unsafe_algo = { + .name = "sha1", + .format_id = GIT_SHA1_FORMAT_ID, + .rawsz = GIT_SHA1_RAWSZ, + .hexsz = GIT_SHA1_HEXSZ, + .blksz = GIT_SHA1_BLKSZ, + .init_fn = git_hash_sha1_init_unsafe, + .clone_fn = git_hash_sha1_clone_unsafe, + .update_fn = git_hash_sha1_update_unsafe, + .final_fn = git_hash_sha1_final_unsafe, + .final_oid_fn = git_hash_sha1_final_oid_unsafe, + .empty_tree = &empty_tree_oid, + .empty_blob = &empty_blob_oid, + .null_oid = &null_oid_sha1, +}; + const struct git_hash_algo hash_algos[GIT_HASH_NALGOS] = { { .name = NULL, @@ -239,6 +255,7 @@ const struct git_hash_algo hash_algos[GIT_HASH_NALGOS] = { .unsafe_update_fn = git_hash_sha1_update_unsafe, .unsafe_final_fn = git_hash_sha1_final_unsafe, .unsafe_final_oid_fn = git_hash_sha1_final_oid_unsafe, + .unsafe = &sha1_unsafe_algo, .empty_tree = &empty_tree_oid, .empty_blob = &empty_blob_oid, .null_oid = &null_oid_sha1, @@ -305,6 +322,15 @@ int hash_algo_by_length(int len) return GIT_HASH_UNKNOWN; } +const struct git_hash_algo *unsafe_hash_algo(const struct git_hash_algo *algop) +{ + /* If we have a faster "unsafe" implementation, use that. */ + if (algop->unsafe) + return algop->unsafe; + /* Otherwise use the default one. */ + return algop; +} + /* * This is meant to hold a *small* number of objects that you would * want repo_read_object_file() to be able to return, but yet you do not want From f0c266af4ea7e4d9b84955f8fed8ee8cb009cbd8 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Thu, 23 Jan 2025 12:34:33 -0500 Subject: [PATCH 5/8] csum-file.c: use unsafe_hash_algo() Instead of calling the unsafe_ hash function variants directly, make use of the shared 'algop' pointer by initializing it to: f->algop = unsafe_hash_algo(the_hash_algo); , thus making all calls use the unsafe variants directly. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- csum-file.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/csum-file.c b/csum-file.c index 7a71121e34..ebffc80ef7 100644 --- a/csum-file.c +++ b/csum-file.c @@ -50,7 +50,7 @@ void hashflush(struct hashfile *f) if (offset) { if (!f->skip_hash) - f->algop->unsafe_update_fn(&f->ctx, f->buffer, offset); + f->algop->update_fn(&f->ctx, f->buffer, offset); flush(f, f->buffer, offset); f->offset = 0; } @@ -73,7 +73,7 @@ int finalize_hashfile(struct hashfile *f, unsigned char *result, if (f->skip_hash) hashclr(f->buffer, f->algop); else - f->algop->unsafe_final_fn(f->buffer, &f->ctx); + f->algop->final_fn(f->buffer, &f->ctx); if (result) hashcpy(result, f->buffer, f->algop); @@ -128,7 +128,7 @@ void hashwrite(struct hashfile *f, const void *buf, unsigned int count) * f->offset is necessarily zero. */ if (!f->skip_hash) - f->algop->unsafe_update_fn(&f->ctx, buf, nr); + f->algop->update_fn(&f->ctx, buf, nr); flush(f, buf, nr); } else { /* @@ -175,8 +175,8 @@ static struct hashfile *hashfd_internal(int fd, const char *name, f->do_crc = 0; f->skip_hash = 0; - f->algop = the_hash_algo; - f->algop->unsafe_init_fn(&f->ctx); + f->algop = unsafe_hash_algo(the_hash_algo); + f->algop->init_fn(&f->ctx); f->buffer_len = buffer_len; f->buffer = xmalloc(buffer_len); @@ -210,7 +210,7 @@ void hashfile_checkpoint(struct hashfile *f, struct hashfile_checkpoint *checkpo { hashflush(f); checkpoint->offset = f->total; - f->algop->unsafe_clone_fn(&checkpoint->ctx, &f->ctx); + f->algop->clone_fn(&checkpoint->ctx, &f->ctx); } int hashfile_truncate(struct hashfile *f, struct hashfile_checkpoint *checkpoint) @@ -221,7 +221,7 @@ int hashfile_truncate(struct hashfile *f, struct hashfile_checkpoint *checkpoint lseek(f->fd, offset, SEEK_SET) != offset) return -1; f->total = offset; - f->algop->unsafe_clone_fn(&f->ctx, &checkpoint->ctx); + f->algop->clone_fn(&f->ctx, &checkpoint->ctx); f->offset = 0; /* hashflush() was called in checkpoint */ return 0; } @@ -242,15 +242,15 @@ int hashfile_checksum_valid(const unsigned char *data, size_t total_len) { unsigned char got[GIT_MAX_RAWSZ]; git_hash_ctx ctx; - const struct git_hash_algo *algop = the_hash_algo; + const struct git_hash_algo *algop = unsafe_hash_algo(the_hash_algo); size_t data_len = total_len - algop->rawsz; if (total_len < algop->rawsz) return 0; /* say "too short"? */ - algop->unsafe_init_fn(&ctx); - algop->unsafe_update_fn(&ctx, data, data_len); - algop->unsafe_final_fn(got, &ctx); + algop->init_fn(&ctx); + algop->update_fn(&ctx, data, data_len); + algop->final_fn(got, &ctx); return hasheq(got, data + data_len, algop); } From 3339180b28da5138eacb6b64f89c03e21493a73d Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Thu, 23 Jan 2025 12:34:36 -0500 Subject: [PATCH 6/8] t/helper/test-hash.c: use unsafe_hash_algo() Remove a series of conditionals within the shared cmd_hash_impl() helper that powers the 'sha1' and 'sha1-unsafe' helpers. Instead, replace them with a single conditional that transforms the specified hash algorithm into its unsafe variant. Then all subsequent calls can directly use whatever function it wants to call without having to decide between the safe and unsafe variants. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- t/helper/test-hash.c | 17 +++++------------ 1 file changed, 5 insertions(+), 12 deletions(-) diff --git a/t/helper/test-hash.c b/t/helper/test-hash.c index d0ee668df9..aa82638c62 100644 --- a/t/helper/test-hash.c +++ b/t/helper/test-hash.c @@ -9,6 +9,8 @@ int cmd_hash_impl(int ac, const char **av, int algo, int unsafe) int binary = 0; char *buffer; const struct git_hash_algo *algop = &hash_algos[algo]; + if (unsafe) + algop = unsafe_hash_algo(algop); if (ac == 2) { if (!strcmp(av[1], "-b")) @@ -27,10 +29,7 @@ int cmd_hash_impl(int ac, const char **av, int algo, int unsafe) die("OOPS"); } - if (unsafe) - algop->unsafe_init_fn(&ctx); - else - algop->init_fn(&ctx); + algop->init_fn(&ctx); while (1) { ssize_t sz, this_sz; @@ -49,15 +48,9 @@ int cmd_hash_impl(int ac, const char **av, int algo, int unsafe) } if (this_sz == 0) break; - if (unsafe) - algop->unsafe_update_fn(&ctx, buffer, this_sz); - else - algop->update_fn(&ctx, buffer, this_sz); + algop->update_fn(&ctx, buffer, this_sz); } - if (unsafe) - algop->unsafe_final_fn(hash, &ctx); - else - algop->final_fn(hash, &ctx); + algop->final_fn(hash, &ctx); if (binary) fwrite(hash, 1, algop->rawsz, stdout); From a8dd3821fe4fcf1524537ef97e4f5e2cf68ce949 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Thu, 23 Jan 2025 12:34:39 -0500 Subject: [PATCH 7/8] csum-file: introduce hashfile_checkpoint_init() In 106140a99f (builtin/fast-import: fix segfault with unsafe SHA1 backend, 2024-12-30) and 9218c0bfe1 (bulk-checkin: fix segfault with unsafe SHA1 backend, 2024-12-30), we observed the effects of failing to initialize a hashfile_checkpoint with the same hash function implementation as is used by the hashfile it is used to checkpoint. While both 106140a99f and 9218c0bfe1 work around the immediate crash, changing the hash function implementation within the hashfile API to, for example, the non-unsafe variant would re-introduce the crash. This is a result of the tight coupling between initializing hashfiles and hashfile_checkpoints. Introduce and use a new function which ensures that both parts of a hashfile and hashfile_checkpoint pair use the same hash function implementation to avoid such crashes. A few things worth noting: - In the change to builtin/fast-import.c::stream_blob(), we can see that by removing the explicit reference to 'the_hash_algo->unsafe_init_fn()', we are hardened against the hashfile API changing away from the_hash_algo (or its unsafe variant) in the future. - The bulk-checkin code no longer needs to explicitly zero-initialize the hashfile_checkpoint, since it is now done as a result of calling 'hashfile_checkpoint_init()'. - Also in the bulk-checkin code, we add an additional call to prepare_to_stream() outside of the main loop in order to initialize 'state->f' so we know which hash function implementation to use when calling 'hashfile_checkpoint_init()'. This is OK, since subsequent 'prepare_to_stream()' calls are noops. However, we only need to call 'prepare_to_stream()' when we have the HASH_WRITE_OBJECT bit set in our flags. Without that bit, calling 'prepare_to_stream()' does not assign 'state->f', so we have nothing to initialize. - Other uses of the 'checkpoint' in 'deflate_blob_to_pack()' are appropriately guarded. Helped-by: Patrick Steinhardt Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/fast-import.c | 2 +- bulk-checkin.c | 9 ++++++--- csum-file.c | 7 +++++++ csum-file.h | 1 + 4 files changed, 15 insertions(+), 4 deletions(-) diff --git a/builtin/fast-import.c b/builtin/fast-import.c index 0f86392761..4a6c7ab52a 100644 --- a/builtin/fast-import.c +++ b/builtin/fast-import.c @@ -1106,7 +1106,7 @@ static void stream_blob(uintmax_t len, struct object_id *oidout, uintmax_t mark) || (pack_size + PACK_SIZE_THRESHOLD + len) < pack_size) cycle_packfile(); - the_hash_algo->unsafe_init_fn(&checkpoint.ctx); + hashfile_checkpoint_init(pack_file, &checkpoint); hashfile_checkpoint(pack_file, &checkpoint); offset = checkpoint.offset; diff --git a/bulk-checkin.c b/bulk-checkin.c index 433070a3bd..892176d23d 100644 --- a/bulk-checkin.c +++ b/bulk-checkin.c @@ -261,7 +261,7 @@ static int deflate_blob_to_pack(struct bulk_checkin_packfile *state, git_hash_ctx ctx; unsigned char obuf[16384]; unsigned header_len; - struct hashfile_checkpoint checkpoint = {0}; + struct hashfile_checkpoint checkpoint; struct pack_idx_entry *idx = NULL; seekback = lseek(fd, 0, SEEK_CUR); @@ -272,12 +272,15 @@ static int deflate_blob_to_pack(struct bulk_checkin_packfile *state, OBJ_BLOB, size); the_hash_algo->init_fn(&ctx); the_hash_algo->update_fn(&ctx, obuf, header_len); - the_hash_algo->unsafe_init_fn(&checkpoint.ctx); /* Note: idx is non-NULL when we are writing */ - if ((flags & HASH_WRITE_OBJECT) != 0) + if ((flags & HASH_WRITE_OBJECT) != 0) { CALLOC_ARRAY(idx, 1); + prepare_to_stream(state, flags); + hashfile_checkpoint_init(state->f, &checkpoint); + } + already_hashed_to = 0; while (1) { diff --git a/csum-file.c b/csum-file.c index ebffc80ef7..232121f415 100644 --- a/csum-file.c +++ b/csum-file.c @@ -206,6 +206,13 @@ struct hashfile *hashfd_throughput(int fd, const char *name, struct progress *tp return hashfd_internal(fd, name, tp, 8 * 1024); } +void hashfile_checkpoint_init(struct hashfile *f, + struct hashfile_checkpoint *checkpoint) +{ + memset(checkpoint, 0, sizeof(*checkpoint)); + f->algop->init_fn(&checkpoint->ctx); +} + void hashfile_checkpoint(struct hashfile *f, struct hashfile_checkpoint *checkpoint) { hashflush(f); diff --git a/csum-file.h b/csum-file.h index 2b45f4673a..b7475f16c2 100644 --- a/csum-file.h +++ b/csum-file.h @@ -36,6 +36,7 @@ struct hashfile_checkpoint { git_hash_ctx ctx; }; +void hashfile_checkpoint_init(struct hashfile *, struct hashfile_checkpoint *); void hashfile_checkpoint(struct hashfile *, struct hashfile_checkpoint *); int hashfile_truncate(struct hashfile *, struct hashfile_checkpoint *); From 04292c3796bb92664f6111326215d9c060ef71c8 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Thu, 23 Jan 2025 12:34:42 -0500 Subject: [PATCH 8/8] hash.h: drop unsafe_ function variants Now that all callers have been converted from: the_hash_algo->unsafe_init_fn(); to unsafe_hash_algo(the_hash_algo)->init_fn(); and similar, we can remove the scaffolding for the unsafe_ function variants and force callers to use the new unsafe_hash_algo() mechanic instead. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- hash.h | 15 --------------- object-file.c | 15 --------------- 2 files changed, 30 deletions(-) diff --git a/hash.h b/hash.h index 0bf63cedfa..ad2c919991 100644 --- a/hash.h +++ b/hash.h @@ -282,21 +282,6 @@ struct git_hash_algo { /* The hash finalization function for object IDs. */ git_hash_final_oid_fn final_oid_fn; - /* The non-cryptographic hash initialization function. */ - git_hash_init_fn unsafe_init_fn; - - /* The non-cryptographic hash context cloning function. */ - git_hash_clone_fn unsafe_clone_fn; - - /* The non-cryptographic hash update function. */ - git_hash_update_fn unsafe_update_fn; - - /* The non-cryptographic hash finalization function. */ - git_hash_final_fn unsafe_final_fn; - - /* The non-cryptographic hash finalization function. */ - git_hash_final_oid_fn unsafe_final_oid_fn; - /* The OID of the empty tree. */ const struct object_id *empty_tree; diff --git a/object-file.c b/object-file.c index 43efa4ca36..c4b42dd4be 100644 --- a/object-file.c +++ b/object-file.c @@ -230,11 +230,6 @@ const struct git_hash_algo hash_algos[GIT_HASH_NALGOS] = { .update_fn = git_hash_unknown_update, .final_fn = git_hash_unknown_final, .final_oid_fn = git_hash_unknown_final_oid, - .unsafe_init_fn = git_hash_unknown_init, - .unsafe_clone_fn = git_hash_unknown_clone, - .unsafe_update_fn = git_hash_unknown_update, - .unsafe_final_fn = git_hash_unknown_final, - .unsafe_final_oid_fn = git_hash_unknown_final_oid, .empty_tree = NULL, .empty_blob = NULL, .null_oid = NULL, @@ -250,11 +245,6 @@ const struct git_hash_algo hash_algos[GIT_HASH_NALGOS] = { .update_fn = git_hash_sha1_update, .final_fn = git_hash_sha1_final, .final_oid_fn = git_hash_sha1_final_oid, - .unsafe_init_fn = git_hash_sha1_init_unsafe, - .unsafe_clone_fn = git_hash_sha1_clone_unsafe, - .unsafe_update_fn = git_hash_sha1_update_unsafe, - .unsafe_final_fn = git_hash_sha1_final_unsafe, - .unsafe_final_oid_fn = git_hash_sha1_final_oid_unsafe, .unsafe = &sha1_unsafe_algo, .empty_tree = &empty_tree_oid, .empty_blob = &empty_blob_oid, @@ -271,11 +261,6 @@ const struct git_hash_algo hash_algos[GIT_HASH_NALGOS] = { .update_fn = git_hash_sha256_update, .final_fn = git_hash_sha256_final, .final_oid_fn = git_hash_sha256_final_oid, - .unsafe_init_fn = git_hash_sha256_init, - .unsafe_clone_fn = git_hash_sha256_clone, - .unsafe_update_fn = git_hash_sha256_update, - .unsafe_final_fn = git_hash_sha256_final, - .unsafe_final_oid_fn = git_hash_sha256_final_oid, .empty_tree = &empty_tree_oid_sha256, .empty_blob = &empty_blob_oid_sha256, .null_oid = &null_oid_sha256,