diff --git a/builtin/fast-import.c b/builtin/fast-import.c index 2da46fecdc..ccdada1810 100644 --- a/builtin/fast-import.c +++ b/builtin/fast-import.c @@ -1106,7 +1106,7 @@ static void stream_blob(uintmax_t len, struct object_id *oidout, uintmax_t mark) || (pack_size + PACK_SIZE_THRESHOLD + len) < pack_size) cycle_packfile(); - the_hash_algo->unsafe_init_fn(&checkpoint.ctx); + hashfile_checkpoint_init(pack_file, &checkpoint); hashfile_checkpoint(pack_file, &checkpoint); offset = checkpoint.offset; diff --git a/bulk-checkin.c b/bulk-checkin.c index 5044cb7fa0..f889fce734 100644 --- a/bulk-checkin.c +++ b/bulk-checkin.c @@ -261,7 +261,7 @@ static int deflate_blob_to_pack(struct bulk_checkin_packfile *state, git_hash_ctx ctx; unsigned char obuf[16384]; unsigned header_len; - struct hashfile_checkpoint checkpoint = {0}; + struct hashfile_checkpoint checkpoint; struct pack_idx_entry *idx = NULL; seekback = lseek(fd, 0, SEEK_CUR); @@ -272,12 +272,15 @@ static int deflate_blob_to_pack(struct bulk_checkin_packfile *state, OBJ_BLOB, size); the_hash_algo->init_fn(&ctx); the_hash_algo->update_fn(&ctx, obuf, header_len); - the_hash_algo->unsafe_init_fn(&checkpoint.ctx); /* Note: idx is non-NULL when we are writing */ - if ((flags & HASH_WRITE_OBJECT) != 0) + if ((flags & HASH_WRITE_OBJECT) != 0) { CALLOC_ARRAY(idx, 1); + prepare_to_stream(state, flags); + hashfile_checkpoint_init(state->f, &checkpoint); + } + already_hashed_to = 0; while (1) { diff --git a/csum-file.c b/csum-file.c index 5716016e12..232121f415 100644 --- a/csum-file.c +++ b/csum-file.c @@ -50,7 +50,7 @@ void hashflush(struct hashfile *f) if (offset) { if (!f->skip_hash) - the_hash_algo->unsafe_update_fn(&f->ctx, f->buffer, offset); + f->algop->update_fn(&f->ctx, f->buffer, offset); flush(f, f->buffer, offset); f->offset = 0; } @@ -71,14 +71,14 @@ int finalize_hashfile(struct hashfile *f, unsigned char *result, hashflush(f); if (f->skip_hash) - hashclr(f->buffer, the_repository->hash_algo); + hashclr(f->buffer, f->algop); else - the_hash_algo->unsafe_final_fn(f->buffer, &f->ctx); + f->algop->final_fn(f->buffer, &f->ctx); if (result) - hashcpy(result, f->buffer, the_repository->hash_algo); + hashcpy(result, f->buffer, f->algop); if (flags & CSUM_HASH_IN_STREAM) - flush(f, f->buffer, the_hash_algo->rawsz); + flush(f, f->buffer, f->algop->rawsz); if (flags & CSUM_FSYNC) fsync_component_or_die(component, f->fd, f->name); if (flags & CSUM_CLOSE) { @@ -128,7 +128,7 @@ void hashwrite(struct hashfile *f, const void *buf, unsigned int count) * f->offset is necessarily zero. */ if (!f->skip_hash) - the_hash_algo->unsafe_update_fn(&f->ctx, buf, nr); + f->algop->update_fn(&f->ctx, buf, nr); flush(f, buf, nr); } else { /* @@ -174,7 +174,9 @@ static struct hashfile *hashfd_internal(int fd, const char *name, f->name = name; f->do_crc = 0; f->skip_hash = 0; - the_hash_algo->unsafe_init_fn(&f->ctx); + + f->algop = unsafe_hash_algo(the_hash_algo); + f->algop->init_fn(&f->ctx); f->buffer_len = buffer_len; f->buffer = xmalloc(buffer_len); @@ -204,11 +206,18 @@ struct hashfile *hashfd_throughput(int fd, const char *name, struct progress *tp return hashfd_internal(fd, name, tp, 8 * 1024); } +void hashfile_checkpoint_init(struct hashfile *f, + struct hashfile_checkpoint *checkpoint) +{ + memset(checkpoint, 0, sizeof(*checkpoint)); + f->algop->init_fn(&checkpoint->ctx); +} + void hashfile_checkpoint(struct hashfile *f, struct hashfile_checkpoint *checkpoint) { hashflush(f); checkpoint->offset = f->total; - the_hash_algo->unsafe_clone_fn(&checkpoint->ctx, &f->ctx); + f->algop->clone_fn(&checkpoint->ctx, &f->ctx); } int hashfile_truncate(struct hashfile *f, struct hashfile_checkpoint *checkpoint) @@ -219,7 +228,7 @@ int hashfile_truncate(struct hashfile *f, struct hashfile_checkpoint *checkpoint lseek(f->fd, offset, SEEK_SET) != offset) return -1; f->total = offset; - the_hash_algo->unsafe_clone_fn(&f->ctx, &checkpoint->ctx); + f->algop->clone_fn(&f->ctx, &checkpoint->ctx); f->offset = 0; /* hashflush() was called in checkpoint */ return 0; } @@ -240,14 +249,15 @@ int hashfile_checksum_valid(const unsigned char *data, size_t total_len) { unsigned char got[GIT_MAX_RAWSZ]; git_hash_ctx ctx; - size_t data_len = total_len - the_hash_algo->rawsz; + const struct git_hash_algo *algop = unsafe_hash_algo(the_hash_algo); + size_t data_len = total_len - algop->rawsz; - if (total_len < the_hash_algo->rawsz) + if (total_len < algop->rawsz) return 0; /* say "too short"? */ - the_hash_algo->unsafe_init_fn(&ctx); - the_hash_algo->unsafe_update_fn(&ctx, data, data_len); - the_hash_algo->unsafe_final_fn(got, &ctx); + algop->init_fn(&ctx); + algop->update_fn(&ctx, data, data_len); + algop->final_fn(got, &ctx); - return hasheq(got, data + data_len, the_repository->hash_algo); + return hasheq(got, data + data_len, algop); } diff --git a/csum-file.h b/csum-file.h index 7c73da0a40..b7475f16c2 100644 --- a/csum-file.h +++ b/csum-file.h @@ -20,6 +20,7 @@ struct hashfile { size_t buffer_len; unsigned char *buffer; unsigned char *check_buffer; + const struct git_hash_algo *algop; /** * If non-zero, skip_hash indicates that we should @@ -35,6 +36,7 @@ struct hashfile_checkpoint { git_hash_ctx ctx; }; +void hashfile_checkpoint_init(struct hashfile *, struct hashfile_checkpoint *); void hashfile_checkpoint(struct hashfile *, struct hashfile_checkpoint *); int hashfile_truncate(struct hashfile *, struct hashfile_checkpoint *); diff --git a/hash.h b/hash.h index 756166ce5e..ad2c919991 100644 --- a/hash.h +++ b/hash.h @@ -282,21 +282,6 @@ struct git_hash_algo { /* The hash finalization function for object IDs. */ git_hash_final_oid_fn final_oid_fn; - /* The non-cryptographic hash initialization function. */ - git_hash_init_fn unsafe_init_fn; - - /* The non-cryptographic hash context cloning function. */ - git_hash_clone_fn unsafe_clone_fn; - - /* The non-cryptographic hash update function. */ - git_hash_update_fn unsafe_update_fn; - - /* The non-cryptographic hash finalization function. */ - git_hash_final_fn unsafe_final_fn; - - /* The non-cryptographic hash finalization function. */ - git_hash_final_oid_fn unsafe_final_oid_fn; - /* The OID of the empty tree. */ const struct object_id *empty_tree; @@ -305,6 +290,9 @@ struct git_hash_algo { /* The all-zeros OID. */ const struct object_id *null_oid; + + /* The unsafe variant of this hash function, if one exists. */ + const struct git_hash_algo *unsafe; }; extern const struct git_hash_algo hash_algos[GIT_HASH_NALGOS]; @@ -320,9 +308,17 @@ int hash_algo_by_length(int len); /* Identical, except for a pointer to struct git_hash_algo. */ static inline int hash_algo_by_ptr(const struct git_hash_algo *p) { - return p - hash_algos; + size_t i; + for (i = 0; i < GIT_HASH_NALGOS; i++) { + const struct git_hash_algo *algop = &hash_algos[i]; + if (p == algop) + return i; + } + return GIT_HASH_UNKNOWN; } +const struct git_hash_algo *unsafe_hash_algo(const struct git_hash_algo *algop); + const struct object_id *null_oid(void); static inline int hashcmp(const unsigned char *sha1, const unsigned char *sha2, const struct git_hash_algo *algop) diff --git a/object-file.c b/object-file.c index aeca61b8ae..6ce1caacae 100644 --- a/object-file.c +++ b/object-file.c @@ -202,6 +202,22 @@ static void git_hash_unknown_final_oid(struct object_id *oid UNUSED, BUG("trying to finalize unknown hash"); } +static const struct git_hash_algo sha1_unsafe_algo = { + .name = "sha1", + .format_id = GIT_SHA1_FORMAT_ID, + .rawsz = GIT_SHA1_RAWSZ, + .hexsz = GIT_SHA1_HEXSZ, + .blksz = GIT_SHA1_BLKSZ, + .init_fn = git_hash_sha1_init_unsafe, + .clone_fn = git_hash_sha1_clone_unsafe, + .update_fn = git_hash_sha1_update_unsafe, + .final_fn = git_hash_sha1_final_unsafe, + .final_oid_fn = git_hash_sha1_final_oid_unsafe, + .empty_tree = &empty_tree_oid, + .empty_blob = &empty_blob_oid, + .null_oid = &null_oid_sha1, +}; + const struct git_hash_algo hash_algos[GIT_HASH_NALGOS] = { { .name = NULL, @@ -214,11 +230,6 @@ const struct git_hash_algo hash_algos[GIT_HASH_NALGOS] = { .update_fn = git_hash_unknown_update, .final_fn = git_hash_unknown_final, .final_oid_fn = git_hash_unknown_final_oid, - .unsafe_init_fn = git_hash_unknown_init, - .unsafe_clone_fn = git_hash_unknown_clone, - .unsafe_update_fn = git_hash_unknown_update, - .unsafe_final_fn = git_hash_unknown_final, - .unsafe_final_oid_fn = git_hash_unknown_final_oid, .empty_tree = NULL, .empty_blob = NULL, .null_oid = NULL, @@ -234,11 +245,7 @@ const struct git_hash_algo hash_algos[GIT_HASH_NALGOS] = { .update_fn = git_hash_sha1_update, .final_fn = git_hash_sha1_final, .final_oid_fn = git_hash_sha1_final_oid, - .unsafe_init_fn = git_hash_sha1_init_unsafe, - .unsafe_clone_fn = git_hash_sha1_clone_unsafe, - .unsafe_update_fn = git_hash_sha1_update_unsafe, - .unsafe_final_fn = git_hash_sha1_final_unsafe, - .unsafe_final_oid_fn = git_hash_sha1_final_oid_unsafe, + .unsafe = &sha1_unsafe_algo, .empty_tree = &empty_tree_oid, .empty_blob = &empty_blob_oid, .null_oid = &null_oid_sha1, @@ -254,11 +261,6 @@ const struct git_hash_algo hash_algos[GIT_HASH_NALGOS] = { .update_fn = git_hash_sha256_update, .final_fn = git_hash_sha256_final, .final_oid_fn = git_hash_sha256_final_oid, - .unsafe_init_fn = git_hash_sha256_init, - .unsafe_clone_fn = git_hash_sha256_clone, - .unsafe_update_fn = git_hash_sha256_update, - .unsafe_final_fn = git_hash_sha256_final, - .unsafe_final_oid_fn = git_hash_sha256_final_oid, .empty_tree = &empty_tree_oid_sha256, .empty_blob = &empty_blob_oid_sha256, .null_oid = &null_oid_sha256, @@ -305,6 +307,15 @@ int hash_algo_by_length(int len) return GIT_HASH_UNKNOWN; } +const struct git_hash_algo *unsafe_hash_algo(const struct git_hash_algo *algop) +{ + /* If we have a faster "unsafe" implementation, use that. */ + if (algop->unsafe) + return algop->unsafe; + /* Otherwise use the default one. */ + return algop; +} + /* * This is meant to hold a *small* number of objects that you would * want repo_read_object_file() to be able to return, but yet you do not want diff --git a/t/helper/test-hash.c b/t/helper/test-hash.c index 45d829c908..aa82638c62 100644 --- a/t/helper/test-hash.c +++ b/t/helper/test-hash.c @@ -1,7 +1,7 @@ #include "test-tool.h" #include "hex.h" -int cmd_hash_impl(int ac, const char **av, int algo) +int cmd_hash_impl(int ac, const char **av, int algo, int unsafe) { git_hash_ctx ctx; unsigned char hash[GIT_MAX_HEXSZ]; @@ -9,6 +9,8 @@ int cmd_hash_impl(int ac, const char **av, int algo) int binary = 0; char *buffer; const struct git_hash_algo *algop = &hash_algos[algo]; + if (unsafe) + algop = unsafe_hash_algo(algop); if (ac == 2) { if (!strcmp(av[1], "-b")) diff --git a/t/helper/test-sha1.c b/t/helper/test-sha1.c index e60d000c03..349540c4df 100644 --- a/t/helper/test-sha1.c +++ b/t/helper/test-sha1.c @@ -3,7 +3,7 @@ int cmd__sha1(int ac, const char **av) { - return cmd_hash_impl(ac, av, GIT_HASH_SHA1); + return cmd_hash_impl(ac, av, GIT_HASH_SHA1, 0); } int cmd__sha1_is_sha1dc(int argc UNUSED, const char **argv UNUSED) @@ -13,3 +13,8 @@ int cmd__sha1_is_sha1dc(int argc UNUSED, const char **argv UNUSED) #endif return 1; } + +int cmd__sha1_unsafe(int ac, const char **av) +{ + return cmd_hash_impl(ac, av, GIT_HASH_SHA1, 1); +} diff --git a/t/helper/test-sha1.sh b/t/helper/test-sha1.sh index 84594885c7..bf387d3db1 100755 --- a/t/helper/test-sha1.sh +++ b/t/helper/test-sha1.sh @@ -3,25 +3,31 @@ dd if=/dev/zero bs=1048576 count=100 2>/dev/null | /usr/bin/time t/helper/test-tool sha1 >/dev/null +dd if=/dev/zero bs=1048576 count=100 2>/dev/null | +/usr/bin/time t/helper/test-tool sha1-unsafe >/dev/null + while read expect cnt pfx do case "$expect" in '#'*) continue ;; esac - actual=$( - { - test -z "$pfx" || echo "$pfx" - dd if=/dev/zero bs=1048576 count=$cnt 2>/dev/null | - perl -pe 'y/\000/g/' - } | ./t/helper/test-tool sha1 $cnt - ) - if test "$expect" = "$actual" - then - echo "OK: $expect $cnt $pfx" - else - echo >&2 "OOPS: $cnt" - echo >&2 "expect: $expect" - echo >&2 "actual: $actual" - exit 1 - fi + for sha1 in sha1 sha1-unsafe + do + actual=$( + { + test -z "$pfx" || echo "$pfx" + dd if=/dev/zero bs=1048576 count=$cnt 2>/dev/null | + perl -pe 'y/\000/g/' + } | ./t/helper/test-tool $sha1 $cnt + ) + if test "$expect" = "$actual" + then + echo "OK ($sha1): $expect $cnt $pfx" + else + echo >&2 "OOPS ($sha1): $cnt" + echo >&2 "expect ($sha1): $expect" + echo >&2 "actual ($sha1): $actual" + exit 1 + fi + done done <