diff --git a/Documentation/config/feature.txt b/Documentation/config/feature.txt index 95975e5091..e52bc6b858 100644 --- a/Documentation/config/feature.txt +++ b/Documentation/config/feature.txt @@ -23,6 +23,11 @@ feature.manyFiles:: working directory. With many files, commands such as `git status` and `git checkout` may be slow and these new defaults improve performance: + +* `index.skipHash=true` speeds up index writes by not computing a trailing + checksum. Note that this will cause Git versions earlier than 2.13.0 to + refuse to parse the index and Git versions earlier than 2.40.0 will report + a corrupted index during `git fsck`. ++ * `index.version=4` enables path-prefix compression in the index. + * `core.untrackedCache=true` enables the untracked cache. This setting assumes diff --git a/Documentation/config/index.txt b/Documentation/config/index.txt index 75f3a2d105..23c7985eb4 100644 --- a/Documentation/config/index.txt +++ b/Documentation/config/index.txt @@ -30,3 +30,14 @@ index.version:: Specify the version with which new index files should be initialized. This does not affect existing repositories. If `feature.manyFiles` is enabled, then the default is 4. + +index.skipHash:: + When enabled, do not compute the trailing hash for the index file. + This accelerates Git commands that manipulate the index, such as + `git add`, `git commit`, or `git status`. Instead of storing the + checksum, write a trailing set of bytes with value zero, indicating + that the computation was skipped. ++ +If you enable `index.skipHash`, then Git clients older than 2.13.0 will +refuse to parse the index and Git clients older than 2.40.0 will report an +error during `git fsck`. diff --git a/csum-file.c b/csum-file.c index 59ef3398ca..cce13c0f04 100644 --- a/csum-file.c +++ b/csum-file.c @@ -45,7 +45,8 @@ void hashflush(struct hashfile *f) unsigned offset = f->offset; if (offset) { - the_hash_algo->update_fn(&f->ctx, f->buffer, offset); + if (!f->skip_hash) + the_hash_algo->update_fn(&f->ctx, f->buffer, offset); flush(f, f->buffer, offset); f->offset = 0; } @@ -64,7 +65,12 @@ int finalize_hashfile(struct hashfile *f, unsigned char *result, int fd; hashflush(f); - the_hash_algo->final_fn(f->buffer, &f->ctx); + + if (f->skip_hash) + hashclr(f->buffer); + else + the_hash_algo->final_fn(f->buffer, &f->ctx); + if (result) hashcpy(result, f->buffer); if (flags & CSUM_HASH_IN_STREAM) @@ -108,7 +114,8 @@ void hashwrite(struct hashfile *f, const void *buf, unsigned int count) * the hashfile's buffer. In this block, * f->offset is necessarily zero. */ - the_hash_algo->update_fn(&f->ctx, buf, nr); + if (!f->skip_hash) + the_hash_algo->update_fn(&f->ctx, buf, nr); flush(f, buf, nr); } else { /* @@ -153,6 +160,7 @@ static struct hashfile *hashfd_internal(int fd, const char *name, f->tp = tp; f->name = name; f->do_crc = 0; + f->skip_hash = 0; the_hash_algo->init_fn(&f->ctx); f->buffer_len = buffer_len; diff --git a/csum-file.h b/csum-file.h index 0d29f528fb..793a59da12 100644 --- a/csum-file.h +++ b/csum-file.h @@ -20,6 +20,13 @@ struct hashfile { size_t buffer_len; unsigned char *buffer; unsigned char *check_buffer; + + /** + * If non-zero, skip_hash indicates that we should + * not actually compute the hash for this hashfile and + * instead only use it as a buffered write. + */ + int skip_hash; }; /* Checkpoint */ diff --git a/read-cache.c b/read-cache.c index 1ff518b2a7..cf87ad7097 100644 --- a/read-cache.c +++ b/read-cache.c @@ -1817,6 +1817,8 @@ static int verify_hdr(const struct cache_header *hdr, unsigned long size) git_hash_ctx c; unsigned char hash[GIT_MAX_RAWSZ]; int hdr_version; + unsigned char *start, *end; + struct object_id oid; if (hdr->hdr_signature != htonl(CACHE_SIGNATURE)) return error(_("bad signature 0x%08x"), hdr->hdr_signature); @@ -1827,10 +1829,16 @@ static int verify_hdr(const struct cache_header *hdr, unsigned long size) if (!verify_index_checksum) return 0; + end = (unsigned char *)hdr + size; + start = end - the_hash_algo->rawsz; + oidread(&oid, start); + if (oideq(&oid, null_oid())) + return 0; + the_hash_algo->init_fn(&c); the_hash_algo->update_fn(&c, hdr, size - the_hash_algo->rawsz); the_hash_algo->final_fn(hash, &c); - if (!hasheq(hash, (unsigned char *)hdr + size - the_hash_algo->rawsz)) + if (!hasheq(hash, start)) return error(_("bad index file sha1 signature")); return 0; } @@ -2920,9 +2928,13 @@ static int do_write_index(struct index_state *istate, struct tempfile *tempfile, int ieot_entries = 1; struct index_entry_offset_table *ieot = NULL; int nr, nr_threads; + struct repository *r = istate->repo ? istate->repo : the_repository; f = hashfd(tempfile->fd, tempfile->filename.buf); + prepare_repo_settings(r); + f->skip_hash = r->settings.index_skip_hash; + for (i = removed = extended = 0; i < entries; i++) { if (cache[i]->ce_flags & CE_REMOVE) removed++; diff --git a/repo-settings.c b/repo-settings.c index 3021921c53..3dbd3f0e2e 100644 --- a/repo-settings.c +++ b/repo-settings.c @@ -47,6 +47,7 @@ void prepare_repo_settings(struct repository *r) } if (manyfiles) { r->settings.index_version = 4; + r->settings.index_skip_hash = 1; r->settings.core_untracked_cache = UNTRACKED_CACHE_WRITE; } @@ -61,6 +62,7 @@ void prepare_repo_settings(struct repository *r) repo_cfg_bool(r, "pack.usesparse", &r->settings.pack_use_sparse, 1); repo_cfg_bool(r, "core.multipackindex", &r->settings.core_multi_pack_index, 1); repo_cfg_bool(r, "index.sparse", &r->settings.sparse_index, 0); + repo_cfg_bool(r, "index.skiphash", &r->settings.index_skip_hash, r->settings.index_skip_hash); /* * The GIT_TEST_MULTI_PACK_INDEX variable is special in that diff --git a/repository.h b/repository.h index 6c461c5b9d..e8c67ffe16 100644 --- a/repository.h +++ b/repository.h @@ -42,6 +42,7 @@ struct repo_settings { struct fsmonitor_settings *fsmonitor; /* lazily loaded */ int index_version; + int index_skip_hash; enum untracked_cache_setting core_untracked_cache; int pack_use_sparse; diff --git a/scalar.c b/scalar.c index 6c52243cdf..b49bb8c24e 100644 --- a/scalar.c +++ b/scalar.c @@ -143,6 +143,7 @@ static int set_recommended_config(int reconfigure) { "credential.validate", "false", 1 }, /* GCM4W-only */ { "gc.auto", "0", 1 }, { "gui.GCWarning", "false", 1 }, + { "index.skipHash", "false", 1 }, { "index.threads", "true", 1 }, { "index.version", "4", 1 }, { "merge.stat", "false", 1 }, diff --git a/t/t1600-index.sh b/t/t1600-index.sh index 010989f90e..0ebbae1305 100755 --- a/t/t1600-index.sh +++ b/t/t1600-index.sh @@ -65,6 +65,36 @@ test_expect_success 'out of bounds index.version issues warning' ' ) ' +test_expect_success 'index.skipHash config option' ' + rm -f .git/index && + git -c index.skipHash=true add a && + test_trailing_hash .git/index >hash && + echo $(test_oid zero) >expect && + test_cmp expect hash && + git fsck && + + rm -f .git/index && + git -c feature.manyFiles=true add a && + test_trailing_hash .git/index >hash && + cmp expect hash && + + rm -f .git/index && + git -c feature.manyFiles=true \ + -c index.skipHash=false add a && + test_trailing_hash .git/index >hash && + ! cmp expect hash && + + test_commit start && + git -c protocol.file.allow=always submodule add ./ sub && + git config index.skipHash false && + git -C sub config index.skipHash true && + >sub/file && + git -C sub add a && + test_trailing_hash .git/modules/sub/index >hash && + test_cmp expect hash && + git -C sub fsck +' + test_index_version () { INDEX_VERSION_CONFIG=$1 && FEATURE_MANY_FILES=$2 && diff --git a/t/test-lib-functions.sh b/t/test-lib-functions.sh index 54e74d5301..1b8d4344a5 100644 --- a/t/test-lib-functions.sh +++ b/t/test-lib-functions.sh @@ -1816,3 +1816,11 @@ test_cmp_config_output () { sort config-actual >sorted-actual && test_cmp sorted-expect sorted-actual } + +# Given a filename, extract its trailing hash as a hex string +test_trailing_hash () { + local file="$1" && + tail -c $(test_oid rawsz) "$file" | + test-tool hexdump | + sed "s/ //g" +}