From 99ce720c338ebeb37331bd98f724569837c74d0d Mon Sep 17 00:00:00 2001 From: Ben Peart Date: Mon, 29 Oct 2018 16:41:59 -0400 Subject: [PATCH 1/2] speed up refresh_index() by utilizing preload_index() Speed up refresh_index() by utilizing preload_index() to do most of the work spread across multiple threads. This works because most cache entries will get marked CE_UPTODATE so that refresh_cache_ent() can bail out early when called from within refresh_index(). On a Windows repo with ~200K files, this drops refresh times from 6.64 seconds to 2.87 seconds for a savings of 57%. Signed-off-by: Ben Peart Signed-off-by: Junio C Hamano --- cache.h | 3 +++ preload-index.c | 8 ++++---- read-cache.c | 6 ++++++ 3 files changed, 13 insertions(+), 4 deletions(-) diff --git a/cache.h b/cache.h index f7fabdde8f..883099db08 100644 --- a/cache.h +++ b/cache.h @@ -659,6 +659,9 @@ extern int daemonize(void); /* Initialize and use the cache information */ struct lock_file; extern int read_index(struct index_state *); +extern void preload_index(struct index_state *index, + const struct pathspec *pathspec, + unsigned int refresh_flags); extern int read_index_preload(struct index_state *, const struct pathspec *pathspec, unsigned int refresh_flags); diff --git a/preload-index.c b/preload-index.c index 9e7152ab14..222792ccbc 100644 --- a/preload-index.c +++ b/preload-index.c @@ -9,7 +9,7 @@ #include "progress.h" #ifdef NO_PTHREADS -static void preload_index(struct index_state *index, +void preload_index(struct index_state *index, const struct pathspec *pathspec, unsigned int refresh_flags) { @@ -100,9 +100,9 @@ static void *preload_thread(void *_data) return NULL; } -static void preload_index(struct index_state *index, - const struct pathspec *pathspec, - unsigned int refresh_flags) +void preload_index(struct index_state *index, + const struct pathspec *pathspec, + unsigned int refresh_flags) { int threads, i, work, offset; struct thread_data data[MAX_PARALLEL]; diff --git a/read-cache.c b/read-cache.c index d57958233e..53733d651d 100644 --- a/read-cache.c +++ b/read-cache.c @@ -1496,6 +1496,12 @@ int refresh_index(struct index_state *istate, unsigned int flags, typechange_fmt = (in_porcelain ? "T\t%s\n" : "%s needs update\n"); added_fmt = (in_porcelain ? "A\t%s\n" : "%s needs update\n"); unmerged_fmt = (in_porcelain ? "U\t%s\n" : "%s: needs merge\n"); + /* + * Use the multi-threaded preload_index() to refresh most of the + * cache entries quickly then in the single threaded loop below, + * we only have to do the special cases that are left. + */ + preload_index(istate, pathspec, 0); for (i = 0; i < istate->cache_nr; i++) { struct cache_entry *ce, *new_entry; int cache_errno = 0; From 6c5b7f55a845bccf7b3abda92a78c621898d838d Mon Sep 17 00:00:00 2001 From: Ben Peart Date: Mon, 5 Nov 2018 14:27:51 -0500 Subject: [PATCH 2/2] refresh_index: remove unnecessary calls to preload_index() With refresh_index() learning to utilize preload_index() to speed up its operation there is no longer any benefit to having the caller preload the index first. Remove those unneeded calls by calling read_index() instead of the preload variant. There is no measurable performance impact of this patch - the 2nd call to preload_index() bails out quickly but there is no reason to call it twice. Signed-off-by: Ben Peart Signed-off-by: Junio C Hamano --- builtin/commit.c | 2 +- builtin/describe.c | 2 +- builtin/update-index.c | 2 +- sequencer.c | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/builtin/commit.c b/builtin/commit.c index 074bd9a551..96d336ec3d 100644 --- a/builtin/commit.c +++ b/builtin/commit.c @@ -1363,7 +1363,7 @@ int cmd_status(int argc, const char **argv, const char *prefix) if (status_format != STATUS_FORMAT_PORCELAIN && status_format != STATUS_FORMAT_PORCELAIN_V2) progress_flag = REFRESH_PROGRESS; - read_index_preload(&the_index, &s.pathspec, progress_flag); + read_index(&the_index); refresh_index(&the_index, REFRESH_QUIET|REFRESH_UNMERGED|progress_flag, &s.pathspec, NULL, NULL); diff --git a/builtin/describe.c b/builtin/describe.c index c48c34e866..cc118448ee 100644 --- a/builtin/describe.c +++ b/builtin/describe.c @@ -629,7 +629,7 @@ int cmd_describe(int argc, const char **argv, const char *prefix) struct argv_array args = ARGV_ARRAY_INIT; int fd, result; - read_cache_preload(NULL); + read_cache(); refresh_index(&the_index, REFRESH_QUIET|REFRESH_UNMERGED, NULL, NULL, NULL); fd = hold_locked_index(&index_lock, 0); diff --git a/builtin/update-index.c b/builtin/update-index.c index 07c10bcb7d..0e1dcf0438 100644 --- a/builtin/update-index.c +++ b/builtin/update-index.c @@ -782,7 +782,7 @@ struct refresh_params { static int refresh(struct refresh_params *o, unsigned int flag) { setup_work_tree(); - read_cache_preload(NULL); + read_cache(); *o->has_errors |= refresh_cache(o->flags | flag); return 0; } diff --git a/sequencer.c b/sequencer.c index 0c164d5f98..b46dd0fb63 100644 --- a/sequencer.c +++ b/sequencer.c @@ -1913,7 +1913,7 @@ static int read_and_refresh_cache(struct replay_opts *opts) { struct lock_file index_lock = LOCK_INIT; int index_fd = hold_locked_index(&index_lock, 0); - if (read_index_preload(&the_index, NULL, 0) < 0) { + if (read_index(&the_index) < 0) { rollback_lock_file(&index_lock); return error(_("git %s: failed to read the index"), _(action_name(opts)));