Merge branch 'mt/threaded-grep-in-object-store'

Traditionally, we avoided threaded grep while searching in objects (as opposed to files in the working tree) as accesses to the object layer is not thread-safe. This limitation is getting lifted. * mt/threaded-grep-in-object-store: grep: use no. of cores as the default no. of threads grep: move driver pre-load out of critical section grep: re-enable threads in non-worktree case grep: protect packed_git [re-]initialization grep: allow submodule functions to run in parallel submodule-config: add skip_if_read option to repo_read_gitmodules() grep: replace grep_read_mutex by internal obj read lock object-store: allow threaded access to object reading replace-object: make replace operations thread-safe grep: fix racy calls in grep_objects() grep: fix race conditions at grep_submodule() grep: fix race conditions on userdiff calls
2020-02-14 12:54:20 -08:00
parent 0da63da794 f1928f04b2
commit 56ceb64eb0
14 changed files with 236 additions and 101 deletions
--- a/.tsan-suppressions
+++ b/.tsan-suppressions
@ -8,3 +8,9 @@
 # in practice it (hopefully!) doesn't matter.
 race:^want_color$
 race:^transfer_debug$
 # A boolean value, which tells whether the replace_map has been initialized or
 # not, is read racily with an update. As this variable is written to only once,
 # and it's OK if the value change right after reading it, this shouldn't be a
 # problem.
 race:^lookup_replace_object$
--- a/Documentation/git-grep.txt
+++ b/Documentation/git-grep.txt
@ -59,8 +59,8 @@ grep.extendedRegexp::
 	other than 'default'.
 grep.threads::
-	Number of grep worker threads to use.  If unset (or set to 0),
+	Number of grep worker threads to use. If unset (or set to 0), Git will
-	8 threads are used by default (for now).
+	use as many threads as the number of logical cores available.
 grep.fullName::
 	If set to true, enable `--full-name` option by default.
@ -348,6 +348,17 @@ EXAMPLES
 `git grep solution -- :^Documentation`::
 	Looks for `solution`, excluding files in `Documentation`.
 NOTES ON THREADS
 ----------------
 The `--threads` option (and the grep.threads configuration) will be ignored when
 `--open-files-in-pager` is used, forcing a single-threaded execution.
 When grepping the object store (with `--cached` or giving tree objects), running
 with multiple threads might perform slower than single threaded if `--textconv`
 is given and there're too many text conversions. So if you experience low
 performance in this case, it might be desirable to use `--threads=1`.
 GIT
 ---
 Part of the linkgit:git[1] suite
--- a/builtin/grep.c
+++ b/builtin/grep.c
@ -24,6 +24,7 @@
 #include "submodule.h"
 #include "submodule-config.h"
 #include "object-store.h"
 #include "packfile.h"
 static char const * const grep_usage[] = {
 	N_("git grep [<options>] [-e] <pattern> [<rev>...] [[--] <path>...]"),
@ -32,7 +33,6 @@ static char const * const grep_usage[] = {
 static int recurse_submodules;
 #define GREP_NUM_THREADS_DEFAULT 8
 static int num_threads;
 static pthread_t *threads;
@ -91,8 +91,11 @@ static pthread_cond_t cond_result;
 static int skip_first_line;
-static void add_work(struct grep_opt *opt, const struct grep_source *gs)
+static void add_work(struct grep_opt *opt, struct grep_source *gs)
 {
 	if (opt->binary != GREP_BINARY_TEXT)
 		grep_source_load_driver(gs, opt->repo->index);
 	grep_lock();
 	while ((todo_end+1) % ARRAY_SIZE(todo) == todo_done) {
@ -100,9 +103,6 @@ static void add_work(struct grep_opt *opt, const struct grep_source *gs)
 	}
 	todo[todo_end].source = *gs;
 	if (opt->binary != GREP_BINARY_TEXT)
 		grep_source_load_driver(&todo[todo_end].source,
 					opt->repo->index);
 	todo[todo_end].done = 0;
 	strbuf_reset(&todo[todo_end].out);
 	todo_end = (todo_end + 1) % ARRAY_SIZE(todo);
@ -200,12 +200,12 @@ static void start_threads(struct grep_opt *opt)
 	int i;
 	pthread_mutex_init(&grep_mutex, NULL);
 	pthread_mutex_init(&grep_read_mutex, NULL);
 	pthread_mutex_init(&grep_attr_mutex, NULL);
 	pthread_cond_init(&cond_add, NULL);
 	pthread_cond_init(&cond_write, NULL);
 	pthread_cond_init(&cond_result, NULL);
 	grep_use_locks = 1;
 	enable_obj_read_lock();
 	for (i = 0; i < ARRAY_SIZE(todo); i++) {
 		strbuf_init(&todo[i].out, 0);
@ -257,12 +257,12 @@ static int wait_all(void)
 	free(threads);
 	pthread_mutex_destroy(&grep_mutex);
 	pthread_mutex_destroy(&grep_read_mutex);
 	pthread_mutex_destroy(&grep_attr_mutex);
 	pthread_cond_destroy(&cond_add);
 	pthread_cond_destroy(&cond_write);
 	pthread_cond_destroy(&cond_result);
 	grep_use_locks = 0;
 	disable_obj_read_lock();
 	return hit;
 }
@ -295,16 +295,6 @@ static int grep_cmd_config(const char *var, const char *value, void *cb)
 	return st;
 }
 static void *lock_and_read_oid_file(const struct object_id *oid, enum object_type *type, unsigned long *size)
 {
 	void *data;
 	grep_read_lock();
 	data = read_object_file(oid, type, size);
 	grep_read_unlock();
 	return data;
 }
 static int grep_oid(struct grep_opt *opt, const struct object_id *oid,
 		     const char *filename, int tree_name_len,
 		     const char *path)
@ -407,30 +397,28 @@ static int grep_submodule(struct grep_opt *opt,
 {
 	struct repository subrepo;
 	struct repository *superproject = opt->repo;
-	const struct submodule *sub = submodule_from_path(superproject,
+	const struct submodule *sub;
 							  &null_oid, path);
 	struct grep_opt subopt;
 	int hit;
 	sub = submodule_from_path(superproject, &null_oid, path);
 	if (!is_submodule_active(superproject, path))
 		return 0;
 	if (repo_submodule_init(&subrepo, superproject, sub))
 		return 0;
 	/*
-	 * NEEDSWORK: submodules functions need to be protected because they
+	 * NEEDSWORK: repo_read_gitmodules() might call
-	 * access the object store via config_from_gitmodules(): the latter
+	 * add_to_alternates_memory() via config_from_gitmodules(). This
-	 * uses get_oid() which, for now, relies on the global the_repository
+	 * operation causes a race condition with concurrent object readings
-	 * object.
+	 * performed by the worker threads. That's why we need obj_read_lock()
 	 * here. It should be removed once it's no longer necessary to add the
 	 * subrepo's odbs to the in-memory alternates list.
 	 */
-	grep_read_lock();
+	obj_read_lock();
-
+	repo_read_gitmodules(&subrepo, 0);
 	if (!is_submodule_active(superproject, path)) {
 		grep_read_unlock();
 		return 0;
 	}
 	if (repo_submodule_init(&subrepo, superproject, sub)) {
 		grep_read_unlock();
 		return 0;
 	}
 	repo_read_gitmodules(&subrepo);
 	/*
 	 * NEEDSWORK: This adds the submodule's object directory to the list of
@ -443,7 +431,7 @@ static int grep_submodule(struct grep_opt *opt,
 	 * object.
 	 */
 	add_to_alternates_memory(subrepo.objects->odb->path);
-	grep_read_unlock();
+	obj_read_unlock();
 	memcpy(&subopt, opt, sizeof(subopt));
 	subopt.repo = &subrepo;
@ -455,14 +443,12 @@ static int grep_submodule(struct grep_opt *opt,
 		unsigned long size;
 		struct strbuf base = STRBUF_INIT;
 		obj_read_lock();
 		object = parse_object_or_die(oid, oid_to_hex(oid));
-
+		obj_read_unlock();
 		grep_read_lock();
 		data = read_object_with_reference(&subrepo,
 						  &object->oid, tree_type,
 						  &size, NULL);
 		grep_read_unlock();
 		if (!data)
 			die(_("unable to read tree (%s)"), oid_to_hex(&object->oid));
@ -587,7 +573,7 @@ static int grep_tree(struct grep_opt *opt, const struct pathspec *pathspec,
 			void *data;
 			unsigned long size;
-			data = lock_and_read_oid_file(&entry.oid, &type, &size);
+			data = read_object_file(&entry.oid, &type, &size);
 			if (!data)
 				die(_("unable to read tree (%s)"),
 				    oid_to_hex(&entry.oid));
@ -625,12 +611,9 @@ static int grep_object(struct grep_opt *opt, const struct pathspec *pathspec,
 		struct strbuf base;
 		int hit, len;
 		grep_read_lock();
 		data = read_object_with_reference(opt->repo,
 						  &obj->oid, tree_type,
 						  &size, NULL);
 		grep_read_unlock();
 		if (!data)
 			die(_("unable to read tree (%s)"), oid_to_hex(&obj->oid));
@ -659,13 +642,18 @@ static int grep_objects(struct grep_opt *opt, const struct pathspec *pathspec,
 	for (i = 0; i < nr; i++) {
 		struct object *real_obj;
 		obj_read_lock();
 		real_obj = deref_tag(opt->repo, list->objects[i].item,
 				     NULL, 0);
 		obj_read_unlock();
 		/* load the gitmodules file for this rev */
 		if (recurse_submodules) {
 			submodule_free(opt->repo);
 			obj_read_lock();
 			gitmodules_config_oid(&real_obj->oid);
 			obj_read_unlock();
 		}
 		if (grep_object(opt, pathspec, real_obj, list->objects[i].name,
 				list->objects[i].path)) {
@ -1065,7 +1053,10 @@ int cmd_grep(int argc, const char **argv, const char *prefix)
 	pathspec.recursive = 1;
 	pathspec.recurse_submodules = !!recurse_submodules;
-	if (list.nr || cached || show_in_pager) {
+	if (recurse_submodules && untracked)
 		die(_("--untracked not supported with --recurse-submodules"));
 	if (show_in_pager) {
 		if (num_threads > 1)
 			warning(_("invalid option combination, ignoring --threads"));
 		num_threads = 1;
@ -1075,7 +1066,7 @@ int cmd_grep(int argc, const char **argv, const char *prefix)
 	} else if (num_threads < 0)
 		die(_("invalid number of threads specified (%d)"), num_threads);
 	else if (num_threads == 0)
-		num_threads = HAVE_THREADS ? GREP_NUM_THREADS_DEFAULT : 1;
+		num_threads = HAVE_THREADS ? online_cpus() : 1;
 	if (num_threads > 1) {
 		if (!HAVE_THREADS)
@ -1084,6 +1075,17 @@ int cmd_grep(int argc, const char **argv, const char *prefix)
 		    && (opt.pre_context || opt.post_context ||
 			opt.file_break || opt.funcbody))
 			skip_first_line = 1;
 		/*
 		 * Pre-read gitmodules (if not read already) and force eager
 		 * initialization of packed_git to prevent racy lazy
 		 * reading/initialization once worker threads are started.
 		 */
 		if (recurse_submodules)
 			repo_read_gitmodules(the_repository, 1);
 		if (startup_info->have_repository)
 			(void)get_packed_git(the_repository);
 		start_threads(&opt);
 	} else {
 		/*
@ -1118,9 +1120,6 @@ int cmd_grep(int argc, const char **argv, const char *prefix)
 		}
 	}
 	if (recurse_submodules && untracked)
 		die(_("--untracked not supported with --recurse-submodules"));
 	if (!show_in_pager && !opt.status_only)
 		setup_pager();
--- a/grep.c
+++ b/grep.c
@ -1540,11 +1540,6 @@ static inline void grep_attr_unlock(void)
 		pthread_mutex_unlock(&grep_attr_mutex);
 }
 /*
 * Same as git_attr_mutex, but protecting the thread-unsafe object db access.
 */
 pthread_mutex_t grep_read_mutex;
 static int match_funcname(struct grep_opt *opt, struct grep_source *gs, char *bol, char *eol)
 {
 	xdemitconf_t *xecfg = opt->priv;
@ -1741,13 +1736,20 @@ static int fill_textconv_grep(struct repository *r,
 	}
 	/*
-	 * fill_textconv is not remotely thread-safe; it may load objects
+	 * fill_textconv is not remotely thread-safe; it modifies the global
-	 * behind the scenes, and it modifies the global diff tempfile
+	 * diff tempfile structure, writes to the_repo's odb and might
-	 * structure.
+	 * internally call thread-unsafe functions such as the
 	 * prepare_packed_git() lazy-initializator. Because of the last two, we
 	 * must ensure mutual exclusion between this call and the object reading
 	 * API, thus we use obj_read_lock() here.
 	 *
 	 * TODO: allowing text conversion to run in parallel with object
 	 * reading operations might increase performance in the multithreaded
 	 * non-worktreee git-grep with --textconv.
 	 */
-	grep_read_lock();
+	obj_read_lock();
 	size = fill_textconv(r, driver, df, &buf);
-	grep_read_unlock();
+	obj_read_unlock();
 	free_filespec(df);
 	/*
@ -1813,10 +1815,15 @@ static int grep_source_1(struct grep_opt *opt, struct grep_source *gs, int colle
 		grep_source_load_driver(gs, opt->repo->index);
 		/*
 		 * We might set up the shared textconv cache data here, which
-		 * is not thread-safe.
+		 * is not thread-safe. Also, get_oid_with_context() and
 		 * parse_object() might be internally called. As they are not
 		 * currenty thread-safe and might be racy with object reading,
 		 * obj_read_lock() must be called.
 		 */
 		grep_attr_lock();
 		obj_read_lock();
 		textconv = userdiff_get_textconv(opt->repo, gs->driver);
 		obj_read_unlock();
 		grep_attr_unlock();
 	}
@ -2116,10 +2123,7 @@ static int grep_source_load_oid(struct grep_source *gs)
 {
 	enum object_type type;
 	grep_read_lock();
 	gs->buf = read_object_file(gs->identifier, &type, &gs->size);
 	grep_read_unlock();
 	if (!gs->buf)
 		return error(_("'%s': unable to read %s"),
 			     gs->name,
--- a/grep.h
+++ b/grep.h
@ -220,18 +220,5 @@ int grep_threads_ok(const struct grep_opt *opt);
 */
 extern int grep_use_locks;
 extern pthread_mutex_t grep_attr_mutex;
 extern pthread_mutex_t grep_read_mutex;
 static inline void grep_read_lock(void)
 {
 	if (grep_use_locks)
 		pthread_mutex_lock(&grep_read_mutex);
 }
 static inline void grep_read_unlock(void)
 {
 	if (grep_use_locks)
 		pthread_mutex_unlock(&grep_read_mutex);
 }
 #endif
--- a/object-store.h
+++ b/object-store.h
@ -6,6 +6,7 @@
 #include "list.h"
 #include "sha1-array.h"
 #include "strbuf.h"
 #include "thread-utils.h"
 struct object_directory {
 	struct object_directory *next;
@ -125,6 +126,8 @@ struct raw_object_store {
 	 * (see git-replace(1)).
 	 */
 	struct oidmap *replace_map;
 	unsigned replace_map_initialized : 1;
 	pthread_mutex_t replace_mutex; /* protect object replace functions */
 	struct commit_graph *commit_graph;
 	unsigned commit_graph_attempted : 1; /* if loading has been attempted */
@ -257,6 +260,40 @@ int has_loose_object_nonlocal(const struct object_id *);
 void assert_oid_type(const struct object_id *oid, enum object_type expect);
 /*
 * Enabling the object read lock allows multiple threads to safely call the
 * following functions in parallel: repo_read_object_file(), read_object_file(),
 * read_object_file_extended(), read_object_with_reference(), read_object(),
 * oid_object_info() and oid_object_info_extended().
 *
 * obj_read_lock() and obj_read_unlock() may also be used to protect other
 * section which cannot execute in parallel with object reading. Since the used
 * lock is a recursive mutex, these sections can even contain calls to object
 * reading functions. However, beware that in these cases zlib inflation won't
 * be performed in parallel, losing performance.
 *
 * TODO: oid_object_info_extended()'s call stack has a recursive behavior. If
 * any of its callees end up calling it, this recursive call won't benefit from
 * parallel inflation.
 */
 void enable_obj_read_lock(void);
 void disable_obj_read_lock(void);
 extern int obj_read_use_lock;
 extern pthread_mutex_t obj_read_mutex;
 static inline void obj_read_lock(void)
 {
 	if(obj_read_use_lock)
 		pthread_mutex_lock(&obj_read_mutex);
 }
 static inline void obj_read_unlock(void)
 {
 	if(obj_read_use_lock)
 		pthread_mutex_unlock(&obj_read_mutex);
 }
 struct object_info {
 	/* Request */
 	enum object_type *typep;
--- a/object.c
+++ b/object.c
@ -480,6 +480,7 @@ struct raw_object_store *raw_object_store_new(void)
 	memset(o, 0, sizeof(*o));
 	INIT_LIST_HEAD(&o->packed_git_mru);
 	hashmap_init(&o->pack_map, pack_map_entry_cmp, NULL, 0);
 	pthread_mutex_init(&o->replace_mutex, NULL);
 	return o;
 }
@ -507,6 +508,7 @@ void raw_object_store_clear(struct raw_object_store *o)
 	oidmap_free(o->replace_map, 1);
 	FREE_AND_NULL(o->replace_map);
 	pthread_mutex_destroy(&o->replace_mutex);
 	free_commit_graph(o->commit_graph);
 	o->commit_graph = NULL;
--- a/packfile.c
+++ b/packfile.c
@ -1004,12 +1004,14 @@ void reprepare_packed_git(struct repository *r)
 {
 	struct object_directory *odb;
 	obj_read_lock();
 	for (odb = r->objects->odb; odb; odb = odb->next)
 		odb_clear_loose_cache(odb);
 	r->objects->approximate_object_count_valid = 0;
 	r->objects->packed_git_initialized = 0;
 	prepare_packed_git(r);
 	obj_read_unlock();
 }
 struct packed_git *get_packed_git(struct repository *r)
@ -1086,7 +1088,23 @@ unsigned long get_size_from_delta(struct packed_git *p,
 	do {
 		in = use_pack(p, w_curs, curpos, &stream.avail_in);
 		stream.next_in = in;
 		/*
 		 * Note: the window section returned by use_pack() must be
 		 * available throughout git_inflate()'s unlocked execution. To
 		 * ensure no other thread will modify the window in the
 		 * meantime, we rely on the packed_window.inuse_cnt. This
 		 * counter is incremented before window reading and checked
 		 * before window disposal.
 		 *
 		 * Other worrying sections could be the call to close_pack_fd(),
 		 * which can close packs even with in-use windows, and to
 		 * reprepare_packed_git(). Regarding the former, mmap doc says:
 		 * "closing the file descriptor does not unmap the region". And
 		 * for the latter, it won't re-open already available packs.
 		 */
 		obj_read_unlock();
 		st = git_inflate(&stream, Z_FINISH);
 		obj_read_lock();
 		curpos += stream.next_in - in;
 	} while ((st == Z_OK || st == Z_BUF_ERROR) &&
 		 stream.total_out < sizeof(delta_head));
@ -1445,6 +1463,14 @@ static void add_delta_base_cache(struct packed_git *p, off_t base_offset,
 	struct delta_base_cache_entry *ent = xmalloc(sizeof(*ent));
 	struct list_head *lru, *tmp;
 	/*
 	 * Check required to avoid redundant entries when more than one thread
 	 * is unpacking the same object, in unpack_entry() (since its phases I
 	 * and III might run concurrently across multiple threads).
 	 */
 	if (in_delta_base_cache(p, base_offset))
 		return;
 	delta_base_cached += base_size;
 	list_for_each_safe(lru, tmp, &delta_base_cache_lru) {
@ -1574,7 +1600,15 @@ static void *unpack_compressed_entry(struct packed_git *p,
 	do {
 		in = use_pack(p, w_curs, curpos, &stream.avail_in);
 		stream.next_in = in;
 		/*
 		 * Note: we must ensure the window section returned by
 		 * use_pack() will be available throughout git_inflate()'s
 		 * unlocked execution. Please refer to the comment at
 		 * get_size_from_delta() to see how this is done.
 		 */
 		obj_read_unlock();
 		st = git_inflate(&stream, Z_FINISH);
 		obj_read_lock();
 		if (!stream.avail_out)
 			break; /* the payload is larger than it should be */
 		curpos += stream.next_in - in;
--- a/replace-object.c
+++ b/replace-object.c
@ -34,14 +34,23 @@ static int register_replace_ref(struct repository *r,
 void prepare_replace_object(struct repository *r)
 {
-	if (r->objects->replace_map)
+	if (r->objects->replace_map_initialized)
 		return;
 	pthread_mutex_lock(&r->objects->replace_mutex);
 	if (r->objects->replace_map_initialized) {
 		pthread_mutex_unlock(&r->objects->replace_mutex);
 		return;
 	}
 	r->objects->replace_map =
 		xmalloc(sizeof(*r->objects->replace_map));
 	oidmap_init(r->objects->replace_map, 0);
 	for_each_replace_ref(r, register_replace_ref, NULL);
 	r->objects->replace_map_initialized = 1;
 	pthread_mutex_unlock(&r->objects->replace_mutex);
 }
 /* We allow "recursive" replacement. Only within reason, though */
--- a/replace-object.h
+++ b/replace-object.h
@ -24,12 +24,17 @@ const struct object_id *do_lookup_replace_object(struct repository *r,
 * name (replaced recursively, if necessary).  The return value is
 * either sha1 or a pointer to a permanently-allocated value.  When
 * object replacement is suppressed, always return sha1.
 *
 * Note: some thread debuggers might point a data race on the
 * replace_map_initialized reading in this function. However, we know there's no
 * problem in the value being updated by one thread right after another one read
 * it here (and it should be written to only once, anyway).
 */
 static inline const struct object_id *lookup_replace_object(struct repository *r,
 							    const struct object_id *oid)
 {
 	if (!read_replace_refs ||
-	    (r->objects->replace_map &&
+	    (r->objects->replace_map_initialized &&
 	     r->objects->replace_map->map.tablesize == 0))
 		return oid;
 	return do_lookup_replace_object(r, oid);
--- a/sha1-file.c
+++ b/sha1-file.c
@ -1147,6 +1147,8 @@ static int unpack_loose_short_header(git_zstream *stream,
 				     unsigned char *map, unsigned long mapsize,
 				     void *buffer, unsigned long bufsiz)
 {
 	int ret;
 	/* Get the data stream */
 	memset(stream, 0, sizeof(*stream));
 	stream->next_in = map;
@ -1155,7 +1157,11 @@ static int unpack_loose_short_header(git_zstream *stream,
 	stream->avail_out = bufsiz;
 	git_inflate_init(stream);
-	return git_inflate(stream, 0);
+	obj_read_unlock();
 	ret = git_inflate(stream, 0);
 	obj_read_lock();
 	return ret;
 }
 int unpack_loose_header(git_zstream *stream,
@ -1200,7 +1206,9 @@ static int unpack_loose_header_to_strbuf(git_zstream *stream, unsigned char *map
 	stream->avail_out = bufsiz;
 	do {
 		obj_read_unlock();
 		status = git_inflate(stream, 0);
 		obj_read_lock();
 		strbuf_add(header, buffer, stream->next_out - (unsigned char *)buffer);
 		if (memchr(buffer, '\0', stream->next_out - (unsigned char *)buffer))
 			return 0;
@ -1240,8 +1248,11 @@ static void *unpack_loose_rest(git_zstream *stream,
 		 */
 		stream->next_out = buf + bytes;
 		stream->avail_out = size - bytes;
-		while (status == Z_OK)
+		while (status == Z_OK) {
 			obj_read_unlock();
 			status = git_inflate(stream, Z_FINISH);
 			obj_read_lock();
 		}
 	}
 	if (status == Z_STREAM_END && !stream->avail_in) {
 		git_inflate_end(stream);
@ -1411,9 +1422,31 @@ static int loose_object_info(struct repository *r,
 	return (status < 0) ? status : 0;
 }
 int obj_read_use_lock = 0;
 pthread_mutex_t obj_read_mutex;
 void enable_obj_read_lock(void)
 {
 	if (obj_read_use_lock)
 		return;
 	obj_read_use_lock = 1;
 	init_recursive_mutex(&obj_read_mutex);
 }
 void disable_obj_read_lock(void)
 {
 	if (!obj_read_use_lock)
 		return;
 	obj_read_use_lock = 0;
 	pthread_mutex_destroy(&obj_read_mutex);
 }
 int fetch_if_missing = 1;
-int oid_object_info_extended(struct repository *r, const struct object_id *oid,
+static int do_oid_object_info_extended(struct repository *r,
 				       const struct object_id *oid,
 				       struct object_info *oi, unsigned flags)
 {
 	static struct object_info blank_oi = OBJECT_INFO_INIT;
@ -1423,6 +1456,7 @@ int oid_object_info_extended(struct repository *r, const struct object_id *oid,
 	const struct object_id *real = oid;
 	int already_retried = 0;
 	if (flags & OBJECT_INFO_LOOKUP_REPLACE)
 		real = lookup_replace_object(r, oid);
@ -1496,7 +1530,7 @@ int oid_object_info_extended(struct repository *r, const struct object_id *oid,
 	rtype = packed_object_info(r, e.p, e.offset, oi);
 	if (rtype < 0) {
 		mark_bad_packed_object(e.p, real->hash);
-		return oid_object_info_extended(r, real, oi, 0);
+		return do_oid_object_info_extended(r, real, oi, 0);
 	} else if (oi->whence == OI_PACKED) {
 		oi->u.packed.offset = e.offset;
 		oi->u.packed.pack = e.p;
@ -1507,6 +1541,17 @@ int oid_object_info_extended(struct repository *r, const struct object_id *oid,
 	return 0;
 }
 int oid_object_info_extended(struct repository *r, const struct object_id *oid,
 			     struct object_info *oi, unsigned flags)
 {
 	int ret;
 	obj_read_lock();
 	ret = do_oid_object_info_extended(r, oid, oi, flags);
 	obj_read_unlock();
 	return ret;
 }
 /* returns enum object_type or negative */
 int oid_object_info(struct repository *r,
 		    const struct object_id *oid,
@ -1579,6 +1624,7 @@ void *read_object_file_extended(struct repository *r,
 	if (data)
 		return data;
 	obj_read_lock();
 	if (errno && errno != ENOENT)
 		die_errno(_("failed to read object %s"), oid_to_hex(oid));
@ -1594,6 +1640,7 @@ void *read_object_file_extended(struct repository *r,
 	if ((p = has_packed_and_bad(r, repl->hash)) != NULL)
 		die(_("packed object %s (stored in %s) is corrupt"),
 		    oid_to_hex(repl), p->pack_name);
 	obj_read_unlock();
 	return NULL;
 }
--- a/submodule-config.c
+++ b/submodule-config.c
@ -674,10 +674,13 @@ static int gitmodules_cb(const char *var, const char *value, void *data)
 	return parse_config(var, value, &parameter);
 }
-void repo_read_gitmodules(struct repository *repo)
+void repo_read_gitmodules(struct repository *repo, int skip_if_read)
 {
 	submodule_cache_check_init(repo);
 	if (repo->submodule_cache->gitmodules_read && skip_if_read)
 		return;
 	if (repo_read_index(repo) < 0)
 		return;
@ -703,20 +706,11 @@ void gitmodules_config_oid(const struct object_id *commit_oid)
 	the_repository->submodule_cache->gitmodules_read = 1;
 }
 static void gitmodules_read_check(struct repository *repo)
 {
 	submodule_cache_check_init(repo);
 	/* read the repo's .gitmodules file if it hasn't been already */
 	if (!repo->submodule_cache->gitmodules_read)
 		repo_read_gitmodules(repo);
 }
 const struct submodule *submodule_from_name(struct repository *r,
 					    const struct object_id *treeish_name,
 		const char *name)
 {
-	gitmodules_read_check(r);
+	repo_read_gitmodules(r, 1);
 	return config_from(r->submodule_cache, treeish_name, name, lookup_name);
 }
@ -724,7 +718,7 @@ const struct submodule *submodule_from_path(struct repository *r,
 					    const struct object_id *treeish_name,
 		const char *path)
 {
-	gitmodules_read_check(r);
+	repo_read_gitmodules(r, 1);
 	return config_from(r->submodule_cache, treeish_name, path, lookup_path);
 }
--- a/submodule-config.h
+++ b/submodule-config.h
@ -61,7 +61,7 @@ int option_fetch_parse_recurse_submodules(const struct option *opt,
 					  const char *arg, int unset);
 int parse_update_recurse_submodules_arg(const char *opt, const char *arg);
 int parse_push_recurse_submodules_arg(const char *opt, const char *arg);
-void repo_read_gitmodules(struct repository *repo);
+void repo_read_gitmodules(struct repository *repo, int skip_if_read);
 void gitmodules_config_oid(const struct object_id *commit_oid);
 /**
--- a/unpack-trees.c
+++ b/unpack-trees.c
@ -291,11 +291,11 @@ static void load_gitmodules_file(struct index_state *index,
 	if (pos >= 0) {
 		struct cache_entry *ce = index->cache[pos];
 		if (!state && ce->ce_flags & CE_WT_REMOVE) {
-			repo_read_gitmodules(the_repository);
+			repo_read_gitmodules(the_repository, 0);
 		} else if (state && (ce->ce_flags & CE_UPDATE)) {
 			submodule_free(the_repository);
 			checkout_entry(ce, state, NULL, NULL);
-			repo_read_gitmodules(the_repository);
+			repo_read_gitmodules(the_repository, 0);
 		}
 	}
 }