packfile.c: speed up loading lots of packfiles
When loading packfiles on start-up, we traverse the internal packfile list once per file to avoid reloading packfiles that have already been loaded. This check runs in quadratic time, so for poorly maintained repos with a large number of packfiles, it can be pretty slow. Add a hashmap containing the packfile names as we load them so that the average runtime cost of checking for already-loaded packs becomes constant. Add a perf test to p5303 to show speed-up. The existing p5303 test runtimes are dominated by other factors and do not show an appreciable speed-up. The new test in p5303 clearly exposes a speed-up in bad cases. In this test we create 10,000 packfiles and measure the start-up time of git rev-parse, which does little else besides load in the packs. Here are the numbers for the new p5303 test: Test HEAD^ HEAD --------------------------------------------------------------------- 5303.12: load 10,000 packs 1.03(0.92+0.10) 0.12(0.02+0.09) -88.3% Signed-off-by: Colin Stolley <cstolley@runbox.com> Helped-by: Jeff King <peff@peff.net> [jc: squashed the change to call hashmap in install_packed_git() by peff] Signed-off-by: Junio C Hamano <gitster@pobox.com>
This commit is contained in:
committed by
Junio C Hamano
parent
d9f6f3b619
commit
ec48540fe8
@ -60,6 +60,7 @@ struct oid_array *odb_loose_cache(struct object_directory *odb,
|
||||
void odb_clear_loose_cache(struct object_directory *odb);
|
||||
|
||||
struct packed_git {
|
||||
struct hashmap_entry packmap_ent;
|
||||
struct packed_git *next;
|
||||
struct list_head mru;
|
||||
struct pack_window *windows;
|
||||
@ -88,6 +89,20 @@ struct packed_git {
|
||||
|
||||
struct multi_pack_index;
|
||||
|
||||
static inline int pack_map_entry_cmp(const void *unused_cmp_data,
|
||||
const struct hashmap_entry *entry,
|
||||
const struct hashmap_entry *entry2,
|
||||
const void *keydata)
|
||||
{
|
||||
const char *key = keydata;
|
||||
const struct packed_git *pg1, *pg2;
|
||||
|
||||
pg1 = container_of(entry, const struct packed_git, packmap_ent);
|
||||
pg2 = container_of(entry2, const struct packed_git, packmap_ent);
|
||||
|
||||
return strcmp(pg1->pack_name, key ? key : pg2->pack_name);
|
||||
}
|
||||
|
||||
struct raw_object_store {
|
||||
/*
|
||||
* Set of all object directories; the main directory is first (and
|
||||
@ -131,6 +146,12 @@ struct raw_object_store {
|
||||
/* A most-recently-used ordered version of the packed_git list. */
|
||||
struct list_head packed_git_mru;
|
||||
|
||||
/*
|
||||
* A map of packfiles to packed_git structs for tracking which
|
||||
* packs have been loaded already.
|
||||
*/
|
||||
struct hashmap pack_map;
|
||||
|
||||
/*
|
||||
* A fast, rough count of the number of objects in the repository.
|
||||
* These two fields are not meant for direct access. Use
|
||||
|
||||
Reference in New Issue
Block a user