Merge branch 'jk/pack-objects-optim-mru'

"git pack-objects" in a repository with many packfiles used to
spend a lot of time looking for/at objects in them; the accesses to
the packfiles are now optimized by checking the most-recently-used
packfile first.

* jk/pack-objects-optim-mru:
  pack-objects: use mru list when iterating over packs
  pack-objects: break delta cycles before delta-search phase
  sha1_file: make packed_object_info public
  provide an initializer for "struct object_info"
This commit is contained in:
Junio C Hamano
2016-10-10 14:03:46 -07:00
7 changed files with 227 additions and 12 deletions

View File

@ -23,6 +23,7 @@
#include "reachable.h"
#include "sha1-array.h"
#include "argv-array.h"
#include "mru.h"
static const char *pack_usage[] = {
N_("git pack-objects --stdout [<options>...] [< <ref-list> | < <object-list>]"),
@ -994,7 +995,7 @@ static int want_object_in_pack(const unsigned char *sha1,
struct packed_git **found_pack,
off_t *found_offset)
{
struct packed_git *p;
struct mru_entry *entry;
int want;
if (!exclude && local && has_loose_object_nonlocal(sha1))
@ -1011,7 +1012,8 @@ static int want_object_in_pack(const unsigned char *sha1,
return want;
}
for (p = packed_git; p; p = p->next) {
for (entry = packed_git_mru->head; entry; entry = entry->next) {
struct packed_git *p = entry->item;
off_t offset;
if (p == *found_pack)
@ -1027,6 +1029,8 @@ static int want_object_in_pack(const unsigned char *sha1,
*found_pack = p;
}
want = want_found_object(exclude, p);
if (!exclude && want > 0)
mru_mark(packed_git_mru, entry);
if (want != -1)
return want;
}
@ -1527,6 +1531,83 @@ static int pack_offset_sort(const void *_a, const void *_b)
(a->in_pack_offset > b->in_pack_offset);
}
/*
* Drop an on-disk delta we were planning to reuse. Naively, this would
* just involve blanking out the "delta" field, but we have to deal
* with some extra book-keeping:
*
* 1. Removing ourselves from the delta_sibling linked list.
*
* 2. Updating our size/type to the non-delta representation. These were
* either not recorded initially (size) or overwritten with the delta type
* (type) when check_object() decided to reuse the delta.
*/
static void drop_reused_delta(struct object_entry *entry)
{
struct object_entry **p = &entry->delta->delta_child;
struct object_info oi = OBJECT_INFO_INIT;
while (*p) {
if (*p == entry)
*p = (*p)->delta_sibling;
else
p = &(*p)->delta_sibling;
}
entry->delta = NULL;
oi.sizep = &entry->size;
oi.typep = &entry->type;
if (packed_object_info(entry->in_pack, entry->in_pack_offset, &oi) < 0) {
/*
* We failed to get the info from this pack for some reason;
* fall back to sha1_object_info, which may find another copy.
* And if that fails, the error will be recorded in entry->type
* and dealt with in prepare_pack().
*/
entry->type = sha1_object_info(entry->idx.sha1, &entry->size);
}
}
/*
* Follow the chain of deltas from this entry onward, throwing away any links
* that cause us to hit a cycle (as determined by the DFS state flags in
* the entries).
*/
static void break_delta_chains(struct object_entry *entry)
{
/* If it's not a delta, it can't be part of a cycle. */
if (!entry->delta) {
entry->dfs_state = DFS_DONE;
return;
}
switch (entry->dfs_state) {
case DFS_NONE:
/*
* This is the first time we've seen the object. We mark it as
* part of the active potential cycle and recurse.
*/
entry->dfs_state = DFS_ACTIVE;
break_delta_chains(entry->delta);
entry->dfs_state = DFS_DONE;
break;
case DFS_DONE:
/* object already examined, and not part of a cycle */
break;
case DFS_ACTIVE:
/*
* We found a cycle that needs broken. It would be correct to
* break any link in the chain, but it's convenient to
* break this one.
*/
drop_reused_delta(entry);
entry->dfs_state = DFS_DONE;
break;
}
}
static void get_object_details(void)
{
uint32_t i;
@ -1544,6 +1625,13 @@ static void get_object_details(void)
entry->no_try_delta = 1;
}
/*
* This must happen in a second pass, since we rely on the delta
* information for the whole list being completed.
*/
for (i = 0; i < to_pack.nr_objects; i++)
break_delta_chains(&to_pack.objects[i]);
free(sorted_by_offset);
}