Merge branch 'jh/add-index-entry-optim'

"git checkout" that handles a lot of paths has been optimized by
reducing the number of unnecessary checks of paths in the
has_dir_name() function.

* jh/add-index-entry-optim:
  read-cache: speed up has_dir_name (part 2)
  read-cache: speed up has_dir_name (part 1)
  read-cache: speed up add_index_entry during checkout
  p0006-read-tree-checkout: perf test to time read-tree
  read-cache: add strcmp_offset function
This commit is contained in:
Junio C Hamano
2017-04-26 15:39:07 +09:00
10 changed files with 446 additions and 2 deletions

View File

@ -887,9 +887,32 @@ static int has_file_name(struct index_state *istate,
return retval;
}
/*
* Like strcmp(), but also return the offset of the first change.
* If strings are equal, return the length.
*/
int strcmp_offset(const char *s1, const char *s2, size_t *first_change)
{
size_t k;
if (!first_change)
return strcmp(s1, s2);
for (k = 0; s1[k] == s2[k]; k++)
if (s1[k] == '\0')
break;
*first_change = k;
return (unsigned char)s1[k] - (unsigned char)s2[k];
}
/*
* Do we have another file with a pathname that is a proper
* subset of the name we're trying to add?
*
* That is, is there another file in the index with a path
* that matches a sub-directory in the given entry?
*/
static int has_dir_name(struct index_state *istate,
const struct cache_entry *ce, int pos, int ok_to_replace)
@ -898,9 +921,51 @@ static int has_dir_name(struct index_state *istate,
int stage = ce_stage(ce);
const char *name = ce->name;
const char *slash = name + ce_namelen(ce);
size_t len_eq_last;
int cmp_last = 0;
/*
* We are frequently called during an iteration on a sorted
* list of pathnames and while building a new index. Therefore,
* there is a high probability that this entry will eventually
* be appended to the index, rather than inserted in the middle.
* If we can confirm that, we can avoid binary searches on the
* components of the pathname.
*
* Compare the entry's full path with the last path in the index.
*/
if (istate->cache_nr > 0) {
cmp_last = strcmp_offset(name,
istate->cache[istate->cache_nr - 1]->name,
&len_eq_last);
if (cmp_last > 0) {
if (len_eq_last == 0) {
/*
* The entry sorts AFTER the last one in the
* index and their paths have no common prefix,
* so there cannot be a F/D conflict.
*/
return retval;
} else {
/*
* The entry sorts AFTER the last one in the
* index, but has a common prefix. Fall through
* to the loop below to disect the entry's path
* and see where the difference is.
*/
}
} else if (cmp_last == 0) {
/*
* The entry exactly matches the last one in the
* index, but because of multiple stage and CE_REMOVE
* items, we fall through and let the regular search
* code handle it.
*/
}
}
for (;;) {
int len;
size_t len;
for (;;) {
if (*--slash == '/')
@ -910,6 +975,67 @@ static int has_dir_name(struct index_state *istate,
}
len = slash - name;
if (cmp_last > 0) {
/*
* (len + 1) is a directory boundary (including
* the trailing slash). And since the loop is
* decrementing "slash", the first iteration is
* the longest directory prefix; subsequent
* iterations consider parent directories.
*/
if (len + 1 <= len_eq_last) {
/*
* The directory prefix (including the trailing
* slash) also appears as a prefix in the last
* entry, so the remainder cannot collide (because
* strcmp said the whole path was greater).
*
* EQ: last: xxx/A
* this: xxx/B
*
* LT: last: xxx/file_A
* this: xxx/file_B
*/
return retval;
}
if (len > len_eq_last) {
/*
* This part of the directory prefix (excluding
* the trailing slash) is longer than the known
* equal portions, so this sub-directory cannot
* collide with a file.
*
* GT: last: xxxA
* this: xxxB/file
*/
return retval;
}
if (istate->cache_nr > 0 &&
ce_namelen(istate->cache[istate->cache_nr - 1]) > len) {
/*
* The directory prefix lines up with part of
* a longer file or directory name, but sorts
* after it, so this sub-directory cannot
* collide with a file.
*
* last: xxx/yy-file (because '-' sorts before '/')
* this: xxx/yy/abc
*/
return retval;
}
/*
* This is a possible collision. Fall through and
* let the regular search code handle it.
*
* last: xxx
* this: xxx/file
*/
}
pos = index_name_stage_pos(istate, name, len, stage);
if (pos >= 0) {
/*
@ -1001,7 +1127,16 @@ static int add_index_entry_with_check(struct index_state *istate, struct cache_e
if (!(option & ADD_CACHE_KEEP_CACHE_TREE))
cache_tree_invalidate_path(istate, ce->name);
pos = index_name_stage_pos(istate, ce->name, ce_namelen(ce), ce_stage(ce));
/*
* If this entry's path sorts after the last entry in the index,
* we can avoid searching for it.
*/
if (istate->cache_nr > 0 &&
strcmp(ce->name, istate->cache[istate->cache_nr - 1]->name) > 0)
pos = -istate->cache_nr - 1;
else
pos = index_name_stage_pos(istate, ce->name, ce_namelen(ce), ce_stage(ce));
/* existing match? Just replace it. */
if (pos >= 0) {