Merge branch 'jh/add-index-entry-optim'
"git checkout" that handles a lot of paths has been optimized by reducing the number of unnecessary checks of paths in the has_dir_name() function. * jh/add-index-entry-optim: read-cache: speed up has_dir_name (part 2) read-cache: speed up has_dir_name (part 1) read-cache: speed up add_index_entry during checkout p0006-read-tree-checkout: perf test to time read-tree read-cache: add strcmp_offset function
This commit is contained in:
139
read-cache.c
139
read-cache.c
@ -887,9 +887,32 @@ static int has_file_name(struct index_state *istate,
|
||||
return retval;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Like strcmp(), but also return the offset of the first change.
|
||||
* If strings are equal, return the length.
|
||||
*/
|
||||
int strcmp_offset(const char *s1, const char *s2, size_t *first_change)
|
||||
{
|
||||
size_t k;
|
||||
|
||||
if (!first_change)
|
||||
return strcmp(s1, s2);
|
||||
|
||||
for (k = 0; s1[k] == s2[k]; k++)
|
||||
if (s1[k] == '\0')
|
||||
break;
|
||||
|
||||
*first_change = k;
|
||||
return (unsigned char)s1[k] - (unsigned char)s2[k];
|
||||
}
|
||||
|
||||
/*
|
||||
* Do we have another file with a pathname that is a proper
|
||||
* subset of the name we're trying to add?
|
||||
*
|
||||
* That is, is there another file in the index with a path
|
||||
* that matches a sub-directory in the given entry?
|
||||
*/
|
||||
static int has_dir_name(struct index_state *istate,
|
||||
const struct cache_entry *ce, int pos, int ok_to_replace)
|
||||
@ -898,9 +921,51 @@ static int has_dir_name(struct index_state *istate,
|
||||
int stage = ce_stage(ce);
|
||||
const char *name = ce->name;
|
||||
const char *slash = name + ce_namelen(ce);
|
||||
size_t len_eq_last;
|
||||
int cmp_last = 0;
|
||||
|
||||
/*
|
||||
* We are frequently called during an iteration on a sorted
|
||||
* list of pathnames and while building a new index. Therefore,
|
||||
* there is a high probability that this entry will eventually
|
||||
* be appended to the index, rather than inserted in the middle.
|
||||
* If we can confirm that, we can avoid binary searches on the
|
||||
* components of the pathname.
|
||||
*
|
||||
* Compare the entry's full path with the last path in the index.
|
||||
*/
|
||||
if (istate->cache_nr > 0) {
|
||||
cmp_last = strcmp_offset(name,
|
||||
istate->cache[istate->cache_nr - 1]->name,
|
||||
&len_eq_last);
|
||||
if (cmp_last > 0) {
|
||||
if (len_eq_last == 0) {
|
||||
/*
|
||||
* The entry sorts AFTER the last one in the
|
||||
* index and their paths have no common prefix,
|
||||
* so there cannot be a F/D conflict.
|
||||
*/
|
||||
return retval;
|
||||
} else {
|
||||
/*
|
||||
* The entry sorts AFTER the last one in the
|
||||
* index, but has a common prefix. Fall through
|
||||
* to the loop below to disect the entry's path
|
||||
* and see where the difference is.
|
||||
*/
|
||||
}
|
||||
} else if (cmp_last == 0) {
|
||||
/*
|
||||
* The entry exactly matches the last one in the
|
||||
* index, but because of multiple stage and CE_REMOVE
|
||||
* items, we fall through and let the regular search
|
||||
* code handle it.
|
||||
*/
|
||||
}
|
||||
}
|
||||
|
||||
for (;;) {
|
||||
int len;
|
||||
size_t len;
|
||||
|
||||
for (;;) {
|
||||
if (*--slash == '/')
|
||||
@ -910,6 +975,67 @@ static int has_dir_name(struct index_state *istate,
|
||||
}
|
||||
len = slash - name;
|
||||
|
||||
if (cmp_last > 0) {
|
||||
/*
|
||||
* (len + 1) is a directory boundary (including
|
||||
* the trailing slash). And since the loop is
|
||||
* decrementing "slash", the first iteration is
|
||||
* the longest directory prefix; subsequent
|
||||
* iterations consider parent directories.
|
||||
*/
|
||||
|
||||
if (len + 1 <= len_eq_last) {
|
||||
/*
|
||||
* The directory prefix (including the trailing
|
||||
* slash) also appears as a prefix in the last
|
||||
* entry, so the remainder cannot collide (because
|
||||
* strcmp said the whole path was greater).
|
||||
*
|
||||
* EQ: last: xxx/A
|
||||
* this: xxx/B
|
||||
*
|
||||
* LT: last: xxx/file_A
|
||||
* this: xxx/file_B
|
||||
*/
|
||||
return retval;
|
||||
}
|
||||
|
||||
if (len > len_eq_last) {
|
||||
/*
|
||||
* This part of the directory prefix (excluding
|
||||
* the trailing slash) is longer than the known
|
||||
* equal portions, so this sub-directory cannot
|
||||
* collide with a file.
|
||||
*
|
||||
* GT: last: xxxA
|
||||
* this: xxxB/file
|
||||
*/
|
||||
return retval;
|
||||
}
|
||||
|
||||
if (istate->cache_nr > 0 &&
|
||||
ce_namelen(istate->cache[istate->cache_nr - 1]) > len) {
|
||||
/*
|
||||
* The directory prefix lines up with part of
|
||||
* a longer file or directory name, but sorts
|
||||
* after it, so this sub-directory cannot
|
||||
* collide with a file.
|
||||
*
|
||||
* last: xxx/yy-file (because '-' sorts before '/')
|
||||
* this: xxx/yy/abc
|
||||
*/
|
||||
return retval;
|
||||
}
|
||||
|
||||
/*
|
||||
* This is a possible collision. Fall through and
|
||||
* let the regular search code handle it.
|
||||
*
|
||||
* last: xxx
|
||||
* this: xxx/file
|
||||
*/
|
||||
}
|
||||
|
||||
pos = index_name_stage_pos(istate, name, len, stage);
|
||||
if (pos >= 0) {
|
||||
/*
|
||||
@ -1001,7 +1127,16 @@ static int add_index_entry_with_check(struct index_state *istate, struct cache_e
|
||||
|
||||
if (!(option & ADD_CACHE_KEEP_CACHE_TREE))
|
||||
cache_tree_invalidate_path(istate, ce->name);
|
||||
pos = index_name_stage_pos(istate, ce->name, ce_namelen(ce), ce_stage(ce));
|
||||
|
||||
/*
|
||||
* If this entry's path sorts after the last entry in the index,
|
||||
* we can avoid searching for it.
|
||||
*/
|
||||
if (istate->cache_nr > 0 &&
|
||||
strcmp(ce->name, istate->cache[istate->cache_nr - 1]->name) > 0)
|
||||
pos = -istate->cache_nr - 1;
|
||||
else
|
||||
pos = index_name_stage_pos(istate, ce->name, ce_namelen(ce), ce_stage(ce));
|
||||
|
||||
/* existing match? Just replace it. */
|
||||
if (pos >= 0) {
|
||||
|
Reference in New Issue
Block a user