lstat_cache(): introduce has_symlink_or_noent_leading_path() function

In some cases, especially inside the unpack-trees.c file, and inside
the verify_absent() function, we can avoid some unnecessary calls to
lstat(), if the lstat_cache() function can also be told to keep track
of non-existing directories.

So we update the lstat_cache() function to handle this new fact,
introduce a new wrapper function, and the result is that we save lots
of lstat() calls for a removed directory which previously contained
lots of files, when we call this new wrapper of lstat_cache() instead
of the old one.

We do similar changes inside the unlink_entry() function, since if we
can already say that the leading directory component of a pathname
does not exist, it is not necessary to try to remove a pathname below
it!

Thanks to Junio C Hamano, Linus Torvalds and Rene Scharfe for valuable
comments to this patch!

Signed-off-by: Kjetil Barvik <barvik@broadpark.no>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
This commit is contained in:
Kjetil Barvik
2009-01-18 16:14:51 +01:00
committed by Junio C Hamano
parent 92604b4663
commit 09c9306658
3 changed files with 63 additions and 36 deletions

View File

@ -720,6 +720,7 @@ struct checkout {
extern int checkout_entry(struct cache_entry *ce, const struct checkout *state, char *topath); extern int checkout_entry(struct cache_entry *ce, const struct checkout *state, char *topath);
extern int has_symlink_leading_path(int len, const char *name); extern int has_symlink_leading_path(int len, const char *name);
extern int has_symlink_or_noent_leading_path(int len, const char *name);
extern struct alternate_object_database { extern struct alternate_object_database {
struct alternate_object_database *next; struct alternate_object_database *next;

View File

@ -4,6 +4,7 @@ static struct cache_def {
char path[PATH_MAX]; char path[PATH_MAX];
int len; int len;
int flags; int flags;
int track_flags;
} cache; } cache;
/* /*
@ -30,21 +31,23 @@ static inline int longest_match_lstat_cache(int len, const char *name)
return match_len; return match_len;
} }
static inline void reset_lstat_cache(void) static inline void reset_lstat_cache(int track_flags)
{ {
cache.path[0] = '\0'; cache.path[0] = '\0';
cache.len = 0; cache.len = 0;
cache.flags = 0; cache.flags = 0;
cache.track_flags = track_flags;
} }
#define FL_DIR (1 << 0) #define FL_DIR (1 << 0)
#define FL_SYMLINK (1 << 1) #define FL_NOENT (1 << 1)
#define FL_LSTATERR (1 << 2) #define FL_SYMLINK (1 << 2)
#define FL_ERR (1 << 3) #define FL_LSTATERR (1 << 3)
#define FL_ERR (1 << 4)
/* /*
* Check if name 'name' of length 'len' has a symlink leading * Check if name 'name' of length 'len' has a symlink leading
* component, or if the directory exists and is real. * component, or if the directory exists and is real, or not.
* *
* To speed up the check, some information is allowed to be cached. * To speed up the check, some information is allowed to be cached.
* This can be indicated by the 'track_flags' argument. * This can be indicated by the 'track_flags' argument.
@ -56,25 +59,35 @@ static int lstat_cache(int len, const char *name,
int match_flags, ret_flags, save_flags, max_len; int match_flags, ret_flags, save_flags, max_len;
struct stat st; struct stat st;
/* if (cache.track_flags != track_flags) {
* Check to see if we have a match from the cache for the /*
* symlink path type. * As a safeguard we clear the cache if the value of
*/ * track_flags does not match with the last supplied
match_len = last_slash = longest_match_lstat_cache(len, name); * value.
match_flags = cache.flags & track_flags & FL_SYMLINK; */
if (match_flags && match_len == cache.len) reset_lstat_cache(track_flags);
return match_flags; match_len = last_slash = 0;
/* } else {
* If we now have match_len > 0, we would know that the /*
* matched part will always be a directory. * Check to see if we have a match from the cache for
* * the 2 "excluding" path types.
* Also, if we are tracking directories and 'name' is a */
* substring of the cache on a path component basis, we can match_len = last_slash = longest_match_lstat_cache(len, name);
* return immediately. match_flags = cache.flags & track_flags & (FL_NOENT|FL_SYMLINK);
*/ if (match_flags && match_len == cache.len)
match_flags = track_flags & FL_DIR; return match_flags;
if (match_flags && len == match_len) /*
return match_flags; * If we now have match_len > 0, we would know that
* the matched part will always be a directory.
*
* Also, if we are tracking directories and 'name' is
* a substring of the cache on a path component basis,
* we can return immediately.
*/
match_flags = track_flags & FL_DIR;
if (match_flags && len == match_len)
return match_flags;
}
/* /*
* Okay, no match from the cache so far, so now we have to * Okay, no match from the cache so far, so now we have to
@ -95,6 +108,8 @@ static int lstat_cache(int len, const char *name,
if (lstat(cache.path, &st)) { if (lstat(cache.path, &st)) {
ret_flags = FL_LSTATERR; ret_flags = FL_LSTATERR;
if (errno == ENOENT)
ret_flags |= FL_NOENT;
} else if (S_ISDIR(st.st_mode)) { } else if (S_ISDIR(st.st_mode)) {
last_slash_dir = last_slash; last_slash_dir = last_slash;
continue; continue;
@ -107,11 +122,11 @@ static int lstat_cache(int len, const char *name,
} }
/* /*
* At the end update the cache. Note that max 2 different * At the end update the cache. Note that max 3 different
* path types, FL_SYMLINK and FL_DIR, can be cached for the * path types, FL_NOENT, FL_SYMLINK and FL_DIR, can be cached
* moment! * for the moment!
*/ */
save_flags = ret_flags & track_flags & FL_SYMLINK; save_flags = ret_flags & track_flags & (FL_NOENT|FL_SYMLINK);
if (save_flags && last_slash > 0 && last_slash < PATH_MAX) { if (save_flags && last_slash > 0 && last_slash < PATH_MAX) {
cache.path[last_slash] = '\0'; cache.path[last_slash] = '\0';
cache.len = last_slash; cache.len = last_slash;
@ -120,20 +135,20 @@ static int lstat_cache(int len, const char *name,
last_slash_dir > 0 && last_slash_dir < PATH_MAX) { last_slash_dir > 0 && last_slash_dir < PATH_MAX) {
/* /*
* We have a separate test for the directory case, * We have a separate test for the directory case,
* since it could be that we have found a symlink and * since it could be that we have found a symlink or a
* the track_flags says that we cannot cache this * non-existing directory and the track_flags says
* fact, so the cache would then have been left empty * that we cannot cache this fact, so the cache would
* in this case. * then have been left empty in this case.
* *
* But if we are allowed to track real directories, we * But if we are allowed to track real directories, we
* can still cache the path components before the last * can still cache the path components before the last
* one (the found symlink component). * one (the found symlink or non-existing component).
*/ */
cache.path[last_slash_dir] = '\0'; cache.path[last_slash_dir] = '\0';
cache.len = last_slash_dir; cache.len = last_slash_dir;
cache.flags = FL_DIR; cache.flags = FL_DIR;
} else { } else {
reset_lstat_cache(); reset_lstat_cache(track_flags);
} }
return ret_flags; return ret_flags;
} }
@ -147,3 +162,14 @@ int has_symlink_leading_path(int len, const char *name)
FL_SYMLINK|FL_DIR) & FL_SYMLINK|FL_DIR) &
FL_SYMLINK; FL_SYMLINK;
} }
/*
* Return non-zero if path 'name' has a leading symlink component or
* if some leading path component does not exists.
*/
int has_symlink_or_noent_leading_path(int len, const char *name)
{
return lstat_cache(len, name,
FL_SYMLINK|FL_NOENT|FL_DIR) &
(FL_SYMLINK|FL_NOENT);
}

View File

@ -61,7 +61,7 @@ static void unlink_entry(struct cache_entry *ce)
char *cp, *prev; char *cp, *prev;
char *name = ce->name; char *name = ce->name;
if (has_symlink_leading_path(ce_namelen(ce), ce->name)) if (has_symlink_or_noent_leading_path(ce_namelen(ce), ce->name))
return; return;
if (unlink(name)) if (unlink(name))
return; return;
@ -584,7 +584,7 @@ static int verify_absent(struct cache_entry *ce, const char *action,
if (o->index_only || o->reset || !o->update) if (o->index_only || o->reset || !o->update)
return 0; return 0;
if (has_symlink_leading_path(ce_namelen(ce), ce->name)) if (has_symlink_or_noent_leading_path(ce_namelen(ce), ce->name))
return 0; return 0;
if (!lstat(ce->name, &st)) { if (!lstat(ce->name, &st)) {