lstat_cache(): introduce has_symlink_or_noent_leading_path() function
In some cases, especially inside the unpack-trees.c file, and inside the verify_absent() function, we can avoid some unnecessary calls to lstat(), if the lstat_cache() function can also be told to keep track of non-existing directories. So we update the lstat_cache() function to handle this new fact, introduce a new wrapper function, and the result is that we save lots of lstat() calls for a removed directory which previously contained lots of files, when we call this new wrapper of lstat_cache() instead of the old one. We do similar changes inside the unlink_entry() function, since if we can already say that the leading directory component of a pathname does not exist, it is not necessary to try to remove a pathname below it! Thanks to Junio C Hamano, Linus Torvalds and Rene Scharfe for valuable comments to this patch! Signed-off-by: Kjetil Barvik <barvik@broadpark.no> Signed-off-by: Junio C Hamano <gitster@pobox.com>
This commit is contained in:

committed by
Junio C Hamano

parent
92604b4663
commit
09c9306658
1
cache.h
1
cache.h
@ -720,6 +720,7 @@ struct checkout {
|
|||||||
|
|
||||||
extern int checkout_entry(struct cache_entry *ce, const struct checkout *state, char *topath);
|
extern int checkout_entry(struct cache_entry *ce, const struct checkout *state, char *topath);
|
||||||
extern int has_symlink_leading_path(int len, const char *name);
|
extern int has_symlink_leading_path(int len, const char *name);
|
||||||
|
extern int has_symlink_or_noent_leading_path(int len, const char *name);
|
||||||
|
|
||||||
extern struct alternate_object_database {
|
extern struct alternate_object_database {
|
||||||
struct alternate_object_database *next;
|
struct alternate_object_database *next;
|
||||||
|
94
symlinks.c
94
symlinks.c
@ -4,6 +4,7 @@ static struct cache_def {
|
|||||||
char path[PATH_MAX];
|
char path[PATH_MAX];
|
||||||
int len;
|
int len;
|
||||||
int flags;
|
int flags;
|
||||||
|
int track_flags;
|
||||||
} cache;
|
} cache;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -30,21 +31,23 @@ static inline int longest_match_lstat_cache(int len, const char *name)
|
|||||||
return match_len;
|
return match_len;
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void reset_lstat_cache(void)
|
static inline void reset_lstat_cache(int track_flags)
|
||||||
{
|
{
|
||||||
cache.path[0] = '\0';
|
cache.path[0] = '\0';
|
||||||
cache.len = 0;
|
cache.len = 0;
|
||||||
cache.flags = 0;
|
cache.flags = 0;
|
||||||
|
cache.track_flags = track_flags;
|
||||||
}
|
}
|
||||||
|
|
||||||
#define FL_DIR (1 << 0)
|
#define FL_DIR (1 << 0)
|
||||||
#define FL_SYMLINK (1 << 1)
|
#define FL_NOENT (1 << 1)
|
||||||
#define FL_LSTATERR (1 << 2)
|
#define FL_SYMLINK (1 << 2)
|
||||||
#define FL_ERR (1 << 3)
|
#define FL_LSTATERR (1 << 3)
|
||||||
|
#define FL_ERR (1 << 4)
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Check if name 'name' of length 'len' has a symlink leading
|
* Check if name 'name' of length 'len' has a symlink leading
|
||||||
* component, or if the directory exists and is real.
|
* component, or if the directory exists and is real, or not.
|
||||||
*
|
*
|
||||||
* To speed up the check, some information is allowed to be cached.
|
* To speed up the check, some information is allowed to be cached.
|
||||||
* This can be indicated by the 'track_flags' argument.
|
* This can be indicated by the 'track_flags' argument.
|
||||||
@ -56,25 +59,35 @@ static int lstat_cache(int len, const char *name,
|
|||||||
int match_flags, ret_flags, save_flags, max_len;
|
int match_flags, ret_flags, save_flags, max_len;
|
||||||
struct stat st;
|
struct stat st;
|
||||||
|
|
||||||
/*
|
if (cache.track_flags != track_flags) {
|
||||||
* Check to see if we have a match from the cache for the
|
/*
|
||||||
* symlink path type.
|
* As a safeguard we clear the cache if the value of
|
||||||
*/
|
* track_flags does not match with the last supplied
|
||||||
match_len = last_slash = longest_match_lstat_cache(len, name);
|
* value.
|
||||||
match_flags = cache.flags & track_flags & FL_SYMLINK;
|
*/
|
||||||
if (match_flags && match_len == cache.len)
|
reset_lstat_cache(track_flags);
|
||||||
return match_flags;
|
match_len = last_slash = 0;
|
||||||
/*
|
} else {
|
||||||
* If we now have match_len > 0, we would know that the
|
/*
|
||||||
* matched part will always be a directory.
|
* Check to see if we have a match from the cache for
|
||||||
*
|
* the 2 "excluding" path types.
|
||||||
* Also, if we are tracking directories and 'name' is a
|
*/
|
||||||
* substring of the cache on a path component basis, we can
|
match_len = last_slash = longest_match_lstat_cache(len, name);
|
||||||
* return immediately.
|
match_flags = cache.flags & track_flags & (FL_NOENT|FL_SYMLINK);
|
||||||
*/
|
if (match_flags && match_len == cache.len)
|
||||||
match_flags = track_flags & FL_DIR;
|
return match_flags;
|
||||||
if (match_flags && len == match_len)
|
/*
|
||||||
return match_flags;
|
* If we now have match_len > 0, we would know that
|
||||||
|
* the matched part will always be a directory.
|
||||||
|
*
|
||||||
|
* Also, if we are tracking directories and 'name' is
|
||||||
|
* a substring of the cache on a path component basis,
|
||||||
|
* we can return immediately.
|
||||||
|
*/
|
||||||
|
match_flags = track_flags & FL_DIR;
|
||||||
|
if (match_flags && len == match_len)
|
||||||
|
return match_flags;
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Okay, no match from the cache so far, so now we have to
|
* Okay, no match from the cache so far, so now we have to
|
||||||
@ -95,6 +108,8 @@ static int lstat_cache(int len, const char *name,
|
|||||||
|
|
||||||
if (lstat(cache.path, &st)) {
|
if (lstat(cache.path, &st)) {
|
||||||
ret_flags = FL_LSTATERR;
|
ret_flags = FL_LSTATERR;
|
||||||
|
if (errno == ENOENT)
|
||||||
|
ret_flags |= FL_NOENT;
|
||||||
} else if (S_ISDIR(st.st_mode)) {
|
} else if (S_ISDIR(st.st_mode)) {
|
||||||
last_slash_dir = last_slash;
|
last_slash_dir = last_slash;
|
||||||
continue;
|
continue;
|
||||||
@ -107,11 +122,11 @@ static int lstat_cache(int len, const char *name,
|
|||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* At the end update the cache. Note that max 2 different
|
* At the end update the cache. Note that max 3 different
|
||||||
* path types, FL_SYMLINK and FL_DIR, can be cached for the
|
* path types, FL_NOENT, FL_SYMLINK and FL_DIR, can be cached
|
||||||
* moment!
|
* for the moment!
|
||||||
*/
|
*/
|
||||||
save_flags = ret_flags & track_flags & FL_SYMLINK;
|
save_flags = ret_flags & track_flags & (FL_NOENT|FL_SYMLINK);
|
||||||
if (save_flags && last_slash > 0 && last_slash < PATH_MAX) {
|
if (save_flags && last_slash > 0 && last_slash < PATH_MAX) {
|
||||||
cache.path[last_slash] = '\0';
|
cache.path[last_slash] = '\0';
|
||||||
cache.len = last_slash;
|
cache.len = last_slash;
|
||||||
@ -120,20 +135,20 @@ static int lstat_cache(int len, const char *name,
|
|||||||
last_slash_dir > 0 && last_slash_dir < PATH_MAX) {
|
last_slash_dir > 0 && last_slash_dir < PATH_MAX) {
|
||||||
/*
|
/*
|
||||||
* We have a separate test for the directory case,
|
* We have a separate test for the directory case,
|
||||||
* since it could be that we have found a symlink and
|
* since it could be that we have found a symlink or a
|
||||||
* the track_flags says that we cannot cache this
|
* non-existing directory and the track_flags says
|
||||||
* fact, so the cache would then have been left empty
|
* that we cannot cache this fact, so the cache would
|
||||||
* in this case.
|
* then have been left empty in this case.
|
||||||
*
|
*
|
||||||
* But if we are allowed to track real directories, we
|
* But if we are allowed to track real directories, we
|
||||||
* can still cache the path components before the last
|
* can still cache the path components before the last
|
||||||
* one (the found symlink component).
|
* one (the found symlink or non-existing component).
|
||||||
*/
|
*/
|
||||||
cache.path[last_slash_dir] = '\0';
|
cache.path[last_slash_dir] = '\0';
|
||||||
cache.len = last_slash_dir;
|
cache.len = last_slash_dir;
|
||||||
cache.flags = FL_DIR;
|
cache.flags = FL_DIR;
|
||||||
} else {
|
} else {
|
||||||
reset_lstat_cache();
|
reset_lstat_cache(track_flags);
|
||||||
}
|
}
|
||||||
return ret_flags;
|
return ret_flags;
|
||||||
}
|
}
|
||||||
@ -147,3 +162,14 @@ int has_symlink_leading_path(int len, const char *name)
|
|||||||
FL_SYMLINK|FL_DIR) &
|
FL_SYMLINK|FL_DIR) &
|
||||||
FL_SYMLINK;
|
FL_SYMLINK;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Return non-zero if path 'name' has a leading symlink component or
|
||||||
|
* if some leading path component does not exists.
|
||||||
|
*/
|
||||||
|
int has_symlink_or_noent_leading_path(int len, const char *name)
|
||||||
|
{
|
||||||
|
return lstat_cache(len, name,
|
||||||
|
FL_SYMLINK|FL_NOENT|FL_DIR) &
|
||||||
|
(FL_SYMLINK|FL_NOENT);
|
||||||
|
}
|
||||||
|
@ -61,7 +61,7 @@ static void unlink_entry(struct cache_entry *ce)
|
|||||||
char *cp, *prev;
|
char *cp, *prev;
|
||||||
char *name = ce->name;
|
char *name = ce->name;
|
||||||
|
|
||||||
if (has_symlink_leading_path(ce_namelen(ce), ce->name))
|
if (has_symlink_or_noent_leading_path(ce_namelen(ce), ce->name))
|
||||||
return;
|
return;
|
||||||
if (unlink(name))
|
if (unlink(name))
|
||||||
return;
|
return;
|
||||||
@ -584,7 +584,7 @@ static int verify_absent(struct cache_entry *ce, const char *action,
|
|||||||
if (o->index_only || o->reset || !o->update)
|
if (o->index_only || o->reset || !o->update)
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
if (has_symlink_leading_path(ce_namelen(ce), ce->name))
|
if (has_symlink_or_noent_leading_path(ce_namelen(ce), ce->name))
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
if (!lstat(ce->name, &st)) {
|
if (!lstat(ce->name, &st)) {
|
||||||
|
Reference in New Issue
Block a user