From 2842c0f914f7c05401c449db9d01276ac5a743f0 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Mon, 29 Aug 2011 12:26:05 -0700 Subject: [PATCH 1/3] traverse_trees(): allow pruning with pathspec The traverse_trees() machinery is primarily meant for merging two (or more) trees, and because a merge is a full tree operation, it doesn't support any pruning with pathspec. Since d1f2d7e (Make run_diff_index() use unpack_trees(), not read_tree(), 2008-01-19), however, we use unpack_trees() to traverse_trees() callchain to perform "diff-index", which could waste a lot of work traversing trees outside the user-supplied pathspec, only to discard at the blob comparison level in diff-lib.c::oneway_diff() which is way too late. Signed-off-by: Junio C Hamano --- tree-walk.c | 39 +++++++++++++++++++++++++++++++++------ tree-walk.h | 1 + 2 files changed, 34 insertions(+), 6 deletions(-) diff --git a/tree-walk.c b/tree-walk.c index 33f749e1e7..808bb55ba3 100644 --- a/tree-walk.c +++ b/tree-walk.c @@ -309,6 +309,18 @@ static void free_extended_entry(struct tree_desc_x *t) } } +static inline int prune_traversal(struct name_entry *e, + struct traverse_info *info, + struct strbuf *base, + int still_interesting) +{ + if (!info->pathspec || still_interesting == 2) + return 2; + if (still_interesting < 0) + return still_interesting; + return tree_entry_interesting(e, base, 0, info->pathspec); +} + int traverse_trees(int n, struct tree_desc *t, struct traverse_info *info) { int ret = 0; @@ -316,10 +328,18 @@ int traverse_trees(int n, struct tree_desc *t, struct traverse_info *info) struct name_entry *entry = xmalloc(n*sizeof(*entry)); int i; struct tree_desc_x *tx = xcalloc(n, sizeof(*tx)); + struct strbuf base = STRBUF_INIT; + int interesting = 1; for (i = 0; i < n; i++) tx[i].d = t[i]; + if (info->prev) { + strbuf_grow(&base, info->pathlen); + make_traverse_path(base.buf, info->prev, &info->name); + base.buf[info->pathlen-1] = '/'; + strbuf_setlen(&base, info->pathlen); + } for (;;) { unsigned long mask, dirmask; const char *first = NULL; @@ -376,16 +396,22 @@ int traverse_trees(int n, struct tree_desc *t, struct traverse_info *info) mask |= 1ul << i; if (S_ISDIR(entry[i].mode)) dirmask |= 1ul << i; + e = &entry[i]; } if (!mask) break; - ret = info->fn(n, mask, dirmask, entry, info); - if (ret < 0) { - error = ret; - if (!info->show_all_errors) - break; + interesting = prune_traversal(e, info, &base, interesting); + if (interesting < 0) + break; + if (interesting) { + ret = info->fn(n, mask, dirmask, entry, info); + if (ret < 0) { + error = ret; + if (!info->show_all_errors) + break; + } + mask &= ret; } - mask &= ret; ret = 0; for (i = 0; i < n; i++) if (mask & (1ul << i)) @@ -395,6 +421,7 @@ int traverse_trees(int n, struct tree_desc *t, struct traverse_info *info) for (i = 0; i < n; i++) free_extended_entry(tx + i); free(tx); + strbuf_release(&base); return error; } diff --git a/tree-walk.h b/tree-walk.h index 39524b7dba..0089581e1d 100644 --- a/tree-walk.h +++ b/tree-walk.h @@ -44,6 +44,7 @@ struct traverse_info { struct traverse_info *prev; struct name_entry name; int pathlen; + struct pathspec *pathspec; unsigned long conflicts; traverse_callback_t fn; From 40e372563cfbcce4380820ae03e872f09fa25327 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Mon, 29 Aug 2011 12:31:06 -0700 Subject: [PATCH 2/3] unpack-trees: allow pruning with pathspec Use the pathspec pruning of traverse_trees() from unpack_trees(). Again, the unpack_trees() machinery is primarily meant for merging two (or more) trees, and because a merge is a full tree operation, it didn't support any pruning with pathspec, and this codepath probably should not be enabled while running a merge, but the caller in diff-lib.c::diff_cache() should be able to take advantage of it. Signed-off-by: Junio C Hamano --- unpack-trees.c | 2 ++ unpack-trees.h | 1 + 2 files changed, 3 insertions(+) diff --git a/unpack-trees.c b/unpack-trees.c index 07f8364244..d5ec463e0e 100644 --- a/unpack-trees.c +++ b/unpack-trees.c @@ -444,6 +444,7 @@ static int traverse_trees_recursive(int n, unsigned long dirmask, newinfo = *info; newinfo.prev = info; + newinfo.pathspec = info->pathspec; newinfo.name = *p; newinfo.pathlen += tree_entry_len(p->path, p->sha1) + 1; newinfo.conflicts |= df_conflicts; @@ -1040,6 +1041,7 @@ int unpack_trees(unsigned len, struct tree_desc *t, struct unpack_trees_options info.fn = unpack_callback; info.data = o; info.show_all_errors = o->show_all_errors; + info.pathspec = o->pathspec; if (o->prefix) { /* diff --git a/unpack-trees.h b/unpack-trees.h index 64f02cb03a..b7fed7e6ec 100644 --- a/unpack-trees.h +++ b/unpack-trees.h @@ -51,6 +51,7 @@ struct unpack_trees_options { const char *prefix; int cache_bottom; struct dir_struct *dir; + struct pathspec *pathspec; merge_fn_t fn; const char *msgs[NB_UNPACK_TREES_ERROR_TYPES]; /* From 2f88c19700feb8db8f116f94bf558e61c82d543c Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Mon, 29 Aug 2011 13:34:08 -0700 Subject: [PATCH 3/3] diff-index: pass pathspec down to unpack-trees machinery And finally, pass the pathspec down through unpack_trees() to traverse_trees() callchain. Before and after applying this series, looking for changes in the kernel repository with a fairly narrow pathspec becomes somewhat faster. (without patch) $ /usr/bin/time git diff --raw v2.6.27 -- net/ipv6 >/dev/null 0.48user 0.05system 0:00.53elapsed 100%CPU (0avgtext+0avgdata 163296maxresident)k 0inputs+952outputs (0major+11163minor)pagefaults 0swaps (with patch) $ /usr/bin/time git diff --raw v2.6.27 -- net/ipv6 >/dev/null 0.01user 0.00system 0:00.02elapsed 104%CPU (0avgtext+0avgdata 43856maxresident)k 0inputs+24outputs (0major+3688minor)pagefaults 0swaps Signed-off-by: Junio C Hamano --- diff-lib.c | 1 + 1 file changed, 1 insertion(+) diff --git a/diff-lib.c b/diff-lib.c index 9c29293bbc..12760b422f 100644 --- a/diff-lib.c +++ b/diff-lib.c @@ -463,6 +463,7 @@ int run_diff_index(struct rev_info *revs, int cached) opts.unpack_data = revs; opts.src_index = &the_index; opts.dst_index = NULL; + opts.pathspec = &revs->diffopt.pathspec; init_tree_desc(&t, tree->buffer, tree->size); if (unpack_trees(1, &t, &opts))