Threaded grep

Make git grep use threads when it is available.

The results below are best of five runs in the Linux repository (on a
box with two cores).

With the patch:

git grep qwerty
1.58user 0.55system 0:01.16elapsed 183%CPU (0avgtext+0avgdata 0maxresident)k
0inputs+800outputs (0major+5774minor)pagefaults 0swaps

Without:

git grep qwerty
1.59user 0.43system 0:02.02elapsed 100%CPU (0avgtext+0avgdata 0maxresident)k
0inputs+800outputs (0major+3716minor)pagefaults 0swaps

And with a pattern with quite a few matches:

With the patch:

$ /usr/bin/time git grep void
5.61user 0.56system 0:03.44elapsed 179%CPU (0avgtext+0avgdata 0maxresident)k
0inputs+800outputs (0major+5587minor)pagefaults 0swaps

Without:

$ /usr/bin/time git grep void
5.36user 0.51system 0:05.87elapsed 100%CPU (0avgtext+0avgdata 0maxresident)k
0inputs+800outputs (0major+3693minor)pagefaults 0swaps

In either case we gain about 40% by the threading.

Signed-off-by: Fredrik Kuivinen <frekui@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
This commit is contained in:
Fredrik Kuivinen
2010-01-25 23:51:39 +01:00
committed by Junio C Hamano
parent c8610a2e7e
commit 5b594f457a
3 changed files with 470 additions and 51 deletions

106
grep.c
View File

@ -29,6 +29,28 @@ void append_grep_pattern(struct grep_opt *opt, const char *pat,
p->next = NULL;
}
struct grep_opt *grep_opt_dup(const struct grep_opt *opt)
{
struct grep_pat *pat;
struct grep_opt *ret = xmalloc(sizeof(struct grep_opt));
*ret = *opt;
ret->pattern_list = NULL;
ret->pattern_tail = &ret->pattern_list;
for(pat = opt->pattern_list; pat != NULL; pat = pat->next)
{
if(pat->token == GREP_PATTERN_HEAD)
append_header_grep_pattern(ret, pat->field,
pat->pattern);
else
append_grep_pattern(ret, pat->pattern, pat->origin,
pat->no, pat->token);
}
return ret;
}
static void compile_regexp(struct grep_pat *p, struct grep_opt *opt)
{
int err;
@ -253,7 +275,8 @@ static int word_char(char ch)
static void show_name(struct grep_opt *opt, const char *name)
{
printf("%s%c", name, opt->null_following_name ? '\0' : '\n');
opt->output(opt, name, strlen(name));
opt->output(opt, opt->null_following_name ? "\0" : "\n", 1);
}
@ -490,24 +513,32 @@ static void show_line(struct grep_opt *opt, char *bol, char *eol,
const char *name, unsigned lno, char sign)
{
int rest = eol - bol;
char sign_str[1];
sign_str[0] = sign;
if (opt->pre_context || opt->post_context) {
if (opt->last_shown == 0) {
if (opt->show_hunk_mark)
fputs("--\n", stdout);
opt->output(opt, "--\n", 3);
else
opt->show_hunk_mark = 1;
} else if (lno > opt->last_shown + 1)
fputs("--\n", stdout);
opt->output(opt, "--\n", 3);
}
opt->last_shown = lno;
if (opt->null_following_name)
sign = '\0';
if (opt->pathname)
printf("%s%c", name, sign);
if (opt->linenum)
printf("%d%c", lno, sign);
sign_str[0] = '\0';
if (opt->pathname) {
opt->output(opt, name, strlen(name));
opt->output(opt, sign_str, 1);
}
if (opt->linenum) {
char buf[32];
snprintf(buf, sizeof(buf), "%d", lno);
opt->output(opt, buf, strlen(buf));
opt->output(opt, sign_str, 1);
}
if (opt->color) {
regmatch_t match;
enum grep_context ctx = GREP_CONTEXT_BODY;
@ -518,18 +549,22 @@ static void show_line(struct grep_opt *opt, char *bol, char *eol,
while (next_match(opt, bol, eol, ctx, &match, eflags)) {
if (match.rm_so == match.rm_eo)
break;
printf("%.*s%s%.*s%s",
(int)match.rm_so, bol,
opt->color_match,
(int)(match.rm_eo - match.rm_so), bol + match.rm_so,
GIT_COLOR_RESET);
opt->output(opt, bol, match.rm_so);
opt->output(opt, opt->color_match,
strlen(opt->color_match));
opt->output(opt, bol + match.rm_so,
(int)(match.rm_eo - match.rm_so));
opt->output(opt, GIT_COLOR_RESET,
strlen(GIT_COLOR_RESET));
bol += match.rm_eo;
rest -= match.rm_eo;
eflags = REG_NOTBOL;
}
*eol = ch;
}
printf("%.*s\n", rest, bol);
opt->output(opt, bol, rest);
opt->output(opt, "\n", 1);
}
static int match_funcname(struct grep_opt *opt, char *bol, char *eol)
@ -667,6 +702,32 @@ static int look_ahead(struct grep_opt *opt,
return 0;
}
int grep_threads_ok(const struct grep_opt *opt)
{
/* If this condition is true, then we may use the attribute
* machinery in grep_buffer_1. The attribute code is not
* thread safe, so we disable the use of threads.
*/
if (opt->funcname && !opt->unmatch_name_only && !opt->status_only &&
!opt->name_only)
return 0;
/* If we are showing hunk marks, we should not do it for the
* first match. The synchronization problem we get for this
* constraint is not yet solved, so we disable threading in
* this case.
*/
if (opt->pre_context || opt->post_context)
return 0;
return 1;
}
static void std_output(struct grep_opt *opt, const void *buf, size_t size)
{
fwrite(buf, size, 1, stdout);
}
static int grep_buffer_1(struct grep_opt *opt, const char *name,
char *buf, unsigned long size, int collect_hits)
{
@ -682,6 +743,9 @@ static int grep_buffer_1(struct grep_opt *opt, const char *name,
opt->last_shown = 0;
if (!opt->output)
opt->output = std_output;
if (buffer_is_binary(buf, size)) {
switch (opt->binary) {
case GREP_BINARY_DEFAULT:
@ -754,7 +818,9 @@ static int grep_buffer_1(struct grep_opt *opt, const char *name,
if (opt->status_only)
return 1;
if (binary_match_only) {
printf("Binary file %s matches\n", name);
opt->output(opt, "Binary file ", 12);
opt->output(opt, name, strlen(name));
opt->output(opt, " matches\n", 9);
return 1;
}
if (opt->name_only) {
@ -810,9 +876,13 @@ static int grep_buffer_1(struct grep_opt *opt, const char *name,
* which feels mostly useless but sometimes useful. Maybe
* make it another option? For now suppress them.
*/
if (opt->count && count)
printf("%s%c%u\n", name,
opt->null_following_name ? '\0' : ':', count);
if (opt->count && count) {
char buf[32];
opt->output(opt, name, strlen(name));
snprintf(buf, sizeof(buf), "%c%u\n",
opt->null_following_name ? '\0' : ':', count);
opt->output(opt, buf, strlen(buf));
}
return !!last_hit;
}