Merge branch 'jh/dirstat-lines'

* jh/dirstat-lines:
  Mark dirstat error messages for translation
  Improve error handling when parsing dirstat parameters
  New --dirstat=lines mode, doing dirstat analysis based on diffstat
  Allow specifying --dirstat cut-off percentage as a floating point number
  Add config variable for specifying default --dirstat behavior
  Refactor --dirstat parsing; deprecate --cumulative and --dirstat-by-file
  Make --dirstat=0 output directories that contribute < 0.1% of changes
  Add several testcases for --dirstat and friends
This commit is contained in:
Junio C Hamano
2011-05-13 11:01:32 -07:00
5 changed files with 1219 additions and 32 deletions

171
diff.c
View File

@ -31,6 +31,7 @@ static const char *external_diff_cmd_cfg;
int diff_auto_refresh_index = 1;
static int diff_mnemonic_prefix;
static int diff_no_prefix;
static int diff_dirstat_permille_default = 30;
static struct diff_options default_diff_options;
static char diff_colors[][COLOR_MAXLEN] = {
@ -66,6 +67,58 @@ static int parse_diff_color_slot(const char *var, int ofs)
return -1;
}
static int parse_dirstat_params(struct diff_options *options, const char *params,
struct strbuf *errmsg)
{
const char *p = params;
int p_len, ret = 0;
while (*p) {
p_len = strchrnul(p, ',') - p;
if (!memcmp(p, "changes", p_len)) {
DIFF_OPT_CLR(options, DIRSTAT_BY_LINE);
DIFF_OPT_CLR(options, DIRSTAT_BY_FILE);
} else if (!memcmp(p, "lines", p_len)) {
DIFF_OPT_SET(options, DIRSTAT_BY_LINE);
DIFF_OPT_CLR(options, DIRSTAT_BY_FILE);
} else if (!memcmp(p, "files", p_len)) {
DIFF_OPT_CLR(options, DIRSTAT_BY_LINE);
DIFF_OPT_SET(options, DIRSTAT_BY_FILE);
} else if (!memcmp(p, "noncumulative", p_len)) {
DIFF_OPT_CLR(options, DIRSTAT_CUMULATIVE);
} else if (!memcmp(p, "cumulative", p_len)) {
DIFF_OPT_SET(options, DIRSTAT_CUMULATIVE);
} else if (isdigit(*p)) {
char *end;
int permille = strtoul(p, &end, 10) * 10;
if (*end == '.' && isdigit(*++end)) {
/* only use first digit */
permille += *end - '0';
/* .. and ignore any further digits */
while (isdigit(*++end))
; /* nothing */
}
if (end - p == p_len)
options->dirstat_permille = permille;
else {
strbuf_addf(errmsg, _(" Failed to parse dirstat cut-off percentage '%.*s'\n"),
p_len, p);
ret++;
}
} else {
strbuf_addf(errmsg, _(" Unknown dirstat parameter '%.*s'\n"),
p_len, p);
ret++;
}
p += p_len;
if (*p)
p++; /* more parameters, swallow separator */
}
return ret;
}
static int git_config_rename(const char *var, const char *value)
{
if (!value)
@ -145,6 +198,17 @@ int git_diff_basic_config(const char *var, const char *value, void *cb)
return 0;
}
if (!strcmp(var, "diff.dirstat")) {
struct strbuf errmsg = STRBUF_INIT;
default_diff_options.dirstat_permille = diff_dirstat_permille_default;
if (parse_dirstat_params(&default_diff_options, value, &errmsg))
warning(_("Found errors in 'diff.dirstat' config variable:\n%s"),
errmsg.buf);
strbuf_release(&errmsg);
diff_dirstat_permille_default = default_diff_options.dirstat_permille;
return 0;
}
if (!prefixcmp(var, "submodule."))
return parse_submodule_config_option(var, value);
@ -1455,7 +1519,7 @@ struct dirstat_file {
struct dirstat_dir {
struct dirstat_file *files;
int alloc, nr, percent, cumulative;
int alloc, nr, permille, cumulative;
};
static long gather_dirstat(struct diff_options *opt, struct dirstat_dir *dir,
@ -1502,12 +1566,11 @@ static long gather_dirstat(struct diff_options *opt, struct dirstat_dir *dir,
* under this directory (sources == 1).
*/
if (baselen && sources != 1) {
int permille = this_dir * 1000 / changed;
if (permille) {
int percent = permille / 10;
if (percent >= dir->percent) {
if (this_dir) {
int permille = this_dir * 1000 / changed;
if (permille >= dir->permille) {
fprintf(opt->file, "%s%4d.%01d%% %.*s\n", line_prefix,
percent, permille % 10, baselen, base);
permille / 10, permille % 10, baselen, base);
if (!dir->cumulative)
return 0;
}
@ -1533,7 +1596,7 @@ static void show_dirstat(struct diff_options *options)
dir.files = NULL;
dir.alloc = 0;
dir.nr = 0;
dir.percent = options->dirstat_percent;
dir.permille = options->dirstat_permille;
dir.cumulative = DIFF_OPT_TST(options, DIRSTAT_CUMULATIVE);
changed = 0;
@ -1622,6 +1685,50 @@ found_damage:
gather_dirstat(options, &dir, changed, "", 0);
}
static void show_dirstat_by_line(struct diffstat_t *data, struct diff_options *options)
{
int i;
unsigned long changed;
struct dirstat_dir dir;
if (data->nr == 0)
return;
dir.files = NULL;
dir.alloc = 0;
dir.nr = 0;
dir.permille = options->dirstat_permille;
dir.cumulative = DIFF_OPT_TST(options, DIRSTAT_CUMULATIVE);
changed = 0;
for (i = 0; i < data->nr; i++) {
struct diffstat_file *file = data->files[i];
unsigned long damage = file->added + file->deleted;
if (file->is_binary)
/*
* binary files counts bytes, not lines. Must find some
* way to normalize binary bytes vs. textual lines.
* The following heuristic assumes that there are 64
* bytes per "line".
* This is stupid and ugly, but very cheap...
*/
damage = (damage + 63) / 64;
ALLOC_GROW(dir.files, dir.nr + 1, dir.alloc);
dir.files[dir.nr].name = file->name;
dir.files[dir.nr].changed = damage;
changed += damage;
dir.nr++;
}
/* This can happen even with many files, if everything was renames */
if (!changed)
return;
/* Show all directories with more than x% of the changes */
qsort(dir.files, dir.nr, sizeof(dir.files[0]), dirstat_compare);
gather_dirstat(options, &dir, changed, "", 0);
}
static void free_diffstat_info(struct diffstat_t *diffstat)
{
int i;
@ -2891,7 +2998,7 @@ void diff_setup(struct diff_options *options)
options->line_termination = '\n';
options->break_opt = -1;
options->rename_limit = -1;
options->dirstat_percent = 3;
options->dirstat_permille = diff_dirstat_permille_default;
options->context = 3;
options->change = diff_change;
@ -3149,6 +3256,21 @@ static int stat_opt(struct diff_options *options, const char **av)
return argcount;
}
static int parse_dirstat_opt(struct diff_options *options, const char *params)
{
struct strbuf errmsg = STRBUF_INIT;
if (parse_dirstat_params(options, params, &errmsg))
die(_("Failed to parse --dirstat/-X option parameter:\n%s"),
errmsg.buf);
strbuf_release(&errmsg);
/*
* The caller knows a dirstat-related option is given from the command
* line; allow it to say "return this_function();"
*/
options->output_format |= DIFF_FORMAT_DIRSTAT;
return 1;
}
int diff_opt_parse(struct diff_options *options, const char **av, int ac)
{
const char *arg = av[0];
@ -3168,15 +3290,19 @@ int diff_opt_parse(struct diff_options *options, const char **av, int ac)
options->output_format |= DIFF_FORMAT_NUMSTAT;
else if (!strcmp(arg, "--shortstat"))
options->output_format |= DIFF_FORMAT_SHORTSTAT;
else if (opt_arg(arg, 'X', "dirstat", &options->dirstat_percent))
options->output_format |= DIFF_FORMAT_DIRSTAT;
else if (!strcmp(arg, "--cumulative")) {
options->output_format |= DIFF_FORMAT_DIRSTAT;
DIFF_OPT_SET(options, DIRSTAT_CUMULATIVE);
} else if (opt_arg(arg, 0, "dirstat-by-file",
&options->dirstat_percent)) {
options->output_format |= DIFF_FORMAT_DIRSTAT;
DIFF_OPT_SET(options, DIRSTAT_BY_FILE);
else if (!strcmp(arg, "-X") || !strcmp(arg, "--dirstat"))
return parse_dirstat_opt(options, "");
else if (!prefixcmp(arg, "-X"))
return parse_dirstat_opt(options, arg + 2);
else if (!prefixcmp(arg, "--dirstat="))
return parse_dirstat_opt(options, arg + 10);
else if (!strcmp(arg, "--cumulative"))
return parse_dirstat_opt(options, "cumulative");
else if (!strcmp(arg, "--dirstat-by-file"))
return parse_dirstat_opt(options, "files");
else if (!prefixcmp(arg, "--dirstat-by-file=")) {
parse_dirstat_opt(options, "files");
return parse_dirstat_opt(options, arg + 18);
}
else if (!strcmp(arg, "--check"))
options->output_format |= DIFF_FORMAT_CHECKDIFF;
@ -4023,6 +4149,7 @@ void diff_flush(struct diff_options *options)
struct diff_queue_struct *q = &diff_queued_diff;
int i, output_format = options->output_format;
int separator = 0;
int dirstat_by_line = 0;
/*
* Order: raw, stat, summary, patch
@ -4043,7 +4170,11 @@ void diff_flush(struct diff_options *options)
separator++;
}
if (output_format & (DIFF_FORMAT_DIFFSTAT|DIFF_FORMAT_SHORTSTAT|DIFF_FORMAT_NUMSTAT)) {
if (output_format & DIFF_FORMAT_DIRSTAT && DIFF_OPT_TST(options, DIRSTAT_BY_LINE))
dirstat_by_line = 1;
if (output_format & (DIFF_FORMAT_DIFFSTAT|DIFF_FORMAT_SHORTSTAT|DIFF_FORMAT_NUMSTAT) ||
dirstat_by_line) {
struct diffstat_t diffstat;
memset(&diffstat, 0, sizeof(struct diffstat_t));
@ -4058,10 +4189,12 @@ void diff_flush(struct diff_options *options)
show_stats(&diffstat, options);
if (output_format & DIFF_FORMAT_SHORTSTAT)
show_shortstats(&diffstat, options);
if (output_format & DIFF_FORMAT_DIRSTAT)
show_dirstat_by_line(&diffstat, options);
free_diffstat_info(&diffstat);
separator++;
}
if (output_format & DIFF_FORMAT_DIRSTAT)
if ((output_format & DIFF_FORMAT_DIRSTAT) && !dirstat_by_line)
show_dirstat(options);
if (output_format & DIFF_FORMAT_SUMMARY && !is_summary_empty(q)) {