 5ce922a014
			
		
	
	5ce922a014
	
	
	
		
			
			Since inception, git-blame -L has been documented as accepting 1-based line numbers. When handed a line number less than 1, -L's behavior is undocumented and undefined; it's also nonsensical and should be diagnosed as an error. Do so. Signed-off-by: Eric Sunshine <sunshine@sunshineco.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
		
			
				
	
	
		
			291 lines
		
	
	
		
			6.5 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
			
		
		
	
	
			291 lines
		
	
	
		
			6.5 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
| #include "git-compat-util.h"
 | |
| #include "line-range.h"
 | |
| #include "xdiff-interface.h"
 | |
| #include "strbuf.h"
 | |
| #include "userdiff.h"
 | |
| 
 | |
| /*
 | |
|  * Parse one item in the -L option
 | |
|  *
 | |
|  * 'begin' is applicable only to relative range anchors. Absolute anchors
 | |
|  * ignore this value.
 | |
|  *
 | |
|  * When parsing "-L A,B", parse_loc() is called once for A and once for B.
 | |
|  *
 | |
|  * When parsing A, 'begin' must be a negative number, the absolute value of
 | |
|  * which is the line at which relative start-of-range anchors should be
 | |
|  * based. Beginning of file is represented by -1.
 | |
|  *
 | |
|  * When parsing B, 'begin' must be the positive line number immediately
 | |
|  * following the line computed for 'A'.
 | |
|  */
 | |
| static const char *parse_loc(const char *spec, nth_line_fn_t nth_line,
 | |
| 			     void *data, long lines, long begin, long *ret)
 | |
| {
 | |
| 	char *term;
 | |
| 	const char *line;
 | |
| 	long num;
 | |
| 	int reg_error;
 | |
| 	regex_t regexp;
 | |
| 	regmatch_t match[1];
 | |
| 
 | |
| 	/* Allow "-L <something>,+20" to mean starting at <something>
 | |
| 	 * for 20 lines, or "-L <something>,-5" for 5 lines ending at
 | |
| 	 * <something>.
 | |
| 	 */
 | |
| 	if (1 <= begin && (spec[0] == '+' || spec[0] == '-')) {
 | |
| 		num = strtol(spec + 1, &term, 10);
 | |
| 		if (term != spec + 1) {
 | |
| 			if (!ret)
 | |
| 				return term;
 | |
| 			if (num == 0)
 | |
| 				die("-L invalid empty range");
 | |
| 			if (spec[0] == '-')
 | |
| 				num = 0 - num;
 | |
| 			if (0 < num)
 | |
| 				*ret = begin + num - 2;
 | |
| 			else if (!num)
 | |
| 				*ret = begin;
 | |
| 			else
 | |
| 				*ret = begin + num;
 | |
| 			return term;
 | |
| 		}
 | |
| 		return spec;
 | |
| 	}
 | |
| 	num = strtol(spec, &term, 10);
 | |
| 	if (term != spec) {
 | |
| 		if (ret) {
 | |
| 			if (num <= 0)
 | |
| 				die("-L invalid line number: %ld", num);
 | |
| 			*ret = num;
 | |
| 		}
 | |
| 		return term;
 | |
| 	}
 | |
| 
 | |
| 	if (begin < 0) {
 | |
| 		if (spec[0] != '^')
 | |
| 			begin = -begin;
 | |
| 		else {
 | |
| 			begin = 1;
 | |
| 			spec++;
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	if (spec[0] != '/')
 | |
| 		return spec;
 | |
| 
 | |
| 	/* it could be a regexp of form /.../ */
 | |
| 	for (term = (char *) spec + 1; *term && *term != '/'; term++) {
 | |
| 		if (*term == '\\')
 | |
| 			term++;
 | |
| 	}
 | |
| 	if (*term != '/')
 | |
| 		return spec;
 | |
| 
 | |
| 	/* in the scan-only case we are not interested in the regex */
 | |
| 	if (!ret)
 | |
| 		return term+1;
 | |
| 
 | |
| 	/* try [spec+1 .. term-1] as regexp */
 | |
| 	*term = 0;
 | |
| 	begin--; /* input is in human terms */
 | |
| 	line = nth_line(data, begin);
 | |
| 
 | |
| 	if (!(reg_error = regcomp(®exp, spec + 1, REG_NEWLINE)) &&
 | |
| 	    !(reg_error = regexec(®exp, line, 1, match, 0))) {
 | |
| 		const char *cp = line + match[0].rm_so;
 | |
| 		const char *nline;
 | |
| 
 | |
| 		while (begin++ < lines) {
 | |
| 			nline = nth_line(data, begin);
 | |
| 			if (line <= cp && cp < nline)
 | |
| 				break;
 | |
| 			line = nline;
 | |
| 		}
 | |
| 		*ret = begin;
 | |
| 		regfree(®exp);
 | |
| 		*term++ = '/';
 | |
| 		return term;
 | |
| 	}
 | |
| 	else {
 | |
| 		char errbuf[1024];
 | |
| 		regerror(reg_error, ®exp, errbuf, 1024);
 | |
| 		die("-L parameter '%s' starting at line %ld: %s",
 | |
| 		    spec + 1, begin + 1, errbuf);
 | |
| 	}
 | |
| }
 | |
| 
 | |
| static int match_funcname(xdemitconf_t *xecfg, const char *bol, const char *eol)
 | |
| {
 | |
| 	if (xecfg) {
 | |
| 		char buf[1];
 | |
| 		return xecfg->find_func(bol, eol - bol, buf, 1,
 | |
| 					xecfg->find_func_priv) >= 0;
 | |
| 	}
 | |
| 
 | |
| 	if (bol == eol)
 | |
| 		return 0;
 | |
| 	if (isalpha(*bol) || *bol == '_' || *bol == '$')
 | |
| 		return 1;
 | |
| 	return 0;
 | |
| }
 | |
| 
 | |
| static const char *find_funcname_matching_regexp(xdemitconf_t *xecfg, const char *start,
 | |
| 						 regex_t *regexp)
 | |
| {
 | |
| 	int reg_error;
 | |
| 	regmatch_t match[1];
 | |
| 	while (1) {
 | |
| 		const char *bol, *eol;
 | |
| 		reg_error = regexec(regexp, start, 1, match, 0);
 | |
| 		if (reg_error == REG_NOMATCH)
 | |
| 			return NULL;
 | |
| 		else if (reg_error) {
 | |
| 			char errbuf[1024];
 | |
| 			regerror(reg_error, regexp, errbuf, 1024);
 | |
| 			die("-L parameter: regexec() failed: %s", errbuf);
 | |
| 		}
 | |
| 		/* determine extent of line matched */
 | |
| 		bol = start+match[0].rm_so;
 | |
| 		eol = start+match[0].rm_eo;
 | |
| 		while (bol > start && *bol != '\n')
 | |
| 			bol--;
 | |
| 		if (*bol == '\n')
 | |
| 			bol++;
 | |
| 		while (*eol && *eol != '\n')
 | |
| 			eol++;
 | |
| 		if (*eol == '\n')
 | |
| 			eol++;
 | |
| 		/* is it a funcname line? */
 | |
| 		if (match_funcname(xecfg, (char*) bol, (char*) eol))
 | |
| 			return bol;
 | |
| 		start = eol;
 | |
| 	}
 | |
| }
 | |
| 
 | |
| static const char *parse_range_funcname(const char *arg, nth_line_fn_t nth_line_cb,
 | |
| 					void *cb_data, long lines, long anchor, long *begin, long *end,
 | |
| 					const char *path)
 | |
| {
 | |
| 	char *pattern;
 | |
| 	const char *term;
 | |
| 	struct userdiff_driver *drv;
 | |
| 	xdemitconf_t *xecfg = NULL;
 | |
| 	const char *start;
 | |
| 	const char *p;
 | |
| 	int reg_error;
 | |
| 	regex_t regexp;
 | |
| 
 | |
| 	if (*arg == '^') {
 | |
| 		anchor = 1;
 | |
| 		arg++;
 | |
| 	}
 | |
| 
 | |
| 	assert(*arg == ':');
 | |
| 	term = arg+1;
 | |
| 	while (*term && *term != ':') {
 | |
| 		if (*term == '\\' && *(term+1))
 | |
| 			term++;
 | |
| 		term++;
 | |
| 	}
 | |
| 	if (term == arg+1)
 | |
| 		return NULL;
 | |
| 	if (!begin) /* skip_range_arg case */
 | |
| 		return term;
 | |
| 
 | |
| 	pattern = xstrndup(arg+1, term-(arg+1));
 | |
| 
 | |
| 	anchor--; /* input is in human terms */
 | |
| 	start = nth_line_cb(cb_data, anchor);
 | |
| 
 | |
| 	drv = userdiff_find_by_path(path);
 | |
| 	if (drv && drv->funcname.pattern) {
 | |
| 		const struct userdiff_funcname *pe = &drv->funcname;
 | |
| 		xecfg = xcalloc(1, sizeof(*xecfg));
 | |
| 		xdiff_set_find_func(xecfg, pe->pattern, pe->cflags);
 | |
| 	}
 | |
| 
 | |
| 	reg_error = regcomp(®exp, pattern, REG_NEWLINE);
 | |
| 	if (reg_error) {
 | |
| 		char errbuf[1024];
 | |
| 		regerror(reg_error, ®exp, errbuf, 1024);
 | |
| 		die("-L parameter '%s': %s", pattern, errbuf);
 | |
| 	}
 | |
| 
 | |
| 	p = find_funcname_matching_regexp(xecfg, (char*) start, ®exp);
 | |
| 	if (!p)
 | |
| 		die("-L parameter '%s' starting at line %ld: no match",
 | |
| 		    pattern, anchor + 1);
 | |
| 	*begin = 0;
 | |
| 	while (p > nth_line_cb(cb_data, *begin))
 | |
| 		(*begin)++;
 | |
| 
 | |
| 	if (*begin >= lines)
 | |
| 		die("-L parameter '%s' matches at EOF", pattern);
 | |
| 
 | |
| 	*end = *begin+1;
 | |
| 	while (*end < lines) {
 | |
| 		const char *bol = nth_line_cb(cb_data, *end);
 | |
| 		const char *eol = nth_line_cb(cb_data, *end+1);
 | |
| 		if (match_funcname(xecfg, bol, eol))
 | |
| 			break;
 | |
| 		(*end)++;
 | |
| 	}
 | |
| 
 | |
| 	regfree(®exp);
 | |
| 	free(xecfg);
 | |
| 	free(pattern);
 | |
| 
 | |
| 	/* compensate for 1-based numbering */
 | |
| 	(*begin)++;
 | |
| 
 | |
| 	return term;
 | |
| }
 | |
| 
 | |
| int parse_range_arg(const char *arg, nth_line_fn_t nth_line_cb,
 | |
| 		    void *cb_data, long lines, long anchor,
 | |
| 		    long *begin, long *end, const char *path)
 | |
| {
 | |
| 	*begin = *end = 0;
 | |
| 
 | |
| 	if (anchor < 1)
 | |
| 		anchor = 1;
 | |
| 	if (anchor > lines)
 | |
| 		anchor = lines + 1;
 | |
| 
 | |
| 	if (*arg == ':' || (*arg == '^' && *(arg + 1) == ':')) {
 | |
| 		arg = parse_range_funcname(arg, nth_line_cb, cb_data, lines, anchor, begin, end, path);
 | |
| 		if (!arg || *arg)
 | |
| 			return -1;
 | |
| 		return 0;
 | |
| 	}
 | |
| 
 | |
| 	arg = parse_loc(arg, nth_line_cb, cb_data, lines, -anchor, begin);
 | |
| 
 | |
| 	if (*arg == ',')
 | |
| 		arg = parse_loc(arg + 1, nth_line_cb, cb_data, lines, *begin + 1, end);
 | |
| 
 | |
| 	if (*arg)
 | |
| 		return -1;
 | |
| 
 | |
| 	if (*begin && *end && *end < *begin) {
 | |
| 		long tmp;
 | |
| 		tmp = *end; *end = *begin; *begin = tmp;
 | |
| 	}
 | |
| 
 | |
| 	return 0;
 | |
| }
 | |
| 
 | |
| const char *skip_range_arg(const char *arg)
 | |
| {
 | |
| 	if (*arg == ':' || (*arg == '^' && *(arg + 1) == ':'))
 | |
| 		return parse_range_funcname(arg, NULL, NULL, 0, 0, NULL, NULL, NULL);
 | |
| 
 | |
| 	arg = parse_loc(arg, NULL, NULL, 0, -1, NULL);
 | |
| 
 | |
| 	if (*arg == ',')
 | |
| 		arg = parse_loc(arg+1, NULL, NULL, 0, 0, NULL);
 | |
| 
 | |
| 	return arg;
 | |
| }
 |