grep: make the behavior for NUL-byte in patterns sane
The behavior of "grep" when patterns contained a NUL-byte has always been haphazard, and has served the vagaries of the implementation more than anything else. A pattern containing a NUL-byte can only be provided via "-f <file>". Since pickaxe (log search) has no such flag the NUL-byte in patterns has only ever been supported by "grep" (and not "log --grep"). Since9eceddeec6("Use kwset in grep", 2011-08-21) patterns containing "\0" were considered fixed. In966be95549("grep: add tests to fix blind spots with \0 patterns", 2017-05-20) I added tests for this behavior. Change the behavior to do the obvious thing, i.e. don't silently discard a regex pattern and make it implicitly fixed just because they contain a NUL-byte. Instead die if the backend in question can't handle them, e.g. --basic-regexp is combined with such a pattern. This is desired because from a user's point of view it's the obvious thing to do. Whether we support BRE/ERE/Perl syntax is different from whether our implementation is limited by C-strings. These patterns are obscure enough that I think this behavior change is OK, especially since we never documented the old behavior. Doing this also makes it easier to replace the kwset backend with something else, since we'll no longer strictly need it for anything we can't easily use another fixed-string backend for. Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
This commit is contained in:
committed by
Junio C Hamano
parent
d316af059d
commit
25754125ce
23
grep.c
23
grep.c
@ -368,18 +368,6 @@ static int is_fixed(const char *s, size_t len)
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int has_null(const char *s, size_t len)
|
||||
{
|
||||
/*
|
||||
* regcomp cannot accept patterns with NULs so when using it
|
||||
* we consider any pattern containing a NUL fixed.
|
||||
*/
|
||||
if (memchr(s, 0, len))
|
||||
return 1;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#ifdef USE_LIBPCRE1
|
||||
static void compile_pcre1_regexp(struct grep_pat *p, const struct grep_opt *opt)
|
||||
{
|
||||
@ -668,9 +656,7 @@ static void compile_regexp(struct grep_pat *p, struct grep_opt *opt)
|
||||
* simple string match using kws. p->fixed tells us if we
|
||||
* want to use kws.
|
||||
*/
|
||||
if (opt->fixed ||
|
||||
has_null(p->pattern, p->patternlen) ||
|
||||
is_fixed(p->pattern, p->patternlen))
|
||||
if (opt->fixed || is_fixed(p->pattern, p->patternlen))
|
||||
p->fixed = !p->ignore_case || !has_non_ascii(p->pattern);
|
||||
|
||||
if (p->fixed) {
|
||||
@ -678,7 +664,12 @@ static void compile_regexp(struct grep_pat *p, struct grep_opt *opt)
|
||||
kwsincr(p->kws, p->pattern, p->patternlen);
|
||||
kwsprep(p->kws);
|
||||
return;
|
||||
} else if (opt->fixed) {
|
||||
}
|
||||
|
||||
if (memchr(p->pattern, 0, p->patternlen) && !opt->pcre2)
|
||||
die(_("given pattern contains NULL byte (via -f <file>). This is only supported with -P under PCRE v2"));
|
||||
|
||||
if (opt->fixed) {
|
||||
/*
|
||||
* We come here when the pattern has the non-ascii
|
||||
* characters we cannot case-fold, and asked to
|
||||
|
||||
Reference in New Issue
Block a user