Merge branch 'nd/icase'
"git grep -i" has been taught to fold case in non-ascii locales correctly. * nd/icase: grep.c: reuse "icase" variable diffcore-pickaxe: support case insensitive match on non-ascii diffcore-pickaxe: Add regcomp_or_die() grep/pcre: support utf-8 gettext: add is_utf8_locale() grep/pcre: prepare locale-dependent tables for icase matching grep: rewrite an if/else condition to avoid duplicate expression grep/icase: avoid kwsset when -F is specified grep/icase: avoid kwsset on literal non-ascii strings test-regex: expose full regcomp() to the command line test-regex: isolate the bug test code grep: break down an "if" stmt in preparation for next changes
This commit is contained in:
64
grep.c
64
grep.c
@ -4,6 +4,8 @@
|
||||
#include "xdiff-interface.h"
|
||||
#include "diff.h"
|
||||
#include "diffcore.h"
|
||||
#include "commit.h"
|
||||
#include "quote.h"
|
||||
|
||||
static int grep_source_load(struct grep_source *gs);
|
||||
static int grep_source_is_binary(struct grep_source *gs);
|
||||
@ -322,11 +324,16 @@ static void compile_pcre_regexp(struct grep_pat *p, const struct grep_opt *opt)
|
||||
int erroffset;
|
||||
int options = PCRE_MULTILINE;
|
||||
|
||||
if (opt->ignore_case)
|
||||
if (opt->ignore_case) {
|
||||
if (has_non_ascii(p->pattern))
|
||||
p->pcre_tables = pcre_maketables();
|
||||
options |= PCRE_CASELESS;
|
||||
}
|
||||
if (is_utf8_locale() && has_non_ascii(p->pattern))
|
||||
options |= PCRE_UTF8;
|
||||
|
||||
p->pcre_regexp = pcre_compile(p->pattern, options, &error, &erroffset,
|
||||
NULL);
|
||||
p->pcre_tables);
|
||||
if (!p->pcre_regexp)
|
||||
compile_regexp_failed(p, error);
|
||||
|
||||
@ -360,6 +367,7 @@ static void free_pcre_regexp(struct grep_pat *p)
|
||||
{
|
||||
pcre_free(p->pcre_regexp);
|
||||
pcre_free(p->pcre_extra_info);
|
||||
pcre_free((void *)p->pcre_tables);
|
||||
}
|
||||
#else /* !USE_LIBPCRE */
|
||||
static void compile_pcre_regexp(struct grep_pat *p, const struct grep_opt *opt)
|
||||
@ -396,26 +404,68 @@ static int is_fixed(const char *s, size_t len)
|
||||
return 1;
|
||||
}
|
||||
|
||||
static void compile_fixed_regexp(struct grep_pat *p, struct grep_opt *opt)
|
||||
{
|
||||
struct strbuf sb = STRBUF_INIT;
|
||||
int err;
|
||||
int regflags;
|
||||
|
||||
basic_regex_quote_buf(&sb, p->pattern);
|
||||
regflags = opt->regflags & ~REG_EXTENDED;
|
||||
if (opt->ignore_case)
|
||||
regflags |= REG_ICASE;
|
||||
err = regcomp(&p->regexp, sb.buf, regflags);
|
||||
if (opt->debug)
|
||||
fprintf(stderr, "fixed %s\n", sb.buf);
|
||||
strbuf_release(&sb);
|
||||
if (err) {
|
||||
char errbuf[1024];
|
||||
regerror(err, &p->regexp, errbuf, sizeof(errbuf));
|
||||
regfree(&p->regexp);
|
||||
compile_regexp_failed(p, errbuf);
|
||||
}
|
||||
}
|
||||
|
||||
static void compile_regexp(struct grep_pat *p, struct grep_opt *opt)
|
||||
{
|
||||
int icase, ascii_only;
|
||||
int err;
|
||||
|
||||
p->word_regexp = opt->word_regexp;
|
||||
p->ignore_case = opt->ignore_case;
|
||||
icase = opt->regflags & REG_ICASE || p->ignore_case;
|
||||
ascii_only = !has_non_ascii(p->pattern);
|
||||
|
||||
/*
|
||||
* Even when -F (fixed) asks us to do a non-regexp search, we
|
||||
* may not be able to correctly case-fold when -i
|
||||
* (ignore-case) is asked (in which case, we'll synthesize a
|
||||
* regexp to match the pattern that matches regexp special
|
||||
* characters literally, while ignoring case differences). On
|
||||
* the other hand, even without -F, if the pattern does not
|
||||
* have any regexp special characters and there is no need for
|
||||
* case-folding search, we can internally turn it into a
|
||||
* simple string match using kws. p->fixed tells us if we
|
||||
* want to use kws.
|
||||
*/
|
||||
if (opt->fixed || is_fixed(p->pattern, p->patternlen))
|
||||
p->fixed = 1;
|
||||
p->fixed = !icase || ascii_only;
|
||||
else
|
||||
p->fixed = 0;
|
||||
|
||||
if (p->fixed) {
|
||||
if (opt->regflags & REG_ICASE || p->ignore_case)
|
||||
p->kws = kwsalloc(tolower_trans_tbl);
|
||||
else
|
||||
p->kws = kwsalloc(NULL);
|
||||
p->kws = kwsalloc(icase ? tolower_trans_tbl : NULL);
|
||||
kwsincr(p->kws, p->pattern, p->patternlen);
|
||||
kwsprep(p->kws);
|
||||
return;
|
||||
} else if (opt->fixed) {
|
||||
/*
|
||||
* We come here when the pattern has the non-ascii
|
||||
* characters we cannot case-fold, and asked to
|
||||
* ignore-case.
|
||||
*/
|
||||
compile_fixed_regexp(p, opt);
|
||||
return;
|
||||
}
|
||||
|
||||
if (opt->pcre) {
|
||||
|
||||
Reference in New Issue
Block a user