From 0ddf8ceac0b38c1ec89551c08dc49cb4b96c82bc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=86var=20Arnfj=C3=B6r=C3=B0=20Bjarmason?= Date: Thu, 18 Feb 2021 01:07:19 +0100 Subject: [PATCH 01/10] grep/pcre2: drop needless assignment + assert() on opt->pcre2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Drop an assignment added in b65abcafc7a (grep: use PCRE v2 for optimized fixed-string search, 2019-07-01) and the overly cautious assert() I added in 94da9193a6e (grep: add support for PCRE v2, 2017-06-01). There was never a good reason for this, it's just a relic from when I initially wrote the PCREv2 support. We're not going to have confusion about compile_pcre2_pattern() being called when it shouldn't just because we forgot to cargo-cult this opt->pcre2 option. Furthermore the "struct grep_opt" is (mostly) used for the options the user supplied, let's avoid the pattern of needlessly assigning to it. With my recent removal of the PCREv1 backend in 7599730b7e2 (Remove support for v1 of the PCRE library, 2021-01-24) there's even less confusion around what we call where in these codepaths, which is one more reason to remove this. Signed-off-by: Ævar Arnfjörð Bjarmason Signed-off-by: Junio C Hamano --- grep.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/grep.c b/grep.c index aabfaaa4c3..816e23f17e 100644 --- a/grep.c +++ b/grep.c @@ -373,8 +373,6 @@ static void compile_pcre2_pattern(struct grep_pat *p, const struct grep_opt *opt int patinforet; size_t jitsizearg; - assert(opt->pcre2); - p->pcre2_compile_context = NULL; /* pcre2_global_context is initialized in append_grep_pattern */ @@ -555,7 +553,6 @@ static void compile_regexp(struct grep_pat *p, struct grep_opt *opt) #endif if (p->fixed || p->is_fixed) { #ifdef USE_LIBPCRE2 - opt->pcre2 = 1; if (p->is_fixed) { compile_pcre2_pattern(p, opt); } else { From 1cfc5a850c3962f341accb15cef6d09c21bbbdee Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=86var=20Arnfj=C3=B6r=C3=B0=20Bjarmason?= Date: Thu, 18 Feb 2021 01:07:20 +0100 Subject: [PATCH 02/10] grep/pcre2: drop needless assignment to NULL MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Remove a redundant assignment of pcre2_compile_context dating back to my 94da9193a6e (grep: add support for PCRE v2, 2017-06-01). In create_grep_pat() we xcalloc() the "grep_pat" struct, so there's no need to NULL out individual members here. I think this was probably something left over from an earlier development version of mine. Signed-off-by: Ævar Arnfjörð Bjarmason Signed-off-by: Junio C Hamano --- grep.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/grep.c b/grep.c index 816e23f17e..f27c5de7f5 100644 --- a/grep.c +++ b/grep.c @@ -373,8 +373,6 @@ static void compile_pcre2_pattern(struct grep_pat *p, const struct grep_opt *opt int patinforet; size_t jitsizearg; - p->pcre2_compile_context = NULL; - /* pcre2_global_context is initialized in append_grep_pattern */ if (opt->ignore_case) { if (!opt->ignore_locale && has_non_ascii(p->pattern)) { From 47eebd2fd2c576377f40555c539c5fc56af060a0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=86var=20Arnfj=C3=B6r=C3=B0=20Bjarmason?= Date: Thu, 18 Feb 2021 01:07:21 +0100 Subject: [PATCH 03/10] grep/pcre2: correct reference to grep_init() in comment MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Correct a comment added in 513f2b0bbd4 (grep: make PCRE2 aware of custom allocator, 2019-10-16). This comment was never correct in git.git, but was consistent with an older version of the patch[1]. 1. https://lore.kernel.org/git/20190806163658.66932-3-carenas@gmail.com/ Signed-off-by: Ævar Arnfjörð Bjarmason Signed-off-by: Junio C Hamano --- grep.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/grep.c b/grep.c index f27c5de7f5..b9adcd83e7 100644 --- a/grep.c +++ b/grep.c @@ -373,7 +373,7 @@ static void compile_pcre2_pattern(struct grep_pat *p, const struct grep_opt *opt int patinforet; size_t jitsizearg; - /* pcre2_global_context is initialized in append_grep_pattern */ + /* pcre2_global_context is initialized in grep_init */ if (opt->ignore_case) { if (!opt->ignore_locale && has_non_ascii(p->pattern)) { if (!pcre2_global_context) From 588e4fb19190c03319a2b67d447dd40f97d85531 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=86var=20Arnfj=C3=B6r=C3=B0=20Bjarmason?= Date: Thu, 18 Feb 2021 01:07:22 +0100 Subject: [PATCH 04/10] grep/pcre2: prepare to add debugging to pcre2_malloc() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Change pcre2_malloc() in a way that'll make it easier for a debugging fprintf() to spew out the allocated pointer. This doesn't introduce any functional change, it just makes a subsequent commit's diff easier to read. Changes code added in 513f2b0bbd4 (grep: make PCRE2 aware of custom allocator, 2019-10-16). Signed-off-by: Ævar Arnfjörð Bjarmason Signed-off-by: Junio C Hamano --- grep.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/grep.c b/grep.c index b9adcd83e7..f96d86c929 100644 --- a/grep.c +++ b/grep.c @@ -45,7 +45,8 @@ static pcre2_general_context *pcre2_global_context; static void *pcre2_malloc(PCRE2_SIZE size, MAYBE_UNUSED void *memory_data) { - return malloc(size); + void *pointer = malloc(size); + return pointer; } static void pcre2_free(void *pointer, MAYBE_UNUSED void *memory_data) From a39b4003f0e4515da5d88480c24ec27196dabfb7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=86var=20Arnfj=C3=B6r=C3=B0=20Bjarmason?= Date: Thu, 18 Feb 2021 01:07:23 +0100 Subject: [PATCH 05/10] grep/pcre2: add GREP_PCRE2_DEBUG_MALLOC debug mode MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add optional printing of PCREv2 allocations to stderr for a developer who manually changes the GREP_PCRE2_DEBUG_MALLOC definition to "1". You need to manually change the definition in the source file similar to the DEBUG_MAILMAP, there's no Makefile knob for this. This will be referenced a subsequent commit, and is generally useful to manually see what's going on with PCREv2 allocations while working on that code. Signed-off-by: Ævar Arnfjörð Bjarmason Signed-off-by: Junio C Hamano --- grep.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/grep.c b/grep.c index f96d86c929..7d262a23d8 100644 --- a/grep.c +++ b/grep.c @@ -42,15 +42,25 @@ static struct grep_opt grep_defaults = { #ifdef USE_LIBPCRE2 static pcre2_general_context *pcre2_global_context; +#define GREP_PCRE2_DEBUG_MALLOC 0 static void *pcre2_malloc(PCRE2_SIZE size, MAYBE_UNUSED void *memory_data) { void *pointer = malloc(size); +#if GREP_PCRE2_DEBUG_MALLOC + static int count = 1; + fprintf(stderr, "PCRE2:%p -> #%02d: alloc(%lu)\n", pointer, count++, size); +#endif return pointer; } static void pcre2_free(void *pointer, MAYBE_UNUSED void *memory_data) { +#if GREP_PCRE2_DEBUG_MALLOC + static int count = 1; + if (pointer) + fprintf(stderr, "PCRE2:%p -> #%02d: free()\n", pointer, count++); +#endif free(pointer); } #endif From 797c359978eb13ddff10974a6c6469e33695606c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=86var=20Arnfj=C3=B6r=C3=B0=20Bjarmason?= Date: Thu, 18 Feb 2021 01:07:24 +0100 Subject: [PATCH 06/10] grep/pcre2: use compile-time PCREv2 version test MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace a use of pcre2_config(PCRE2_CONFIG_VERSION, ...) which I added in 95ca1f987ed (grep/pcre2: better support invalid UTF-8 haystacks, 2021-01-24) with the same test done at compile-time. It might be cuter to do this at runtime since we don't have to do the "major >= 11 || (major >= 10 && ...)" test. But in the next commit we'll add another version comparison that absolutely needs to be done at compile-time, so we're better of being consistent across the board. Signed-off-by: Ævar Arnfjörð Bjarmason Signed-off-by: Junio C Hamano --- grep.c | 18 ++++-------------- grep.h | 3 +++ 2 files changed, 7 insertions(+), 14 deletions(-) diff --git a/grep.c b/grep.c index 7d262a23d8..e58044474d 100644 --- a/grep.c +++ b/grep.c @@ -400,21 +400,11 @@ static void compile_pcre2_pattern(struct grep_pat *p, const struct grep_opt *opt !(!opt->ignore_case && (p->fixed || p->is_fixed))) options |= (PCRE2_UTF | PCRE2_MATCH_INVALID_UTF); +#ifdef GIT_PCRE2_VERSION_10_36_OR_HIGHER /* Work around https://bugs.exim.org/show_bug.cgi?id=2642 fixed in 10.36 */ - if (PCRE2_MATCH_INVALID_UTF && options & (PCRE2_UTF | PCRE2_CASELESS)) { - struct strbuf buf; - int len; - int err; - - if ((len = pcre2_config(PCRE2_CONFIG_VERSION, NULL)) < 0) - BUG("pcre2_config(..., NULL) failed: %d", len); - strbuf_init(&buf, len + 1); - if ((err = pcre2_config(PCRE2_CONFIG_VERSION, buf.buf)) < 0) - BUG("pcre2_config(..., buf.buf) failed: %d", err); - if (versioncmp(buf.buf, "10.36") < 0) - options |= PCRE2_NO_START_OPTIMIZE; - strbuf_release(&buf); - } + if (PCRE2_MATCH_INVALID_UTF && options & (PCRE2_UTF | PCRE2_CASELESS)) + options |= PCRE2_NO_START_OPTIMIZE; +#endif p->pcre2_pattern = pcre2_compile((PCRE2_SPTR)p->pattern, p->patternlen, options, &error, &erroffset, diff --git a/grep.h b/grep.h index ae89d6254b..54e52042cb 100644 --- a/grep.h +++ b/grep.h @@ -4,6 +4,9 @@ #ifdef USE_LIBPCRE2 #define PCRE2_CODE_UNIT_WIDTH 8 #include +#if (PCRE2_MAJOR >= 10 && PCRE2_MINOR >= 36) || PCRE2_MAJOR >= 11 +#define GIT_PCRE2_VERSION_10_36_OR_HIGHER +#endif #else typedef int pcre2_code; typedef int pcre2_match_data; From b76bf27f6a0138f66f7cfca122b7ddc68836160d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=86var=20Arnfj=C3=B6r=C3=B0=20Bjarmason?= Date: Thu, 18 Feb 2021 01:07:25 +0100 Subject: [PATCH 07/10] grep/pcre2: use pcre2_maketables_free() function MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Make use of the pcre2_maketables_free() function to free the memory allocated by pcre2_maketables(). At first sight it's strange that 10da030ab75 (grep: avoid leak of chartables in PCRE2, 2019-10-16) which added the free() call here doesn't make use of the pcre2_free() the author introduced in the preceding commit in 513f2b0bbd4 (grep: make PCRE2 aware of custom allocator, 2019-10-16). The reason is that at the time the function didn't exist. It was first introduced in PCREv2 version 10.34, released on 2019-11-21. Let's make use of it behind a macro. I don't think this matters for anything to do with custom allocators, but it makes our use of PCREv2 more discoverable. At some distant point in the future we'll be able to drop the version guard, as nobody will be running a version older than 10.34. Signed-off-by: Ævar Arnfjörð Bjarmason Signed-off-by: Junio C Hamano --- grep.c | 4 ++++ grep.h | 3 +++ 2 files changed, 7 insertions(+) diff --git a/grep.c b/grep.c index e58044474d..c63dbff4b2 100644 --- a/grep.c +++ b/grep.c @@ -490,7 +490,11 @@ static void free_pcre2_pattern(struct grep_pat *p) pcre2_compile_context_free(p->pcre2_compile_context); pcre2_code_free(p->pcre2_pattern); pcre2_match_data_free(p->pcre2_match_data); +#ifdef GIT_PCRE2_VERSION_10_34_OR_HIGHER + pcre2_maketables_free(pcre2_global_context, p->pcre2_tables); +#else free((void *)p->pcre2_tables); +#endif } #else /* !USE_LIBPCRE2 */ static void compile_pcre2_pattern(struct grep_pat *p, const struct grep_opt *opt) diff --git a/grep.h b/grep.h index 54e52042cb..64666e9204 100644 --- a/grep.h +++ b/grep.h @@ -7,6 +7,9 @@ #if (PCRE2_MAJOR >= 10 && PCRE2_MINOR >= 36) || PCRE2_MAJOR >= 11 #define GIT_PCRE2_VERSION_10_36_OR_HIGHER #endif +#if (PCRE2_MAJOR >= 10 && PCRE2_MINOR >= 34) || PCRE2_MAJOR >= 11 +#define GIT_PCRE2_VERSION_10_34_OR_HIGHER +#endif #else typedef int pcre2_code; typedef int pcre2_match_data; From 8d1285134297d66638f615cb076d1964e2dbbc19 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=86var=20Arnfj=C3=B6r=C3=B0=20Bjarmason?= Date: Thu, 18 Feb 2021 01:07:26 +0100 Subject: [PATCH 08/10] grep/pcre2: actually make pcre2 use custom allocator MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Continue work started in 513f2b0bbd4 (grep: make PCRE2 aware of custom allocator, 2019-10-16) and make PCREv2 use our pcre2_{malloc,free}(). functions for allocation. We'll now use it for all PCREv2 allocations. The reason 513f2b0bbd4 worked as a bugfix for the USE_NED_ALLOCATOR issue is because it targeted the allocation freed via free(), as opposed to by a pcre2_*free() function. I.e. the pcre2_maketables() and pcre2_maketables_free() pair. For most of the rest we continued allocating with stock malloc() inside PCREv2 itself, but didn't segfault because we'd use its corresponding free(). In a preceding commit of mine I changed the free() to pcre2_maketables_free() on versions of PCREv2 10.34 and newer. So as far as fixing the segfault goes we could revert 513f2b0bbd4. But then we wouldn't use the desired allocator, let's just use it instead. Before this patch we'd on e.g.: grep --threads=1 -iP æ.*var.*xyz Only use pcre2_{malloc,free}() for 2 malloc() calls and 2 corresponding free() calls. Now it's 12 calls to each. This can be observed with the GREP_PCRE2_DEBUG_MALLOC debug mode. Reading the history of how this bug got introduced it wasn't present in Johannes's original patch[1] to fix the issue. My reading of that thread is that the approach the follow-up patches to Johannes's original pursued were based on misunderstanding of how the PCREv2 API works. In particular this part of [2]: "most of the time (like when using UTF-8) the chartable (and therefore the global context) is not needed (even when using alternate allocators)" That's simply not how PCREv2 memory allocation works. It's easy to see how the misunderstanding came about. It's because (as noted above) the issue was noticed because of our use of free() in our own grep.c for freeing the memory allocated by pcre2_maketables(). Thus the misunderstanding that PCREv2's compile context is something only needed for pcre2_maketables(), and e.g. an aborted earlier attempt[3] to only set it up when we ourselves called pcre2_maketables(). That's not what PCREv2's compile context is. To quote PCREv2's documentation: "This context just contains pointers to (and data for) external memory management functions that are called from several places in the PCRE2 library." Thus the failed attempts to go down the route of only creating the general context in cases where we ourselves call pcre2_maketables(), before finally settling on the approach 513f2b0bbd4 took of always creating it, but then mostly not using it. Instead we should always create it, and then pass the general context to those functions that accept it, so that they'll consistently use our preferred memory allocation functions. 1. https://lore.kernel.org/git/3397e6797f872aedd18c6d795f4976e1c579514b.1565005867.git.gitgitgadget@gmail.com/ 2. https://lore.kernel.org/git/CAPUEsphMh_ZqcH3M7PXC9jHTfEdQN3mhTAK2JDkdvKBp53YBoA@mail.gmail.com/ 3. https://lore.kernel.org/git/20190806085014.47776-3-carenas@gmail.com/ Signed-off-by: Ævar Arnfjörð Bjarmason Signed-off-by: Junio C Hamano --- grep.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/grep.c b/grep.c index c63dbff4b2..0116ff5f09 100644 --- a/grep.c +++ b/grep.c @@ -390,7 +390,7 @@ static void compile_pcre2_pattern(struct grep_pat *p, const struct grep_opt *opt if (!pcre2_global_context) BUG("pcre2_global_context uninitialized"); p->pcre2_tables = pcre2_maketables(pcre2_global_context); - p->pcre2_compile_context = pcre2_compile_context_create(NULL); + p->pcre2_compile_context = pcre2_compile_context_create(pcre2_global_context); pcre2_set_character_tables(p->pcre2_compile_context, p->pcre2_tables); } @@ -411,7 +411,7 @@ static void compile_pcre2_pattern(struct grep_pat *p, const struct grep_opt *opt p->pcre2_compile_context); if (p->pcre2_pattern) { - p->pcre2_match_data = pcre2_match_data_create_from_pattern(p->pcre2_pattern, NULL); + p->pcre2_match_data = pcre2_match_data_create_from_pattern(p->pcre2_pattern, pcre2_global_context); if (!p->pcre2_match_data) die("Couldn't allocate PCRE2 match data"); } else { From cbe81e653fa1adc6b4e09d881628074f7448289a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=86var=20Arnfj=C3=B6r=C3=B0=20Bjarmason?= Date: Thu, 18 Feb 2021 01:07:27 +0100 Subject: [PATCH 09/10] grep/pcre2: move back to thread-only PCREv2 structures MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Change the setup of the "pcre2_general_context" to happen per-thread in compile_pcre2_pattern() instead of in grep_init(). This change brings it in line with how the rest of the pcre2_* members in the grep_pat structure are set up. As noted in the preceding commit the approach 513f2b0bbd4 (grep: make PCRE2 aware of custom allocator, 2019-10-16) took to allocate the pcre2_general_context seems to have been initially based on a misunderstanding of how PCREv2 memory allocation works. The approach of creating a global context in grep_init() is just added complexity for almost zero gain. On my system it's 24 bytes saved per-thread. For comparison PCREv2 will then go on to allocate at least a kilobyte for its own thread-local state. As noted in 6d423dd542f (grep: don't redundantly compile throwaway patterns under threading, 2017-05-25) the grep code is intentionally not trying to micro-optimize allocations by e.g. sharing some PCREv2 structures globally, while making others thread-local. So let's remove this special case and make all of them thread-local again for simplicity. With this change we could move the pcre2_{malloc,free} functions around to live closer to their current use. I'm not doing that here to keep this change small, that cleanup will be done in a follow-up commit. See also the discussion in 94da9193a6 (grep: add support for PCRE v2, 2017-06-01) about thread safety, and Johannes's comments[1] to the effect that we should be doing what this patch is doing. 1. https://lore.kernel.org/git/nycvar.QRO.7.76.6.1908052120302.46@tvgsbejvaqbjf.bet/ Signed-off-by: Ævar Arnfjörð Bjarmason Signed-off-by: Junio C Hamano --- builtin/grep.c | 1 - grep.c | 41 +++++++++++++++-------------------------- grep.h | 3 ++- 3 files changed, 17 insertions(+), 28 deletions(-) diff --git a/builtin/grep.c b/builtin/grep.c index 55d06c9513..c69fe99340 100644 --- a/builtin/grep.c +++ b/builtin/grep.c @@ -1175,6 +1175,5 @@ int cmd_grep(int argc, const char **argv, const char *prefix) run_pager(&opt, prefix); clear_pathspec(&pathspec); free_grep_patterns(&opt); - grep_destroy(); return !hit; } diff --git a/grep.c b/grep.c index 0116ff5f09..2599f329cd 100644 --- a/grep.c +++ b/grep.c @@ -41,7 +41,6 @@ static struct grep_opt grep_defaults = { }; #ifdef USE_LIBPCRE2 -static pcre2_general_context *pcre2_global_context; #define GREP_PCRE2_DEBUG_MALLOC 0 static void *pcre2_malloc(PCRE2_SIZE size, MAYBE_UNUSED void *memory_data) @@ -163,20 +162,9 @@ int grep_config(const char *var, const char *value, void *cb) * Initialize one instance of grep_opt and copy the * default values from the template we read the configuration * information in an earlier call to git_config(grep_config). - * - * If using PCRE, make sure that the library is configured - * to use the same allocator as Git (e.g. nedmalloc on Windows). - * - * Any allocated memory needs to be released in grep_destroy(). */ void grep_init(struct grep_opt *opt, struct repository *repo, const char *prefix) { -#if defined(USE_LIBPCRE2) - if (!pcre2_global_context) - pcre2_global_context = pcre2_general_context_create( - pcre2_malloc, pcre2_free, NULL); -#endif - *opt = grep_defaults; opt->repo = repo; @@ -186,13 +174,6 @@ void grep_init(struct grep_opt *opt, struct repository *repo, const char *prefix opt->header_tail = &opt->header_list; } -void grep_destroy(void) -{ -#ifdef USE_LIBPCRE2 - pcre2_general_context_free(pcre2_global_context); -#endif -} - static void grep_set_pattern_type_option(enum grep_pattern_type pattern_type, struct grep_opt *opt) { /* @@ -384,13 +365,20 @@ static void compile_pcre2_pattern(struct grep_pat *p, const struct grep_opt *opt int patinforet; size_t jitsizearg; - /* pcre2_global_context is initialized in grep_init */ + /* + * Call pcre2_general_context_create() before calling any + * other pcre2_*(). It sets up our malloc()/free() functions + * with which everything else is allocated. + */ + p->pcre2_general_context = pcre2_general_context_create( + pcre2_malloc, pcre2_free, NULL); + if (!p->pcre2_general_context) + die("Couldn't allocate PCRE2 general context"); + if (opt->ignore_case) { if (!opt->ignore_locale && has_non_ascii(p->pattern)) { - if (!pcre2_global_context) - BUG("pcre2_global_context uninitialized"); - p->pcre2_tables = pcre2_maketables(pcre2_global_context); - p->pcre2_compile_context = pcre2_compile_context_create(pcre2_global_context); + p->pcre2_tables = pcre2_maketables(p->pcre2_general_context); + p->pcre2_compile_context = pcre2_compile_context_create(p->pcre2_general_context); pcre2_set_character_tables(p->pcre2_compile_context, p->pcre2_tables); } @@ -411,7 +399,7 @@ static void compile_pcre2_pattern(struct grep_pat *p, const struct grep_opt *opt p->pcre2_compile_context); if (p->pcre2_pattern) { - p->pcre2_match_data = pcre2_match_data_create_from_pattern(p->pcre2_pattern, pcre2_global_context); + p->pcre2_match_data = pcre2_match_data_create_from_pattern(p->pcre2_pattern, p->pcre2_general_context); if (!p->pcre2_match_data) die("Couldn't allocate PCRE2 match data"); } else { @@ -491,10 +479,11 @@ static void free_pcre2_pattern(struct grep_pat *p) pcre2_code_free(p->pcre2_pattern); pcre2_match_data_free(p->pcre2_match_data); #ifdef GIT_PCRE2_VERSION_10_34_OR_HIGHER - pcre2_maketables_free(pcre2_global_context, p->pcre2_tables); + pcre2_maketables_free(p->pcre2_general_context, p->pcre2_tables); #else free((void *)p->pcre2_tables); #endif + pcre2_general_context_free(p->pcre2_general_context); } #else /* !USE_LIBPCRE2 */ static void compile_pcre2_pattern(struct grep_pat *p, const struct grep_opt *opt) diff --git a/grep.h b/grep.h index 64666e9204..72f82b1e30 100644 --- a/grep.h +++ b/grep.h @@ -14,6 +14,7 @@ typedef int pcre2_code; typedef int pcre2_match_data; typedef int pcre2_compile_context; +typedef int pcre2_general_context; #endif #ifndef PCRE2_MATCH_INVALID_UTF /* PCRE2_MATCH_* dummy also with !USE_LIBPCRE2, for test-pcre2-config.c */ @@ -75,6 +76,7 @@ struct grep_pat { pcre2_code *pcre2_pattern; pcre2_match_data *pcre2_match_data; pcre2_compile_context *pcre2_compile_context; + pcre2_general_context *pcre2_general_context; const uint8_t *pcre2_tables; uint32_t pcre2_jit_on; unsigned fixed:1; @@ -167,7 +169,6 @@ struct grep_opt { int grep_config(const char *var, const char *value, void *); void grep_init(struct grep_opt *, struct repository *repo, const char *prefix); -void grep_destroy(void); void grep_commit_pattern_type(enum grep_pattern_type, struct grep_opt *opt); void append_grep_pat(struct grep_opt *opt, const char *pat, size_t patlen, const char *origin, int no, enum grep_pat_token t); From c1760352e0b27cfbdffd97dec50a9eb552318993 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=86var=20Arnfj=C3=B6r=C3=B0=20Bjarmason?= Date: Thu, 18 Feb 2021 01:07:28 +0100 Subject: [PATCH 10/10] grep/pcre2: move definitions of pcre2_{malloc,free} MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Move the definitions of the pcre2_{malloc,free} functions above the compile_pcre2_pattern() function they're used in. Before the preceding commit they used to be needed earlier, but now we can move them to be adjacent to the other PCREv2 functions. Signed-off-by: Ævar Arnfjörð Bjarmason Signed-off-by: Junio C Hamano --- grep.c | 46 ++++++++++++++++++++++------------------------ 1 file changed, 22 insertions(+), 24 deletions(-) diff --git a/grep.c b/grep.c index 2599f329cd..636ac48bf0 100644 --- a/grep.c +++ b/grep.c @@ -40,30 +40,6 @@ static struct grep_opt grep_defaults = { .output = std_output, }; -#ifdef USE_LIBPCRE2 -#define GREP_PCRE2_DEBUG_MALLOC 0 - -static void *pcre2_malloc(PCRE2_SIZE size, MAYBE_UNUSED void *memory_data) -{ - void *pointer = malloc(size); -#if GREP_PCRE2_DEBUG_MALLOC - static int count = 1; - fprintf(stderr, "PCRE2:%p -> #%02d: alloc(%lu)\n", pointer, count++, size); -#endif - return pointer; -} - -static void pcre2_free(void *pointer, MAYBE_UNUSED void *memory_data) -{ -#if GREP_PCRE2_DEBUG_MALLOC - static int count = 1; - if (pointer) - fprintf(stderr, "PCRE2:%p -> #%02d: free()\n", pointer, count++); -#endif - free(pointer); -} -#endif - static const char *color_grep_slots[] = { [GREP_COLOR_CONTEXT] = "context", [GREP_COLOR_FILENAME] = "filename", @@ -355,6 +331,28 @@ static int is_fixed(const char *s, size_t len) } #ifdef USE_LIBPCRE2 +#define GREP_PCRE2_DEBUG_MALLOC 0 + +static void *pcre2_malloc(PCRE2_SIZE size, MAYBE_UNUSED void *memory_data) +{ + void *pointer = malloc(size); +#if GREP_PCRE2_DEBUG_MALLOC + static int count = 1; + fprintf(stderr, "PCRE2:%p -> #%02d: alloc(%lu)\n", pointer, count++, size); +#endif + return pointer; +} + +static void pcre2_free(void *pointer, MAYBE_UNUSED void *memory_data) +{ +#if GREP_PCRE2_DEBUG_MALLOC + static int count = 1; + if (pointer) + fprintf(stderr, "PCRE2:%p -> #%02d: free()\n", pointer, count++); +#endif + free(pointer); +} + static void compile_pcre2_pattern(struct grep_pat *p, const struct grep_opt *opt) { int error;