Merge branch 'ab/pickaxe-pcre2'

Rewrite the backend for "diff -G/-S" to use pcre2 engine when
available.

* ab/pickaxe-pcre2: (22 commits)
  xdiff-interface: replace discard_hunk_line() with a flag
  xdiff users: use designated initializers for out_line
  pickaxe -G: don't special-case create/delete
  pickaxe -G: terminate early on matching lines
  xdiff-interface: allow early return from xdiff_emit_line_fn
  xdiff-interface: prepare for allowing early return
  pickaxe -S: slightly optimize contains()
  pickaxe: rename variables in has_changes() for brevity
  pickaxe -S: support content with NULs under --pickaxe-regex
  pickaxe: assert that we must have a needle under -G or -S
  pickaxe: refactor function selection in diffcore-pickaxe()
  perf: add performance test for pickaxe
  pickaxe/style: consolidate declarations and assignments
  diff.h: move pickaxe fields together again
  pickaxe: die when --find-object and --pickaxe-all are combined
  pickaxe: die when -G and --pickaxe-regex are combined
  pickaxe tests: add missing test for --no-pickaxe-regex being an error
  pickaxe tests: test for -G, -S and --find-object incompatibility
  pickaxe tests: add test for "log -S" not being a regex
  pickaxe tests: add test for diffgrep_consume() internals
  ...
This commit is contained in:
Junio C Hamano
2021-07-13 16:52:50 -07:00
14 changed files with 312 additions and 107 deletions

70
t/perf/p4209-pickaxe.sh Executable file
View File

@ -0,0 +1,70 @@
#!/bin/sh
test_description="Test pickaxe performance"
. ./perf-lib.sh
test_perf_default_repo
# Not --max-count, as that's the number of matching commit, so it's
# unbounded. We want to limit our revision walk here.
from_rev_desc=
from_rev=
max_count=1000
if test_have_prereq EXPENSIVE
then
max_count=10000
fi
from_rev=" $(git rev-list HEAD | head -n $max_count | tail -n 1).."
from_rev_desc=" <limit-rev>.."
for icase in \
'' \
'-i '
do
# -S (no regex)
for pattern in \
'int main' \
'æ'
do
for opts in \
'-S'
do
test_perf "git log $icase$opts'$pattern'$from_rev_desc" "
git log --pretty=format:%H $icase$opts'$pattern'$from_rev
"
done
done
# -S (regex)
for pattern in \
'(int|void|null)' \
'if *\([^ ]+ & ' \
'[àáâãäåæñøùúûüýþ]'
do
for opts in \
'--pickaxe-regex -S'
do
test_perf "git log $icase$opts'$pattern'$from_rev_desc" "
git log --pretty=format:%H $icase$opts'$pattern'$from_rev
"
done
done
# -G
for pattern in \
'(int|void|null)' \
'if *\([^ ]+ & ' \
'[àáâãäåæñøùúûüýþ]'
do
for opts in \
'-G'
do
test_perf "git log $icase$opts'$pattern'$from_rev_desc" "
git log --pretty=format:%H $icase$opts'$pattern'$from_rev
"
done
done
done
test_done

View File

@ -55,6 +55,43 @@ test_expect_success setup '
git rev-parse --verify HEAD >expect_second
'
test_expect_success 'usage' '
test_expect_code 129 git log -S 2>err &&
test_i18ngrep "switch.*requires a value" err &&
test_expect_code 129 git log -G 2>err &&
test_i18ngrep "switch.*requires a value" err &&
test_expect_code 128 git log -Gregex -Sstring 2>err &&
grep "mutually exclusive" err &&
test_expect_code 128 git log -Gregex --find-object=HEAD 2>err &&
grep "mutually exclusive" err &&
test_expect_code 128 git log -Sstring --find-object=HEAD 2>err &&
grep "mutually exclusive" err &&
test_expect_code 128 git log --pickaxe-all --find-object=HEAD 2>err &&
grep "mutually exclusive" err
'
test_expect_success 'usage: --pickaxe-regex' '
test_expect_code 128 git log -Gregex --pickaxe-regex 2>err &&
grep "mutually exclusive" err
'
test_expect_success 'usage: --no-pickaxe-regex' '
cat >expect <<-\EOF &&
fatal: unrecognized argument: --no-pickaxe-regex
EOF
test_expect_code 128 git log -Sstring --no-pickaxe-regex 2>actual &&
test_cmp expect actual &&
test_expect_code 128 git log -Gstring --no-pickaxe-regex 2>err &&
test_cmp expect actual
'
test_log expect_initial --grep initial
test_log expect_nomatch --grep InItial
test_log_icase expect_initial --grep InItial
@ -106,38 +143,83 @@ test_expect_success 'log -S --no-textconv (missing textconv tool)' '
rm .gitattributes
'
test_expect_success 'setup log -[GS] plain & regex' '
test_create_repo GS-plain &&
test_commit -C GS-plain --append A data.txt "a" &&
test_commit -C GS-plain --append B data.txt "a a" &&
test_commit -C GS-plain --append C data.txt "b" &&
test_commit -C GS-plain --append D data.txt "[b]" &&
test_commit -C GS-plain E data.txt "" &&
# We also include E, the deletion commit
git -C GS-plain log --grep="[ABE]" >A-to-B-then-E-log &&
git -C GS-plain log --grep="[CDE]" >C-to-D-then-E-log &&
git -C GS-plain log --grep="[DE]" >D-then-E-log &&
git -C GS-plain log >full-log
'
test_expect_success 'log -G trims diff new/old [-+]' '
git -C GS-plain log -G"[+-]a" >log &&
test_must_be_empty log &&
git -C GS-plain log -G"^a" >log &&
test_cmp log A-to-B-then-E-log
'
test_expect_success 'log -S<pat> is not a regex, but -S<pat> --pickaxe-regex is' '
git -C GS-plain log -S"a" >log &&
test_cmp log A-to-B-then-E-log &&
git -C GS-plain log -S"[a]" >log &&
test_must_be_empty log &&
git -C GS-plain log -S"[a]" --pickaxe-regex >log &&
test_cmp log A-to-B-then-E-log &&
git -C GS-plain log -S"[b]" >log &&
test_cmp log D-then-E-log &&
git -C GS-plain log -S"[b]" --pickaxe-regex >log &&
test_cmp log C-to-D-then-E-log
'
test_expect_success 'setup log -[GS] binary & --text' '
git checkout --orphan GS-binary-and-text &&
git read-tree --empty &&
printf "a\na\0a\n" >data.bin &&
git add data.bin &&
git commit -m "create binary file" data.bin &&
printf "a\na\0a\n" >>data.bin &&
git commit -m "modify binary file" data.bin &&
git rm data.bin &&
git commit -m "delete binary file" data.bin &&
git log >full-log
test_create_repo GS-bin-txt &&
test_commit -C GS-bin-txt --printf A data.bin "a\na\0a\n" &&
test_commit -C GS-bin-txt --append --printf B data.bin "a\na\0a\n" &&
test_commit -C GS-bin-txt C data.bin "" &&
git -C GS-bin-txt log >full-log
'
test_expect_success 'log -G ignores binary files' '
git log -Ga >log &&
git -C GS-bin-txt log -Ga >log &&
test_must_be_empty log
'
test_expect_success 'log -G looks into binary files with -a' '
git log -a -Ga >log &&
git -C GS-bin-txt log -a -Ga >log &&
test_cmp log full-log
'
test_expect_success 'log -G looks into binary files with textconv filter' '
test_when_finished "rm .gitattributes" &&
echo "* diff=bin" >.gitattributes &&
git -c diff.bin.textconv=cat log -Ga >log &&
test_when_finished "rm GS-bin-txt/.gitattributes" &&
(
cd GS-bin-txt &&
echo "* diff=bin" >.gitattributes &&
git -c diff.bin.textconv=cat log -Ga >../log
) &&
test_cmp log full-log
'
test_expect_success 'log -S looks into binary files' '
git log -Sa >log &&
git -C GS-bin-txt log -Sa >log &&
test_cmp log full-log
'
test_expect_success 'log -S --pickaxe-regex looks into binary files' '
git -C GS-bin-txt log --pickaxe-regex -Sa >log &&
test_cmp log full-log &&
git -C GS-bin-txt log --pickaxe-regex -S"[a]" >log &&
test_cmp log full-log
'

View File

@ -59,7 +59,7 @@ test_expect_success 'setup' "
git commit -m.
"
# Simple fixed-string matching that can use kwset (no -i && non-ASCII)
# Simple fixed-string matching
nul_match P P P '-F' 'yQf'
nul_match P P P '-F' 'yQx'
nul_match P P P '-Fi' 'YQf'
@ -78,7 +78,7 @@ nul_match P P P '-Fi' '[Y]QF'
nul_match P P P '-F' 'æQ[ð]'
nul_match P P P '-F' '[æ]Qð'
# The -F kwset codepath can't handle -i && non-ASCII...
# Matching pattern and subject case with -i
nul_match P 1 1 '-i' '[æ]Qð'
# ...PCRE v2 only matches non-ASCII with -i casefolding under UTF-8