From f73613ac33aad6f08aa1bc07e596202f19e760d1 Mon Sep 17 00:00:00 2001 From: Phillip Wood Date: Thu, 9 Dec 2021 10:29:55 +0000 Subject: [PATCH 01/15] diff --color-moved: add perf tests Add some tests so we can monitor changes to the performance of the move detection code. The tests record the performance --color-moved and --color-moved-ws=allow-indentation-change for a large diff and a sequence of smaller diffs. The range of commits used for the large diff can be customized by exporting TEST_REV_A and TEST_REV_B when running the test. Signed-off-by: Phillip Wood Signed-off-by: Junio C Hamano --- t/perf/p4002-diff-color-moved.sh | 57 ++++++++++++++++++++++++++++++++ 1 file changed, 57 insertions(+) create mode 100755 t/perf/p4002-diff-color-moved.sh diff --git a/t/perf/p4002-diff-color-moved.sh b/t/perf/p4002-diff-color-moved.sh new file mode 100755 index 0000000000..ab2af931c0 --- /dev/null +++ b/t/perf/p4002-diff-color-moved.sh @@ -0,0 +1,57 @@ +#!/bin/sh + +test_description='Tests diff --color-moved performance' +. ./perf-lib.sh + +test_perf_default_repo + +# The endpoints of the diff can be customized by setting TEST_REV_A +# and TEST_REV_B in the environment when running this test. + +rev="${TEST_REV_A:-v2.28.0}" +if ! rev_a="$(git rev-parse --quiet --verify "$rev")" +then + skip_all="skipping because '$rev' was not found. \ + Use TEST_REV_A and TEST_REV_B to set the revs to use" + test_done +fi +rev="${TEST_REV_B:-v2.29.0}" +if ! rev_b="$(git rev-parse --quiet --verify "$rev")" +then + skip_all="skipping because '$rev' was not found. \ + Use TEST_REV_A and TEST_REV_B to set the revs to use" + test_done +fi + +GIT_PAGER_IN_USE=1 +test_export GIT_PAGER_IN_USE rev_a rev_b + +test_perf 'diff --no-color-moved --no-color-moved-ws large change' ' + git diff --no-color-moved --no-color-moved-ws $rev_a $rev_b +' + +test_perf 'diff --color-moved --no-color-moved-ws large change' ' + git diff --color-moved=zebra --no-color-moved-ws $rev_a $rev_b +' + +test_perf 'diff --color-moved-ws=allow-indentation-change large change' ' + git diff --color-moved=zebra --color-moved-ws=allow-indentation-change \ + $rev_a $rev_b +' + +test_perf 'log --no-color-moved --no-color-moved-ws' ' + git log --no-color-moved --no-color-moved-ws --no-merges --patch \ + -n1000 $rev_b +' + +test_perf 'log --color-moved --no-color-moved-ws' ' + git log --color-moved=zebra --no-color-moved-ws --no-merges --patch \ + -n1000 $rev_b +' + +test_perf 'log --color-moved-ws=allow-indentation-change' ' + git log --color-moved=zebra --color-moved-ws=allow-indentation-change \ + --no-merges --patch -n1000 $rev_b +' + +test_done From bea084ba41ffcaf5896522e48d67682b6a45b04c Mon Sep 17 00:00:00 2001 From: Phillip Wood Date: Thu, 9 Dec 2021 10:29:56 +0000 Subject: [PATCH 02/15] diff --color-moved: clear all flags on blocks that are too short If a block of potentially moved lines is not long enough then the DIFF_SYMBOL_MOVED_LINE flag is cleared on the matching lines so they are not marked as moved. To avoid problems when we start rewinding after an unsuccessful match in a couple of commits time make sure all the move related flags are cleared, not just DIFF_SYMBOL_MOVED_LINE. Signed-off-by: Phillip Wood Signed-off-by: Junio C Hamano --- diff.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/diff.c b/diff.c index 52c791574b..bd8e4ec975 100644 --- a/diff.c +++ b/diff.c @@ -1114,6 +1114,8 @@ static int shrink_potential_moved_blocks(struct moved_block *pmb, * NEEDSWORK: This uses the same heuristic as blame_entry_score() in blame.c. * Think of a way to unify them. */ +#define DIFF_SYMBOL_MOVED_LINE_ZEBRA_MASK \ + (DIFF_SYMBOL_MOVED_LINE | DIFF_SYMBOL_MOVED_LINE_ALT) static int adjust_last_block(struct diff_options *o, int n, int block_length) { int i, alnum_count = 0; @@ -1130,7 +1132,7 @@ static int adjust_last_block(struct diff_options *o, int n, int block_length) } } for (i = 1; i < block_length + 1; i++) - o->emitted_symbols->buf[n - i].flags &= ~DIFF_SYMBOL_MOVED_LINE; + o->emitted_symbols->buf[n - i].flags &= ~DIFF_SYMBOL_MOVED_LINE_ZEBRA_MASK; return 0; } @@ -1237,8 +1239,6 @@ static void mark_color_as_moved(struct diff_options *o, free(pmb); } -#define DIFF_SYMBOL_MOVED_LINE_ZEBRA_MASK \ - (DIFF_SYMBOL_MOVED_LINE | DIFF_SYMBOL_MOVED_LINE_ALT) static void dim_moved_lines(struct diff_options *o) { int n; From 7dfe427107a3765456e9fa33c56da29fea24cf0f Mon Sep 17 00:00:00 2001 From: Phillip Wood Date: Thu, 9 Dec 2021 10:29:57 +0000 Subject: [PATCH 03/15] diff --color-moved: factor out function This code is quite heavily indented and having it in its own function simplifies an upcoming change. Signed-off-by: Phillip Wood Signed-off-by: Junio C Hamano --- diff.c | 51 ++++++++++++++++++++++++++++++++++----------------- 1 file changed, 34 insertions(+), 17 deletions(-) diff --git a/diff.c b/diff.c index bd8e4ec975..09af94e018 100644 --- a/diff.c +++ b/diff.c @@ -1098,6 +1098,38 @@ static int shrink_potential_moved_blocks(struct moved_block *pmb, return rp + 1; } +static void fill_potential_moved_blocks(struct diff_options *o, + struct hashmap *hm, + struct moved_entry *match, + struct emitted_diff_symbol *l, + struct moved_block **pmb_p, + int *pmb_alloc_p, int *pmb_nr_p) + +{ + struct moved_block *pmb = *pmb_p; + int pmb_alloc = *pmb_alloc_p, pmb_nr = *pmb_nr_p; + + /* + * The current line is the start of a new block. + * Setup the set of potential blocks. + */ + hashmap_for_each_entry_from(hm, match, ent) { + ALLOC_GROW(pmb, pmb_nr + 1, pmb_alloc); + if (o->color_moved_ws_handling & + COLOR_MOVED_WS_ALLOW_INDENTATION_CHANGE) { + if (compute_ws_delta(l, match->es, &(pmb[pmb_nr]).wsd)) + pmb[pmb_nr++].match = match; + } else { + pmb[pmb_nr].wsd = 0; + pmb[pmb_nr++].match = match; + } + } + + *pmb_p = pmb; + *pmb_alloc_p = pmb_alloc; + *pmb_nr_p = pmb_nr; +} + /* * If o->color_moved is COLOR_MOVED_PLAIN, this function does nothing. * @@ -1198,23 +1230,8 @@ static void mark_color_as_moved(struct diff_options *o, pmb_nr = shrink_potential_moved_blocks(pmb, pmb_nr); if (pmb_nr == 0) { - /* - * The current line is the start of a new block. - * Setup the set of potential blocks. - */ - hashmap_for_each_entry_from(hm, match, ent) { - ALLOC_GROW(pmb, pmb_nr + 1, pmb_alloc); - if (o->color_moved_ws_handling & - COLOR_MOVED_WS_ALLOW_INDENTATION_CHANGE) { - if (compute_ws_delta(l, match->es, - &pmb[pmb_nr].wsd)) - pmb[pmb_nr++].match = match; - } else { - pmb[pmb_nr].wsd = 0; - pmb[pmb_nr++].match = match; - } - } - + fill_potential_moved_blocks( + o, hm, match, l, &pmb, &pmb_alloc, &pmb_nr); if (adjust_last_block(o, n, block_length) && pmb_nr && last_symbol != l->s) flipped_block = (flipped_block + 1) % 2; From 0990658bf85a0763ffd628b1dd57a33c27b25450 Mon Sep 17 00:00:00 2001 From: Phillip Wood Date: Thu, 9 Dec 2021 10:29:58 +0000 Subject: [PATCH 04/15] diff --color-moved: rewind when discarding pmb Signed-off-by: Junio C Hamano --- diff.c | 28 ++++++++++++++++++----- t/t4015-diff-whitespace.sh | 46 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 69 insertions(+), 5 deletions(-) diff --git a/diff.c b/diff.c index 09af94e018..1e1b5127d1 100644 --- a/diff.c +++ b/diff.c @@ -1205,7 +1205,15 @@ static void mark_color_as_moved(struct diff_options *o, if (!match) { int i; - adjust_last_block(o, n, block_length); + if (!adjust_last_block(o, n, block_length) && + block_length > 1) { + /* + * Rewind in case there is another match + * starting at the second line of the block + */ + match = NULL; + n -= block_length; + } for(i = 0; i < pmb_nr; i++) moved_block_clear(&pmb[i]); pmb_nr = 0; @@ -1230,10 +1238,20 @@ static void mark_color_as_moved(struct diff_options *o, pmb_nr = shrink_potential_moved_blocks(pmb, pmb_nr); if (pmb_nr == 0) { - fill_potential_moved_blocks( - o, hm, match, l, &pmb, &pmb_alloc, &pmb_nr); - if (adjust_last_block(o, n, block_length) && - pmb_nr && last_symbol != l->s) + int contiguous = adjust_last_block(o, n, block_length); + + if (!contiguous && block_length > 1) + /* + * Rewind in case there is another match + * starting at the second line of the block + */ + n -= block_length; + else + fill_potential_moved_blocks(o, hm, match, l, + &pmb, &pmb_alloc, + &pmb_nr); + + if (contiguous && pmb_nr && last_symbol != l->s) flipped_block = (flipped_block + 1) % 2; else flipped_block = 0; diff --git a/t/t4015-diff-whitespace.sh b/t/t4015-diff-whitespace.sh index 2c13b62d3c..308dc13659 100755 --- a/t/t4015-diff-whitespace.sh +++ b/t/t4015-diff-whitespace.sh @@ -1833,6 +1833,52 @@ test_expect_success '--color-moved treats adjacent blocks as separate for MIN_AL test_cmp expected actual ' +test_expect_success '--color-moved rewinds for MIN_ALNUM_COUNT' ' + git reset --hard && + test_write_lines >file \ + A B C one two three four five six seven D E F G H I J && + git add file && + test_write_lines >file \ + one two A B C D E F G H I J two three four five six seven && + git diff --color-moved=zebra -- file && + + git diff --color-moved=zebra --color -- file >actual.raw && + grep -v "index" actual.raw | test_decode_color >actual && + cat >expected <<-\EOF && + diff --git a/file b/file + --- a/file + +++ b/file + @@ -1,13 +1,8 @@ + +one + +two + A + B + C + -one + -two + -three + -four + -five + -six + -seven + D + E + F + @@ -15,3 +10,9 @@ G + H + I + J + +two + +three + +four + +five + +six + +seven + EOF + + test_cmp expected actual +' + test_expect_success 'move detection with submodules' ' test_create_repo bananas && echo ripe >bananas/recipe && From eb315457f65e1d6b77cde3933b6a175e649fb34b Mon Sep 17 00:00:00 2001 From: Phillip Wood Date: Thu, 9 Dec 2021 10:29:59 +0000 Subject: [PATCH 05/15] diff --color-moved=zebra: fix alternate coloring b0a2ba4776 ("diff --color-moved=zebra: be stricter with color alternation", 2018-11-23) sought to avoid using the alternate colors unless there are two adjacent moved blocks of the same sign. Unfortunately it contains two bugs that prevented it from fixing the problem properly. Firstly `last_symbol` is reset at the start of each iteration of the loop losing the symbol of the last line and secondly when deciding whether to use the alternate color it should be checking if the current line is the same sign of the last line, not a different sign. The combination of the two errors means that we still use the alternate color when we should do but we also use it when we shouldn't. This is most noticable when using --color-moved-ws=allow-indentation-change with hunks like -this line gets indented + this line gets indented where the post image is colored with newMovedAlternate rather than newMoved. While this does not matter much, the next commit will change the coloring to be correct in this case, so lets fix the bug here to make it clear why the output is changing and add a regression test. Signed-off-by: Phillip Wood Signed-off-by: Junio C Hamano --- diff.c | 4 +-- t/t4015-diff-whitespace.sh | 72 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 74 insertions(+), 2 deletions(-) diff --git a/diff.c b/diff.c index 1e1b5127d1..53f0df7532 100644 --- a/diff.c +++ b/diff.c @@ -1176,6 +1176,7 @@ static void mark_color_as_moved(struct diff_options *o, struct moved_block *pmb = NULL; /* potentially moved blocks */ int pmb_nr = 0, pmb_alloc = 0; int n, flipped_block = 0, block_length = 0; + enum diff_symbol last_symbol = 0; for (n = 0; n < o->emitted_symbols->nr; n++) { @@ -1183,7 +1184,6 @@ static void mark_color_as_moved(struct diff_options *o, struct moved_entry *key; struct moved_entry *match = NULL; struct emitted_diff_symbol *l = &o->emitted_symbols->buf[n]; - enum diff_symbol last_symbol = 0; switch (l->s) { case DIFF_SYMBOL_PLUS: @@ -1251,7 +1251,7 @@ static void mark_color_as_moved(struct diff_options *o, &pmb, &pmb_alloc, &pmb_nr); - if (contiguous && pmb_nr && last_symbol != l->s) + if (contiguous && pmb_nr && last_symbol == l->s) flipped_block = (flipped_block + 1) % 2; else flipped_block = 0; diff --git a/t/t4015-diff-whitespace.sh b/t/t4015-diff-whitespace.sh index 308dc13659..4e0fd76c6c 100755 --- a/t/t4015-diff-whitespace.sh +++ b/t/t4015-diff-whitespace.sh @@ -1442,6 +1442,78 @@ test_expect_success 'detect permutations inside moved code -- dimmed-zebra' ' test_cmp expected actual ' +test_expect_success 'zebra alternate color is only used when necessary' ' + cat >old.txt <<-\EOF && + line 1A should be marked as oldMoved newMovedAlternate + line 1B should be marked as oldMoved newMovedAlternate + unchanged + line 2A should be marked as oldMoved newMovedAlternate + line 2B should be marked as oldMoved newMovedAlternate + line 3A should be marked as oldMovedAlternate newMoved + line 3B should be marked as oldMovedAlternate newMoved + unchanged + line 4A should be marked as oldMoved newMovedAlternate + line 4B should be marked as oldMoved newMovedAlternate + line 5A should be marked as oldMovedAlternate newMoved + line 5B should be marked as oldMovedAlternate newMoved + line 6A should be marked as oldMoved newMoved + line 6B should be marked as oldMoved newMoved + EOF + cat >new.txt <<-\EOF && + line 1A should be marked as oldMoved newMovedAlternate + line 1B should be marked as oldMoved newMovedAlternate + unchanged + line 3A should be marked as oldMovedAlternate newMoved + line 3B should be marked as oldMovedAlternate newMoved + line 2A should be marked as oldMoved newMovedAlternate + line 2B should be marked as oldMoved newMovedAlternate + unchanged + line 6A should be marked as oldMoved newMoved + line 6B should be marked as oldMoved newMoved + line 4A should be marked as oldMoved newMovedAlternate + line 4B should be marked as oldMoved newMovedAlternate + line 5A should be marked as oldMovedAlternate newMoved + line 5B should be marked as oldMovedAlternate newMoved + EOF + test_expect_code 1 git diff --no-index --color --color-moved=zebra \ + --color-moved-ws=allow-indentation-change \ + old.txt new.txt >output && + grep -v index output | test_decode_color >actual && + cat >expected <<-\EOF && + diff --git a/old.txt b/new.txt + --- a/old.txt + +++ b/new.txt + @@ -1,14 +1,14 @@ + -line 1A should be marked as oldMoved newMovedAlternate + -line 1B should be marked as oldMoved newMovedAlternate + + line 1A should be marked as oldMoved newMovedAlternate + + line 1B should be marked as oldMoved newMovedAlternate + unchanged + -line 2A should be marked as oldMoved newMovedAlternate + -line 2B should be marked as oldMoved newMovedAlternate + -line 3A should be marked as oldMovedAlternate newMoved + -line 3B should be marked as oldMovedAlternate newMoved + + line 3A should be marked as oldMovedAlternate newMoved + + line 3B should be marked as oldMovedAlternate newMoved + + line 2A should be marked as oldMoved newMovedAlternate + + line 2B should be marked as oldMoved newMovedAlternate + unchanged + -line 4A should be marked as oldMoved newMovedAlternate + -line 4B should be marked as oldMoved newMovedAlternate + -line 5A should be marked as oldMovedAlternate newMoved + -line 5B should be marked as oldMovedAlternate newMoved + -line 6A should be marked as oldMoved newMoved + -line 6B should be marked as oldMoved newMoved + + line 6A should be marked as oldMoved newMoved + + line 6B should be marked as oldMoved newMoved + + line 4A should be marked as oldMoved newMovedAlternate + + line 4B should be marked as oldMoved newMovedAlternate + + line 5A should be marked as oldMovedAlternate newMoved + + line 5B should be marked as oldMovedAlternate newMoved + EOF + test_cmp expected actual +' + test_expect_success 'cmd option assumes configured colored-moved' ' test_config color.diff.oldMoved "magenta" && test_config color.diff.newMoved "cyan" && From eb893525041a53b968a46000ed5cb66ffc725853 Mon Sep 17 00:00:00 2001 From: Phillip Wood Date: Thu, 9 Dec 2021 10:30:00 +0000 Subject: [PATCH 06/15] diff --color-moved: avoid false short line matches and bad zebra coloring When marking moved lines it is possible for a block of potential matched lines to extend past a change in sign when there is a sequence of added lines whose text matches the text of a sequence of deleted and added lines. Most of the time either `match` will be NULL or `pmb_advance_or_null()` will fail when the loop encounters a change of sign but there are corner cases where `match` is non-NULL and `pmb_advance_or_null()` successfully advances the moved block despite the change in sign. One consequence of this is highlighting a short line as moved when it should not be. For example -moved line # Correctly highlighted as moved +short line # Wrongly highlighted as moved context +moved line # Correctly highlighted as moved +short line context -short line The other consequence is coloring a moved addition following a moved deletion in the wrong color. In the example below the first "+moved line 3" should be highlighted as newMoved not newMovedAlternate. -moved line 1 # Correctly highlighted as oldMoved -moved line 2 # Correctly highlighted as oldMovedAlternate +moved line 3 # Wrongly highlighted as newMovedAlternate context # Everything else is highlighted correctly +moved line 2 +moved line 3 context +moved line 1 -moved line 3 These false matches are more likely when using --color-moved-ws with the exception of --color-moved-ws=allow-indentation-change which ties the sign of the current whitespace delta to the sign of the line to avoid this problem. The fix is to check that the sign of the new line being matched is the same as the sign of the line that started the block of potential matches. Signed-off-by: Phillip Wood Signed-off-by: Junio C Hamano --- diff.c | 17 ++++++---- t/t4015-diff-whitespace.sh | 65 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 76 insertions(+), 6 deletions(-) diff --git a/diff.c b/diff.c index 53f0df7532..efba278935 100644 --- a/diff.c +++ b/diff.c @@ -1176,7 +1176,7 @@ static void mark_color_as_moved(struct diff_options *o, struct moved_block *pmb = NULL; /* potentially moved blocks */ int pmb_nr = 0, pmb_alloc = 0; int n, flipped_block = 0, block_length = 0; - enum diff_symbol last_symbol = 0; + enum diff_symbol moved_symbol = DIFF_SYMBOL_BINARY_DIFF_HEADER; for (n = 0; n < o->emitted_symbols->nr; n++) { @@ -1202,7 +1202,7 @@ static void mark_color_as_moved(struct diff_options *o, flipped_block = 0; } - if (!match) { + if (pmb_nr && (!match || l->s != moved_symbol)) { int i; if (!adjust_last_block(o, n, block_length) && @@ -1219,12 +1219,13 @@ static void mark_color_as_moved(struct diff_options *o, pmb_nr = 0; block_length = 0; flipped_block = 0; - last_symbol = l->s; + } + if (!match) { + moved_symbol = DIFF_SYMBOL_BINARY_DIFF_HEADER; continue; } if (o->color_moved == COLOR_MOVED_PLAIN) { - last_symbol = l->s; l->flags |= DIFF_SYMBOL_MOVED_LINE; continue; } @@ -1251,11 +1252,16 @@ static void mark_color_as_moved(struct diff_options *o, &pmb, &pmb_alloc, &pmb_nr); - if (contiguous && pmb_nr && last_symbol == l->s) + if (contiguous && pmb_nr && moved_symbol == l->s) flipped_block = (flipped_block + 1) % 2; else flipped_block = 0; + if (pmb_nr) + moved_symbol = l->s; + else + moved_symbol = DIFF_SYMBOL_BINARY_DIFF_HEADER; + block_length = 0; } @@ -1265,7 +1271,6 @@ static void mark_color_as_moved(struct diff_options *o, if (flipped_block && o->color_moved != COLOR_MOVED_BLOCKS) l->flags |= DIFF_SYMBOL_MOVED_LINE_ALT; } - last_symbol = l->s; } adjust_last_block(o, n, block_length); diff --git a/t/t4015-diff-whitespace.sh b/t/t4015-diff-whitespace.sh index 4e0fd76c6c..15782c879d 100755 --- a/t/t4015-diff-whitespace.sh +++ b/t/t4015-diff-whitespace.sh @@ -1514,6 +1514,71 @@ test_expect_success 'zebra alternate color is only used when necessary' ' test_cmp expected actual ' +test_expect_success 'short lines of opposite sign do not get marked as moved' ' + cat >old.txt <<-\EOF && + this line should be marked as moved + unchanged + unchanged + unchanged + unchanged + too short + this line should be marked as oldMoved newMoved + this line should be marked as oldMovedAlternate newMoved + unchanged 1 + unchanged 2 + unchanged 3 + unchanged 4 + this line should be marked as oldMoved newMoved/newMovedAlternate + EOF + cat >new.txt <<-\EOF && + too short + unchanged + unchanged + this line should be marked as moved + too short + unchanged + unchanged + this line should be marked as oldMoved newMoved/newMovedAlternate + unchanged 1 + unchanged 2 + this line should be marked as oldMovedAlternate newMoved + this line should be marked as oldMoved newMoved/newMovedAlternate + unchanged 3 + this line should be marked as oldMoved newMoved + unchanged 4 + EOF + test_expect_code 1 git diff --no-index --color --color-moved=zebra \ + old.txt new.txt >output && cat output && + grep -v index output | test_decode_color >actual && + cat >expect <<-\EOF && + diff --git a/old.txt b/new.txt + --- a/old.txt + +++ b/new.txt + @@ -1,13 +1,15 @@ + -this line should be marked as moved + +too short + unchanged + unchanged + +this line should be marked as moved + +too short + unchanged + unchanged + -too short + -this line should be marked as oldMoved newMoved + -this line should be marked as oldMovedAlternate newMoved + +this line should be marked as oldMoved newMoved/newMovedAlternate + unchanged 1 + unchanged 2 + +this line should be marked as oldMovedAlternate newMoved + +this line should be marked as oldMoved newMoved/newMovedAlternate + unchanged 3 + +this line should be marked as oldMoved newMoved + unchanged 4 + -this line should be marked as oldMoved newMoved/newMovedAlternate + EOF + test_cmp expect actual +' + test_expect_success 'cmd option assumes configured colored-moved' ' test_config color.diff.oldMoved "magenta" && test_config color.diff.newMoved "cyan" && From 76e32d619323c89e44ebfcfdb9d44766937b17dc Mon Sep 17 00:00:00 2001 From: Phillip Wood Date: Thu, 9 Dec 2021 10:30:01 +0000 Subject: [PATCH 07/15] diff: simplify allow-indentation-change delta calculation Now that we reliably end a block when the sign changes we don't need the whitespace delta calculation to rely on the sign. Signed-off-by: Phillip Wood Signed-off-by: Junio C Hamano --- diff.c | 13 ++----------- 1 file changed, 2 insertions(+), 11 deletions(-) diff --git a/diff.c b/diff.c index efba278935..9aff167be2 100644 --- a/diff.c +++ b/diff.c @@ -864,23 +864,17 @@ static int compute_ws_delta(const struct emitted_diff_symbol *a, a_width = a->indent_width, b_off = b->indent_off, b_width = b->indent_width; - int delta; if (a_width == INDENT_BLANKLINE && b_width == INDENT_BLANKLINE) { *out = INDENT_BLANKLINE; return 1; } - if (a->s == DIFF_SYMBOL_PLUS) - delta = a_width - b_width; - else - delta = b_width - a_width; - if (a_len - a_off != b_len - b_off || memcmp(a->line + a_off, b->line + b_off, a_len - a_off)) return 0; - *out = delta; + *out = a_width - b_width; return 1; } @@ -924,10 +918,7 @@ static int cmp_in_block_with_wsd(const struct diff_options *o, * match those of the current block and that the text of 'l' and 'cur' * after the indentation match. */ - if (cur->es->s == DIFF_SYMBOL_PLUS) - delta = a_width - c_width; - else - delta = c_width - a_width; + delta = c_width - a_width; /* * If the previous lines of this block were all blank then set its From 52d14e166d7360b17f74ae169a83e5e642892bdb Mon Sep 17 00:00:00 2001 From: Phillip Wood Date: Thu, 9 Dec 2021 10:30:02 +0000 Subject: [PATCH 08/15] diff --color-moved-ws=allow-indentation-change: simplify and optimize If we already have a block of potentially moved lines then as we move down the diff we need to check if the next line of each potentially moved line matches the current line of the diff. The implementation of --color-moved-ws=allow-indentation-change was needlessly performing this check on all the lines in the diff that matched the current line rather than just the current line. To exacerbate the problem finding all the other lines in the diff that match the current line involves a fuzzy lookup so we were wasting even more time performing a second comparison to filter out the non-matching lines. Fixing this reduces time to run git diff --color-moved-ws=allow-indentation-change v2.28.0 v2.29.0 by 93% compared to master and simplifies the code. Test HEAD^ HEAD --------------------------------------------------------------------------------------------------------------- 4002.1: diff --no-color-moved --no-color-moved-ws large change 0.38 (0.35+0.03) 0.38(0.35+0.03) +0.0% 4002.2: diff --color-moved --no-color-moved-ws large change 0.86 (0.80+0.06) 0.87(0.83+0.04) +1.2% 4002.3: diff --color-moved-ws=allow-indentation-change large change 19.01(18.93+0.06) 0.97(0.92+0.04) -94.9% 4002.4: log --no-color-moved --no-color-moved-ws 1.16 (1.06+0.09) 1.17(1.06+0.10) +0.9% 4002.5: log --color-moved --no-color-moved-ws 1.32 (1.25+0.07) 1.32(1.24+0.08) +0.0% 4002.6: log --color-moved-ws=allow-indentation-change 1.71 (1.64+0.06) 1.36(1.25+0.10) -20.5% Test master HEAD --------------------------------------------------------------------------------------------------------------- 4002.1: diff --no-color-moved --no-color-moved-ws large change 0.38 (0.33+0.05) 0.38(0.35+0.03) +0.0% 4002.2: diff --color-moved --no-color-moved-ws large change 0.80 (0.75+0.04) 0.87(0.83+0.04) +8.7% 4002.3: diff --color-moved-ws=allow-indentation-change large change 14.20(14.15+0.05) 0.97(0.92+0.04) -93.2% 4002.4: log --no-color-moved --no-color-moved-ws 1.15 (1.05+0.09) 1.17(1.06+0.10) +1.7% 4002.5: log --color-moved --no-color-moved-ws 1.30 (1.19+0.11) 1.32(1.24+0.08) +1.5% 4002.6: log --color-moved-ws=allow-indentation-change 1.70 (1.63+0.06) 1.36(1.25+0.10) -20.0% Helped-by: Jeff King Signed-off-by: Phillip Wood Signed-off-by: Junio C Hamano --- diff.c | 68 ++++++++++++++++------------------------------------------ 1 file changed, 19 insertions(+), 49 deletions(-) diff --git a/diff.c b/diff.c index 9aff167be2..78a486021a 100644 --- a/diff.c +++ b/diff.c @@ -879,37 +879,21 @@ static int compute_ws_delta(const struct emitted_diff_symbol *a, return 1; } -static int cmp_in_block_with_wsd(const struct diff_options *o, - const struct moved_entry *cur, - const struct moved_entry *match, - struct moved_block *pmb, - int n) +static int cmp_in_block_with_wsd(const struct moved_entry *cur, + const struct emitted_diff_symbol *l, + struct moved_block *pmb) { - struct emitted_diff_symbol *l = &o->emitted_symbols->buf[n]; - int al = cur->es->len, bl = match->es->len, cl = l->len; + int al = cur->es->len, bl = l->len; const char *a = cur->es->line, - *b = match->es->line, - *c = l->line; + *b = l->line; int a_off = cur->es->indent_off, a_width = cur->es->indent_width, - c_off = l->indent_off, - c_width = l->indent_width; + b_off = l->indent_off, + b_width = l->indent_width; int delta; - /* - * We need to check if 'cur' is equal to 'match'. As those - * are from the same (+/-) side, we do not need to adjust for - * indent changes. However these were found using fuzzy - * matching so we do have to check if they are equal. Here we - * just check the lengths. We delay calling memcmp() to check - * the contents until later as if the length comparison for a - * and c fails we can avoid the call all together. - */ - if (al != bl) - return 1; - /* If 'l' and 'cur' are both blank then they match. */ - if (a_width == INDENT_BLANKLINE && c_width == INDENT_BLANKLINE) + if (a_width == INDENT_BLANKLINE && b_width == INDENT_BLANKLINE) return 0; /* @@ -918,7 +902,7 @@ static int cmp_in_block_with_wsd(const struct diff_options *o, * match those of the current block and that the text of 'l' and 'cur' * after the indentation match. */ - delta = c_width - a_width; + delta = b_width - a_width; /* * If the previous lines of this block were all blank then set its @@ -927,9 +911,8 @@ static int cmp_in_block_with_wsd(const struct diff_options *o, if (pmb->wsd == INDENT_BLANKLINE) pmb->wsd = delta; - return !(delta == pmb->wsd && al - a_off == cl - c_off && - !memcmp(a, b, al) && ! - memcmp(a + a_off, c + c_off, al - a_off)); + return !(delta == pmb->wsd && al - a_off == bl - b_off && + !memcmp(a + a_off, b + b_off, al - a_off)); } static int moved_entry_cmp(const void *hashmap_cmp_fn_data, @@ -1030,36 +1013,23 @@ static void pmb_advance_or_null(struct diff_options *o, } static void pmb_advance_or_null_multi_match(struct diff_options *o, - struct moved_entry *match, - struct hashmap *hm, + struct emitted_diff_symbol *l, struct moved_block *pmb, - int pmb_nr, int n) + int pmb_nr) { int i; - char *got_match = xcalloc(1, pmb_nr); - - hashmap_for_each_entry_from(hm, match, ent) { - for (i = 0; i < pmb_nr; i++) { - struct moved_entry *prev = pmb[i].match; - struct moved_entry *cur = (prev && prev->next_line) ? - prev->next_line : NULL; - if (!cur) - continue; - if (!cmp_in_block_with_wsd(o, cur, match, &pmb[i], n)) - got_match[i] |= 1; - } - } for (i = 0; i < pmb_nr; i++) { - if (got_match[i]) { + struct moved_entry *prev = pmb[i].match; + struct moved_entry *cur = (prev && prev->next_line) ? + prev->next_line : NULL; + if (cur && !cmp_in_block_with_wsd(cur, l, &pmb[i])) { /* Advance to the next line */ - pmb[i].match = pmb[i].match->next_line; + pmb[i].match = cur; } else { moved_block_clear(&pmb[i]); } } - - free(got_match); } static int shrink_potential_moved_blocks(struct moved_block *pmb, @@ -1223,7 +1193,7 @@ static void mark_color_as_moved(struct diff_options *o, if (o->color_moved_ws_handling & COLOR_MOVED_WS_ALLOW_INDENTATION_CHANGE) - pmb_advance_or_null_multi_match(o, match, hm, pmb, pmb_nr, n); + pmb_advance_or_null_multi_match(o, l, pmb, pmb_nr); else pmb_advance_or_null(o, match, hm, pmb, pmb_nr); From 08fba1076faf0b2c0bfeda628c84ac15efd27cfb Mon Sep 17 00:00:00 2001 From: Phillip Wood Date: Thu, 9 Dec 2021 10:30:03 +0000 Subject: [PATCH 09/15] diff --color-moved: call comparison function directly This change will allow us to easily combine pmb_advance_or_null() and pmb_advance_or_null_multi_match() in the next commit. Calling xdiff_compare_lines() directly rather than using a function pointer from the hash map has little effect on the run time. Test HEAD^ HEAD ------------------------------------------------------------------------------------------------------------- 4002.1: diff --no-color-moved --no-color-moved-ws large change 0.38(0.35+0.03) 0.38(0.32+0.06) +0.0% 4002.2: diff --color-moved --no-color-moved-ws large change 0.87(0.83+0.04) 0.87(0.80+0.06) +0.0% 4002.3: diff --color-moved-ws=allow-indentation-change large change 0.97(0.92+0.04) 0.97(0.93+0.04) +0.0% 4002.4: log --no-color-moved --no-color-moved-ws 1.17(1.06+0.10) 1.16(1.10+0.05) -0.9% 4002.5: log --color-moved --no-color-moved-ws 1.32(1.24+0.08) 1.31(1.22+0.09) -0.8% 4002.6: log --color-moved-ws=allow-indentation-change 1.36(1.25+0.10) 1.35(1.25+0.10) -0.7% Signed-off-by: Phillip Wood Signed-off-by: Junio C Hamano --- diff.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/diff.c b/diff.c index 78a486021a..22e0edac17 100644 --- a/diff.c +++ b/diff.c @@ -994,17 +994,20 @@ static void add_lines_to_move_detection(struct diff_options *o, } static void pmb_advance_or_null(struct diff_options *o, - struct moved_entry *match, - struct hashmap *hm, + struct emitted_diff_symbol *l, struct moved_block *pmb, int pmb_nr) { int i; + unsigned flags = o->color_moved_ws_handling & XDF_WHITESPACE_FLAGS; + for (i = 0; i < pmb_nr; i++) { struct moved_entry *prev = pmb[i].match; struct moved_entry *cur = (prev && prev->next_line) ? prev->next_line : NULL; - if (cur && !hm->cmpfn(o, &cur->ent, &match->ent, NULL)) { + if (cur && xdiff_compare_lines(cur->es->line, cur->es->len, + l->line, l->len, + flags)) { pmb[i].match = cur; } else { pmb[i].match = NULL; @@ -1195,7 +1198,7 @@ static void mark_color_as_moved(struct diff_options *o, COLOR_MOVED_WS_ALLOW_INDENTATION_CHANGE) pmb_advance_or_null_multi_match(o, l, pmb, pmb_nr); else - pmb_advance_or_null(o, match, hm, pmb, pmb_nr); + pmb_advance_or_null(o, l, pmb, pmb_nr); pmb_nr = shrink_potential_moved_blocks(pmb, pmb_nr); From ff046a0066f55cb730ced3f8eb54d508dcd3a4c7 Mon Sep 17 00:00:00 2001 From: Phillip Wood Date: Thu, 9 Dec 2021 10:30:04 +0000 Subject: [PATCH 10/15] diff --color-moved: unify moved block growth functions After the last two commits pmb_advance_or_null() and pmb_advance_or_null_multi_match() differ only in the comparison they perform. Lets simplify the code by combining them into a single function. Signed-off-by: Phillip Wood Signed-off-by: Junio C Hamano --- diff.c | 41 ++++++++++++----------------------------- 1 file changed, 12 insertions(+), 29 deletions(-) diff --git a/diff.c b/diff.c index 22e0edac17..51f092e724 100644 --- a/diff.c +++ b/diff.c @@ -1002,36 +1002,23 @@ static void pmb_advance_or_null(struct diff_options *o, unsigned flags = o->color_moved_ws_handling & XDF_WHITESPACE_FLAGS; for (i = 0; i < pmb_nr; i++) { + int match; struct moved_entry *prev = pmb[i].match; struct moved_entry *cur = (prev && prev->next_line) ? prev->next_line : NULL; - if (cur && xdiff_compare_lines(cur->es->line, cur->es->len, - l->line, l->len, - flags)) { - pmb[i].match = cur; - } else { - pmb[i].match = NULL; - } - } -} -static void pmb_advance_or_null_multi_match(struct diff_options *o, - struct emitted_diff_symbol *l, - struct moved_block *pmb, - int pmb_nr) -{ - int i; - - for (i = 0; i < pmb_nr; i++) { - struct moved_entry *prev = pmb[i].match; - struct moved_entry *cur = (prev && prev->next_line) ? - prev->next_line : NULL; - if (cur && !cmp_in_block_with_wsd(cur, l, &pmb[i])) { - /* Advance to the next line */ + if (o->color_moved_ws_handling & + COLOR_MOVED_WS_ALLOW_INDENTATION_CHANGE) + match = cur && + !cmp_in_block_with_wsd(cur, l, &pmb[i]); + else + match = cur && + xdiff_compare_lines(cur->es->line, cur->es->len, + l->line, l->len, flags); + if (match) pmb[i].match = cur; - } else { + else moved_block_clear(&pmb[i]); - } } } @@ -1194,11 +1181,7 @@ static void mark_color_as_moved(struct diff_options *o, continue; } - if (o->color_moved_ws_handling & - COLOR_MOVED_WS_ALLOW_INDENTATION_CHANGE) - pmb_advance_or_null_multi_match(o, l, pmb, pmb_nr); - else - pmb_advance_or_null(o, l, pmb, pmb_nr); + pmb_advance_or_null(o, l, pmb, pmb_nr); pmb_nr = shrink_potential_moved_blocks(pmb, pmb_nr); From 0e488f173257e5ab766267ac5a5c4c9f3f1ce343 Mon Sep 17 00:00:00 2001 From: Phillip Wood Date: Thu, 9 Dec 2021 10:30:05 +0000 Subject: [PATCH 11/15] diff --color-moved: shrink potential moved blocks as we go Rather than setting `match` to NULL and then looping over the list of potential matched blocks for a second time to remove blocks with no matches just filter out the blocks with no matches as we go. Signed-off-by: Phillip Wood Signed-off-by: Junio C Hamano --- diff.c | 44 ++++++++------------------------------------ 1 file changed, 8 insertions(+), 36 deletions(-) diff --git a/diff.c b/diff.c index 51f092e724..626fd47aa0 100644 --- a/diff.c +++ b/diff.c @@ -996,12 +996,12 @@ static void add_lines_to_move_detection(struct diff_options *o, static void pmb_advance_or_null(struct diff_options *o, struct emitted_diff_symbol *l, struct moved_block *pmb, - int pmb_nr) + int *pmb_nr) { - int i; + int i, j; unsigned flags = o->color_moved_ws_handling & XDF_WHITESPACE_FLAGS; - for (i = 0; i < pmb_nr; i++) { + for (i = 0, j = 0; i < *pmb_nr; i++) { int match; struct moved_entry *prev = pmb[i].match; struct moved_entry *cur = (prev && prev->next_line) ? @@ -1015,38 +1015,12 @@ static void pmb_advance_or_null(struct diff_options *o, match = cur && xdiff_compare_lines(cur->es->line, cur->es->len, l->line, l->len, flags); - if (match) - pmb[i].match = cur; - else - moved_block_clear(&pmb[i]); - } -} - -static int shrink_potential_moved_blocks(struct moved_block *pmb, - int pmb_nr) -{ - int lp, rp; - - /* Shrink the set of potential block to the remaining running */ - for (lp = 0, rp = pmb_nr - 1; lp <= rp;) { - while (lp < pmb_nr && pmb[lp].match) - lp++; - /* lp points at the first NULL now */ - - while (rp > -1 && !pmb[rp].match) - rp--; - /* rp points at the last non-NULL */ - - if (lp < pmb_nr && rp > -1 && lp < rp) { - pmb[lp] = pmb[rp]; - memset(&pmb[rp], 0, sizeof(pmb[rp])); - rp--; - lp++; + if (match) { + pmb[j] = pmb[i]; + pmb[j++].match = cur; } } - - /* Remember the number of running sets */ - return rp + 1; + *pmb_nr = j; } static void fill_potential_moved_blocks(struct diff_options *o, @@ -1181,9 +1155,7 @@ static void mark_color_as_moved(struct diff_options *o, continue; } - pmb_advance_or_null(o, l, pmb, pmb_nr); - - pmb_nr = shrink_potential_moved_blocks(pmb, pmb_nr); + pmb_advance_or_null(o, l, pmb, &pmb_nr); if (pmb_nr == 0) { int contiguous = adjust_last_block(o, n, block_length); From eec7f53b3150dcc54ea0f4441587724be65c105d Mon Sep 17 00:00:00 2001 From: Phillip Wood Date: Thu, 9 Dec 2021 10:30:06 +0000 Subject: [PATCH 12/15] diff --color-moved: stop clearing potential moved blocks moved_block_clear() was introduced in 74d156f4a1 ("diff --color-moved-ws: fix double free crash", 2018-10-04) to free the memory that was allocated when initializing a potential moved block. However since 21536d077f ("diff --color-moved-ws: modify allow-indentation-change", 2018-11-23) initializing a potential moved block no longer allocates any memory. Up until the last commit we were relying on moved_block_clear() to set the `match` pointer to NULL when a block stopped matching, but since that commit we do not clear a moved block that does not match so it does not make sense to clear them elsewhere. Signed-off-by: Phillip Wood Signed-off-by: Junio C Hamano --- diff.c | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/diff.c b/diff.c index 626fd47aa0..ffbe09937b 100644 --- a/diff.c +++ b/diff.c @@ -807,11 +807,6 @@ struct moved_block { int wsd; /* The whitespace delta of this block */ }; -static void moved_block_clear(struct moved_block *b) -{ - memset(b, 0, sizeof(*b)); -} - #define INDENT_BLANKLINE INT_MIN static void fill_es_indent_data(struct emitted_diff_symbol *es) @@ -1128,8 +1123,6 @@ static void mark_color_as_moved(struct diff_options *o, } if (pmb_nr && (!match || l->s != moved_symbol)) { - int i; - if (!adjust_last_block(o, n, block_length) && block_length > 1) { /* @@ -1139,8 +1132,6 @@ static void mark_color_as_moved(struct diff_options *o, match = NULL; n -= block_length; } - for(i = 0; i < pmb_nr; i++) - moved_block_clear(&pmb[i]); pmb_nr = 0; block_length = 0; flipped_block = 0; @@ -1193,8 +1184,6 @@ static void mark_color_as_moved(struct diff_options *o, } adjust_last_block(o, n, block_length); - for(n = 0; n < pmb_nr; n++) - moved_block_clear(&pmb[n]); free(pmb); } From 25e61909e95ffd376787ce0ecc5dd6cf7d3f0e78 Mon Sep 17 00:00:00 2001 From: Phillip Wood Date: Thu, 9 Dec 2021 10:30:07 +0000 Subject: [PATCH 13/15] diff --color-moved-ws=allow-indentation-change: improve hash lookups As libxdiff does not have a whitespace flag to ignore the indentation the code for --color-moved-ws=allow-indentation-change uses XDF_IGNORE_WHITESPACE and then filters out any hash lookups where there are non-indentation changes. This filtering is inefficient as we have to perform another string comparison. By using the offset data that we have already computed to skip the indentation we can avoid using XDF_IGNORE_WHITESPACE and safely remove the extra checks which improves the performance by 11% and paves the way for the elimination of string comparisons in the next commit. This change slightly increases the run time of other --color-moved modes. This could be avoided by using different comparison functions for the different modes but after the next two commits there is no measurable benefit in doing so. There is a change in behavior for lines that begin with a form-feed or vertical-tab character. Since b46054b374 ("xdiff: use git-compat-util", 2019-04-11) xdiff does not treat '\f' or '\v' as whitespace characters. This means that lines starting with those characters are never considered to be blank and never match a line that does not start with the same character. After this patch a line matching "^[\f\v\r]*[ \t]*$" is considered to be blank by --color-moved-ws=allow-indentation-change and lines beginning "^[\f\v\r]*[ \t]*" can match another line if the suffixes match. This changes the output of git show for d18f76dccf ("compat/regex: use the regex engine from gawk for compat", 2010-08-17) as some lines in the pre-image before a moved block that contain '\f' are now considered moved as well as they match a blank line before the moved lines in the post-image. This commit updates one of the tests to reflect this change. Test HEAD^ HEAD -------------------------------------------------------------------------------------------------------------- 4002.1: diff --no-color-moved --no-color-moved-ws large change 0.38(0.33+0.05) 0.38(0.33+0.05) +0.0% 4002.2: diff --color-moved --no-color-moved-ws large change 0.86(0.82+0.04) 0.88(0.84+0.04) +2.3% 4002.3: diff --color-moved-ws=allow-indentation-change large change 0.97(0.94+0.03) 0.86(0.81+0.05) -11.3% 4002.4: log --no-color-moved --no-color-moved-ws 1.16(1.07+0.09) 1.16(1.06+0.09) +0.0% 4002.5: log --color-moved --no-color-moved-ws 1.32(1.26+0.06) 1.33(1.27+0.05) +0.8% 4002.6: log --color-moved-ws=allow-indentation-change 1.35(1.29+0.06) 1.33(1.24+0.08) -1.5% Signed-off-by: Phillip Wood Signed-off-by: Junio C Hamano --- diff.c | 63 +++++++++++--------------------------- t/t4015-diff-whitespace.sh | 22 ++++++------- 2 files changed, 29 insertions(+), 56 deletions(-) diff --git a/diff.c b/diff.c index ffbe09937b..2085c06367 100644 --- a/diff.c +++ b/diff.c @@ -850,28 +850,15 @@ static void fill_es_indent_data(struct emitted_diff_symbol *es) } static int compute_ws_delta(const struct emitted_diff_symbol *a, - const struct emitted_diff_symbol *b, - int *out) + const struct emitted_diff_symbol *b) { - int a_len = a->len, - b_len = b->len, - a_off = a->indent_off, - a_width = a->indent_width, - b_off = b->indent_off, + int a_width = a->indent_width, b_width = b->indent_width; - if (a_width == INDENT_BLANKLINE && b_width == INDENT_BLANKLINE) { - *out = INDENT_BLANKLINE; - return 1; - } + if (a_width == INDENT_BLANKLINE && b_width == INDENT_BLANKLINE) + return INDENT_BLANKLINE; - if (a_len - a_off != b_len - b_off || - memcmp(a->line + a_off, b->line + b_off, a_len - a_off)) - return 0; - - *out = a_width - b_width; - - return 1; + return a_width - b_width; } static int cmp_in_block_with_wsd(const struct moved_entry *cur, @@ -916,26 +903,17 @@ static int moved_entry_cmp(const void *hashmap_cmp_fn_data, const void *keydata) { const struct diff_options *diffopt = hashmap_cmp_fn_data; - const struct moved_entry *a, *b; + const struct emitted_diff_symbol *a, *b; unsigned flags = diffopt->color_moved_ws_handling & XDF_WHITESPACE_FLAGS; - a = container_of(eptr, const struct moved_entry, ent); - b = container_of(entry_or_key, const struct moved_entry, ent); + a = container_of(eptr, const struct moved_entry, ent)->es; + b = container_of(entry_or_key, const struct moved_entry, ent)->es; - if (diffopt->color_moved_ws_handling & - COLOR_MOVED_WS_ALLOW_INDENTATION_CHANGE) - /* - * As there is not specific white space config given, - * we'd need to check for a new block, so ignore all - * white space. The setup of the white space - * configuration for the next block is done else where - */ - flags |= XDF_IGNORE_WHITESPACE; - - return !xdiff_compare_lines(a->es->line, a->es->len, - b->es->line, b->es->len, - flags); + return !xdiff_compare_lines(a->line + a->indent_off, + a->len - a->indent_off, + b->line + b->indent_off, + b->len - b->indent_off, flags); } static struct moved_entry *prepare_entry(struct diff_options *o, @@ -944,7 +922,8 @@ static struct moved_entry *prepare_entry(struct diff_options *o, struct moved_entry *ret = xmalloc(sizeof(*ret)); struct emitted_diff_symbol *l = &o->emitted_symbols->buf[line_no]; unsigned flags = o->color_moved_ws_handling & XDF_WHITESPACE_FLAGS; - unsigned int hash = xdiff_hash_string(l->line, l->len, flags); + unsigned int hash = xdiff_hash_string(l->line + l->indent_off, + l->len - l->indent_off, flags); hashmap_entry_init(&ret->ent, hash); ret->es = l; @@ -1036,13 +1015,11 @@ static void fill_potential_moved_blocks(struct diff_options *o, hashmap_for_each_entry_from(hm, match, ent) { ALLOC_GROW(pmb, pmb_nr + 1, pmb_alloc); if (o->color_moved_ws_handling & - COLOR_MOVED_WS_ALLOW_INDENTATION_CHANGE) { - if (compute_ws_delta(l, match->es, &(pmb[pmb_nr]).wsd)) - pmb[pmb_nr++].match = match; - } else { + COLOR_MOVED_WS_ALLOW_INDENTATION_CHANGE) + pmb[pmb_nr].wsd = compute_ws_delta(l, match->es); + else pmb[pmb_nr].wsd = 0; - pmb[pmb_nr++].match = match; - } + pmb[pmb_nr++].match = match; } *pmb_p = pmb; @@ -6276,10 +6253,6 @@ static void diff_flush_patch_all_file_pairs(struct diff_options *o) if (o->color_moved) { struct hashmap add_lines, del_lines; - if (o->color_moved_ws_handling & - COLOR_MOVED_WS_ALLOW_INDENTATION_CHANGE) - o->color_moved_ws_handling |= XDF_IGNORE_WHITESPACE; - hashmap_init(&del_lines, moved_entry_cmp, o, 0); hashmap_init(&add_lines, moved_entry_cmp, o, 0); diff --git a/t/t4015-diff-whitespace.sh b/t/t4015-diff-whitespace.sh index 15782c879d..50d0cf486b 100755 --- a/t/t4015-diff-whitespace.sh +++ b/t/t4015-diff-whitespace.sh @@ -2206,10 +2206,10 @@ EMPTY='' test_expect_success 'compare mixed whitespace delta across moved blocks' ' git reset --hard && - tr Q_ "\t " <<-EOF >text.txt && - ${EMPTY} - ____too short without - ${EMPTY} + tr "^|Q_" "\f\v\t " <<-EOF >text.txt && + ^__ + |____too short without + ^ ___being grouped across blank line ${EMPTY} context @@ -2228,7 +2228,7 @@ test_expect_success 'compare mixed whitespace delta across moved blocks' ' git add text.txt && git commit -m "add text.txt" && - tr Q_ "\t " <<-EOF >text.txt && + tr "^|Q_" "\f\v\t " <<-EOF >text.txt && context lines to @@ -2239,7 +2239,7 @@ test_expect_success 'compare mixed whitespace delta across moved blocks' ' ${EMPTY} QQtoo short without ${EMPTY} - Q_______being grouped across blank line + ^Q_______being grouped across blank line ${EMPTY} Q_QThese two lines have had their indentation reduced by four spaces @@ -2251,16 +2251,16 @@ test_expect_success 'compare mixed whitespace delta across moved blocks' ' -c core.whitespace=space-before-tab \ diff --color --color-moved --ws-error-highlight=all \ --color-moved-ws=allow-indentation-change >actual.raw && - grep -v "index" actual.raw | test_decode_color >actual && + grep -v "index" actual.raw | tr "\f\v" "^|" | test_decode_color >actual && cat <<-\EOF >expected && diff --git a/text.txt b/text.txt --- a/text.txt +++ b/text.txt @@ -1,16 +1,16 @@ - - - - too short without - - + -^ + -| too short without + -^ - being grouped across blank line - context @@ -2280,7 +2280,7 @@ test_expect_success 'compare mixed whitespace delta across moved blocks' ' + + too short without + - + being grouped across blank line + +^ being grouped across blank line + + These two lines have had their +indentation reduced by four spaces From b4a5c5c419009c26935536fa7039ad5073acb237 Mon Sep 17 00:00:00 2001 From: Phillip Wood Date: Thu, 9 Dec 2021 10:30:08 +0000 Subject: [PATCH 14/15] diff: use designated initializers for emitted_diff_symbol This makes it clearer which fields are being explicitly initialized and will simplify the next commit where we add a new field to the struct. Signed-off-by: Phillip Wood Signed-off-by: Junio C Hamano --- diff.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/diff.c b/diff.c index 2085c06367..9ef88d7665 100644 --- a/diff.c +++ b/diff.c @@ -1497,7 +1497,9 @@ static void emit_diff_symbol_from_struct(struct diff_options *o, static void emit_diff_symbol(struct diff_options *o, enum diff_symbol s, const char *line, int len, unsigned flags) { - struct emitted_diff_symbol e = {line, len, flags, 0, 0, s}; + struct emitted_diff_symbol e = { + .line = line, .len = len, .flags = flags, .s = s + }; if (o->emitted_symbols) append_emitted_diff_symbol(o, &e); From 72962e8b3c3ea3a631166876b4668718103be4fe Mon Sep 17 00:00:00 2001 From: Phillip Wood Date: Thu, 9 Dec 2021 10:30:09 +0000 Subject: [PATCH 15/15] diff --color-moved: intern strings Taking inspiration from xdl_classify_record() assign an id to each addition and deletion such that lines that match for the current --color-moved-ws mode share the same unique id. This reduces the number of hash lookups a little (calculating the ids still involves one hash lookup per line) but the main benefit is that when growing blocks of potentially moved lines we can replace string comparisons which involve chasing a pointer with a simple integer comparison. On a large diff this commit reduces the time to run 'diff --color-moved' by 37% compared to the previous commit and 31% compared to master, for 'diff --color-moved-ws=allow-indentation-change' the reduction is 28% compared to the previous commit and 96% compared to master. There is little change in the performance of 'git log --patch' as the diffs are smaller. Test HEAD^ HEAD --------------------------------------------------------------------------------------------------------------- 4002.1: diff --no-color-moved --no-color-moved-ws large change 0.38(0.33+0.05) 0.38(0.33+0.05) +0.0% 4002.2: diff --color-moved --no-color-moved-ws large change 0.88(0.81+0.06) 0.55(0.50+0.04) -37.5% 4002.3: diff --color-moved-ws=allow-indentation-change large change 0.85(0.79+0.06) 0.61(0.54+0.06) -28.2% 4002.4: log --no-color-moved --no-color-moved-ws 1.16(1.07+0.08) 1.15(1.09+0.05) -0.9% 4002.5: log --color-moved --no-color-moved-ws 1.31(1.22+0.08) 1.29(1.19+0.09) -1.5% 4002.6: log --color-moved-ws=allow-indentation-change 1.32(1.24+0.08) 1.31(1.18+0.13) -0.8% Test master HEAD --------------------------------------------------------------------------------------------------------------- 4002.1: diff --no-color-moved --no-color-moved-ws large change 0.38 (0.33+0.05) 0.38(0.33+0.05) +0.0% 4002.2: diff --color-moved --no-color-moved-ws large change 0.80 (0.75+0.04) 0.55(0.50+0.04) -31.2% 4002.3: diff --color-moved-ws=allow-indentation-change large change 14.20(14.15+0.05) 0.61(0.54+0.06) -95.7% 4002.4: log --no-color-moved --no-color-moved-ws 1.15 (1.05+0.09) 1.15(1.09+0.05) +0.0% 4002.5: log --color-moved --no-color-moved-ws 1.30 (1.19+0.11) 1.29(1.19+0.09) -0.8% 4002.6: log --color-moved-ws=allow-indentation-change 1.70 (1.63+0.06) 1.31(1.18+0.13) -22.9% Signed-off-by: Phillip Wood Signed-off-by: Junio C Hamano --- diff.c | 176 +++++++++++++++++++++++++++++++-------------------------- 1 file changed, 97 insertions(+), 79 deletions(-) diff --git a/diff.c b/diff.c index 9ef88d7665..c28c56c128 100644 --- a/diff.c +++ b/diff.c @@ -18,6 +18,7 @@ #include "submodule-config.h" #include "submodule.h" #include "hashmap.h" +#include "mem-pool.h" #include "ll-merge.h" #include "string-list.h" #include "strvec.h" @@ -772,6 +773,7 @@ struct emitted_diff_symbol { int flags; int indent_off; /* Offset to first non-whitespace character */ int indent_width; /* The visual width of the indentation */ + unsigned id; enum diff_symbol s; }; #define EMITTED_DIFF_SYMBOL_INIT {NULL} @@ -797,9 +799,9 @@ static void append_emitted_diff_symbol(struct diff_options *o, } struct moved_entry { - struct hashmap_entry ent; const struct emitted_diff_symbol *es; struct moved_entry *next_line; + struct moved_entry *next_match; }; struct moved_block { @@ -865,24 +867,24 @@ static int cmp_in_block_with_wsd(const struct moved_entry *cur, const struct emitted_diff_symbol *l, struct moved_block *pmb) { - int al = cur->es->len, bl = l->len; - const char *a = cur->es->line, - *b = l->line; - int a_off = cur->es->indent_off, - a_width = cur->es->indent_width, - b_off = l->indent_off, - b_width = l->indent_width; + int a_width = cur->es->indent_width, b_width = l->indent_width; int delta; - /* If 'l' and 'cur' are both blank then they match. */ - if (a_width == INDENT_BLANKLINE && b_width == INDENT_BLANKLINE) + /* The text of each line must match */ + if (cur->es->id != l->id) + return 1; + + /* + * If 'l' and 'cur' are both blank then we don't need to check the + * indent. We only need to check cur as we know the strings match. + * */ + if (a_width == INDENT_BLANKLINE) return 0; /* * The indent changes of the block are known and stored in pmb->wsd; * however we need to check if the indent changes of the current line - * match those of the current block and that the text of 'l' and 'cur' - * after the indentation match. + * match those of the current block. */ delta = b_width - a_width; @@ -893,22 +895,26 @@ static int cmp_in_block_with_wsd(const struct moved_entry *cur, if (pmb->wsd == INDENT_BLANKLINE) pmb->wsd = delta; - return !(delta == pmb->wsd && al - a_off == bl - b_off && - !memcmp(a + a_off, b + b_off, al - a_off)); + return delta != pmb->wsd; } -static int moved_entry_cmp(const void *hashmap_cmp_fn_data, - const struct hashmap_entry *eptr, - const struct hashmap_entry *entry_or_key, - const void *keydata) +struct interned_diff_symbol { + struct hashmap_entry ent; + struct emitted_diff_symbol *es; +}; + +static int interned_diff_symbol_cmp(const void *hashmap_cmp_fn_data, + const struct hashmap_entry *eptr, + const struct hashmap_entry *entry_or_key, + const void *keydata) { const struct diff_options *diffopt = hashmap_cmp_fn_data; const struct emitted_diff_symbol *a, *b; unsigned flags = diffopt->color_moved_ws_handling & XDF_WHITESPACE_FLAGS; - a = container_of(eptr, const struct moved_entry, ent)->es; - b = container_of(entry_or_key, const struct moved_entry, ent)->es; + a = container_of(eptr, const struct interned_diff_symbol, ent)->es; + b = container_of(entry_or_key, const struct interned_diff_symbol, ent)->es; return !xdiff_compare_lines(a->line + a->indent_off, a->len - a->indent_off, @@ -916,55 +922,81 @@ static int moved_entry_cmp(const void *hashmap_cmp_fn_data, b->len - b->indent_off, flags); } -static struct moved_entry *prepare_entry(struct diff_options *o, - int line_no) +static void prepare_entry(struct diff_options *o, struct emitted_diff_symbol *l, + struct interned_diff_symbol *s) { - struct moved_entry *ret = xmalloc(sizeof(*ret)); - struct emitted_diff_symbol *l = &o->emitted_symbols->buf[line_no]; unsigned flags = o->color_moved_ws_handling & XDF_WHITESPACE_FLAGS; unsigned int hash = xdiff_hash_string(l->line + l->indent_off, l->len - l->indent_off, flags); - hashmap_entry_init(&ret->ent, hash); - ret->es = l; - ret->next_line = NULL; - - return ret; + hashmap_entry_init(&s->ent, hash); + s->es = l; } -static void add_lines_to_move_detection(struct diff_options *o, - struct hashmap *add_lines, - struct hashmap *del_lines) +struct moved_entry_list { + struct moved_entry *add, *del; +}; + +static struct moved_entry_list *add_lines_to_move_detection(struct diff_options *o, + struct mem_pool *entry_mem_pool) { struct moved_entry *prev_line = NULL; - + struct mem_pool interned_pool; + struct hashmap interned_map; + struct moved_entry_list *entry_list = NULL; + size_t entry_list_alloc = 0; + unsigned id = 0; int n; - for (n = 0; n < o->emitted_symbols->nr; n++) { - struct hashmap *hm; - struct moved_entry *key; - switch (o->emitted_symbols->buf[n].s) { - case DIFF_SYMBOL_PLUS: - hm = add_lines; - break; - case DIFF_SYMBOL_MINUS: - hm = del_lines; - break; - default: + hashmap_init(&interned_map, interned_diff_symbol_cmp, o, 8096); + mem_pool_init(&interned_pool, 1024 * 1024); + + for (n = 0; n < o->emitted_symbols->nr; n++) { + struct interned_diff_symbol key; + struct emitted_diff_symbol *l = &o->emitted_symbols->buf[n]; + struct interned_diff_symbol *s; + struct moved_entry *entry; + + if (l->s != DIFF_SYMBOL_PLUS && l->s != DIFF_SYMBOL_MINUS) { prev_line = NULL; continue; } if (o->color_moved_ws_handling & COLOR_MOVED_WS_ALLOW_INDENTATION_CHANGE) - fill_es_indent_data(&o->emitted_symbols->buf[n]); - key = prepare_entry(o, n); - if (prev_line && prev_line->es->s == o->emitted_symbols->buf[n].s) - prev_line->next_line = key; + fill_es_indent_data(l); - hashmap_add(hm, &key->ent); - prev_line = key; + prepare_entry(o, l, &key); + s = hashmap_get_entry(&interned_map, &key, ent, &key.ent); + if (s) { + l->id = s->es->id; + } else { + l->id = id; + ALLOC_GROW_BY(entry_list, id, 1, entry_list_alloc); + hashmap_add(&interned_map, + memcpy(mem_pool_alloc(&interned_pool, + sizeof(key)), + &key, sizeof(key))); + } + entry = mem_pool_alloc(entry_mem_pool, sizeof(*entry)); + entry->es = l; + entry->next_line = NULL; + if (prev_line && prev_line->es->s == l->s) + prev_line->next_line = entry; + prev_line = entry; + if (l->s == DIFF_SYMBOL_PLUS) { + entry->next_match = entry_list[l->id].add; + entry_list[l->id].add = entry; + } else { + entry->next_match = entry_list[l->id].del; + entry_list[l->id].del = entry; + } } + + hashmap_clear(&interned_map); + mem_pool_discard(&interned_pool, 0); + + return entry_list; } static void pmb_advance_or_null(struct diff_options *o, @@ -973,7 +1005,6 @@ static void pmb_advance_or_null(struct diff_options *o, int *pmb_nr) { int i, j; - unsigned flags = o->color_moved_ws_handling & XDF_WHITESPACE_FLAGS; for (i = 0, j = 0; i < *pmb_nr; i++) { int match; @@ -986,9 +1017,8 @@ static void pmb_advance_or_null(struct diff_options *o, match = cur && !cmp_in_block_with_wsd(cur, l, &pmb[i]); else - match = cur && - xdiff_compare_lines(cur->es->line, cur->es->len, - l->line, l->len, flags); + match = cur && cur->es->id == l->id; + if (match) { pmb[j] = pmb[i]; pmb[j++].match = cur; @@ -998,7 +1028,6 @@ static void pmb_advance_or_null(struct diff_options *o, } static void fill_potential_moved_blocks(struct diff_options *o, - struct hashmap *hm, struct moved_entry *match, struct emitted_diff_symbol *l, struct moved_block **pmb_p, @@ -1012,7 +1041,7 @@ static void fill_potential_moved_blocks(struct diff_options *o, * The current line is the start of a new block. * Setup the set of potential blocks. */ - hashmap_for_each_entry_from(hm, match, ent) { + for (; match; match = match->next_match) { ALLOC_GROW(pmb, pmb_nr + 1, pmb_alloc); if (o->color_moved_ws_handling & COLOR_MOVED_WS_ALLOW_INDENTATION_CHANGE) @@ -1067,8 +1096,7 @@ static int adjust_last_block(struct diff_options *o, int n, int block_length) /* Find blocks of moved code, delegate actual coloring decision to helper */ static void mark_color_as_moved(struct diff_options *o, - struct hashmap *add_lines, - struct hashmap *del_lines) + struct moved_entry_list *entry_list) { struct moved_block *pmb = NULL; /* potentially moved blocks */ int pmb_nr = 0, pmb_alloc = 0; @@ -1077,23 +1105,15 @@ static void mark_color_as_moved(struct diff_options *o, for (n = 0; n < o->emitted_symbols->nr; n++) { - struct hashmap *hm = NULL; - struct moved_entry *key; struct moved_entry *match = NULL; struct emitted_diff_symbol *l = &o->emitted_symbols->buf[n]; switch (l->s) { case DIFF_SYMBOL_PLUS: - hm = del_lines; - key = prepare_entry(o, n); - match = hashmap_get_entry(hm, key, ent, NULL); - free(key); + match = entry_list[l->id].del; break; case DIFF_SYMBOL_MINUS: - hm = add_lines; - key = prepare_entry(o, n); - match = hashmap_get_entry(hm, key, ent, NULL); - free(key); + match = entry_list[l->id].add; break; default: flipped_block = 0; @@ -1135,7 +1155,7 @@ static void mark_color_as_moved(struct diff_options *o, */ n -= block_length; else - fill_potential_moved_blocks(o, hm, match, l, + fill_potential_moved_blocks(o, match, l, &pmb, &pmb_alloc, &pmb_nr); @@ -6253,20 +6273,18 @@ static void diff_flush_patch_all_file_pairs(struct diff_options *o) if (o->emitted_symbols) { if (o->color_moved) { - struct hashmap add_lines, del_lines; + struct mem_pool entry_pool; + struct moved_entry_list *entry_list; - hashmap_init(&del_lines, moved_entry_cmp, o, 0); - hashmap_init(&add_lines, moved_entry_cmp, o, 0); - - add_lines_to_move_detection(o, &add_lines, &del_lines); - mark_color_as_moved(o, &add_lines, &del_lines); + mem_pool_init(&entry_pool, 1024 * 1024); + entry_list = add_lines_to_move_detection(o, + &entry_pool); + mark_color_as_moved(o, entry_list); if (o->color_moved == COLOR_MOVED_ZEBRA_DIM) dim_moved_lines(o); - hashmap_clear_and_free(&add_lines, struct moved_entry, - ent); - hashmap_clear_and_free(&del_lines, struct moved_entry, - ent); + mem_pool_discard(&entry_pool, 0); + free(entry_list); } for (i = 0; i < esm.nr; i++)