diff-highlight: match multi-line hunks
Currently we only bother highlighting single-line hunks. The rationale was that the purpose of highlighting is to point out small changes between two similar lines that are otherwise hard to see. However, that meant we missed similar cases where two lines were changed together, like: -foo(buf); -bar(buf); +foo(obj->buf); +bar(obj->buf); Each of those changes is simple, and would benefit from highlighting (the "obj->" parts in this case). This patch considers whole hunks at a time. For now, we consider only the case where the hunk has the same number of removed and added lines, and assume that the lines from each segment correspond one-to-one. While this is just a heuristic, in practice it seems to generate sensible results (especially because we now omit highlighting on completely-changed lines, so when our heuristic is wrong, we tend to avoid highlighting at all). Based on an original idea and implementation by Michał Kiedrowicz. Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
This commit is contained in:

committed by
Junio C Hamano

parent
6463fd7ed1
commit
34d9819e0a
@ -14,13 +14,15 @@ Instead, this script post-processes the line-oriented diff, finds pairs
|
|||||||
of lines, and highlights the differing segments. It's currently very
|
of lines, and highlights the differing segments. It's currently very
|
||||||
simple and stupid about doing these tasks. In particular:
|
simple and stupid about doing these tasks. In particular:
|
||||||
|
|
||||||
1. It will only highlight a pair of lines if they are the only two
|
1. It will only highlight hunks in which the number of removed and
|
||||||
lines in a hunk. It could instead try to match up "before" and
|
added lines is the same, and it will pair lines within the hunk by
|
||||||
"after" lines for a given hunk into pairs of similar lines.
|
position (so the first removed line is compared to the first added
|
||||||
However, this may end up visually distracting, as the paired
|
line, and so forth). This is simple and tends to work well in
|
||||||
lines would have other highlighted lines in between them. And in
|
practice. More complex changes don't highlight well, so we tend to
|
||||||
practice, the lines which most need attention called to their
|
exclude them due to the "same number of removed and added lines"
|
||||||
small, hard-to-see changes are touching only a single line.
|
restriction. Or even if we do try to highlight them, they end up
|
||||||
|
not highlighting because of our "don't highlight if the whole line
|
||||||
|
would be highlighted" rule.
|
||||||
|
|
||||||
2. It will find the common prefix and suffix of two lines, and
|
2. It will find the common prefix and suffix of two lines, and
|
||||||
consider everything in the middle to be "different". It could
|
consider everything in the middle to be "different". It could
|
||||||
|
@ -10,23 +10,28 @@ my $UNHIGHLIGHT = "\x1b[27m";
|
|||||||
my $COLOR = qr/\x1b\[[0-9;]*m/;
|
my $COLOR = qr/\x1b\[[0-9;]*m/;
|
||||||
my $BORING = qr/$COLOR|\s/;
|
my $BORING = qr/$COLOR|\s/;
|
||||||
|
|
||||||
my @window;
|
my @removed;
|
||||||
|
my @added;
|
||||||
|
my $in_hunk;
|
||||||
|
|
||||||
while (<>) {
|
while (<>) {
|
||||||
# We highlight only single-line changes, so we need
|
if (!$in_hunk) {
|
||||||
# a 4-line window to make a decision on whether
|
print;
|
||||||
# to highlight.
|
$in_hunk = /^$COLOR*\@/;
|
||||||
push @window, $_;
|
}
|
||||||
next if @window < 4;
|
elsif (/^$COLOR*-/) {
|
||||||
if ($window[0] =~ /^$COLOR*(\@| )/ &&
|
push @removed, $_;
|
||||||
$window[1] =~ /^$COLOR*-/ &&
|
}
|
||||||
$window[2] =~ /^$COLOR*\+/ &&
|
elsif (/^$COLOR*\+/) {
|
||||||
$window[3] !~ /^$COLOR*\+/) {
|
push @added, $_;
|
||||||
print shift @window;
|
|
||||||
show_hunk(shift @window, shift @window);
|
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
print shift @window;
|
show_hunk(\@removed, \@added);
|
||||||
|
@removed = ();
|
||||||
|
@added = ();
|
||||||
|
|
||||||
|
print;
|
||||||
|
$in_hunk = /^$COLOR*[\@ ]/;
|
||||||
}
|
}
|
||||||
|
|
||||||
# Most of the time there is enough output to keep things streaming,
|
# Most of the time there is enough output to keep things streaming,
|
||||||
@ -42,26 +47,37 @@ while (<>) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
# Special case a single-line hunk at the end of file.
|
# Flush any queued hunk (this can happen when there is no trailing context in
|
||||||
if (@window == 3 &&
|
# the final diff of the input).
|
||||||
$window[0] =~ /^$COLOR*(\@| )/ &&
|
show_hunk(\@removed, \@added);
|
||||||
$window[1] =~ /^$COLOR*-/ &&
|
|
||||||
$window[2] =~ /^$COLOR*\+/) {
|
|
||||||
print shift @window;
|
|
||||||
show_hunk(shift @window, shift @window);
|
|
||||||
}
|
|
||||||
|
|
||||||
# And then flush any remaining lines.
|
|
||||||
while (@window) {
|
|
||||||
print shift @window;
|
|
||||||
}
|
|
||||||
|
|
||||||
exit 0;
|
exit 0;
|
||||||
|
|
||||||
sub show_hunk {
|
sub show_hunk {
|
||||||
my ($a, $b) = @_;
|
my ($a, $b) = @_;
|
||||||
|
|
||||||
print highlight_pair($a, $b);
|
# If one side is empty, then there is nothing to compare or highlight.
|
||||||
|
if (!@$a || !@$b) {
|
||||||
|
print @$a, @$b;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
# If we have mismatched numbers of lines on each side, we could try to
|
||||||
|
# be clever and match up similar lines. But for now we are simple and
|
||||||
|
# stupid, and only handle multi-line hunks that remove and add the same
|
||||||
|
# number of lines.
|
||||||
|
if (@$a != @$b) {
|
||||||
|
print @$a, @$b;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
my @queue;
|
||||||
|
for (my $i = 0; $i < @$a; $i++) {
|
||||||
|
my ($rm, $add) = highlight_pair($a->[$i], $b->[$i]);
|
||||||
|
print $rm;
|
||||||
|
push @queue, $add;
|
||||||
|
}
|
||||||
|
print @queue;
|
||||||
}
|
}
|
||||||
|
|
||||||
sub highlight_pair {
|
sub highlight_pair {
|
||||||
|
Reference in New Issue
Block a user