Implement line-history search (git log -L)

This is a rewrite of much of Bo's work, mainly in an effort to split
it into smaller, easier to understand routines.

The algorithm is built around the struct range_set, which encodes a
series of line ranges as intervals [a,b).  This is used in two
contexts:

* A set of lines we are tracking (which will change as we dig through
  history).
* To encode diffs, as pairs of ranges.

The main routine is range_set_map_across_diff().  It processes the
diff between a commit C and some parent P.  It determines which diff
hunks are relevant to the ranges tracked in C, and computes the new
ranges for P.

The algorithm is then simply to process history in topological order
from newest to oldest, computing ranges and (partial) diffs.  At
branch points, we need to merge the ranges we are watching.  We will
find that many commits do not affect the chosen ranges, and mark them
TREESAME (in addition to those already filtered by pathspec limiting).
Another pass of history simplification then gets rid of such commits.

This is wired as an extra filtering pass in the log machinery.  This
currently only reduces code duplication, but should allow for other
simplifications and options to be used.

Finally, we hook a diff printer into the output chain.  Ideally we
would wire directly into the diff logic, to optionally use features
like word diff.  However, that will require some major reworking of
the diff chain, so we completely replace the output with our own diff
for now.

As this was a GSoC project, and has quite some history by now, many
people have helped.  In no particular order, thanks go to

  Jakub Narebski <jnareb@gmail.com>
  Jens Lehmann <Jens.Lehmann@web.de>
  Jonathan Nieder <jrnieder@gmail.com>
  Junio C Hamano <gitster@pobox.com>
  Ramsay Jones <ramsay@ramsay1.demon.co.uk>
  Will Palmer <wmpalmer@gmail.com>

Apologies to everyone I forgot.

Signed-off-by: Bo Yang <struggleyb.nku@gmail.com>
Signed-off-by: Thomas Rast <trast@student.ethz.ch>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
This commit is contained in:
Thomas Rast
2013-03-28 17:47:32 +01:00
committed by Junio C Hamano
parent c7edcae06e
commit 12da1d1f6f
20 changed files with 2156 additions and 2 deletions

View File

@ -0,0 +1,43 @@
commit 4a23ae5c98d59a58c6da036156959f2dc9f472ad
Author: Thomas Rast <trast@student.ethz.ch>
Date: Thu Feb 28 10:47:40 2013 +0100
change at very beginning
diff --git a/a.c b/a.c
--- a/a.c
+++ b/a.c
@@ -1,3 +1,4 @@
+#include <unistd.h>
#include <stdio.h>
long f(long x)
commit a6eb82647d5d67f893da442f8f9375fd89a3b1e2
Author: Thomas Rast <trast@student.ethz.ch>
Date: Thu Feb 28 10:45:16 2013 +0100
touch both functions
diff --git a/a.c b/a.c
--- a/a.c
+++ b/a.c
@@ -1,3 +1,3 @@
#include <stdio.h>
-int f(int x)
+long f(long x)
commit de4c48ae814792c02a49c4c3c0c757ae69c55f6a
Author: Thomas Rast <trast@student.ethz.ch>
Date: Thu Feb 28 10:44:48 2013 +0100
initial
diff --git a/a.c b/a.c
--- /dev/null
+++ b/a.c
@@ -0,0 +1,3 @@
+#include <stdio.h>
+
+int f(int x)

View File

@ -0,0 +1,62 @@
commit 4659538844daa2849b1a9e7d6fadb96fcd26fc83
Author: Thomas Rast <trast@student.ethz.ch>
Date: Thu Feb 28 10:48:43 2013 +0100
change back to complete line
diff --git a/a.c b/a.c
--- a/a.c
+++ b/a.c
@@ -20,3 +20,5 @@
printf("%ld\n", f(15));
return 0;
-}
\ No newline at end of file
+}
+
+/* incomplete lines are bad! */
commit 100b61a6f2f720f812620a9d10afb3a960ccb73c
Author: Thomas Rast <trast@student.ethz.ch>
Date: Thu Feb 28 10:48:10 2013 +0100
change to an incomplete line at end
diff --git a/a.c b/a.c
--- a/a.c
+++ b/a.c
@@ -20,3 +20,3 @@
printf("%ld\n", f(15));
return 0;
-}
+}
\ No newline at end of file
commit a6eb82647d5d67f893da442f8f9375fd89a3b1e2
Author: Thomas Rast <trast@student.ethz.ch>
Date: Thu Feb 28 10:45:16 2013 +0100
touch both functions
diff --git a/a.c b/a.c
--- a/a.c
+++ b/a.c
@@ -19,3 +19,3 @@
- printf("%d\n", f(15));
+ printf("%ld\n", f(15));
return 0;
}
commit de4c48ae814792c02a49c4c3c0c757ae69c55f6a
Author: Thomas Rast <trast@student.ethz.ch>
Date: Thu Feb 28 10:44:48 2013 +0100
initial
diff --git a/a.c b/a.c
--- /dev/null
+++ b/a.c
@@ -0,0 +18,3 @@
+ printf("%d\n", f(15));
+ return 0;
+}

View File

@ -0,0 +1,40 @@
commit 6ce3c4ff690136099bb17e1a8766b75764726ea7
Author: Thomas Rast <trast@student.ethz.ch>
Date: Thu Feb 28 10:49:50 2013 +0100
another simple change
diff --git a/b.c b/b.c
--- a/b.c
+++ b/b.c
@@ -4,9 +4,9 @@
long f(long x)
{
int s = 0;
while (x) {
- x >>= 1;
+ x /= 2;
s++;
}
return s;
}
commit e6da343666244ea9e67cbe3f3bd26da860f9fe0e
Author: Thomas Rast <trast@student.ethz.ch>
Date: Thu Feb 28 10:49:28 2013 +0100
move file
diff --git a/b.c b/b.c
--- /dev/null
+++ b/b.c
@@ -0,0 +4,9 @@
+long f(long x)
+{
+ int s = 0;
+ while (x) {
+ x >>= 1;
+ s++;
+ }
+ return s;
+}

59
t/t4211/expect.simple-f Normal file
View File

@ -0,0 +1,59 @@
commit a6eb82647d5d67f893da442f8f9375fd89a3b1e2
Author: Thomas Rast <trast@student.ethz.ch>
Date: Thu Feb 28 10:45:16 2013 +0100
touch both functions
diff --git a/a.c b/a.c
--- a/a.c
+++ b/a.c
@@ -3,9 +3,9 @@
-int f(int x)
+long f(long x)
{
int s = 0;
while (x) {
x >>= 1;
s++;
}
return s;
}
commit f04fb20f2c77850996cba739709acc6faecc58f7
Author: Thomas Rast <trast@student.ethz.ch>
Date: Thu Feb 28 10:44:55 2013 +0100
change f()
diff --git a/a.c b/a.c
--- a/a.c
+++ b/a.c
@@ -3,8 +3,9 @@
int f(int x)
{
int s = 0;
while (x) {
x >>= 1;
s++;
}
+ return s;
}
commit de4c48ae814792c02a49c4c3c0c757ae69c55f6a
Author: Thomas Rast <trast@student.ethz.ch>
Date: Thu Feb 28 10:44:48 2013 +0100
initial
diff --git a/a.c b/a.c
--- /dev/null
+++ b/a.c
@@ -0,0 +3,8 @@
+int f(int x)
+{
+ int s = 0;
+ while (x) {
+ x >>= 1;
+ s++;
+ }
+}

View File

@ -0,0 +1,68 @@
commit 4659538844daa2849b1a9e7d6fadb96fcd26fc83
Author: Thomas Rast <trast@student.ethz.ch>
Date: Thu Feb 28 10:48:43 2013 +0100
change back to complete line
diff --git a/a.c b/a.c
--- a/a.c
+++ b/a.c
@@ -18,5 +18,5 @@
int main ()
{
printf("%ld\n", f(15));
return 0;
-}
\ No newline at end of file
+}
commit 100b61a6f2f720f812620a9d10afb3a960ccb73c
Author: Thomas Rast <trast@student.ethz.ch>
Date: Thu Feb 28 10:48:10 2013 +0100
change to an incomplete line at end
diff --git a/a.c b/a.c
--- a/a.c
+++ b/a.c
@@ -18,5 +18,5 @@
int main ()
{
printf("%ld\n", f(15));
return 0;
-}
+}
\ No newline at end of file
commit a6eb82647d5d67f893da442f8f9375fd89a3b1e2
Author: Thomas Rast <trast@student.ethz.ch>
Date: Thu Feb 28 10:45:16 2013 +0100
touch both functions
diff --git a/a.c b/a.c
--- a/a.c
+++ b/a.c
@@ -17,5 +17,5 @@
int main ()
{
- printf("%d\n", f(15));
+ printf("%ld\n", f(15));
return 0;
}
commit de4c48ae814792c02a49c4c3c0c757ae69c55f6a
Author: Thomas Rast <trast@student.ethz.ch>
Date: Thu Feb 28 10:44:48 2013 +0100
initial
diff --git a/a.c b/a.c
--- /dev/null
+++ b/a.c
@@ -0,0 +16,5 @@
+int main ()
+{
+ printf("%d\n", f(15));
+ return 0;
+}

102
t/t4211/expect.two-ranges Normal file
View File

@ -0,0 +1,102 @@
commit 4659538844daa2849b1a9e7d6fadb96fcd26fc83
Author: Thomas Rast <trast@student.ethz.ch>
Date: Thu Feb 28 10:48:43 2013 +0100
change back to complete line
diff --git a/a.c b/a.c
--- a/a.c
+++ b/a.c
@@ -18,5 +18,5 @@
int main ()
{
printf("%ld\n", f(15));
return 0;
-}
\ No newline at end of file
+}
commit 100b61a6f2f720f812620a9d10afb3a960ccb73c
Author: Thomas Rast <trast@student.ethz.ch>
Date: Thu Feb 28 10:48:10 2013 +0100
change to an incomplete line at end
diff --git a/a.c b/a.c
--- a/a.c
+++ b/a.c
@@ -18,5 +18,5 @@
int main ()
{
printf("%ld\n", f(15));
return 0;
-}
+}
\ No newline at end of file
commit a6eb82647d5d67f893da442f8f9375fd89a3b1e2
Author: Thomas Rast <trast@student.ethz.ch>
Date: Thu Feb 28 10:45:16 2013 +0100
touch both functions
diff --git a/a.c b/a.c
--- a/a.c
+++ b/a.c
@@ -3,9 +3,9 @@
-int f(int x)
+long f(long x)
{
int s = 0;
while (x) {
x >>= 1;
s++;
}
return s;
}
@@ -17,5 +17,5 @@
int main ()
{
- printf("%d\n", f(15));
+ printf("%ld\n", f(15));
return 0;
}
commit f04fb20f2c77850996cba739709acc6faecc58f7
Author: Thomas Rast <trast@student.ethz.ch>
Date: Thu Feb 28 10:44:55 2013 +0100
change f()
diff --git a/a.c b/a.c
--- a/a.c
+++ b/a.c
@@ -3,8 +3,9 @@
int f(int x)
{
int s = 0;
while (x) {
x >>= 1;
s++;
}
+ return s;
}
commit de4c48ae814792c02a49c4c3c0c757ae69c55f6a
Author: Thomas Rast <trast@student.ethz.ch>
Date: Thu Feb 28 10:44:48 2013 +0100
initial
diff --git a/a.c b/a.c
--- /dev/null
+++ b/a.c
@@ -0,0 +3,8 @@
+int f(int x)
+{
+ int s = 0;
+ while (x) {
+ x >>= 1;
+ s++;
+ }
+}

View File

@ -0,0 +1,39 @@
commit 4659538844daa2849b1a9e7d6fadb96fcd26fc83
Author: Thomas Rast <trast@student.ethz.ch>
Date: Thu Feb 28 10:48:43 2013 +0100
change back to complete line
diff --git a/a.c b/a.c
--- a/a.c
+++ b/a.c
@@ -22,1 +24,1 @@
-}
\ No newline at end of file
+/* incomplete lines are bad! */
commit 100b61a6f2f720f812620a9d10afb3a960ccb73c
Author: Thomas Rast <trast@student.ethz.ch>
Date: Thu Feb 28 10:48:10 2013 +0100
change to an incomplete line at end
diff --git a/a.c b/a.c
--- a/a.c
+++ b/a.c
@@ -22,1 +22,1 @@
-}
+}
\ No newline at end of file
commit de4c48ae814792c02a49c4c3c0c757ae69c55f6a
Author: Thomas Rast <trast@student.ethz.ch>
Date: Thu Feb 28 10:44:48 2013 +0100
initial
diff --git a/a.c b/a.c
--- /dev/null
+++ b/a.c
@@ -0,0 +20,1 @@
+}

330
t/t4211/history.export Normal file
View File

@ -0,0 +1,330 @@
blob
mark :1
data 157
#include <stdio.h>
int f(int x)
{
int s = 0;
while (x) {
x >>= 1;
s++;
}
}
/*
* A comment.
*/
int main ()
{
printf("%d\n", f(15));
return 0;
}
reset refs/tags/simple
commit refs/tags/simple
mark :2
author Thomas Rast <trast@student.ethz.ch> 1362044688 +0100
committer Thomas Rast <trast@student.ethz.ch> 1362044688 +0100
data 8
initial
M 100644 :1 a.c
blob
mark :3
data 168
#include <stdio.h>
int f(int x)
{
int s = 0;
while (x) {
x >>= 1;
s++;
}
return s;
}
/*
* A comment.
*/
int main ()
{
printf("%d\n", f(15));
return 0;
}
commit refs/tags/simple
mark :4
author Thomas Rast <trast@student.ethz.ch> 1362044695 +0100
committer Thomas Rast <trast@student.ethz.ch> 1362044695 +0100
data 11
change f()
from :2
M 100644 :3 a.c
blob
mark :5
data 171
#include <stdio.h>
long f(long x)
{
int s = 0;
while (x) {
x >>= 1;
s++;
}
return s;
}
/*
* A comment.
*/
int main ()
{
printf("%ld\n", f(15));
return 0;
}
commit refs/tags/simple
mark :6
author Thomas Rast <trast@student.ethz.ch> 1362044716 +0100
committer Thomas Rast <trast@student.ethz.ch> 1362044716 +0100
data 21
touch both functions
from :4
M 100644 :5 a.c
blob
mark :7
data 185
#include <stdio.h>
long f(long x)
{
int s = 0;
while (x) {
x >>= 1;
s++;
}
return s;
}
/*
* This is only an example!
*/
int main ()
{
printf("%ld\n", f(15));
return 0;
}
commit refs/tags/simple
mark :8
author Thomas Rast <trast@student.ethz.ch> 1362044741 +0100
committer Thomas Rast <trast@student.ethz.ch> 1362044741 +0100
data 14
touch comment
from :6
M 100644 :7 a.c
blob
mark :9
data 205
#include <unistd.h>
#include <stdio.h>
long f(long x)
{
int s = 0;
while (x) {
x >>= 1;
s++;
}
return s;
}
/*
* This is only an example!
*/
int main ()
{
printf("%ld\n", f(15));
return 0;
}
commit refs/tags/simple
mark :10
author Thomas Rast <trast@student.ethz.ch> 1362044860 +0100
committer Thomas Rast <trast@student.ethz.ch> 1362044860 +0100
data 25
change at very beginning
from :8
M 100644 :9 a.c
blob
mark :11
data 204
#include <unistd.h>
#include <stdio.h>
long f(long x)
{
int s = 0;
while (x) {
x >>= 1;
s++;
}
return s;
}
/*
* This is only an example!
*/
int main ()
{
printf("%ld\n", f(15));
return 0;
}
commit refs/tags/simple
mark :12
author Thomas Rast <trast@student.ethz.ch> 1362044890 +0100
committer Thomas Rast <trast@student.ethz.ch> 1362044890 +0100
data 36
change to an incomplete line at end
from :10
M 100644 :11 a.c
blob
mark :13
data 238
#include <unistd.h>
#include <stdio.h>
long f(long x)
{
int s = 0;
while (x) {
x >>= 1;
s++;
}
return s;
}
/*
* This is only an example!
*/
int main ()
{
printf("%ld\n", f(15));
return 0;
}
/* incomplete lines are bad! */
commit refs/tags/simple
mark :14
author Thomas Rast <trast@student.ethz.ch> 1362044923 +0100
committer Thomas Rast <trast@student.ethz.ch> 1362044923 +0100
data 29
change back to complete line
from :12
M 100644 :13 a.c
commit refs/tags/move-support
mark :15
author Thomas Rast <trast@student.ethz.ch> 1362044968 +0100
committer Thomas Rast <trast@student.ethz.ch> 1362044968 +0100
data 10
move file
from :14
D a.c
M 100644 :13 b.c
blob
mark :16
data 237
#include <unistd.h>
#include <stdio.h>
long f(long x)
{
int s = 0;
while (x) {
x /= 2;
s++;
}
return s;
}
/*
* This is only an example!
*/
int main ()
{
printf("%ld\n", f(15));
return 0;
}
/* incomplete lines are bad! */
commit refs/tags/move-support
mark :17
author Thomas Rast <trast@student.ethz.ch> 1362044990 +0100
committer Thomas Rast <trast@student.ethz.ch> 1362044990 +0100
data 22
another simple change
from :15
M 100644 :16 b.c
blob
mark :18
data 254
#include <unistd.h>
#include <stdio.h>
long f(long x);
/*
* This is only an example!
*/
int main ()
{
printf("%ld\n", f(15));
return 0;
}
/* incomplete lines are bad! */
long f(long x)
{
int s = 0;
while (x) {
x /= 2;
s++;
}
return s;
}
commit refs/heads/master
mark :19
author Thomas Rast <trast@student.ethz.ch> 1362045024 +0100
committer Thomas Rast <trast@student.ethz.ch> 1362045024 +0100
data 21
move within the file
from :17
M 100644 :18 b.c
reset refs/heads/master
from :19