Merge branch 'ds/path-walk-1'

Introduce a new API to visit objects in batches based on a common
path, or by type.

* ds/path-walk-1:
  path-walk: drop redundant parse_tree() call
  path-walk: reorder object visits
  path-walk: mark trees and blobs as UNINTERESTING
  path-walk: visit tags and cached objects
  path-walk: allow consumer to specify object types
  t6601: add helper for testing path-walk API
  test-lib-functions: add test_cmp_sorted
  path-walk: introduce an object walk by path
This commit is contained in:
Junio C Hamano
2025-01-29 14:05:08 -08:00
12 changed files with 1220 additions and 0 deletions

View File

@ -40,6 +40,7 @@ test_tool_sources = [
'test-parse-pathspec-file.c',
'test-partial-clone.c',
'test-path-utils.c',
'test-path-walk.c',
'test-pcre2-config.c',
'test-pkt-line.c',
'test-proc-receive.c',

112
t/helper/test-path-walk.c Normal file
View File

@ -0,0 +1,112 @@
#define USE_THE_REPOSITORY_VARIABLE
#include "test-tool.h"
#include "environment.h"
#include "hex.h"
#include "object-name.h"
#include "object.h"
#include "pretty.h"
#include "revision.h"
#include "setup.h"
#include "parse-options.h"
#include "path-walk.h"
#include "oid-array.h"
static const char * const path_walk_usage[] = {
N_("test-tool path-walk <options> -- <revision-options>"),
NULL
};
struct path_walk_test_data {
uintmax_t batch_nr;
uintmax_t commit_nr;
uintmax_t tree_nr;
uintmax_t blob_nr;
uintmax_t tag_nr;
};
static int emit_block(const char *path, struct oid_array *oids,
enum object_type type, void *data)
{
struct path_walk_test_data *tdata = data;
const char *typestr;
if (type == OBJ_TREE)
tdata->tree_nr += oids->nr;
else if (type == OBJ_BLOB)
tdata->blob_nr += oids->nr;
else if (type == OBJ_COMMIT)
tdata->commit_nr += oids->nr;
else if (type == OBJ_TAG)
tdata->tag_nr += oids->nr;
else
BUG("we do not understand this type");
typestr = type_name(type);
/* This should never be output during tests. */
if (!oids->nr)
printf("%"PRIuMAX":%s:%s:EMPTY\n",
tdata->batch_nr, typestr, path);
for (size_t i = 0; i < oids->nr; i++) {
struct object *o = lookup_unknown_object(the_repository,
&oids->oid[i]);
printf("%"PRIuMAX":%s:%s:%s%s\n",
tdata->batch_nr, typestr, path,
oid_to_hex(&oids->oid[i]),
o->flags & UNINTERESTING ? ":UNINTERESTING" : "");
}
tdata->batch_nr++;
return 0;
}
int cmd__path_walk(int argc, const char **argv)
{
int res;
struct rev_info revs = REV_INFO_INIT;
struct path_walk_info info = PATH_WALK_INFO_INIT;
struct path_walk_test_data data = { 0 };
struct option options[] = {
OPT_BOOL(0, "blobs", &info.blobs,
N_("toggle inclusion of blob objects")),
OPT_BOOL(0, "commits", &info.commits,
N_("toggle inclusion of commit objects")),
OPT_BOOL(0, "tags", &info.tags,
N_("toggle inclusion of tag objects")),
OPT_BOOL(0, "trees", &info.trees,
N_("toggle inclusion of tree objects")),
OPT_BOOL(0, "prune", &info.prune_all_uninteresting,
N_("toggle pruning of uninteresting paths")),
OPT_END(),
};
setup_git_directory();
revs.repo = the_repository;
argc = parse_options(argc, argv, NULL,
options, path_walk_usage,
PARSE_OPT_KEEP_UNKNOWN_OPT | PARSE_OPT_KEEP_ARGV0);
if (argc > 1)
setup_revisions(argc, argv, &revs, NULL);
else
usage(path_walk_usage[0]);
info.revs = &revs;
info.path_fn = emit_block;
info.path_fn_data = &data;
res = walk_objects_by_path(&info);
printf("commits:%" PRIuMAX "\n"
"trees:%" PRIuMAX "\n"
"blobs:%" PRIuMAX "\n"
"tags:%" PRIuMAX "\n",
data.commit_nr, data.tree_nr, data.blob_nr, data.tag_nr);
release_revisions(&revs);
return res;
}

View File

@ -52,6 +52,7 @@ static struct test_cmd cmds[] = {
{ "parse-subcommand", cmd__parse_subcommand },
{ "partial-clone", cmd__partial_clone },
{ "path-utils", cmd__path_utils },
{ "path-walk", cmd__path_walk },
{ "pcre2-config", cmd__pcre2_config },
{ "pkt-line", cmd__pkt_line },
{ "proc-receive", cmd__proc_receive },

View File

@ -45,6 +45,7 @@ int cmd__parse_pathspec_file(int argc, const char** argv);
int cmd__parse_subcommand(int argc, const char **argv);
int cmd__partial_clone(int argc, const char **argv);
int cmd__path_utils(int argc, const char **argv);
int cmd__path_walk(int argc, const char **argv);
int cmd__pcre2_config(int argc, const char **argv);
int cmd__pkt_line(int argc, const char **argv);
int cmd__proc_receive(int argc, const char **argv);

View File

@ -829,6 +829,7 @@ integration_tests = [
't6500-gc.sh',
't6501-freshen-objects.sh',
't6600-test-reach.sh',
't6601-path-walk.sh',
't6700-tree-depth.sh',
't7001-mv.sh',
't7002-mv-sparse-checkout.sh',

368
t/t6601-path-walk.sh Executable file
View File

@ -0,0 +1,368 @@
#!/bin/sh
TEST_PASSES_SANITIZE_LEAK=true
test_description='direct path-walk API tests'
. ./test-lib.sh
test_expect_success 'setup test repository' '
git checkout -b base &&
# Make some objects that will only be reachable
# via non-commit tags.
mkdir child &&
echo file >child/file &&
git add child &&
git commit -m "will abandon" &&
git tag -a -m "tree" tree-tag HEAD^{tree} &&
echo file2 >file2 &&
git add file2 &&
git commit --amend -m "will abandon" &&
git tag tree-tag2 HEAD^{tree} &&
echo blob >file &&
blob_oid=$(git hash-object -t blob -w --stdin <file) &&
git tag -a -m "blob" blob-tag "$blob_oid" &&
echo blob2 >file2 &&
blob2_oid=$(git hash-object -t blob -w --stdin <file2) &&
git tag blob-tag2 "$blob2_oid" &&
rm -fr child file file2 &&
mkdir left &&
mkdir right &&
echo a >a &&
echo b >left/b &&
echo c >right/c &&
git add . &&
git commit --amend -m "first" &&
git tag -m "first" first HEAD &&
echo d >right/d &&
git add right &&
git commit -m "second" &&
git tag -a -m "second (under)" second.1 HEAD &&
git tag -a -m "second (top)" second.2 second.1 &&
# Set up file/dir collision in history.
rm a &&
mkdir a &&
echo a >a/a &&
echo bb >left/b &&
git add a left &&
git commit -m "third" &&
git tag -a -m "third" third &&
git checkout -b topic HEAD~1 &&
echo cc >right/c &&
git commit -a -m "topic" &&
git tag -a -m "fourth" fourth
'
test_expect_success 'all' '
test-tool path-walk -- --all >out &&
cat >expect <<-EOF &&
0:commit::$(git rev-parse topic)
0:commit::$(git rev-parse base)
0:commit::$(git rev-parse base~1)
0:commit::$(git rev-parse base~2)
1:tag:/tags:$(git rev-parse refs/tags/first)
1:tag:/tags:$(git rev-parse refs/tags/second.1)
1:tag:/tags:$(git rev-parse refs/tags/second.2)
1:tag:/tags:$(git rev-parse refs/tags/third)
1:tag:/tags:$(git rev-parse refs/tags/fourth)
1:tag:/tags:$(git rev-parse refs/tags/tree-tag)
1:tag:/tags:$(git rev-parse refs/tags/blob-tag)
2:blob:/tagged-blobs:$(git rev-parse refs/tags/blob-tag^{})
2:blob:/tagged-blobs:$(git rev-parse refs/tags/blob-tag2^{})
3:tree::$(git rev-parse topic^{tree})
3:tree::$(git rev-parse base^{tree})
3:tree::$(git rev-parse base~1^{tree})
3:tree::$(git rev-parse base~2^{tree})
3:tree::$(git rev-parse refs/tags/tree-tag^{})
3:tree::$(git rev-parse refs/tags/tree-tag2^{})
4:blob:a:$(git rev-parse base~2:a)
5:blob:file2:$(git rev-parse refs/tags/tree-tag2^{}:file2)
6:tree:a/:$(git rev-parse base:a)
7:tree:child/:$(git rev-parse refs/tags/tree-tag:child)
8:blob:child/file:$(git rev-parse refs/tags/tree-tag:child/file)
9:tree:left/:$(git rev-parse base:left)
9:tree:left/:$(git rev-parse base~2:left)
10:blob:left/b:$(git rev-parse base~2:left/b)
10:blob:left/b:$(git rev-parse base:left/b)
11:tree:right/:$(git rev-parse topic:right)
11:tree:right/:$(git rev-parse base~1:right)
11:tree:right/:$(git rev-parse base~2:right)
12:blob:right/c:$(git rev-parse base~2:right/c)
12:blob:right/c:$(git rev-parse topic:right/c)
13:blob:right/d:$(git rev-parse base~1:right/d)
blobs:10
commits:4
tags:7
trees:13
EOF
test_cmp_sorted expect out
'
test_expect_success 'indexed objects' '
test_when_finished git reset --hard &&
# stage change into index, adding a blob but
# also invalidating the cache-tree for the root
# and the "left" directory.
echo bogus >left/c &&
git add left &&
test-tool path-walk -- --indexed-objects >out &&
cat >expect <<-EOF &&
0:blob:a:$(git rev-parse HEAD:a)
1:blob:left/b:$(git rev-parse HEAD:left/b)
2:blob:left/c:$(git rev-parse :left/c)
3:blob:right/c:$(git rev-parse HEAD:right/c)
4:blob:right/d:$(git rev-parse HEAD:right/d)
5:tree:right/:$(git rev-parse topic:right)
blobs:5
commits:0
tags:0
trees:1
EOF
test_cmp_sorted expect out
'
test_expect_success 'branches and indexed objects mix well' '
test_when_finished git reset --hard &&
# stage change into index, adding a blob but
# also invalidating the cache-tree for the root
# and the "right" directory.
echo fake >right/d &&
git add right &&
test-tool path-walk -- --indexed-objects --branches >out &&
cat >expect <<-EOF &&
0:commit::$(git rev-parse topic)
0:commit::$(git rev-parse base)
0:commit::$(git rev-parse base~1)
0:commit::$(git rev-parse base~2)
1:tree::$(git rev-parse topic^{tree})
1:tree::$(git rev-parse base^{tree})
1:tree::$(git rev-parse base~1^{tree})
1:tree::$(git rev-parse base~2^{tree})
2:tree:a/:$(git rev-parse refs/tags/third:a)
3:tree:left/:$(git rev-parse base:left)
3:tree:left/:$(git rev-parse base~2:left)
4:blob:left/b:$(git rev-parse base:left/b)
4:blob:left/b:$(git rev-parse base~2:left/b)
5:tree:right/:$(git rev-parse topic:right)
5:tree:right/:$(git rev-parse base~1:right)
5:tree:right/:$(git rev-parse base~2:right)
6:blob:right/c:$(git rev-parse base~2:right/c)
6:blob:right/c:$(git rev-parse topic:right/c)
7:blob:right/d:$(git rev-parse base~1:right/d)
7:blob:right/d:$(git rev-parse :right/d)
8:blob:a:$(git rev-parse base~2:a)
blobs:7
commits:4
tags:0
trees:10
EOF
test_cmp_sorted expect out
'
test_expect_success 'topic only' '
test-tool path-walk -- topic >out &&
cat >expect <<-EOF &&
0:commit::$(git rev-parse topic)
0:commit::$(git rev-parse base~1)
0:commit::$(git rev-parse base~2)
1:tree::$(git rev-parse topic^{tree})
1:tree::$(git rev-parse base~1^{tree})
1:tree::$(git rev-parse base~2^{tree})
2:blob:a:$(git rev-parse base~2:a)
3:tree:left/:$(git rev-parse base~2:left)
4:blob:left/b:$(git rev-parse base~2:left/b)
5:tree:right/:$(git rev-parse topic:right)
5:tree:right/:$(git rev-parse base~1:right)
5:tree:right/:$(git rev-parse base~2:right)
6:blob:right/c:$(git rev-parse base~2:right/c)
6:blob:right/c:$(git rev-parse topic:right/c)
7:blob:right/d:$(git rev-parse base~1:right/d)
blobs:5
commits:3
tags:0
trees:7
EOF
test_cmp_sorted expect out
'
test_expect_success 'topic, not base' '
test-tool path-walk -- topic --not base >out &&
cat >expect <<-EOF &&
0:commit::$(git rev-parse topic)
1:tree::$(git rev-parse topic^{tree})
2:blob:a:$(git rev-parse topic:a):UNINTERESTING
3:tree:left/:$(git rev-parse topic:left):UNINTERESTING
4:blob:left/b:$(git rev-parse topic:left/b):UNINTERESTING
5:tree:right/:$(git rev-parse topic:right)
6:blob:right/c:$(git rev-parse topic:right/c)
7:blob:right/d:$(git rev-parse topic:right/d):UNINTERESTING
blobs:4
commits:1
tags:0
trees:3
EOF
test_cmp_sorted expect out
'
test_expect_success 'fourth, blob-tag2, not base' '
test-tool path-walk -- fourth blob-tag2 --not base >out &&
cat >expect <<-EOF &&
0:commit::$(git rev-parse topic)
1:tag:/tags:$(git rev-parse fourth)
2:blob:/tagged-blobs:$(git rev-parse refs/tags/blob-tag2^{})
3:tree::$(git rev-parse topic^{tree})
4:blob:a:$(git rev-parse base~1:a):UNINTERESTING
5:tree:left/:$(git rev-parse base~1:left):UNINTERESTING
6:blob:left/b:$(git rev-parse base~1:left/b):UNINTERESTING
7:tree:right/:$(git rev-parse topic:right)
8:blob:right/c:$(git rev-parse topic:right/c)
9:blob:right/d:$(git rev-parse base~1:right/d):UNINTERESTING
blobs:5
commits:1
tags:1
trees:3
EOF
test_cmp_sorted expect out
'
test_expect_success 'topic, not base, only blobs' '
test-tool path-walk --no-trees --no-commits \
-- topic --not base >out &&
cat >expect <<-EOF &&
0:blob:a:$(git rev-parse topic:a):UNINTERESTING
1:blob:left/b:$(git rev-parse topic:left/b):UNINTERESTING
2:blob:right/c:$(git rev-parse topic:right/c)
3:blob:right/d:$(git rev-parse topic:right/d):UNINTERESTING
blobs:4
commits:0
tags:0
trees:0
EOF
test_cmp_sorted expect out
'
# No, this doesn't make a lot of sense for the path-walk API,
# but it is possible to do.
test_expect_success 'topic, not base, only commits' '
test-tool path-walk --no-blobs --no-trees \
-- topic --not base >out &&
cat >expect <<-EOF &&
0:commit::$(git rev-parse topic)
commits:1
blobs:0
tags:0
trees:0
EOF
test_cmp_sorted expect out
'
test_expect_success 'topic, not base, only trees' '
test-tool path-walk --no-blobs --no-commits \
-- topic --not base >out &&
cat >expect <<-EOF &&
0:tree::$(git rev-parse topic^{tree})
1:tree:left/:$(git rev-parse topic:left):UNINTERESTING
2:tree:right/:$(git rev-parse topic:right)
commits:0
blobs:0
tags:0
trees:3
EOF
test_cmp_sorted expect out
'
test_expect_success 'topic, not base, boundary' '
test-tool path-walk -- --boundary topic --not base >out &&
cat >expect <<-EOF &&
0:commit::$(git rev-parse topic)
0:commit::$(git rev-parse base~1):UNINTERESTING
1:tree::$(git rev-parse topic^{tree})
1:tree::$(git rev-parse base~1^{tree}):UNINTERESTING
2:blob:a:$(git rev-parse base~1:a):UNINTERESTING
3:tree:left/:$(git rev-parse base~1:left):UNINTERESTING
4:blob:left/b:$(git rev-parse base~1:left/b):UNINTERESTING
5:tree:right/:$(git rev-parse topic:right)
5:tree:right/:$(git rev-parse base~1:right):UNINTERESTING
6:blob:right/c:$(git rev-parse base~1:right/c):UNINTERESTING
6:blob:right/c:$(git rev-parse topic:right/c)
7:blob:right/d:$(git rev-parse base~1:right/d):UNINTERESTING
blobs:5
commits:2
tags:0
trees:5
EOF
test_cmp_sorted expect out
'
test_expect_success 'topic, not base, boundary with pruning' '
test-tool path-walk --prune -- --boundary topic --not base >out &&
cat >expect <<-EOF &&
0:commit::$(git rev-parse topic)
0:commit::$(git rev-parse base~1):UNINTERESTING
1:tree::$(git rev-parse topic^{tree})
1:tree::$(git rev-parse base~1^{tree}):UNINTERESTING
2:tree:right/:$(git rev-parse topic:right)
2:tree:right/:$(git rev-parse base~1:right):UNINTERESTING
3:blob:right/c:$(git rev-parse base~1:right/c):UNINTERESTING
3:blob:right/c:$(git rev-parse topic:right/c)
blobs:2
commits:2
tags:0
trees:4
EOF
test_cmp_sorted expect out
'
test_expect_success 'trees are reported exactly once' '
test_when_finished "rm -rf unique-trees" &&
test_create_repo unique-trees &&
(
cd unique-trees &&
mkdir initial &&
test_commit initial/file &&
git switch -c move-to-top &&
git mv initial/file.t ./ &&
test_tick &&
git commit -m moved &&
git update-ref refs/heads/other HEAD
) &&
test-tool -C unique-trees path-walk -- --all >out &&
tree=$(git -C unique-trees rev-parse HEAD:) &&
grep "$tree" out >out-filtered &&
test_line_count = 1 out-filtered
'
test_done

View File

@ -1268,6 +1268,16 @@ test_cmp () {
eval "$GIT_TEST_CMP" '"$@"'
}
# test_cmp_sorted runs test_cmp on sorted versions of the two
# input files. Uses "$1.sorted" and "$2.sorted" as temp files.
test_cmp_sorted () {
sort <"$1" >"$1.sorted" &&
sort <"$2" >"$2.sorted" &&
test_cmp "$1.sorted" "$2.sorted" &&
rm "$1.sorted" "$2.sorted"
}
# Check that the given config key has the expected value.
#
# test_cmp_config [-C <dir>] <expected-value>