Merge branch 'ds/name-hash-tweaks'

"git pack-objects" and its wrapper "git repack" learned an option
to use an alternative path-hash function to improve delta-base
selection to produce a packfile with deeper history than window
size.

* ds/name-hash-tweaks:
  pack-objects: prevent name hash version change
  test-tool: add helper for name-hash values
  p5313: add size comparison test
  pack-objects: add GIT_TEST_NAME_HASH_VERSION
  repack: add --name-hash-version option
  pack-objects: add --name-hash-version option
  pack-objects: create new name-hash function version
This commit is contained in:
Junio C Hamano
2025-02-12 10:08:51 -08:00
22 changed files with 389 additions and 16 deletions

View File

@ -471,6 +471,10 @@ a test and then fails then the whole test run will abort. This can help to make
sure the expected tests are executed and not silently skipped when their
dependency breaks or is simply not present in a new environment.
GIT_TEST_NAME_HASH_VERSION=<int>, when set, causes 'git pack-objects' to
assume '--name-hash-version=<n>'.
Naming Tests
------------

View File

@ -34,6 +34,7 @@ test_tool_sources = [
'test-match-trees.c',
'test-mergesort.c',
'test-mktemp.c',
'test-name-hash.c',
'test-online-cpus.c',
'test-pack-mtimes.c',
'test-parse-options.c',

23
t/helper/test-name-hash.c Normal file
View File

@ -0,0 +1,23 @@
/*
* test-name-hash.c: Read a list of paths over stdin and report on their
* name-hash and full name-hash.
*/
#include "test-tool.h"
#include "git-compat-util.h"
#include "pack-objects.h"
#include "strbuf.h"
int cmd__name_hash(int argc UNUSED, const char **argv UNUSED)
{
struct strbuf line = STRBUF_INIT;
while (!strbuf_getline(&line, stdin)) {
printf("%10u ", pack_name_hash(line.buf));
printf("%10u ", pack_name_hash_v2((unsigned const char *)line.buf));
printf("%s\n", line.buf);
}
strbuf_release(&line);
return 0;
}

View File

@ -44,6 +44,7 @@ static struct test_cmd cmds[] = {
{ "match-trees", cmd__match_trees },
{ "mergesort", cmd__mergesort },
{ "mktemp", cmd__mktemp },
{ "name-hash", cmd__name_hash },
{ "online-cpus", cmd__online_cpus },
{ "pack-mtimes", cmd__pack_mtimes },
{ "parse-options", cmd__parse_options },

View File

@ -37,6 +37,7 @@ int cmd__lazy_init_name_hash(int argc, const char **argv);
int cmd__match_trees(int argc, const char **argv);
int cmd__mergesort(int argc, const char **argv);
int cmd__mktemp(int argc, const char **argv);
int cmd__name_hash(int argc, const char **argv);
int cmd__online_cpus(int argc, const char **argv);
int cmd__pack_mtimes(int argc, const char **argv);
int cmd__parse_options(int argc, const char **argv);

70
t/perf/p5313-pack-objects.sh Executable file
View File

@ -0,0 +1,70 @@
#!/bin/sh
test_description='Tests pack performance using bitmaps'
. ./perf-lib.sh
GIT_TEST_PASSING_SANITIZE_LEAK=0
export GIT_TEST_PASSING_SANITIZE_LEAK
test_perf_large_repo
test_expect_success 'create rev input' '
cat >in-thin <<-EOF &&
$(git rev-parse HEAD)
^$(git rev-parse HEAD~1)
EOF
cat >in-big <<-EOF &&
$(git rev-parse HEAD)
^$(git rev-parse HEAD~1000)
EOF
cat >in-shallow <<-EOF
$(git rev-parse HEAD)
--shallow $(git rev-parse HEAD)
EOF
'
for version in 1 2
do
export version
test_perf "thin pack with version $version" '
git pack-objects --thin --stdout --revs --sparse \
--name-hash-version=$version <in-thin >out
'
test_size "thin pack size with version $version" '
test_file_size out
'
test_perf "big pack with version $version" '
git pack-objects --stdout --revs --sparse \
--name-hash-version=$version <in-big >out
'
test_size "big pack size with version $version" '
test_file_size out
'
test_perf "shallow fetch pack with version $version" '
git pack-objects --stdout --revs --sparse --shallow \
--name-hash-version=$version <in-shallow >out
'
test_size "shallow pack size with version $version" '
test_file_size out
'
test_perf "repack with version $version" '
git repack -adf --name-hash-version=$version
'
test_size "repack size with version $version" '
gitdir=$(git rev-parse --git-dir) &&
pack=$(ls $gitdir/objects/pack/pack-*.pack) &&
test_file_size "$pack"
'
done
test_done

31
t/perf/p5314-name-hash.sh Executable file
View File

@ -0,0 +1,31 @@
#!/bin/sh
test_description='Tests pack performance using bitmaps'
. ./perf-lib.sh
GIT_TEST_PASSING_SANITIZE_LEAK=0
export GIT_TEST_PASSING_SANITIZE_LEAK
test_perf_large_repo
test_size 'paths at head' '
git ls-tree -r --name-only HEAD >path-list &&
wc -l <path-list &&
test-tool name-hash <path-list >name-hashes
'
for version in 1 2
do
test_size "distinct hash value: v$version" '
awk "{ print \$$version; }" <name-hashes | sort | \
uniq -c >name-hash-count &&
wc -l <name-hash-count
'
test_size "maximum multiplicity: v$version" '
sort -nr <name-hash-count | head -n 1 | \
awk "{ print \$1; }"
'
done
test_done

View File

@ -45,7 +45,6 @@ rebase
remote
remote-ext
remote-fd
repack
reset
restore
rev-parse

View File

@ -689,4 +689,38 @@ do
'
done
test_expect_success 'valid and invalid --name-hash-versions' '
sane_unset GIT_TEST_NAME_HASH_VERSION &&
# Valid values are hard to verify other than "do not fail".
# Performance tests will be more valuable to validate these versions.
# Negative values are converted to version 1.
for value in -1 1 2
do
git pack-objects base --all --name-hash-version=$value || return 1
done &&
# Invalid values have clear post-conditions.
for value in 0 3
do
test_must_fail git pack-objects base --all --name-hash-version=$value 2>err &&
test_grep "invalid --name-hash-version option" err || return 1
done
'
# The following test is not necessarily a permanent choice, but since we do not
# have a "name hash version" bit in the .bitmap file format, we cannot write the
# hash values into the .bitmap file without risking breakage later.
#
# TODO: Make these compatible in the future and replace this test with the
# expected behavior when both are specified.
test_expect_success '--name-hash-version=2 and --write-bitmap-index are incompatible' '
git pack-objects base --all --name-hash-version=2 --write-bitmap-index 2>err &&
test_grep "currently, --write-bitmap-index requires --name-hash-version=1" err &&
# --stdout option silently removes --write-bitmap-index
git pack-objects --stdout --all --name-hash-version=2 --write-bitmap-index >out 2>err &&
! test_grep "currently, --write-bitmap-index requires --name-hash-version=1" err
'
test_done

View File

@ -26,6 +26,36 @@ has_any () {
grep -Ff "$1" "$2"
}
# Since name-hash values are stored in the .bitmap files, add a test
# that checks that the name-hash calculations are stable across versions.
# Not exhaustive, but these hashing algorithms would be hard to change
# without causing deviations here.
test_expect_success 'name-hash value stability' '
cat >names <<-\EOF &&
first
second
third
a/one-long-enough-for-collisions
b/two-long-enough-for-collisions
many/parts/to/this/path/enough/to/collide/in/v2
enough/parts/to/this/path/enough/to/collide/in/v2
EOF
test-tool name-hash <names >out &&
cat >expect <<-\EOF &&
2582249472 1763573760 first
2289942528 1188134912 second
2300837888 1130758144 third
2544516325 3963087891 a/one-long-enough-for-collisions
2544516325 4013419539 b/two-long-enough-for-collisions
1420111091 1709547268 many/parts/to/this/path/enough/to/collide/in/v2
1420111091 1709547268 enough/parts/to/this/path/enough/to/collide/in/v2
EOF
test_cmp expect out
'
test_bitmap_cases () {
writeLookupTable=false
for i in "$@"
@ -419,7 +449,10 @@ test_bitmap_cases () {
cat >expect <<-\EOF &&
error: missing value for '\''pack.preferbitmaptips'\''
EOF
git repack -adb 2>actual &&
# Disable name hash version adjustment due to stderr comparison.
GIT_TEST_NAME_HASH_VERSION=1 \
git repack -adb 2>actual &&
test_cmp expect actual
)
'

View File

@ -208,7 +208,8 @@ test_expect_success 'bitmapPseudoMerge.stableThreshold creates stable groups' '
'
test_expect_success 'out of order thresholds are rejected' '
test_must_fail git \
# Disable the test var to remove a stderr message.
test_must_fail env GIT_TEST_NAME_HASH_VERSION=1 git \
-c bitmapPseudoMerge.test.pattern="refs/*" \
-c bitmapPseudoMerge.test.threshold=1.month.ago \
-c bitmapPseudoMerge.test.stableThreshold=1.week.ago \

View File

@ -1237,7 +1237,12 @@ test_expect_success 'all boundary commits are excluded' '
test_tick &&
git merge otherside &&
ad=$(git log --no-walk --format=%ad HEAD) &&
git bundle create twoside-boundary.bdl main --since="$ad" &&
# If the a different name hash function is used here, then no delta
# pair is found and the bundle does not expand to three objects
# when fixing the thin object.
GIT_TEST_NAME_HASH_VERSION=1 \
git bundle create twoside-boundary.bdl main --since="$ad" &&
test_bundle_object_count --thin twoside-boundary.bdl 3
'

View File

@ -246,7 +246,11 @@ test_expect_success 'create bundle with --since option' '
EOF
test_cmp expect actual &&
git bundle create since.bdl \
# If a different name hash function is used, then one fewer
# delta base is found and this counts a different number
# of objects after performing --fix-thin.
GIT_TEST_NAME_HASH_VERSION=1 \
git bundle create since.bdl \
--since "Thu Apr 7 15:27:00 2005 -0700" \
--all &&

View File

@ -1093,7 +1093,9 @@ test_expect_success 'submodule update --quiet passes quietness to fetch with a s
) &&
git clone super4 super5 &&
(cd super5 &&
git submodule update --quiet --init --depth=1 submodule3 >out 2>err &&
# This test var can mess with the stderr output checked in this test.
GIT_TEST_NAME_HASH_VERSION=1 \
git submodule update --quiet --init --depth=1 submodule3 >out 2>err &&
test_must_be_empty out &&
test_must_be_empty err
) &&

View File

@ -308,7 +308,10 @@ test_expect_success 'no bitmaps created if .keep files present' '
keep=${pack%.pack}.keep &&
test_when_finished "rm -f \"\$keep\"" &&
>"$keep" &&
git -C bare.git repack -ad 2>stderr &&
# Disable --name-hash-version test due to stderr comparison.
GIT_TEST_NAME_HASH_VERSION=1 \
git -C bare.git repack -ad 2>stderr &&
test_must_be_empty stderr &&
find bare.git/objects/pack/ -type f -name "*.bitmap" >actual &&
test_must_be_empty actual
@ -319,7 +322,10 @@ test_expect_success 'auto-bitmaps do not complain if unavailable' '
blob=$(test-tool genrandom big $((1024*1024)) |
git -C bare.git hash-object -w --stdin) &&
git -C bare.git update-ref refs/tags/big $blob &&
git -C bare.git repack -ad 2>stderr &&
# Disable --name-hash-version test due to stderr comparison.
GIT_TEST_NAME_HASH_VERSION=1 \
git -C bare.git repack -ad 2>stderr &&
test_must_be_empty stderr &&
find bare.git/objects/pack -type f -name "*.bitmap" >actual &&
test_must_be_empty actual
@ -776,6 +782,12 @@ test_expect_success 'repack -ad cleans up old .tmp-* packs' '
test_must_be_empty tmpfiles
'
test_expect_success '--name-hash-version option passes through to pack-objects' '
GIT_TRACE2_EVENT="$(pwd)/hash-trace.txt" \
git repack -a --name-hash-version=2 &&
test_subcommand_flex git pack-objects --name-hash-version=2 <hash-trace.txt
'
test_expect_success 'setup for update-server-info' '
git init update-server-info &&
test_commit -C update-server-info message

View File

@ -1896,6 +1896,32 @@ test_subcommand () {
fi
}
# Check that the given subcommand was run with the given set of
# arguments in order (but with possible extra arguments).
#
# test_subcommand_flex [!] <command> <args>... < <trace>
#
# If the first parameter passed is !, this instead checks that
# the given command was not called.
#
test_subcommand_flex () {
local negate=
if test "$1" = "!"
then
negate=t
shift
fi
local expr="$(printf '"%s".*' "$@")"
if test -n "$negate"
then
! grep "\[$expr\]"
else
grep "\[$expr\]"
fi
}
# Check that the given command was invoked as part of the
# trace2-format trace on stdin.
#