diff --git a/Documentation/technical/multi-pack-index.txt b/Documentation/technical/multi-pack-index.txt index b39c69da8c..f2221d2b44 100644 --- a/Documentation/technical/multi-pack-index.txt +++ b/Documentation/technical/multi-pack-index.txt @@ -24,6 +24,7 @@ and their offsets into multiple packfiles. It contains: ** An offset within the jth packfile for the object. * If large offsets are required, we use another list of large offsets similar to version 2 pack-indexes. +- An optional list of objects in pseudo-pack order (used with MIDX bitmaps). Thus, we can provide O(log N) lookup time for any number of packfiles. diff --git a/Documentation/technical/pack-format.txt b/Documentation/technical/pack-format.txt index 8d2f42f29e..6d3efb7d16 100644 --- a/Documentation/technical/pack-format.txt +++ b/Documentation/technical/pack-format.txt @@ -376,6 +376,11 @@ CHUNK DATA: [Optional] Object Large Offsets (ID: {'L', 'O', 'F', 'F'}) 8-byte offsets into large packfiles. + [Optional] Bitmap pack order (ID: {'R', 'I', 'D', 'X'}) + A list of MIDX positions (one per object in the MIDX, num_objects in + total, each a 4-byte unsigned integer in network byte order), sorted + according to their relative bitmap/pseudo-pack positions. + TRAILER: Index checksum of the above contents. @@ -456,9 +461,5 @@ In short, a MIDX's pseudo-pack is the de-duplicated concatenation of objects in packs stored by the MIDX, laid out in pack order, and the packs arranged in MIDX order (with the preferred pack coming first). -Finally, note that the MIDX's reverse index is not stored as a chunk in -the multi-pack-index itself. This is done because the reverse index -includes the checksum of the pack or MIDX to which it belongs, which -makes it impossible to write in the MIDX. To avoid races when rewriting -the MIDX, a MIDX reverse index includes the MIDX's checksum in its -filename (e.g., `multi-pack-index-xyz.rev`). +The MIDX's reverse index is stored in the optional 'RIDX' chunk within +the MIDX itself. diff --git a/midx.c b/midx.c index 837b46b2af..6e6cb0eb04 100644 --- a/midx.c +++ b/midx.c @@ -33,6 +33,7 @@ #define MIDX_CHUNKID_OIDLOOKUP 0x4f49444c /* "OIDL" */ #define MIDX_CHUNKID_OBJECTOFFSETS 0x4f4f4646 /* "OOFF" */ #define MIDX_CHUNKID_LARGEOFFSETS 0x4c4f4646 /* "LOFF" */ +#define MIDX_CHUNKID_REVINDEX 0x52494458 /* "RIDX" */ #define MIDX_CHUNK_FANOUT_SIZE (sizeof(uint32_t) * 256) #define MIDX_CHUNK_OFFSET_WIDTH (2 * sizeof(uint32_t)) #define MIDX_CHUNK_LARGE_OFFSET_WIDTH (sizeof(uint64_t)) @@ -161,6 +162,9 @@ struct multi_pack_index *load_multi_pack_index(const char *object_dir, int local pair_chunk(cf, MIDX_CHUNKID_LARGEOFFSETS, &m->chunk_large_offsets); + if (git_env_bool("GIT_TEST_MIDX_READ_RIDX", 1)) + pair_chunk(cf, MIDX_CHUNKID_REVINDEX, &m->chunk_revindex); + m->num_objects = ntohl(m->chunk_oid_fanout[255]); CALLOC_ARRAY(m->pack_names, m->num_packs); @@ -833,6 +837,18 @@ static int write_midx_large_offsets(struct hashfile *f, return 0; } +static int write_midx_revindex(struct hashfile *f, + void *data) +{ + struct write_midx_context *ctx = data; + uint32_t i; + + for (i = 0; i < ctx->entries_nr; i++) + hashwrite_be32(f, ctx->pack_order[i]); + + return 0; +} + struct midx_pack_order_data { uint32_t nr; uint32_t pack; @@ -1403,16 +1419,21 @@ static int write_midx_internal(const char *object_dir, (size_t)ctx.num_large_offsets * MIDX_CHUNK_LARGE_OFFSET_WIDTH, write_midx_large_offsets); + if (flags & (MIDX_WRITE_REV_INDEX | MIDX_WRITE_BITMAP)) { + ctx.pack_order = midx_pack_order(&ctx); + add_chunk(cf, MIDX_CHUNKID_REVINDEX, + ctx.entries_nr * sizeof(uint32_t), + write_midx_revindex); + } + write_midx_header(f, get_num_chunks(cf), ctx.nr - dropped_packs); write_chunkfile(cf, &ctx); finalize_hashfile(f, midx_hash, CSUM_FSYNC | CSUM_HASH_IN_STREAM); free_chunkfile(cf); - if (flags & (MIDX_WRITE_REV_INDEX | MIDX_WRITE_BITMAP)) - ctx.pack_order = midx_pack_order(&ctx); - - if (flags & MIDX_WRITE_REV_INDEX) + if (flags & MIDX_WRITE_REV_INDEX && + git_env_bool("GIT_TEST_MIDX_WRITE_REV", 0)) write_midx_reverse_index(midx_name.buf, midx_hash, &ctx); if (flags & MIDX_WRITE_BITMAP) { if (write_midx_bitmap(midx_name.buf, midx_hash, &ctx, diff --git a/midx.h b/midx.h index b7d79a515c..22e8e53288 100644 --- a/midx.h +++ b/midx.h @@ -36,6 +36,7 @@ struct multi_pack_index { const unsigned char *chunk_oid_lookup; const unsigned char *chunk_object_offsets; const unsigned char *chunk_large_offsets; + const unsigned char *chunk_revindex; const char **pack_names; struct packed_git **packs; diff --git a/pack-bitmap.c b/pack-bitmap.c index f772d3cb7f..9c666cdb8b 100644 --- a/pack-bitmap.c +++ b/pack-bitmap.c @@ -358,7 +358,9 @@ static int open_midx_bitmap_1(struct bitmap_index *bitmap_git, cleanup: munmap(bitmap_git->map, bitmap_git->map_size); bitmap_git->map_size = 0; + bitmap_git->map_pos = 0; bitmap_git->map = NULL; + bitmap_git->midx = NULL; return -1; } @@ -405,6 +407,8 @@ static int open_pack_bitmap_1(struct bitmap_index *bitmap_git, struct packed_git munmap(bitmap_git->map, bitmap_git->map_size); bitmap_git->map = NULL; bitmap_git->map_size = 0; + bitmap_git->map_pos = 0; + bitmap_git->pack = NULL; return -1; } diff --git a/pack-revindex.c b/pack-revindex.c index 70d0fbafcb..08dc160167 100644 --- a/pack-revindex.c +++ b/pack-revindex.c @@ -298,9 +298,29 @@ int load_midx_revindex(struct multi_pack_index *m) { struct strbuf revindex_name = STRBUF_INIT; int ret; + if (m->revindex_data) return 0; + if (m->chunk_revindex) { + /* + * If the MIDX `m` has a `RIDX` chunk, then use its contents for + * the reverse index instead of trying to load a separate `.rev` + * file. + * + * Note that we do *not* set `m->revindex_map` here, since we do + * not want to accidentally call munmap() in the middle of the + * MIDX. + */ + trace2_data_string("load_midx_revindex", the_repository, + "source", "midx"); + m->revindex_data = (const uint32_t *)m->chunk_revindex; + return 0; + } + + trace2_data_string("load_midx_revindex", the_repository, + "source", "rev"); + get_midx_rev_filename(&revindex_name, m); ret = load_revindex_from_disk(revindex_name.buf, diff --git a/t/lib-bitmap.sh b/t/lib-bitmap.sh index 21d0392dda..a95537e759 100644 --- a/t/lib-bitmap.sh +++ b/t/lib-bitmap.sh @@ -1,6 +1,9 @@ # Helpers for scripts testing bitmap functionality; see t5310 for # example usage. +objdir=.git/objects +midx=$objdir/pack/multi-pack-index + # Compare a file containing rev-list bitmap traversal output to its non-bitmap # counterpart. You can't just use test_cmp for this, because the two produce # subtly different output: @@ -264,3 +267,185 @@ have_delta () { midx_checksum () { test-tool read-midx --checksum "$1" } + +# midx_pack_source +midx_pack_source () { + test-tool read-midx --show-objects .git/objects | grep "^$1 " | cut -f2 +} + +test_rev_exists () { + commit="$1" + kind="$2" + + test_expect_success "reverse index exists ($kind)" ' + GIT_TRACE2_EVENT=$(pwd)/event.trace \ + git rev-list --test-bitmap "$commit" && + + if test "rev" = "$kind" + then + test_path_is_file $midx-$(midx_checksum $objdir).rev + fi && + grep "\"category\":\"load_midx_revindex\",\"key\":\"source\",\"value\":\"$kind\"" event.trace + ' +} + +midx_bitmap_core () { + rev_kind="${1:-midx}" + + setup_bitmap_history + + test_expect_success 'create single-pack midx with bitmaps' ' + git repack -ad && + git multi-pack-index write --bitmap && + test_path_is_file $midx && + test_path_is_file $midx-$(midx_checksum $objdir).bitmap + ' + + test_rev_exists HEAD "$rev_kind" + + basic_bitmap_tests + + test_expect_success 'create new additional packs' ' + for i in $(test_seq 1 16) + do + test_commit "$i" && + git repack -d || return 1 + done && + + git checkout -b other2 HEAD~8 && + for i in $(test_seq 1 8) + do + test_commit "side-$i" && + git repack -d || return 1 + done && + git checkout second + ' + + test_expect_success 'create multi-pack midx with bitmaps' ' + git multi-pack-index write --bitmap && + + ls $objdir/pack/pack-*.pack >packs && + test_line_count = 25 packs && + + test_path_is_file $midx && + test_path_is_file $midx-$(midx_checksum $objdir).bitmap + ' + + test_rev_exists HEAD "$rev_kind" + + basic_bitmap_tests + + test_expect_success '--no-bitmap is respected when bitmaps exist' ' + git multi-pack-index write --bitmap && + + test_commit respect--no-bitmap && + git repack -d && + + test_path_is_file $midx && + test_path_is_file $midx-$(midx_checksum $objdir).bitmap && + + git multi-pack-index write --no-bitmap && + + test_path_is_file $midx && + test_path_is_missing $midx-$(midx_checksum $objdir).bitmap && + test_path_is_missing $midx-$(midx_checksum $objdir).rev + ' + + test_expect_success 'setup midx with base from later pack' ' + # Write a and b so that "a" is a delta on top of base "b", since Git + # prefers to delete contents out of a base rather than add to a shorter + # object. + test_seq 1 128 >a && + test_seq 1 130 >b && + + git add a b && + git commit -m "initial commit" && + + a=$(git rev-parse HEAD:a) && + b=$(git rev-parse HEAD:b) && + + # In the first pack, "a" is stored as a delta to "b". + p1=$(git pack-objects .git/objects/pack/pack <<-EOF + $a + $b + EOF + ) && + + # In the second pack, "a" is missing, and "b" is not a delta nor base to + # any other object. + p2=$(git pack-objects .git/objects/pack/pack <<-EOF + $b + $(git rev-parse HEAD) + $(git rev-parse HEAD^{tree}) + EOF + ) && + + git prune-packed && + # Use the second pack as the preferred source, so that "b" occurs + # earlier in the MIDX object order, rendering "a" unusable for pack + # reuse. + git multi-pack-index write --bitmap --preferred-pack=pack-$p2.idx && + + have_delta $a $b && + test $(midx_pack_source $a) != $(midx_pack_source $b) + ' + + rev_list_tests 'full bitmap with backwards delta' + + test_expect_success 'clone with bitmaps enabled' ' + git clone --no-local --bare . clone-reverse-delta.git && + test_when_finished "rm -fr clone-reverse-delta.git" && + + git rev-parse HEAD >expect && + git --git-dir=clone-reverse-delta.git rev-parse HEAD >actual && + test_cmp expect actual + ' + + test_expect_success 'changing the preferred pack does not corrupt bitmaps' ' + rm -fr repo && + git init repo && + test_when_finished "rm -fr repo" && + ( + cd repo && + + test_commit A && + test_commit B && + + git rev-list --objects --no-object-names HEAD^ >A.objects && + git rev-list --objects --no-object-names HEAD^.. >B.objects && + + A=$(git pack-objects $objdir/pack/pack indexes <<-EOF && + pack-$A.idx + pack-$B.idx + EOF + + git multi-pack-index write --bitmap --stdin-packs \ + --preferred-pack=pack-$A.pack err && + test_path_is_file $midx && + test_path_is_file $midx-$(midx_checksum $objdir).bitmap + ' + + test_rev_exists HEAD~ "$rev_kind" + + basic_bitmap_tests HEAD~ +} diff --git a/t/t5310-pack-bitmaps.sh b/t/t5310-pack-bitmaps.sh index d05ab716f6..f775fc1ce6 100755 --- a/t/t5310-pack-bitmaps.sh +++ b/t/t5310-pack-bitmaps.sh @@ -397,4 +397,32 @@ test_expect_success 'pack.preferBitmapTips' ' ) ' +test_expect_success 'complains about multiple pack bitmaps' ' + rm -fr repo && + git init repo && + test_when_finished "rm -fr repo" && + ( + cd repo && + + test_commit base && + + git repack -adb && + bitmap="$(ls .git/objects/pack/pack-*.bitmap)" && + mv "$bitmap" "$bitmap.bak" && + + test_commit other && + git repack -ab && + + mv "$bitmap.bak" "$bitmap" && + + find .git/objects/pack -type f -name "*.pack" >packs && + find .git/objects/pack -type f -name "*.bitmap" >bitmaps && + test_line_count = 2 packs && + test_line_count = 2 bitmaps && + + git rev-list --use-bitmap-index HEAD 2>err && + grep "ignoring extra bitmap file" err + ) +' + test_done diff --git a/t/t5326-multi-pack-bitmaps.sh b/t/t5326-multi-pack-bitmaps.sh index e187f90f29..3c1ecc7e25 100755 --- a/t/t5326-multi-pack-bitmaps.sh +++ b/t/t5326-multi-pack-bitmaps.sh @@ -9,125 +9,13 @@ test_description='exercise basic multi-pack bitmap functionality' GIT_TEST_MULTI_PACK_INDEX=0 GIT_TEST_MULTI_PACK_INDEX_WRITE_BITMAP=0 -objdir=.git/objects -midx=$objdir/pack/multi-pack-index +# This test exercise multi-pack bitmap functionality where the object order is +# stored and read from a special chunk within the MIDX, so use the default +# behavior here. +sane_unset GIT_TEST_MIDX_WRITE_REV +sane_unset GIT_TEST_MIDX_READ_RIDX -# midx_pack_source -midx_pack_source () { - test-tool read-midx --show-objects .git/objects | grep "^$1 " | cut -f2 -} - -setup_bitmap_history - -test_expect_success 'enable core.multiPackIndex' ' - git config core.multiPackIndex true -' - -test_expect_success 'create single-pack midx with bitmaps' ' - git repack -ad && - git multi-pack-index write --bitmap && - test_path_is_file $midx && - test_path_is_file $midx-$(midx_checksum $objdir).bitmap && - test_path_is_file $midx-$(midx_checksum $objdir).rev -' - -basic_bitmap_tests - -test_expect_success 'create new additional packs' ' - for i in $(test_seq 1 16) - do - test_commit "$i" && - git repack -d || return 1 - done && - - git checkout -b other2 HEAD~8 && - for i in $(test_seq 1 8) - do - test_commit "side-$i" && - git repack -d || return 1 - done && - git checkout second -' - -test_expect_success 'create multi-pack midx with bitmaps' ' - git multi-pack-index write --bitmap && - - ls $objdir/pack/pack-*.pack >packs && - test_line_count = 25 packs && - - test_path_is_file $midx && - test_path_is_file $midx-$(midx_checksum $objdir).bitmap && - test_path_is_file $midx-$(midx_checksum $objdir).rev -' - -basic_bitmap_tests - -test_expect_success '--no-bitmap is respected when bitmaps exist' ' - git multi-pack-index write --bitmap && - - test_commit respect--no-bitmap && - git repack -d && - - test_path_is_file $midx && - test_path_is_file $midx-$(midx_checksum $objdir).bitmap && - test_path_is_file $midx-$(midx_checksum $objdir).rev && - - git multi-pack-index write --no-bitmap && - - test_path_is_file $midx && - test_path_is_missing $midx-$(midx_checksum $objdir).bitmap && - test_path_is_missing $midx-$(midx_checksum $objdir).rev -' - -test_expect_success 'setup midx with base from later pack' ' - # Write a and b so that "a" is a delta on top of base "b", since Git - # prefers to delete contents out of a base rather than add to a shorter - # object. - test_seq 1 128 >a && - test_seq 1 130 >b && - - git add a b && - git commit -m "initial commit" && - - a=$(git rev-parse HEAD:a) && - b=$(git rev-parse HEAD:b) && - - # In the first pack, "a" is stored as a delta to "b". - p1=$(git pack-objects .git/objects/pack/pack <<-EOF - $a - $b - EOF - ) && - - # In the second pack, "a" is missing, and "b" is not a delta nor base to - # any other object. - p2=$(git pack-objects .git/objects/pack/pack <<-EOF - $b - $(git rev-parse HEAD) - $(git rev-parse HEAD^{tree}) - EOF - ) && - - git prune-packed && - # Use the second pack as the preferred source, so that "b" occurs - # earlier in the MIDX object order, rendering "a" unusable for pack - # reuse. - git multi-pack-index write --bitmap --preferred-pack=pack-$p2.idx && - - have_delta $a $b && - test $(midx_pack_source $a) != $(midx_pack_source $b) -' - -rev_list_tests 'full bitmap with backwards delta' - -test_expect_success 'clone with bitmaps enabled' ' - git clone --no-local --bare . clone-reverse-delta.git && - test_when_finished "rm -fr clone-reverse-delta.git" && - - git rev-parse HEAD >expect && - git --git-dir=clone-reverse-delta.git rev-parse HEAD >actual && - test_cmp expect actual -' +midx_bitmap_core bitmap_reuse_tests() { from=$1 @@ -204,17 +92,7 @@ test_expect_success 'missing object closure fails gracefully' ' ) ' -test_expect_success 'setup partial bitmaps' ' - test_commit packed && - git repack && - test_commit loose && - git multi-pack-index write --bitmap 2>err && - test_path_is_file $midx && - test_path_is_file $midx-$(midx_checksum $objdir).bitmap && - test_path_is_file $midx-$(midx_checksum $objdir).rev -' - -basic_bitmap_tests HEAD~ +midx_bitmap_partial_tests test_expect_success 'removing a MIDX clears stale bitmaps' ' rm -fr repo && @@ -228,7 +106,6 @@ test_expect_success 'removing a MIDX clears stale bitmaps' ' # Write a MIDX and bitmap; remove the MIDX but leave the bitmap. stale_bitmap=$midx-$(midx_checksum $objdir).bitmap && - stale_rev=$midx-$(midx_checksum $objdir).rev && rm $midx && # Then write a new MIDX. @@ -238,9 +115,7 @@ test_expect_success 'removing a MIDX clears stale bitmaps' ' test_path_is_file $midx && test_path_is_file $midx-$(midx_checksum $objdir).bitmap && - test_path_is_file $midx-$(midx_checksum $objdir).rev && - test_path_is_missing $stale_bitmap && - test_path_is_missing $stale_rev + test_path_is_missing $stale_bitmap ) ' @@ -261,7 +136,6 @@ test_expect_success 'pack.preferBitmapTips' ' git multi-pack-index write --bitmap && test_path_is_file $midx && test_path_is_file $midx-$(midx_checksum $objdir).bitmap && - test_path_is_file $midx-$(midx_checksum $objdir).rev && test-tool bitmap list-commits | sort >bitmaps && comm -13 bitmaps commits >before && @@ -271,7 +145,6 @@ test_expect_success 'pack.preferBitmapTips' ' snapshot && rm -fr $midx-$(midx_checksum $objdir).bitmap && - rm -fr $midx-$(midx_checksum $objdir).rev && rm -fr $midx && git multi-pack-index write --bitmap --refs-snapshot=snapshot && @@ -395,4 +266,23 @@ test_expect_success 'hash-cache values are propagated from pack bitmaps' ' ) ' +test_expect_success 'graceful fallback when missing reverse index' ' + rm -fr repo && + git init repo && + test_when_finished "rm -fr repo" && + ( + cd repo && + + test_commit base && + + # write a pack and MIDX bitmap containing base + git repack -adb && + git multi-pack-index write --bitmap && + + GIT_TEST_MIDX_READ_RIDX=0 \ + git rev-list --use-bitmap-index HEAD 2>err && + ! grep "ignoring extra bitmap file" err + ) +' + test_done diff --git a/t/t5327-multi-pack-bitmaps-rev.sh b/t/t5327-multi-pack-bitmaps-rev.sh new file mode 100755 index 0000000000..d30ba632c8 --- /dev/null +++ b/t/t5327-multi-pack-bitmaps-rev.sh @@ -0,0 +1,23 @@ +#!/bin/sh + +test_description='exercise basic multi-pack bitmap functionality (.rev files)' + +. ./test-lib.sh +. "${TEST_DIRECTORY}/lib-bitmap.sh" + +# We'll be writing our own midx and bitmaps, so avoid getting confused by the +# automatic ones. +GIT_TEST_MULTI_PACK_INDEX=0 +GIT_TEST_MULTI_PACK_INDEX_WRITE_BITMAP=0 + +# Unlike t5326, this test exercise multi-pack bitmap functionality where the +# object order is stored in a separate .rev file. +GIT_TEST_MIDX_WRITE_REV=1 +GIT_TEST_MIDX_READ_RIDX=0 +export GIT_TEST_MIDX_WRITE_REV +export GIT_TEST_MIDX_READ_RIDX + +midx_bitmap_core rev +midx_bitmap_partial_tests rev + +test_done diff --git a/t/t7700-repack.sh b/t/t7700-repack.sh index e489869dd9..5922fb5bdd 100755 --- a/t/t7700-repack.sh +++ b/t/t7700-repack.sh @@ -312,16 +312,13 @@ test_expect_success 'cleans up MIDX when appropriate' ' checksum=$(midx_checksum $objdir) && test_path_is_file $midx && test_path_is_file $midx-$checksum.bitmap && - test_path_is_file $midx-$checksum.rev && test_commit repack-3 && GIT_TEST_MULTI_PACK_INDEX=0 git repack -Adb --write-midx && test_path_is_file $midx && test_path_is_missing $midx-$checksum.bitmap && - test_path_is_missing $midx-$checksum.rev && test_path_is_file $midx-$(midx_checksum $objdir).bitmap && - test_path_is_file $midx-$(midx_checksum $objdir).rev && test_commit repack-4 && GIT_TEST_MULTI_PACK_INDEX=0 git repack -Adb && @@ -354,7 +351,6 @@ test_expect_success '--write-midx with preferred bitmap tips' ' test_line_count = 1 before && rm -fr $midx-$(midx_checksum $objdir).bitmap && - rm -fr $midx-$(midx_checksum $objdir).rev && rm -fr $midx && # instead of constructing the snapshot ourselves (c.f., the test