Merge branch 'ew/fast-import-unpack-limit'
"git fast-import" learned the same performance trick to avoid creating too small a packfile as "git fetch" and "git push" have, using *.unpackLimit configuration. * ew/fast-import-unpack-limit: fast-import: invalidate pack_id references after loosening fast-import: implement unpack limit
This commit is contained in:
@ -1189,6 +1189,15 @@ difftool.<tool>.cmd::
|
|||||||
difftool.prompt::
|
difftool.prompt::
|
||||||
Prompt before each invocation of the diff tool.
|
Prompt before each invocation of the diff tool.
|
||||||
|
|
||||||
|
fastimport.unpackLimit::
|
||||||
|
If the number of objects imported by linkgit:git-fast-import[1]
|
||||||
|
is below this limit, then the objects will be unpacked into
|
||||||
|
loose object files. However if the number of imported objects
|
||||||
|
equals or exceeds this limit then the pack will be stored as a
|
||||||
|
pack. Storing the pack from a fast-import can make the import
|
||||||
|
operation complete faster, especially on slow filesystems. If
|
||||||
|
not set, the value of `transfer.unpackLimit` is used instead.
|
||||||
|
|
||||||
fetch.recurseSubmodules::
|
fetch.recurseSubmodules::
|
||||||
This option can be either set to a boolean value or to 'on-demand'.
|
This option can be either set to a boolean value or to 'on-demand'.
|
||||||
Setting it to a boolean changes the behavior of fetch and pull to
|
Setting it to a boolean changes the behavior of fetch and pull to
|
||||||
|
@ -136,6 +136,8 @@ Performance and Compression Tuning
|
|||||||
Maximum size of each output packfile.
|
Maximum size of each output packfile.
|
||||||
The default is unlimited.
|
The default is unlimited.
|
||||||
|
|
||||||
|
fastimport.unpackLimit::
|
||||||
|
See linkgit:git-config[1]
|
||||||
|
|
||||||
Performance
|
Performance
|
||||||
-----------
|
-----------
|
||||||
|
@ -166,6 +166,7 @@ Format of STDIN stream:
|
|||||||
#include "quote.h"
|
#include "quote.h"
|
||||||
#include "exec_cmd.h"
|
#include "exec_cmd.h"
|
||||||
#include "dir.h"
|
#include "dir.h"
|
||||||
|
#include "run-command.h"
|
||||||
|
|
||||||
#define PACK_ID_BITS 16
|
#define PACK_ID_BITS 16
|
||||||
#define MAX_PACK_ID ((1<<PACK_ID_BITS)-1)
|
#define MAX_PACK_ID ((1<<PACK_ID_BITS)-1)
|
||||||
@ -282,6 +283,7 @@ struct recent_command {
|
|||||||
/* Configured limits on output */
|
/* Configured limits on output */
|
||||||
static unsigned long max_depth = 10;
|
static unsigned long max_depth = 10;
|
||||||
static off_t max_packsize;
|
static off_t max_packsize;
|
||||||
|
static int unpack_limit = 100;
|
||||||
static int force_update;
|
static int force_update;
|
||||||
static int pack_compression_level = Z_DEFAULT_COMPRESSION;
|
static int pack_compression_level = Z_DEFAULT_COMPRESSION;
|
||||||
static int pack_compression_seen;
|
static int pack_compression_seen;
|
||||||
@ -596,6 +598,33 @@ static struct object_entry *insert_object(unsigned char *sha1)
|
|||||||
return e;
|
return e;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void invalidate_pack_id(unsigned int id)
|
||||||
|
{
|
||||||
|
unsigned int h;
|
||||||
|
unsigned long lu;
|
||||||
|
struct tag *t;
|
||||||
|
|
||||||
|
for (h = 0; h < ARRAY_SIZE(object_table); h++) {
|
||||||
|
struct object_entry *e;
|
||||||
|
|
||||||
|
for (e = object_table[h]; e; e = e->next)
|
||||||
|
if (e->pack_id == id)
|
||||||
|
e->pack_id = MAX_PACK_ID;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (lu = 0; lu < branch_table_sz; lu++) {
|
||||||
|
struct branch *b;
|
||||||
|
|
||||||
|
for (b = branch_table[lu]; b; b = b->table_next_branch)
|
||||||
|
if (b->pack_id == id)
|
||||||
|
b->pack_id = MAX_PACK_ID;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (t = first_tag; t; t = t->next_tag)
|
||||||
|
if (t->pack_id == id)
|
||||||
|
t->pack_id = MAX_PACK_ID;
|
||||||
|
}
|
||||||
|
|
||||||
static unsigned int hc_str(const char *s, size_t len)
|
static unsigned int hc_str(const char *s, size_t len)
|
||||||
{
|
{
|
||||||
unsigned int r = 0;
|
unsigned int r = 0;
|
||||||
@ -951,6 +980,23 @@ static void unkeep_all_packs(void)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int loosen_small_pack(const struct packed_git *p)
|
||||||
|
{
|
||||||
|
struct child_process unpack = CHILD_PROCESS_INIT;
|
||||||
|
|
||||||
|
if (lseek(p->pack_fd, 0, SEEK_SET) < 0)
|
||||||
|
die_errno("Failed seeking to start of '%s'", p->pack_name);
|
||||||
|
|
||||||
|
unpack.in = p->pack_fd;
|
||||||
|
unpack.git_cmd = 1;
|
||||||
|
unpack.stdout_to_stderr = 1;
|
||||||
|
argv_array_push(&unpack.args, "unpack-objects");
|
||||||
|
if (!show_stats)
|
||||||
|
argv_array_push(&unpack.args, "-q");
|
||||||
|
|
||||||
|
return run_command(&unpack);
|
||||||
|
}
|
||||||
|
|
||||||
static void end_packfile(void)
|
static void end_packfile(void)
|
||||||
{
|
{
|
||||||
static int running;
|
static int running;
|
||||||
@ -973,6 +1019,14 @@ static void end_packfile(void)
|
|||||||
fixup_pack_header_footer(pack_data->pack_fd, pack_data->sha1,
|
fixup_pack_header_footer(pack_data->pack_fd, pack_data->sha1,
|
||||||
pack_data->pack_name, object_count,
|
pack_data->pack_name, object_count,
|
||||||
cur_pack_sha1, pack_size);
|
cur_pack_sha1, pack_size);
|
||||||
|
|
||||||
|
if (object_count <= unpack_limit) {
|
||||||
|
if (!loosen_small_pack(pack_data)) {
|
||||||
|
invalidate_pack_id(pack_id);
|
||||||
|
goto discard_pack;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
close(pack_data->pack_fd);
|
close(pack_data->pack_fd);
|
||||||
idx_name = keep_pack(create_index());
|
idx_name = keep_pack(create_index());
|
||||||
|
|
||||||
@ -1003,6 +1057,7 @@ static void end_packfile(void)
|
|||||||
pack_id++;
|
pack_id++;
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
|
discard_pack:
|
||||||
close(pack_data->pack_fd);
|
close(pack_data->pack_fd);
|
||||||
unlink_or_warn(pack_data->pack_name);
|
unlink_or_warn(pack_data->pack_name);
|
||||||
}
|
}
|
||||||
@ -3320,6 +3375,7 @@ static void parse_option(const char *option)
|
|||||||
static void git_pack_config(void)
|
static void git_pack_config(void)
|
||||||
{
|
{
|
||||||
int indexversion_value;
|
int indexversion_value;
|
||||||
|
int limit;
|
||||||
unsigned long packsizelimit_value;
|
unsigned long packsizelimit_value;
|
||||||
|
|
||||||
if (!git_config_get_ulong("pack.depth", &max_depth)) {
|
if (!git_config_get_ulong("pack.depth", &max_depth)) {
|
||||||
@ -3344,6 +3400,11 @@ static void git_pack_config(void)
|
|||||||
if (!git_config_get_ulong("pack.packsizelimit", &packsizelimit_value))
|
if (!git_config_get_ulong("pack.packsizelimit", &packsizelimit_value))
|
||||||
max_packsize = packsizelimit_value;
|
max_packsize = packsizelimit_value;
|
||||||
|
|
||||||
|
if (!git_config_get_int("fastimport.unpacklimit", &limit))
|
||||||
|
unpack_limit = limit;
|
||||||
|
else if (!git_config_get_int("transfer.unpacklimit", &limit))
|
||||||
|
unpack_limit = limit;
|
||||||
|
|
||||||
git_config(git_default_config, NULL);
|
git_config(git_default_config, NULL);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -52,6 +52,7 @@ echo "$@"'
|
|||||||
###
|
###
|
||||||
|
|
||||||
test_expect_success 'empty stream succeeds' '
|
test_expect_success 'empty stream succeeds' '
|
||||||
|
git config fastimport.unpackLimit 0 &&
|
||||||
git fast-import </dev/null
|
git fast-import </dev/null
|
||||||
'
|
'
|
||||||
|
|
||||||
@ -2690,6 +2691,7 @@ test_expect_success 'R: blob bigger than threshold' '
|
|||||||
echo >>input &&
|
echo >>input &&
|
||||||
|
|
||||||
test_create_repo R &&
|
test_create_repo R &&
|
||||||
|
git --git-dir=R/.git config fastimport.unpackLimit 0 &&
|
||||||
git --git-dir=R/.git fast-import --big-file-threshold=1 <input
|
git --git-dir=R/.git fast-import --big-file-threshold=1 <input
|
||||||
'
|
'
|
||||||
|
|
||||||
|
105
t/t9302-fast-import-unpack-limit.sh
Executable file
105
t/t9302-fast-import-unpack-limit.sh
Executable file
@ -0,0 +1,105 @@
|
|||||||
|
#!/bin/sh
|
||||||
|
test_description='test git fast-import unpack limit'
|
||||||
|
. ./test-lib.sh
|
||||||
|
|
||||||
|
test_expect_success 'create loose objects on import' '
|
||||||
|
test_tick &&
|
||||||
|
cat >input <<-INPUT_END &&
|
||||||
|
commit refs/heads/master
|
||||||
|
committer $GIT_COMMITTER_NAME <$GIT_COMMITTER_EMAIL> $GIT_COMMITTER_DATE
|
||||||
|
data <<COMMIT
|
||||||
|
initial
|
||||||
|
COMMIT
|
||||||
|
|
||||||
|
done
|
||||||
|
INPUT_END
|
||||||
|
|
||||||
|
git -c fastimport.unpackLimit=2 fast-import --done <input &&
|
||||||
|
git fsck --no-progress &&
|
||||||
|
test $(find .git/objects/?? -type f | wc -l) -eq 2 &&
|
||||||
|
test $(find .git/objects/pack -type f | wc -l) -eq 0
|
||||||
|
'
|
||||||
|
|
||||||
|
test_expect_success 'bigger packs are preserved' '
|
||||||
|
test_tick &&
|
||||||
|
cat >input <<-INPUT_END &&
|
||||||
|
commit refs/heads/master
|
||||||
|
committer $GIT_COMMITTER_NAME <$GIT_COMMITTER_EMAIL> $GIT_COMMITTER_DATE
|
||||||
|
data <<COMMIT
|
||||||
|
incremental should create a pack
|
||||||
|
COMMIT
|
||||||
|
from refs/heads/master^0
|
||||||
|
|
||||||
|
commit refs/heads/branch
|
||||||
|
committer $GIT_COMMITTER_NAME <$GIT_COMMITTER_EMAIL> $GIT_COMMITTER_DATE
|
||||||
|
data <<COMMIT
|
||||||
|
branch
|
||||||
|
COMMIT
|
||||||
|
|
||||||
|
done
|
||||||
|
INPUT_END
|
||||||
|
|
||||||
|
git -c fastimport.unpackLimit=2 fast-import --done <input &&
|
||||||
|
git fsck --no-progress &&
|
||||||
|
test $(find .git/objects/?? -type f | wc -l) -eq 2 &&
|
||||||
|
test $(find .git/objects/pack -type f | wc -l) -eq 2
|
||||||
|
'
|
||||||
|
|
||||||
|
test_expect_success 'lookups after checkpoint works' '
|
||||||
|
hello_id=$(echo hello | git hash-object --stdin -t blob) &&
|
||||||
|
id="$GIT_COMMITTER_NAME <$GIT_COMMITTER_EMAIL> $GIT_COMMITTER_DATE" &&
|
||||||
|
before=$(git rev-parse refs/heads/master^0) &&
|
||||||
|
(
|
||||||
|
cat <<-INPUT_END &&
|
||||||
|
blob
|
||||||
|
mark :1
|
||||||
|
data 6
|
||||||
|
hello
|
||||||
|
|
||||||
|
commit refs/heads/master
|
||||||
|
mark :2
|
||||||
|
committer $id
|
||||||
|
data <<COMMIT
|
||||||
|
checkpoint after this
|
||||||
|
COMMIT
|
||||||
|
from refs/heads/master^0
|
||||||
|
M 100644 :1 hello
|
||||||
|
|
||||||
|
# pre-checkpoint
|
||||||
|
cat-blob :1
|
||||||
|
cat-blob $hello_id
|
||||||
|
checkpoint
|
||||||
|
# post-checkpoint
|
||||||
|
cat-blob :1
|
||||||
|
cat-blob $hello_id
|
||||||
|
INPUT_END
|
||||||
|
|
||||||
|
n=0 &&
|
||||||
|
from=$before &&
|
||||||
|
while test x"$from" = x"$before"
|
||||||
|
do
|
||||||
|
if test $n -gt 30
|
||||||
|
then
|
||||||
|
echo >&2 "checkpoint did not update branch"
|
||||||
|
exit 1
|
||||||
|
else
|
||||||
|
n=$(($n + 1))
|
||||||
|
fi &&
|
||||||
|
sleep 1 &&
|
||||||
|
from=$(git rev-parse refs/heads/master^0)
|
||||||
|
done &&
|
||||||
|
cat <<-INPUT_END &&
|
||||||
|
commit refs/heads/master
|
||||||
|
committer $id
|
||||||
|
data <<COMMIT
|
||||||
|
make sure from "unpacked sha1 reference" works, too
|
||||||
|
COMMIT
|
||||||
|
from $from
|
||||||
|
INPUT_END
|
||||||
|
echo done
|
||||||
|
) | git -c fastimport.unpackLimit=100 fast-import --done &&
|
||||||
|
test $(find .git/objects/?? -type f | wc -l) -eq 6 &&
|
||||||
|
test $(find .git/objects/pack -type f | wc -l) -eq 2
|
||||||
|
'
|
||||||
|
|
||||||
|
test_done
|
Reference in New Issue
Block a user