unpack-objects: low memory footprint for get_data() in dry_run mode
As the name implies, "get_data(size)" will allocate and return a given
amount of memory. Allocating memory for a large blob object may cause the
system to run out of memory. Before preparing to replace calling of
"get_data()" to unpack large blob objects in latter commits, refactor
"get_data()" to reduce memory footprint for dry_run mode.
Because in dry_run mode, "get_data()" is only used to check the
integrity of data, and the returned buffer is not used at all, we can
allocate a smaller buffer and use it as zstream output. Make the function
return NULL in the dry-run mode, as no callers use the returned buffer.
The "find [...]objects/?? -type f | wc -l" test idiom being used here
is adapted from the same "find" use added to another test in
d9545c7f46
(fast-import: implement unpack limit, 2016-04-25).
Suggested-by: Jiang Xin <zhiyou.jx@alibaba-inc.com>
Signed-off-by: Han Xin <chiyutianyi@gmail.com>
Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
This commit is contained in:
@ -97,15 +97,27 @@ static void use(int bytes)
|
||||
display_throughput(progress, consumed_bytes);
|
||||
}
|
||||
|
||||
/*
|
||||
* Decompress zstream from the standard input into a newly
|
||||
* allocated buffer of specified size and return the buffer.
|
||||
* The caller is responsible to free the returned buffer.
|
||||
*
|
||||
* But for dry_run mode, "get_data()" is only used to check the
|
||||
* integrity of data, and the returned buffer is not used at all.
|
||||
* Therefore, in dry_run mode, "get_data()" will release the small
|
||||
* allocated buffer which is reused to hold temporary zstream output
|
||||
* and return NULL instead of returning garbage data.
|
||||
*/
|
||||
static void *get_data(unsigned long size)
|
||||
{
|
||||
git_zstream stream;
|
||||
void *buf = xmallocz(size);
|
||||
unsigned long bufsize = dry_run && size > 8192 ? 8192 : size;
|
||||
void *buf = xmallocz(bufsize);
|
||||
|
||||
memset(&stream, 0, sizeof(stream));
|
||||
|
||||
stream.next_out = buf;
|
||||
stream.avail_out = size;
|
||||
stream.avail_out = bufsize;
|
||||
stream.next_in = fill(1);
|
||||
stream.avail_in = len;
|
||||
git_inflate_init(&stream);
|
||||
@ -125,8 +137,17 @@ static void *get_data(unsigned long size)
|
||||
}
|
||||
stream.next_in = fill(1);
|
||||
stream.avail_in = len;
|
||||
if (dry_run) {
|
||||
/* reuse the buffer in dry_run mode */
|
||||
stream.next_out = buf;
|
||||
stream.avail_out = bufsize > size - stream.total_out ?
|
||||
size - stream.total_out :
|
||||
bufsize;
|
||||
}
|
||||
}
|
||||
git_inflate_end(&stream);
|
||||
if (dry_run)
|
||||
FREE_AND_NULL(buf);
|
||||
return buf;
|
||||
}
|
||||
|
||||
@ -326,10 +347,8 @@ static void unpack_non_delta_entry(enum object_type type, unsigned long size,
|
||||
{
|
||||
void *buf = get_data(size);
|
||||
|
||||
if (!dry_run && buf)
|
||||
if (buf)
|
||||
write_object(nr, type, buf, size);
|
||||
else
|
||||
free(buf);
|
||||
}
|
||||
|
||||
static int resolve_against_held(unsigned nr, const struct object_id *base,
|
||||
@ -359,10 +378,8 @@ static void unpack_delta_entry(enum object_type type, unsigned long delta_size,
|
||||
oidread(&base_oid, fill(the_hash_algo->rawsz));
|
||||
use(the_hash_algo->rawsz);
|
||||
delta_data = get_data(delta_size);
|
||||
if (dry_run || !delta_data) {
|
||||
free(delta_data);
|
||||
if (!delta_data)
|
||||
return;
|
||||
}
|
||||
if (has_object_file(&base_oid))
|
||||
; /* Ok we have this one */
|
||||
else if (resolve_against_held(nr, &base_oid,
|
||||
@ -398,10 +415,8 @@ static void unpack_delta_entry(enum object_type type, unsigned long delta_size,
|
||||
die("offset value out of bound for delta base object");
|
||||
|
||||
delta_data = get_data(delta_size);
|
||||
if (dry_run || !delta_data) {
|
||||
free(delta_data);
|
||||
if (!delta_data)
|
||||
return;
|
||||
}
|
||||
lo = 0;
|
||||
hi = nr;
|
||||
while (lo < hi) {
|
||||
|
Reference in New Issue
Block a user