object-file.c: refactor write_loose_object() to several steps

When writing a large blob using "write_loose_object()", we have to pass
a buffer with the whole content of the blob, and this behavior will
consume lots of memory and may cause OOM. We will introduce a stream
version function ("stream_loose_object()") in later commit to resolve
this issue.

Before introducing that streaming function, do some refactoring on
"write_loose_object()" to reuse code for both versions.

Rewrite "write_loose_object()" as follows:

 1. Figure out a path for the (temp) object file. This step is only
    used in "write_loose_object()".

 2. Move common steps for starting to write loose objects into a new
    function "start_loose_object_common()".

 3. Compress data.

 4. Move common steps for ending zlib stream into a new function
    "end_loose_object_common()".

 5. Close fd and finalize the object file.

Helped-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Helped-by: Jiang Xin <zhiyou.jx@alibaba-inc.com>
Signed-off-by: Han Xin <chiyutianyi@gmail.com>
Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
This commit is contained in:
Han Xin
2022-06-11 10:44:17 +08:00
committed by Junio C Hamano
parent a1bf5ca29f
commit 97a9db6ffb

View File

@ -1951,6 +1951,74 @@ static int create_tmpfile(struct strbuf *tmp, const char *filename)
return fd;
}
/**
* Common steps for loose object writers to start writing loose
* objects:
*
* - Create tmpfile for the loose object.
* - Setup zlib stream for compression.
* - Start to feed header to zlib stream.
*
* Returns a "fd", which should later be provided to
* end_loose_object_common().
*/
static int start_loose_object_common(struct strbuf *tmp_file,
const char *filename, unsigned flags,
git_zstream *stream,
unsigned char *buf, size_t buflen,
git_hash_ctx *c,
char *hdr, int hdrlen)
{
int fd;
fd = create_tmpfile(tmp_file, filename);
if (fd < 0) {
if (flags & HASH_SILENT)
return -1;
else if (errno == EACCES)
return error(_("insufficient permission for adding "
"an object to repository database %s"),
get_object_directory());
else
return error_errno(
_("unable to create temporary file"));
}
/* Setup zlib stream for compression */
git_deflate_init(stream, zlib_compression_level);
stream->next_out = buf;
stream->avail_out = buflen;
the_hash_algo->init_fn(c);
/* Start to feed header to zlib stream */
stream->next_in = (unsigned char *)hdr;
stream->avail_in = hdrlen;
while (git_deflate(stream, 0) == Z_OK)
; /* nothing */
the_hash_algo->update_fn(c, hdr, hdrlen);
return fd;
}
/**
* Common steps for loose object writers to end writing loose objects:
*
* - End the compression of zlib stream.
* - Get the calculated oid to "oid".
*/
static int end_loose_object_common(git_hash_ctx *c, git_zstream *stream,
struct object_id *oid)
{
int ret;
ret = git_deflate_end_gently(stream);
if (ret != Z_OK)
return ret;
the_hash_algo->final_oid_fn(oid, c);
return Z_OK;
}
static int write_loose_object(const struct object_id *oid, char *hdr,
int hdrlen, const void *buf, unsigned long len,
time_t mtime, unsigned flags)
@ -1968,28 +2036,11 @@ static int write_loose_object(const struct object_id *oid, char *hdr,
loose_object_path(the_repository, &filename, oid);
fd = create_tmpfile(&tmp_file, filename.buf);
if (fd < 0) {
if (flags & HASH_SILENT)
return -1;
else if (errno == EACCES)
return error(_("insufficient permission for adding an object to repository database %s"), get_object_directory());
else
return error_errno(_("unable to create temporary file"));
}
/* Set it up */
git_deflate_init(&stream, zlib_compression_level);
stream.next_out = compressed;
stream.avail_out = sizeof(compressed);
the_hash_algo->init_fn(&c);
/* First header.. */
stream.next_in = (unsigned char *)hdr;
stream.avail_in = hdrlen;
while (git_deflate(&stream, 0) == Z_OK)
; /* nothing */
the_hash_algo->update_fn(&c, hdr, hdrlen);
fd = start_loose_object_common(&tmp_file, filename.buf, flags,
&stream, compressed, sizeof(compressed),
&c, hdr, hdrlen);
if (fd < 0)
return -1;
/* Then the data itself.. */
stream.next_in = (void *)buf;
@ -2007,11 +2058,10 @@ static int write_loose_object(const struct object_id *oid, char *hdr,
if (ret != Z_STREAM_END)
die(_("unable to deflate new object %s (%d)"), oid_to_hex(oid),
ret);
ret = git_deflate_end_gently(&stream);
ret = end_loose_object_common(&c, &stream, &parano_oid);
if (ret != Z_OK)
die(_("deflateEnd on object %s failed (%d)"), oid_to_hex(oid),
ret);
the_hash_algo->final_oid_fn(&parano_oid, &c);
if (!oideq(oid, &parano_oid))
die(_("confused by unstable object source data for %s"),
oid_to_hex(oid));