Merge branch 'sp/stream-clean-filter'

When running a required clean filter, we do not have to mmap the
original before feeding the filter.  Instead, stream the file
contents directly to the filter and process its output.

* sp/stream-clean-filter:
  sha1_file: don't convert off_t to size_t too early to avoid potential die()
  convert: stream from fd to required clean filter to reduce used address space
  copy_fd(): do not close the input file descriptor
  mmap_limit: introduce GIT_MMAP_LIMIT to allow testing expected mmap size
  memory_limit: use git_env_ulong() to parse GIT_ALLOC_LIMIT
  config.c: add git_env_ulong() to parse environment variable
  convert: drop arguments other than 'path' from would_convert_to_git()
This commit is contained in:
Junio C Hamano
2014-10-08 13:05:32 -07:00
10 changed files with 164 additions and 52 deletions

View File

@ -663,10 +663,26 @@ void release_pack_memory(size_t need)
; /* nothing */
}
static void mmap_limit_check(size_t length)
{
static size_t limit = 0;
if (!limit) {
limit = git_env_ulong("GIT_MMAP_LIMIT", 0);
if (!limit)
limit = SIZE_MAX;
}
if (length > limit)
die("attempting to mmap %"PRIuMAX" over limit %"PRIuMAX,
(uintmax_t)length, (uintmax_t)limit);
}
void *xmmap(void *start, size_t length,
int prot, int flags, int fd, off_t offset)
{
void *ret = mmap(start, length, prot, flags, fd, offset);
void *ret;
mmap_limit_check(length);
ret = mmap(start, length, prot, flags, fd, offset);
if (ret == MAP_FAILED) {
if (!length)
return NULL;
@ -3076,6 +3092,29 @@ static int index_mem(unsigned char *sha1, void *buf, size_t size,
return ret;
}
static int index_stream_convert_blob(unsigned char *sha1, int fd,
const char *path, unsigned flags)
{
int ret;
const int write_object = flags & HASH_WRITE_OBJECT;
struct strbuf sbuf = STRBUF_INIT;
assert(path);
assert(would_convert_to_git_filter_fd(path));
convert_to_git_filter_fd(path, fd, &sbuf,
write_object ? safe_crlf : SAFE_CRLF_FALSE);
if (write_object)
ret = write_sha1_file(sbuf.buf, sbuf.len, typename(OBJ_BLOB),
sha1);
else
ret = hash_sha1_file(sbuf.buf, sbuf.len, typename(OBJ_BLOB),
sha1);
strbuf_release(&sbuf);
return ret;
}
static int index_pipe(unsigned char *sha1, int fd, enum object_type type,
const char *path, unsigned flags)
{
@ -3141,15 +3180,22 @@ int index_fd(unsigned char *sha1, int fd, struct stat *st,
enum object_type type, const char *path, unsigned flags)
{
int ret;
size_t size = xsize_t(st->st_size);
if (!S_ISREG(st->st_mode))
/*
* Call xsize_t() only when needed to avoid potentially unnecessary
* die() for large files.
*/
if (type == OBJ_BLOB && path && would_convert_to_git_filter_fd(path))
ret = index_stream_convert_blob(sha1, fd, path, flags);
else if (!S_ISREG(st->st_mode))
ret = index_pipe(sha1, fd, type, path, flags);
else if (size <= big_file_threshold || type != OBJ_BLOB ||
(path && would_convert_to_git(path, NULL, 0, 0)))
ret = index_core(sha1, fd, size, type, path, flags);
else if (st->st_size <= big_file_threshold || type != OBJ_BLOB ||
(path && would_convert_to_git(path)))
ret = index_core(sha1, fd, xsize_t(st->st_size), type, path,
flags);
else
ret = index_stream(sha1, fd, size, type, path, flags);
ret = index_stream(sha1, fd, xsize_t(st->st_size), type, path,
flags);
close(fd);
return ret;
}