Merge branch 'jk/loose-object-fsck'

"git fsck" inspects loose objects more carefully now.

* jk/loose-object-fsck:
  fsck: detect trailing garbage in all object types
  fsck: parse loose object paths directly
  sha1_file: add read_loose_object() function
  t1450: test fsck of packed objects
  sha1_file: fix error message for alternate objects
  t1450: refactor loose-object removal
This commit is contained in:
Junio C Hamano
2017-01-31 13:14:57 -08:00
4 changed files with 284 additions and 41 deletions

View File

@ -1630,39 +1630,54 @@ int git_open_cloexec(const char *name, int flags)
return fd;
}
static int stat_sha1_file(const unsigned char *sha1, struct stat *st)
/*
* Find "sha1" as a loose object in the local repository or in an alternate.
* Returns 0 on success, negative on failure.
*
* The "path" out-parameter will give the path of the object we found (if any).
* Note that it may point to static storage and is only valid until another
* call to sha1_file_name(), etc.
*/
static int stat_sha1_file(const unsigned char *sha1, struct stat *st,
const char **path)
{
struct alternate_object_database *alt;
if (!lstat(sha1_file_name(sha1), st))
*path = sha1_file_name(sha1);
if (!lstat(*path, st))
return 0;
prepare_alt_odb();
errno = ENOENT;
for (alt = alt_odb_list; alt; alt = alt->next) {
const char *path = alt_sha1_path(alt, sha1);
if (!lstat(path, st))
*path = alt_sha1_path(alt, sha1);
if (!lstat(*path, st))
return 0;
}
return -1;
}
static int open_sha1_file(const unsigned char *sha1)
/*
* Like stat_sha1_file(), but actually open the object and return the
* descriptor. See the caveats on the "path" parameter above.
*/
static int open_sha1_file(const unsigned char *sha1, const char **path)
{
int fd;
struct alternate_object_database *alt;
int most_interesting_errno;
fd = git_open(sha1_file_name(sha1));
*path = sha1_file_name(sha1);
fd = git_open(*path);
if (fd >= 0)
return fd;
most_interesting_errno = errno;
prepare_alt_odb();
for (alt = alt_odb_list; alt; alt = alt->next) {
const char *path = alt_sha1_path(alt, sha1);
fd = git_open(path);
*path = alt_sha1_path(alt, sha1);
fd = git_open(*path);
if (fd >= 0)
return fd;
if (most_interesting_errno == ENOENT)
@ -1672,12 +1687,21 @@ static int open_sha1_file(const unsigned char *sha1)
return -1;
}
void *map_sha1_file(const unsigned char *sha1, unsigned long *size)
/*
* Map the loose object at "path" if it is not NULL, or the path found by
* searching for a loose object named "sha1".
*/
static void *map_sha1_file_1(const char *path,
const unsigned char *sha1,
unsigned long *size)
{
void *map;
int fd;
fd = open_sha1_file(sha1);
if (path)
fd = git_open(path);
else
fd = open_sha1_file(sha1, &path);
map = NULL;
if (fd >= 0) {
struct stat st;
@ -1686,7 +1710,7 @@ void *map_sha1_file(const unsigned char *sha1, unsigned long *size)
*size = xsize_t(st.st_size);
if (!*size) {
/* mmap() is forbidden on empty files */
error("object file %s is empty", sha1_file_name(sha1));
error("object file %s is empty", path);
return NULL;
}
map = xmmap(NULL, *size, PROT_READ, MAP_PRIVATE, fd, 0);
@ -1696,6 +1720,11 @@ void *map_sha1_file(const unsigned char *sha1, unsigned long *size)
return map;
}
void *map_sha1_file(const unsigned char *sha1, unsigned long *size)
{
return map_sha1_file_1(NULL, sha1, size);
}
unsigned long unpack_object_header_buffer(const unsigned char *buf,
unsigned long len, enum object_type *type, unsigned long *sizep)
{
@ -2806,8 +2835,9 @@ static int sha1_loose_object_info(const unsigned char *sha1,
* object even exists.
*/
if (!oi->typep && !oi->typename && !oi->sizep) {
const char *path;
struct stat st;
if (stat_sha1_file(sha1, &st) < 0)
if (stat_sha1_file(sha1, &st, &path) < 0)
return -1;
if (oi->disk_sizep)
*oi->disk_sizep = st.st_size;
@ -3003,6 +3033,8 @@ void *read_sha1_file_extended(const unsigned char *sha1,
{
void *data;
const struct packed_git *p;
const char *path;
struct stat st;
const unsigned char *repl = lookup_replace_object_extended(sha1, flag);
errno = 0;
@ -3018,12 +3050,9 @@ void *read_sha1_file_extended(const unsigned char *sha1,
die("replacement %s not found for %s",
sha1_to_hex(repl), sha1_to_hex(sha1));
if (has_loose_object(repl)) {
const char *path = sha1_file_name(sha1);
if (!stat_sha1_file(repl, &st, &path))
die("loose object %s (stored in %s) is corrupt",
sha1_to_hex(repl), path);
}
if ((p = has_packed_and_bad(repl)) != NULL)
die("packed object %s (stored in %s) is corrupt",
@ -3793,3 +3822,122 @@ int for_each_packed_object(each_packed_object_fn cb, void *data, unsigned flags)
}
return r ? r : pack_errors;
}
static int check_stream_sha1(git_zstream *stream,
const char *hdr,
unsigned long size,
const char *path,
const unsigned char *expected_sha1)
{
git_SHA_CTX c;
unsigned char real_sha1[GIT_SHA1_RAWSZ];
unsigned char buf[4096];
unsigned long total_read;
int status = Z_OK;
git_SHA1_Init(&c);
git_SHA1_Update(&c, hdr, stream->total_out);
/*
* We already read some bytes into hdr, but the ones up to the NUL
* do not count against the object's content size.
*/
total_read = stream->total_out - strlen(hdr) - 1;
/*
* This size comparison must be "<=" to read the final zlib packets;
* see the comment in unpack_sha1_rest for details.
*/
while (total_read <= size &&
(status == Z_OK || status == Z_BUF_ERROR)) {
stream->next_out = buf;
stream->avail_out = sizeof(buf);
if (size - total_read < stream->avail_out)
stream->avail_out = size - total_read;
status = git_inflate(stream, Z_FINISH);
git_SHA1_Update(&c, buf, stream->next_out - buf);
total_read += stream->next_out - buf;
}
git_inflate_end(stream);
if (status != Z_STREAM_END) {
error("corrupt loose object '%s'", sha1_to_hex(expected_sha1));
return -1;
}
if (stream->avail_in) {
error("garbage at end of loose object '%s'",
sha1_to_hex(expected_sha1));
return -1;
}
git_SHA1_Final(real_sha1, &c);
if (hashcmp(expected_sha1, real_sha1)) {
error("sha1 mismatch for %s (expected %s)", path,
sha1_to_hex(expected_sha1));
return -1;
}
return 0;
}
int read_loose_object(const char *path,
const unsigned char *expected_sha1,
enum object_type *type,
unsigned long *size,
void **contents)
{
int ret = -1;
int fd = -1;
void *map = NULL;
unsigned long mapsize;
git_zstream stream;
char hdr[32];
*contents = NULL;
map = map_sha1_file_1(path, NULL, &mapsize);
if (!map) {
error_errno("unable to mmap %s", path);
goto out;
}
if (unpack_sha1_header(&stream, map, mapsize, hdr, sizeof(hdr)) < 0) {
error("unable to unpack header of %s", path);
goto out;
}
*type = parse_sha1_header(hdr, size);
if (*type < 0) {
error("unable to parse header of %s", path);
git_inflate_end(&stream);
goto out;
}
if (*type == OBJ_BLOB) {
if (check_stream_sha1(&stream, hdr, *size, path, expected_sha1) < 0)
goto out;
} else {
*contents = unpack_sha1_rest(&stream, hdr, *size, expected_sha1);
if (!*contents) {
error("unable to unpack contents of %s", path);
git_inflate_end(&stream);
goto out;
}
if (check_sha1_signature(expected_sha1, *contents,
*size, typename(*type))) {
error("sha1 mismatch for %s (expected %s)", path,
sha1_to_hex(expected_sha1));
free(*contents);
goto out;
}
}
ret = 0; /* everything checks out */
out:
if (map)
munmap(map, mapsize);
if (fd >= 0)
close(fd);
return ret;
}