From 285a2984bd92a273fb85a216ec7243ea74761a12 Mon Sep 17 00:00:00 2001 From: Jonathan Tan Date: Tue, 13 Jun 2017 14:05:57 -0700 Subject: [PATCH 1/8] sha1_file: teach packed_object_info about typename In commit 46f0344 ("sha1_file: support reading from a loose object of unknown type", 2015-05-06), "struct object_info" gained a "typename" field that could represent a type name from a loose object file, whether valid or invalid, as opposed to the existing "typep" which could only represent valid types. Some relatively complex manipulations were added to avoid breaking packed_object_info() without modifying it, but it is much easier to just teach packed_object_info() about the new field. Therefore, teach packed_object_info() as described above. Signed-off-by: Jonathan Tan Signed-off-by: Junio C Hamano --- sha1_file.c | 29 ++++++++++++----------------- 1 file changed, 12 insertions(+), 17 deletions(-) diff --git a/sha1_file.c b/sha1_file.c index 59a4ed2ed3..a52b27541f 100644 --- a/sha1_file.c +++ b/sha1_file.c @@ -2277,9 +2277,18 @@ int packed_object_info(struct packed_git *p, off_t obj_offset, *oi->disk_sizep = revidx[1].offset - obj_offset; } - if (oi->typep) { - *oi->typep = packed_to_object_type(p, obj_offset, type, &w_curs, curpos); - if (*oi->typep < 0) { + if (oi->typep || oi->typename) { + enum object_type ptot; + ptot = packed_to_object_type(p, obj_offset, type, &w_curs, + curpos); + if (oi->typep) + *oi->typep = ptot; + if (oi->typename) { + const char *tn = typename(ptot); + if (tn) + strbuf_addstr(oi->typename, tn); + } + if (ptot < 0) { type = OBJ_BAD; goto out; } @@ -2960,7 +2969,6 @@ int sha1_object_info_extended(const unsigned char *sha1, struct object_info *oi, struct cached_object *co; struct pack_entry e; int rtype; - enum object_type real_type; const unsigned char *real = lookup_replace_object_extended(sha1, flags); co = find_cached_object(real); @@ -2992,18 +3000,9 @@ int sha1_object_info_extended(const unsigned char *sha1, struct object_info *oi, return -1; } - /* - * packed_object_info() does not follow the delta chain to - * find out the real type, unless it is given oi->typep. - */ - if (oi->typename && !oi->typep) - oi->typep = &real_type; - rtype = packed_object_info(e.p, e.offset, oi); if (rtype < 0) { mark_bad_packed_object(e.p, real); - if (oi->typep == &real_type) - oi->typep = NULL; return sha1_object_info_extended(real, oi, 0); } else if (in_delta_base_cache(e.p, e.offset)) { oi->whence = OI_DBCACHED; @@ -3014,10 +3013,6 @@ int sha1_object_info_extended(const unsigned char *sha1, struct object_info *oi, oi->u.packed.is_delta = (rtype == OBJ_REF_DELTA || rtype == OBJ_OFS_DELTA); } - if (oi->typename) - strbuf_addstr(oi->typename, typename(*oi->typep)); - if (oi->typep == &real_type) - oi->typep = NULL; return 0; } From 19fc5e84a70e0dc6d6b3a279fa549b4e522ee33b Mon Sep 17 00:00:00 2001 From: Jonathan Tan Date: Wed, 21 Jun 2017 17:40:18 -0700 Subject: [PATCH 2/8] sha1_file: rename LOOKUP_UNKNOWN_OBJECT The LOOKUP_UNKNOWN_OBJECT flag was introduced in commit 46f0344 ("sha1_file: support reading from a loose object of unknown type", 2015-05-03) in order to support a feature in cat-file subsequently introduced in commit 39e4ae3 ("cat-file: teach cat-file a '--allow-unknown-type' option", 2015-05-03). Despite its name and location in cache.h, this flag is used neither in read_sha1_file_extended() nor in any of the lookup functions, but used only in sha1_object_info_extended(). Therefore rename this flag to OBJECT_INFO_ALLOW_UNKNOWN_TYPE, taking the name of the cat-file flag that invokes this feature, and move it closer to the declaration of sha1_object_info_extended(). Also add documentation for this flag. OBJECT_INFO_ALLOW_UNKNOWN_TYPE is defined to 2, not 1, to avoid conflicting with LOOKUP_REPLACE_OBJECT. Avoidance of this conflict is necessary because sha1_object_info_extended() supports both flags. Signed-off-by: Jonathan Tan Signed-off-by: Junio C Hamano --- builtin/cat-file.c | 2 +- cache.h | 3 ++- sha1_file.c | 4 ++-- 3 files changed, 5 insertions(+), 4 deletions(-) diff --git a/builtin/cat-file.c b/builtin/cat-file.c index 4bffd7a2d8..209374b3ca 100644 --- a/builtin/cat-file.c +++ b/builtin/cat-file.c @@ -60,7 +60,7 @@ static int cat_one_file(int opt, const char *exp_type, const char *obj_name, const char *path = force_path; if (unknown_type) - flags |= LOOKUP_UNKNOWN_OBJECT; + flags |= OBJECT_INFO_ALLOW_UNKNOWN_TYPE; if (get_sha1_with_context(obj_name, GET_SHA1_RECORD_PATH, oid.hash, &obj_context)) diff --git a/cache.h b/cache.h index 4d92aae0e8..e2ec45dfe3 100644 --- a/cache.h +++ b/cache.h @@ -1207,7 +1207,6 @@ extern char *xdg_cache_home(const char *filename); /* object replacement */ #define LOOKUP_REPLACE_OBJECT 1 -#define LOOKUP_UNKNOWN_OBJECT 2 extern void *read_sha1_file_extended(const unsigned char *sha1, enum object_type *type, unsigned long *size, unsigned flag); static inline void *read_sha1_file(const unsigned char *sha1, enum object_type *type, unsigned long *size) { @@ -1866,6 +1865,8 @@ struct object_info { */ #define OBJECT_INFO_INIT {NULL} +/* Allow reading from a loose object file of unknown/bogus type */ +#define OBJECT_INFO_ALLOW_UNKNOWN_TYPE 2 extern int sha1_object_info_extended(const unsigned char *, struct object_info *, unsigned flags); extern int packed_object_info(struct packed_git *pack, off_t offset, struct object_info *); diff --git a/sha1_file.c b/sha1_file.c index a52b27541f..ad04ea8e0f 100644 --- a/sha1_file.c +++ b/sha1_file.c @@ -1964,7 +1964,7 @@ static int parse_sha1_header_extended(const char *hdr, struct object_info *oi, * we're obtaining the type using '--allow-unknown-type' * option. */ - if ((flags & LOOKUP_UNKNOWN_OBJECT) && (type < 0)) + if ((flags & OBJECT_INFO_ALLOW_UNKNOWN_TYPE) && (type < 0)) type = 0; else if (type < 0) die("invalid object type"); @@ -2941,7 +2941,7 @@ static int sha1_loose_object_info(const unsigned char *sha1, return -1; if (oi->disk_sizep) *oi->disk_sizep = mapsize; - if ((flags & LOOKUP_UNKNOWN_OBJECT)) { + if ((flags & OBJECT_INFO_ALLOW_UNKNOWN_TYPE)) { if (unpack_sha1_header_to_strbuf(&stream, map, mapsize, hdr, sizeof(hdr), &hdrbuf) < 0) status = error("unable to unpack %s header with --allow-unknown-type", sha1_to_hex(sha1)); From 1f0c0d36c1567f5cc8c10141fd4e70b871e809fd Mon Sep 17 00:00:00 2001 From: Jonathan Tan Date: Wed, 21 Jun 2017 17:40:19 -0700 Subject: [PATCH 3/8] sha1_file: rename LOOKUP_REPLACE_OBJECT The LOOKUP_REPLACE_OBJECT flag controls whether the lookup_replace_object() function is invoked by sha1_object_info_extended(), read_sha1_file_extended(), and lookup_replace_object_extended(), but it is not immediately clear which functions accept that flag. Therefore restrict this flag to only sha1_object_info_extended(), renaming it appropriately to OBJECT_INFO_LOOKUP_REPLACE and adding some documentation. Update read_sha1_file_extended() to have a boolean parameter instead, and delete lookup_replace_object_extended(). parse_sha1_header() also passes this flag to parse_sha1_header_extended() since commit 46f0344 ("sha1_file: support reading from a loose object of unknown type", 2015-05-03), but that has had no effect since that commit. Therefore this patch also removes this flag from that invocation. Signed-off-by: Jonathan Tan Signed-off-by: Junio C Hamano --- builtin/cat-file.c | 5 +++-- cache.h | 17 ++++++----------- sha1_file.c | 14 +++++++++----- 3 files changed, 18 insertions(+), 18 deletions(-) diff --git a/builtin/cat-file.c b/builtin/cat-file.c index 209374b3ca..a58b8c8200 100644 --- a/builtin/cat-file.c +++ b/builtin/cat-file.c @@ -56,7 +56,7 @@ static int cat_one_file(int opt, const char *exp_type, const char *obj_name, struct object_context obj_context; struct object_info oi = OBJECT_INFO_INIT; struct strbuf sb = STRBUF_INIT; - unsigned flags = LOOKUP_REPLACE_OBJECT; + unsigned flags = OBJECT_INFO_LOOKUP_REPLACE; const char *path = force_path; if (unknown_type) @@ -337,7 +337,8 @@ static void batch_object_write(const char *obj_name, struct batch_options *opt, struct strbuf buf = STRBUF_INIT; if (!data->skip_object_info && - sha1_object_info_extended(data->oid.hash, &data->info, LOOKUP_REPLACE_OBJECT) < 0) { + sha1_object_info_extended(data->oid.hash, &data->info, + OBJECT_INFO_LOOKUP_REPLACE) < 0) { printf("%s missing\n", obj_name ? obj_name : oid_to_hex(&data->oid)); fflush(stdout); diff --git a/cache.h b/cache.h index e2ec45dfe3..a3631b2374 100644 --- a/cache.h +++ b/cache.h @@ -1205,12 +1205,12 @@ extern char *xdg_config_home(const char *filename); */ extern char *xdg_cache_home(const char *filename); -/* object replacement */ -#define LOOKUP_REPLACE_OBJECT 1 -extern void *read_sha1_file_extended(const unsigned char *sha1, enum object_type *type, unsigned long *size, unsigned flag); +extern void *read_sha1_file_extended(const unsigned char *sha1, + enum object_type *type, + unsigned long *size, int lookup_replace); static inline void *read_sha1_file(const unsigned char *sha1, enum object_type *type, unsigned long *size) { - return read_sha1_file_extended(sha1, type, size, LOOKUP_REPLACE_OBJECT); + return read_sha1_file_extended(sha1, type, size, 1); } /* @@ -1232,13 +1232,6 @@ static inline const unsigned char *lookup_replace_object(const unsigned char *sh return do_lookup_replace_object(sha1); } -static inline const unsigned char *lookup_replace_object_extended(const unsigned char *sha1, unsigned flag) -{ - if (!(flag & LOOKUP_REPLACE_OBJECT)) - return sha1; - return lookup_replace_object(sha1); -} - /* Read and unpack a sha1 file into memory, write memory to a sha1 file */ extern int sha1_object_info(const unsigned char *, unsigned long *); extern int hash_sha1_file(const void *buf, unsigned long len, const char *type, unsigned char *sha1); @@ -1865,6 +1858,8 @@ struct object_info { */ #define OBJECT_INFO_INIT {NULL} +/* Invoke lookup_replace_object() on the given hash */ +#define OBJECT_INFO_LOOKUP_REPLACE 1 /* Allow reading from a loose object file of unknown/bogus type */ #define OBJECT_INFO_ALLOW_UNKNOWN_TYPE 2 extern int sha1_object_info_extended(const unsigned char *, struct object_info *, unsigned flags); diff --git a/sha1_file.c b/sha1_file.c index ad04ea8e0f..71296e6cde 100644 --- a/sha1_file.c +++ b/sha1_file.c @@ -2002,7 +2002,7 @@ int parse_sha1_header(const char *hdr, unsigned long *sizep) struct object_info oi = OBJECT_INFO_INIT; oi.sizep = sizep; - return parse_sha1_header_extended(hdr, &oi, LOOKUP_REPLACE_OBJECT); + return parse_sha1_header_extended(hdr, &oi, 0); } static void *unpack_sha1_file(void *map, unsigned long mapsize, enum object_type *type, unsigned long *size, const unsigned char *sha1) @@ -2969,7 +2969,9 @@ int sha1_object_info_extended(const unsigned char *sha1, struct object_info *oi, struct cached_object *co; struct pack_entry e; int rtype; - const unsigned char *real = lookup_replace_object_extended(sha1, flags); + const unsigned char *real = (flags & OBJECT_INFO_LOOKUP_REPLACE) ? + lookup_replace_object(sha1) : + sha1; co = find_cached_object(real); if (co) { @@ -3025,7 +3027,8 @@ int sha1_object_info(const unsigned char *sha1, unsigned long *sizep) oi.typep = &type; oi.sizep = sizep; - if (sha1_object_info_extended(sha1, &oi, LOOKUP_REPLACE_OBJECT) < 0) + if (sha1_object_info_extended(sha1, &oi, + OBJECT_INFO_LOOKUP_REPLACE) < 0) return -1; return type; } @@ -3107,13 +3110,14 @@ static void *read_object(const unsigned char *sha1, enum object_type *type, void *read_sha1_file_extended(const unsigned char *sha1, enum object_type *type, unsigned long *size, - unsigned flag) + int lookup_replace) { void *data; const struct packed_git *p; const char *path; struct stat st; - const unsigned char *repl = lookup_replace_object_extended(sha1, flag); + const unsigned char *repl = lookup_replace ? lookup_replace_object(sha1) + : sha1; errno = 0; data = read_object(repl, type, size); From 845b102b9951949a2413b3e6d425d9fb4424ff9e Mon Sep 17 00:00:00 2001 From: Jonathan Tan Date: Wed, 21 Jun 2017 17:40:20 -0700 Subject: [PATCH 4/8] sha1_file: move delta base cache code up In a subsequent patch, packed_object_info() will be modified to use the delta base cache, so move the relevant code to before packed_object_info(). Signed-off-by: Jonathan Tan Signed-off-by: Junio C Hamano --- sha1_file.c | 220 ++++++++++++++++++++++++++-------------------------- 1 file changed, 110 insertions(+), 110 deletions(-) diff --git a/sha1_file.c b/sha1_file.c index 71296e6cde..0c996370de 100644 --- a/sha1_file.c +++ b/sha1_file.c @@ -2239,116 +2239,6 @@ static enum object_type packed_to_object_type(struct packed_git *p, goto out; } -int packed_object_info(struct packed_git *p, off_t obj_offset, - struct object_info *oi) -{ - struct pack_window *w_curs = NULL; - unsigned long size; - off_t curpos = obj_offset; - enum object_type type; - - /* - * We always get the representation type, but only convert it to - * a "real" type later if the caller is interested. - */ - type = unpack_object_header(p, &w_curs, &curpos, &size); - - if (oi->sizep) { - if (type == OBJ_OFS_DELTA || type == OBJ_REF_DELTA) { - off_t tmp_pos = curpos; - off_t base_offset = get_delta_base(p, &w_curs, &tmp_pos, - type, obj_offset); - if (!base_offset) { - type = OBJ_BAD; - goto out; - } - *oi->sizep = get_size_from_delta(p, &w_curs, tmp_pos); - if (*oi->sizep == 0) { - type = OBJ_BAD; - goto out; - } - } else { - *oi->sizep = size; - } - } - - if (oi->disk_sizep) { - struct revindex_entry *revidx = find_pack_revindex(p, obj_offset); - *oi->disk_sizep = revidx[1].offset - obj_offset; - } - - if (oi->typep || oi->typename) { - enum object_type ptot; - ptot = packed_to_object_type(p, obj_offset, type, &w_curs, - curpos); - if (oi->typep) - *oi->typep = ptot; - if (oi->typename) { - const char *tn = typename(ptot); - if (tn) - strbuf_addstr(oi->typename, tn); - } - if (ptot < 0) { - type = OBJ_BAD; - goto out; - } - } - - if (oi->delta_base_sha1) { - if (type == OBJ_OFS_DELTA || type == OBJ_REF_DELTA) { - const unsigned char *base; - - base = get_delta_base_sha1(p, &w_curs, curpos, - type, obj_offset); - if (!base) { - type = OBJ_BAD; - goto out; - } - - hashcpy(oi->delta_base_sha1, base); - } else - hashclr(oi->delta_base_sha1); - } - -out: - unuse_pack(&w_curs); - return type; -} - -static void *unpack_compressed_entry(struct packed_git *p, - struct pack_window **w_curs, - off_t curpos, - unsigned long size) -{ - int st; - git_zstream stream; - unsigned char *buffer, *in; - - buffer = xmallocz_gently(size); - if (!buffer) - return NULL; - memset(&stream, 0, sizeof(stream)); - stream.next_out = buffer; - stream.avail_out = size + 1; - - git_inflate_init(&stream); - do { - in = use_pack(p, w_curs, curpos, &stream.avail_in); - stream.next_in = in; - st = git_inflate(&stream, Z_FINISH); - if (!stream.avail_out) - break; /* the payload is larger than it should be */ - curpos += stream.next_in - in; - } while (st == Z_OK || st == Z_BUF_ERROR); - git_inflate_end(&stream); - if ((st != Z_STREAM_END) || stream.total_out != size) { - free(buffer); - return NULL; - } - - return buffer; -} - static struct hashmap delta_base_cache; static size_t delta_base_cached; @@ -2486,6 +2376,116 @@ static void add_delta_base_cache(struct packed_git *p, off_t base_offset, hashmap_add(&delta_base_cache, ent); } +int packed_object_info(struct packed_git *p, off_t obj_offset, + struct object_info *oi) +{ + struct pack_window *w_curs = NULL; + unsigned long size; + off_t curpos = obj_offset; + enum object_type type; + + /* + * We always get the representation type, but only convert it to + * a "real" type later if the caller is interested. + */ + type = unpack_object_header(p, &w_curs, &curpos, &size); + + if (oi->sizep) { + if (type == OBJ_OFS_DELTA || type == OBJ_REF_DELTA) { + off_t tmp_pos = curpos; + off_t base_offset = get_delta_base(p, &w_curs, &tmp_pos, + type, obj_offset); + if (!base_offset) { + type = OBJ_BAD; + goto out; + } + *oi->sizep = get_size_from_delta(p, &w_curs, tmp_pos); + if (*oi->sizep == 0) { + type = OBJ_BAD; + goto out; + } + } else { + *oi->sizep = size; + } + } + + if (oi->disk_sizep) { + struct revindex_entry *revidx = find_pack_revindex(p, obj_offset); + *oi->disk_sizep = revidx[1].offset - obj_offset; + } + + if (oi->typep || oi->typename) { + enum object_type ptot; + ptot = packed_to_object_type(p, obj_offset, type, &w_curs, + curpos); + if (oi->typep) + *oi->typep = ptot; + if (oi->typename) { + const char *tn = typename(ptot); + if (tn) + strbuf_addstr(oi->typename, tn); + } + if (ptot < 0) { + type = OBJ_BAD; + goto out; + } + } + + if (oi->delta_base_sha1) { + if (type == OBJ_OFS_DELTA || type == OBJ_REF_DELTA) { + const unsigned char *base; + + base = get_delta_base_sha1(p, &w_curs, curpos, + type, obj_offset); + if (!base) { + type = OBJ_BAD; + goto out; + } + + hashcpy(oi->delta_base_sha1, base); + } else + hashclr(oi->delta_base_sha1); + } + +out: + unuse_pack(&w_curs); + return type; +} + +static void *unpack_compressed_entry(struct packed_git *p, + struct pack_window **w_curs, + off_t curpos, + unsigned long size) +{ + int st; + git_zstream stream; + unsigned char *buffer, *in; + + buffer = xmallocz_gently(size); + if (!buffer) + return NULL; + memset(&stream, 0, sizeof(stream)); + stream.next_out = buffer; + stream.avail_out = size + 1; + + git_inflate_init(&stream); + do { + in = use_pack(p, w_curs, curpos, &stream.avail_in); + stream.next_in = in; + st = git_inflate(&stream, Z_FINISH); + if (!stream.avail_out) + break; /* the payload is larger than it should be */ + curpos += stream.next_in - in; + } while (st == Z_OK || st == Z_BUF_ERROR); + git_inflate_end(&stream); + if ((st != Z_STREAM_END) || stream.total_out != size) { + free(buffer); + return NULL; + } + + return buffer; +} + static void *read_object(const unsigned char *sha1, enum object_type *type, unsigned long *size); From c84a1f3ed4d5314d2acc2bdca71b0bc5088423f9 Mon Sep 17 00:00:00 2001 From: Jonathan Tan Date: Wed, 21 Jun 2017 17:40:21 -0700 Subject: [PATCH 5/8] sha1_file: refactor read_object read_object() and sha1_object_info_extended() both implement mechanisms such as object replacement, retrying the packed store after failing to find the object in the packed store then the loose store, and being able to mark a packed object as bad and then retrying the whole process. Consolidating these mechanisms would be a great help to maintainability. Therefore, consolidate them by extending sha1_object_info_extended() to support the functionality needed, and then modifying read_object() to use sha1_object_info_extended(). Signed-off-by: Jonathan Tan Signed-off-by: Junio C Hamano --- cache.h | 1 + sha1_file.c | 84 ++++++++++++++++++++++++++--------------------------- 2 files changed, 43 insertions(+), 42 deletions(-) diff --git a/cache.h b/cache.h index a3631b2374..48aea923b2 100644 --- a/cache.h +++ b/cache.h @@ -1827,6 +1827,7 @@ struct object_info { off_t *disk_sizep; unsigned char *delta_base_sha1; struct strbuf *typename; + void **contentp; /* Response */ enum { diff --git a/sha1_file.c b/sha1_file.c index 0c996370de..615a27dac6 100644 --- a/sha1_file.c +++ b/sha1_file.c @@ -2005,19 +2005,6 @@ int parse_sha1_header(const char *hdr, unsigned long *sizep) return parse_sha1_header_extended(hdr, &oi, 0); } -static void *unpack_sha1_file(void *map, unsigned long mapsize, enum object_type *type, unsigned long *size, const unsigned char *sha1) -{ - int ret; - git_zstream stream; - char hdr[8192]; - - ret = unpack_sha1_header(&stream, map, mapsize, hdr, sizeof(hdr)); - if (ret < Z_OK || (*type = parse_sha1_header(hdr, size)) < 0) - return NULL; - - return unpack_sha1_rest(&stream, hdr, *size, sha1); -} - unsigned long get_size_from_delta(struct packed_git *p, struct pack_window **w_curs, off_t curpos) @@ -2326,8 +2313,10 @@ static void *cache_or_unpack_entry(struct packed_git *p, off_t base_offset, if (!ent) return unpack_entry(p, base_offset, type, base_size); - *type = ent->type; - *base_size = ent->size; + if (type) + *type = ent->type; + if (base_size) + *base_size = ent->size; return xmemdupz(ent->data, ent->size); } @@ -2388,9 +2377,16 @@ int packed_object_info(struct packed_git *p, off_t obj_offset, * We always get the representation type, but only convert it to * a "real" type later if the caller is interested. */ - type = unpack_object_header(p, &w_curs, &curpos, &size); + if (oi->contentp) { + *oi->contentp = cache_or_unpack_entry(p, obj_offset, oi->sizep, + &type); + if (!*oi->contentp) + type = OBJ_BAD; + } else { + type = unpack_object_header(p, &w_curs, &curpos, &size); + } - if (oi->sizep) { + if (!oi->contentp && oi->sizep) { if (type == OBJ_OFS_DELTA || type == OBJ_REF_DELTA) { off_t tmp_pos = curpos; off_t base_offset = get_delta_base(p, &w_curs, &tmp_pos, @@ -2679,8 +2675,10 @@ void *unpack_entry(struct packed_git *p, off_t obj_offset, free(external_base); } - *final_type = type; - *final_size = size; + if (final_type) + *final_type = type; + if (final_size) + *final_size = size; unuse_pack(&w_curs); @@ -2914,6 +2912,7 @@ static int sha1_loose_object_info(const unsigned char *sha1, git_zstream stream; char hdr[32]; struct strbuf hdrbuf = STRBUF_INIT; + unsigned long size_scratch; if (oi->delta_base_sha1) hashclr(oi->delta_base_sha1); @@ -2926,7 +2925,7 @@ static int sha1_loose_object_info(const unsigned char *sha1, * return value implicitly indicates whether the * object even exists. */ - if (!oi->typep && !oi->typename && !oi->sizep) { + if (!oi->typep && !oi->typename && !oi->sizep && !oi->contentp) { const char *path; struct stat st; if (stat_sha1_file(sha1, &st, &path) < 0) @@ -2939,6 +2938,10 @@ static int sha1_loose_object_info(const unsigned char *sha1, map = map_sha1_file(sha1, &mapsize); if (!map) return -1; + + if (!oi->sizep) + oi->sizep = &size_scratch; + if (oi->disk_sizep) *oi->disk_sizep = mapsize; if ((flags & OBJECT_INFO_ALLOW_UNKNOWN_TYPE)) { @@ -2956,10 +2959,18 @@ static int sha1_loose_object_info(const unsigned char *sha1, sha1_to_hex(sha1)); } else if ((status = parse_sha1_header_extended(hdr, oi, flags)) < 0) status = error("unable to parse %s header", sha1_to_hex(sha1)); - git_inflate_end(&stream); + + if (status >= 0 && oi->contentp) + *oi->contentp = unpack_sha1_rest(&stream, hdr, + *oi->sizep, sha1); + else + git_inflate_end(&stream); + munmap(map, mapsize); if (status && oi->typep) *oi->typep = status; + if (oi->sizep == &size_scratch) + oi->sizep = NULL; strbuf_release(&hdrbuf); return (status < 0) ? status : 0; } @@ -2985,6 +2996,8 @@ int sha1_object_info_extended(const unsigned char *sha1, struct object_info *oi, hashclr(oi->delta_base_sha1); if (oi->typename) strbuf_addstr(oi->typename, typename(co->type)); + if (oi->contentp) + *oi->contentp = xmemdupz(co->buf, co->size); oi->whence = OI_CACHED; return 0; } @@ -3078,28 +3091,15 @@ int pretend_sha1_file(void *buf, unsigned long len, enum object_type type, static void *read_object(const unsigned char *sha1, enum object_type *type, unsigned long *size) { - unsigned long mapsize; - void *map, *buf; - struct cached_object *co; + struct object_info oi = OBJECT_INFO_INIT; + void *content; + oi.typep = type; + oi.sizep = size; + oi.contentp = &content; - co = find_cached_object(sha1); - if (co) { - *type = co->type; - *size = co->size; - return xmemdupz(co->buf, co->size); - } - - buf = read_packed_sha1(sha1, type, size); - if (buf) - return buf; - map = map_sha1_file(sha1, &mapsize); - if (map) { - buf = unpack_sha1_file(map, mapsize, type, size, sha1); - munmap(map, mapsize); - return buf; - } - reprepare_packed_git(); - return read_packed_sha1(sha1, type, size); + if (sha1_object_info_extended(sha1, &oi, 0) < 0) + return NULL; + return content; } /* From dfdd4afcf97b0199f44231e726e373934da77717 Mon Sep 17 00:00:00 2001 From: Jonathan Tan Date: Wed, 21 Jun 2017 17:40:22 -0700 Subject: [PATCH 6/8] sha1_file: teach sha1_object_info_extended more flags Improve sha1_object_info_extended() by supporting additional flags. This allows has_sha1_file_with_flags() to be modified to use sha1_object_info_extended() in a subsequent patch. Signed-off-by: Jonathan Tan Signed-off-by: Junio C Hamano --- cache.h | 4 ++++ sha1_file.c | 43 ++++++++++++++++++++++++------------------- 2 files changed, 28 insertions(+), 19 deletions(-) diff --git a/cache.h b/cache.h index 48aea923b2..7cf2ca466a 100644 --- a/cache.h +++ b/cache.h @@ -1863,6 +1863,10 @@ struct object_info { #define OBJECT_INFO_LOOKUP_REPLACE 1 /* Allow reading from a loose object file of unknown/bogus type */ #define OBJECT_INFO_ALLOW_UNKNOWN_TYPE 2 +/* Do not check cached storage */ +#define OBJECT_INFO_SKIP_CACHED 4 +/* Do not retry packed storage after checking packed and loose storage */ +#define OBJECT_INFO_QUICK 8 extern int sha1_object_info_extended(const unsigned char *, struct object_info *, unsigned flags); extern int packed_object_info(struct packed_git *pack, off_t offset, struct object_info *); diff --git a/sha1_file.c b/sha1_file.c index 615a27dac6..b6bc02f093 100644 --- a/sha1_file.c +++ b/sha1_file.c @@ -2977,29 +2977,30 @@ static int sha1_loose_object_info(const unsigned char *sha1, int sha1_object_info_extended(const unsigned char *sha1, struct object_info *oi, unsigned flags) { - struct cached_object *co; struct pack_entry e; int rtype; const unsigned char *real = (flags & OBJECT_INFO_LOOKUP_REPLACE) ? lookup_replace_object(sha1) : sha1; - co = find_cached_object(real); - if (co) { - if (oi->typep) - *(oi->typep) = co->type; - if (oi->sizep) - *(oi->sizep) = co->size; - if (oi->disk_sizep) - *(oi->disk_sizep) = 0; - if (oi->delta_base_sha1) - hashclr(oi->delta_base_sha1); - if (oi->typename) - strbuf_addstr(oi->typename, typename(co->type)); - if (oi->contentp) - *oi->contentp = xmemdupz(co->buf, co->size); - oi->whence = OI_CACHED; - return 0; + if (!(flags & OBJECT_INFO_SKIP_CACHED)) { + struct cached_object *co = find_cached_object(real); + if (co) { + if (oi->typep) + *(oi->typep) = co->type; + if (oi->sizep) + *(oi->sizep) = co->size; + if (oi->disk_sizep) + *(oi->disk_sizep) = 0; + if (oi->delta_base_sha1) + hashclr(oi->delta_base_sha1); + if (oi->typename) + strbuf_addstr(oi->typename, typename(co->type)); + if (oi->contentp) + *oi->contentp = xmemdupz(co->buf, co->size); + oi->whence = OI_CACHED; + return 0; + } } if (!find_pack_entry(real, &e)) { @@ -3010,9 +3011,13 @@ int sha1_object_info_extended(const unsigned char *sha1, struct object_info *oi, } /* Not a loose object; someone else may have just packed it. */ - reprepare_packed_git(); - if (!find_pack_entry(real, &e)) + if (flags & OBJECT_INFO_QUICK) { return -1; + } else { + reprepare_packed_git(); + if (!find_pack_entry(real, &e)) + return -1; + } } rtype = packed_object_info(e.p, e.offset, oi); From cd585e2a33841d725b821adbb9b48654fc7d0b61 Mon Sep 17 00:00:00 2001 From: Jonathan Tan Date: Wed, 21 Jun 2017 17:40:23 -0700 Subject: [PATCH 7/8] sha1_file: do not access pack if unneeded Currently, regardless of the contents of the "struct object_info" passed to sha1_object_info_extended(), that function always accesses the packfile whenever it returns information about a packed object, since it needs to populate "u.packed". Add the ability to pass NULL, and use NULL-ness of the argument to activate an optimization in which sha1_object_info_extended() does not needlessly access the packfile. A subsequent patch will make use of this optimization. A similar optimization is not made for the cached and loose cases as it would not cause a significant performance improvement. Signed-off-by: Jonathan Tan Signed-off-by: Junio C Hamano --- sha1_file.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/sha1_file.c b/sha1_file.c index b6bc02f093..bf6b64ec8f 100644 --- a/sha1_file.c +++ b/sha1_file.c @@ -2977,12 +2977,16 @@ static int sha1_loose_object_info(const unsigned char *sha1, int sha1_object_info_extended(const unsigned char *sha1, struct object_info *oi, unsigned flags) { + static struct object_info blank_oi = OBJECT_INFO_INIT; struct pack_entry e; int rtype; const unsigned char *real = (flags & OBJECT_INFO_LOOKUP_REPLACE) ? lookup_replace_object(sha1) : sha1; + if (!oi) + oi = &blank_oi; + if (!(flags & OBJECT_INFO_SKIP_CACHED)) { struct cached_object *co = find_cached_object(real); if (co) { @@ -3020,6 +3024,13 @@ int sha1_object_info_extended(const unsigned char *sha1, struct object_info *oi, } } + if (oi == &blank_oi) + /* + * We know that the caller doesn't actually need the + * information below, so return early. + */ + return 0; + rtype = packed_object_info(e.p, e.offset, oi); if (rtype < 0) { mark_bad_packed_object(e.p, real); From e83e71c5e15f2c6aaf9bdb8ee9593a46c3bb9a5b Mon Sep 17 00:00:00 2001 From: Jonathan Tan Date: Wed, 21 Jun 2017 17:40:24 -0700 Subject: [PATCH 8/8] sha1_file: refactor has_sha1_file_with_flags has_sha1_file_with_flags() implements many mechanisms in common with sha1_object_info_extended(). Make has_sha1_file_with_flags() a convenience function for sha1_object_info_extended() instead. Signed-off-by: Jonathan Tan Signed-off-by: Junio C Hamano --- builtin/fetch.c | 10 ++++++---- builtin/index-pack.c | 3 ++- cache.h | 11 +++-------- sha1_file.c | 12 ++---------- 4 files changed, 13 insertions(+), 23 deletions(-) diff --git a/builtin/fetch.c b/builtin/fetch.c index 100248c5af..4c8d055d6e 100644 --- a/builtin/fetch.c +++ b/builtin/fetch.c @@ -249,9 +249,11 @@ static void find_non_local_tags(struct transport *transport, */ if (ends_with(ref->name, "^{}")) { if (item && - !has_object_file_with_flags(&ref->old_oid, HAS_SHA1_QUICK) && + !has_object_file_with_flags(&ref->old_oid, + OBJECT_INFO_QUICK) && !will_fetch(head, ref->old_oid.hash) && - !has_sha1_file_with_flags(item->util, HAS_SHA1_QUICK) && + !has_sha1_file_with_flags(item->util, + OBJECT_INFO_QUICK) && !will_fetch(head, item->util)) item->util = NULL; item = NULL; @@ -265,7 +267,7 @@ static void find_non_local_tags(struct transport *transport, * fetch. */ if (item && - !has_sha1_file_with_flags(item->util, HAS_SHA1_QUICK) && + !has_sha1_file_with_flags(item->util, OBJECT_INFO_QUICK) && !will_fetch(head, item->util)) item->util = NULL; @@ -286,7 +288,7 @@ static void find_non_local_tags(struct transport *transport, * checked to see if it needs fetching. */ if (item && - !has_sha1_file_with_flags(item->util, HAS_SHA1_QUICK) && + !has_sha1_file_with_flags(item->util, OBJECT_INFO_QUICK) && !will_fetch(head, item->util)) item->util = NULL; diff --git a/builtin/index-pack.c b/builtin/index-pack.c index 04b9dcaf0f..587bc80c96 100644 --- a/builtin/index-pack.c +++ b/builtin/index-pack.c @@ -794,7 +794,8 @@ static void sha1_object(const void *data, struct object_entry *obj_entry, if (startup_info->have_repository) { read_lock(); - collision_test_needed = has_sha1_file_with_flags(oid->hash, HAS_SHA1_QUICK); + collision_test_needed = + has_sha1_file_with_flags(oid->hash, OBJECT_INFO_QUICK); read_unlock(); } diff --git a/cache.h b/cache.h index 7cf2ca466a..3ae9769aaf 100644 --- a/cache.h +++ b/cache.h @@ -1268,15 +1268,10 @@ int read_loose_object(const char *path, void **contents); /* - * Return true iff we have an object named sha1, whether local or in - * an alternate object database, and whether packed or loose. This - * function does not respect replace references. - * - * If the QUICK flag is set, do not re-check the pack directory - * when we cannot find the object (this means we may give a false - * negative answer if another process is simultaneously repacking). + * Convenience for sha1_object_info_extended() with a NULL struct + * object_info. OBJECT_INFO_SKIP_CACHED is automatically set; pass + * nonzero flags to also set other flags. */ -#define HAS_SHA1_QUICK 0x1 extern int has_sha1_file_with_flags(const unsigned char *sha1, int flags); static inline int has_sha1_file(const unsigned char *sha1) { diff --git a/sha1_file.c b/sha1_file.c index bf6b64ec8f..778f01d923 100644 --- a/sha1_file.c +++ b/sha1_file.c @@ -3494,18 +3494,10 @@ int has_sha1_pack(const unsigned char *sha1) int has_sha1_file_with_flags(const unsigned char *sha1, int flags) { - struct pack_entry e; - if (!startup_info->have_repository) return 0; - if (find_pack_entry(sha1, &e)) - return 1; - if (has_loose_object(sha1)) - return 1; - if (flags & HAS_SHA1_QUICK) - return 0; - reprepare_packed_git(); - return find_pack_entry(sha1, &e); + return sha1_object_info_extended(sha1, NULL, + flags | OBJECT_INFO_SKIP_CACHED) >= 0; } int has_object_file(const struct object_id *oid)