From 578d81d0c445f714dce46d5f4e3f599ac3a94c75 Mon Sep 17 00:00:00 2001 From: Jeff Hostetler Date: Tue, 21 Nov 2017 20:58:47 +0000 Subject: [PATCH 1/9] dir: allow exclusions from blob in addition to file Refactor add_excludes() to separate the reading of the exclude file into a buffer and the parsing of the buffer into exclude_list items. Add add_excludes_from_blob_to_list() to allow an exclude file be specified with an OID without assuming a local worktree or index exists. Refactor read_skip_worktree_file_from_index() and add do_read_blob() to eliminate duplication of preliminary processing of blob contents. Signed-off-by: Jeff Hostetler Reviewed-by: Jonathan Tan Signed-off-by: Junio C Hamano --- dir.c | 132 ++++++++++++++++++++++++++++++++++++++++++++-------------- dir.h | 3 ++ 2 files changed, 104 insertions(+), 31 deletions(-) diff --git a/dir.c b/dir.c index 1d17b800cf..1962374d2a 100644 --- a/dir.c +++ b/dir.c @@ -220,6 +220,57 @@ int within_depth(const char *name, int namelen, return 1; } +/* + * Read the contents of the blob with the given OID into a buffer. + * Append a trailing LF to the end if the last line doesn't have one. + * + * Returns: + * -1 when the OID is invalid or unknown or does not refer to a blob. + * 0 when the blob is empty. + * 1 along with { data, size } of the (possibly augmented) buffer + * when successful. + * + * Optionally updates the given sha1_stat with the given OID (when valid). + */ +static int do_read_blob(const struct object_id *oid, + struct sha1_stat *sha1_stat, + size_t *size_out, + char **data_out) +{ + enum object_type type; + unsigned long sz; + char *data; + + *size_out = 0; + *data_out = NULL; + + data = read_sha1_file(oid->hash, &type, &sz); + if (!data || type != OBJ_BLOB) { + free(data); + return -1; + } + + if (sha1_stat) { + memset(&sha1_stat->stat, 0, sizeof(sha1_stat->stat)); + hashcpy(sha1_stat->sha1, oid->hash); + } + + if (sz == 0) { + free(data); + return 0; + } + + if (data[sz - 1] != '\n') { + data = xrealloc(data, st_add(sz, 1)); + data[sz++] = '\n'; + } + + *size_out = xsize_t(sz); + *data_out = data; + + return 1; +} + #define DO_MATCH_EXCLUDE (1<<0) #define DO_MATCH_DIRECTORY (1<<1) #define DO_MATCH_SUBMODULE (1<<2) @@ -600,32 +651,22 @@ void add_exclude(const char *string, const char *base, x->el = el; } -static void *read_skip_worktree_file_from_index(const struct index_state *istate, - const char *path, size_t *size, - struct sha1_stat *sha1_stat) +static int read_skip_worktree_file_from_index(const struct index_state *istate, + const char *path, + size_t *size_out, + char **data_out, + struct sha1_stat *sha1_stat) { int pos, len; - unsigned long sz; - enum object_type type; - void *data; len = strlen(path); pos = index_name_pos(istate, path, len); if (pos < 0) - return NULL; + return -1; if (!ce_skip_worktree(istate->cache[pos])) - return NULL; - data = read_sha1_file(istate->cache[pos]->oid.hash, &type, &sz); - if (!data || type != OBJ_BLOB) { - free(data); - return NULL; - } - *size = xsize_t(sz); - if (sha1_stat) { - memset(&sha1_stat->stat, 0, sizeof(sha1_stat->stat)); - hashcpy(sha1_stat->sha1, istate->cache[pos]->oid.hash); - } - return data; + return -1; + + return do_read_blob(&istate->cache[pos]->oid, sha1_stat, size_out, data_out); } /* @@ -739,6 +780,10 @@ static void invalidate_directory(struct untracked_cache *uc, dir->dirs[i]->recurse = 0; } +static int add_excludes_from_buffer(char *buf, size_t size, + const char *base, int baselen, + struct exclude_list *el); + /* * Given a file with name "fname", read it (either from disk, or from * an index if 'istate' is non-null), parse it and store the @@ -754,9 +799,10 @@ static int add_excludes(const char *fname, const char *base, int baselen, struct sha1_stat *sha1_stat) { struct stat st; - int fd, i, lineno = 1; + int r; + int fd; size_t size = 0; - char *buf, *entry; + char *buf; fd = open(fname, O_RDONLY); if (fd < 0 || fstat(fd, &st) < 0) { @@ -764,17 +810,13 @@ static int add_excludes(const char *fname, const char *base, int baselen, warn_on_fopen_errors(fname); else close(fd); - if (!istate || - (buf = read_skip_worktree_file_from_index(istate, fname, &size, sha1_stat)) == NULL) + if (!istate) return -1; - if (size == 0) { - free(buf); - return 0; - } - if (buf[size-1] != '\n') { - buf = xrealloc(buf, st_add(size, 1)); - buf[size++] = '\n'; - } + r = read_skip_worktree_file_from_index(istate, fname, + &size, &buf, + sha1_stat); + if (r != 1) + return r; } else { size = xsize_t(st.st_size); if (size == 0) { @@ -813,6 +855,17 @@ static int add_excludes(const char *fname, const char *base, int baselen, } } + add_excludes_from_buffer(buf, size, base, baselen, el); + return 0; +} + +static int add_excludes_from_buffer(char *buf, size_t size, + const char *base, int baselen, + struct exclude_list *el) +{ + int i, lineno = 1; + char *entry; + el->filebuf = buf; if (skip_utf8_bom(&buf, size)) @@ -841,6 +894,23 @@ int add_excludes_from_file_to_list(const char *fname, const char *base, return add_excludes(fname, base, baselen, el, istate, NULL); } +int add_excludes_from_blob_to_list( + struct object_id *oid, + const char *base, int baselen, + struct exclude_list *el) +{ + char *buf; + size_t size; + int r; + + r = do_read_blob(oid, NULL, &size, &buf); + if (r != 1) + return r; + + add_excludes_from_buffer(buf, size, base, baselen, el); + return 0; +} + struct exclude_list *add_exclude_list(struct dir_struct *dir, int group_type, const char *src) { diff --git a/dir.h b/dir.h index e3717055d1..1bcf39123a 100644 --- a/dir.h +++ b/dir.h @@ -256,6 +256,9 @@ extern struct exclude_list *add_exclude_list(struct dir_struct *dir, extern int add_excludes_from_file_to_list(const char *fname, const char *base, int baselen, struct exclude_list *el, struct index_state *istate); extern void add_excludes_from_file(struct dir_struct *, const char *fname); +extern int add_excludes_from_blob_to_list(struct object_id *oid, + const char *base, int baselen, + struct exclude_list *el); extern void parse_exclude_pattern(const char **string, int *patternlen, unsigned *flags, int *nowildcardlen); extern void add_exclude(const char *string, const char *base, int baselen, struct exclude_list *el, int srcpos); From 314f354ee77892664f49b4b44d2052df139ece7e Mon Sep 17 00:00:00 2001 From: Jeff Hostetler Date: Tue, 21 Nov 2017 20:58:48 +0000 Subject: [PATCH 2/9] oidmap: add oidmap iterator methods Add the usual map iterator functions to oidmap. Signed-off-by: Jeff Hostetler Reviewed-by: Jonathan Tan Signed-off-by: Junio C Hamano --- oidmap.h | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/oidmap.h b/oidmap.h index 18f54cde14..d3cd2bb590 100644 --- a/oidmap.h +++ b/oidmap.h @@ -65,4 +65,26 @@ extern void *oidmap_put(struct oidmap *map, void *entry); */ extern void *oidmap_remove(struct oidmap *map, const struct object_id *key); + +struct oidmap_iter { + struct hashmap_iter h_iter; +}; + +static inline void oidmap_iter_init(struct oidmap *map, struct oidmap_iter *iter) +{ + hashmap_iter_init(&map->map, &iter->h_iter); +} + +static inline void *oidmap_iter_next(struct oidmap_iter *iter) +{ + return hashmap_iter_next(&iter->h_iter); +} + +static inline void *oidmap_iter_first(struct oidmap *map, + struct oidmap_iter *iter) +{ + oidmap_iter_init(map, iter); + return oidmap_iter_next(iter); +} + #endif From c3a9ad311793e16f6f5c5dcc3559133700c70288 Mon Sep 17 00:00:00 2001 From: Jeff Hostetler Date: Tue, 21 Nov 2017 20:58:49 +0000 Subject: [PATCH 3/9] oidset: add iterator methods to oidset Add the usual iterator methods to oidset. Add oidset_remove(). Signed-off-by: Jeff Hostetler Reviewed-by: Jonathan Tan Signed-off-by: Junio C Hamano --- oidset.c | 10 ++++++++++ oidset.h | 36 ++++++++++++++++++++++++++++++++++++ 2 files changed, 46 insertions(+) diff --git a/oidset.c b/oidset.c index f1f874aaad..454c54f933 100644 --- a/oidset.c +++ b/oidset.c @@ -24,6 +24,16 @@ int oidset_insert(struct oidset *set, const struct object_id *oid) return 0; } +int oidset_remove(struct oidset *set, const struct object_id *oid) +{ + struct oidmap_entry *entry; + + entry = oidmap_remove(&set->map, oid); + free(entry); + + return (entry != NULL); +} + void oidset_clear(struct oidset *set) { oidmap_free(&set->map, 1); diff --git a/oidset.h b/oidset.h index f4c9e0f9c0..783abceccd 100644 --- a/oidset.h +++ b/oidset.h @@ -24,6 +24,12 @@ struct oidset { #define OIDSET_INIT { OIDMAP_INIT } + +static inline void oidset_init(struct oidset *set, size_t initial_size) +{ + return oidmap_init(&set->map, initial_size); +} + /** * Returns true iff `set` contains `oid`. */ @@ -38,10 +44,40 @@ int oidset_contains(const struct oidset *set, const struct object_id *oid); */ int oidset_insert(struct oidset *set, const struct object_id *oid); +/** + * Remove the oid from the set. + * + * Returns 1 if the oid was present in the set, 0 otherwise. + */ +int oidset_remove(struct oidset *set, const struct object_id *oid); + /** * Remove all entries from the oidset, freeing any resources associated with * it. */ void oidset_clear(struct oidset *set); +struct oidset_iter { + struct oidmap_iter m_iter; +}; + +static inline void oidset_iter_init(struct oidset *set, + struct oidset_iter *iter) +{ + oidmap_iter_init(&set->map, &iter->m_iter); +} + +static inline struct object_id *oidset_iter_next(struct oidset_iter *iter) +{ + struct oidmap_entry *e = oidmap_iter_next(&iter->m_iter); + return e ? &e->oid : NULL; +} + +static inline struct object_id *oidset_iter_first(struct oidset *set, + struct oidset_iter *iter) +{ + oidset_iter_init(set, iter); + return oidset_iter_next(iter); +} + #endif /* OIDSET_H */ From 25ec7bcac044057900a0f3c9a8d6ccbb41a066bc Mon Sep 17 00:00:00 2001 From: Jeff Hostetler Date: Tue, 21 Nov 2017 20:58:50 +0000 Subject: [PATCH 4/9] list-objects: filter objects in traverse_commit_list Create traverse_commit_list_filtered() and add filtering interface to allow certain objects to be omitted from the traversal. Update traverse_commit_list() to be a wrapper for the above with a null filter to minimize the number of callers that needed to be changed. Object filtering will be used in a future commit by rev-list and pack-objects for partial clone and fetch to omit unwanted objects from the result. traverse_bitmap_commit_list() does not work with filtering. If a packfile bitmap is present, it will not be used. It should be possible to extend such support in the future (at least to simple filters that do not require object pathnames), but that is beyond the scope of this patch series. Signed-off-by: Jeff Hostetler Reviewed-by: Jonathan Tan Signed-off-by: Junio C Hamano --- Makefile | 2 + list-objects-filter-options.c | 81 +++++++ list-objects-filter-options.h | 58 +++++ list-objects-filter.c | 401 ++++++++++++++++++++++++++++++++++ list-objects-filter.h | 77 +++++++ list-objects.c | 95 ++++++-- list-objects.h | 13 +- object.h | 1 + 8 files changed, 711 insertions(+), 17 deletions(-) create mode 100644 list-objects-filter-options.c create mode 100644 list-objects-filter-options.h create mode 100644 list-objects-filter.c create mode 100644 list-objects-filter.h diff --git a/Makefile b/Makefile index cd75985991..ca378a4603 100644 --- a/Makefile +++ b/Makefile @@ -807,6 +807,8 @@ LIB_OBJS += levenshtein.o LIB_OBJS += line-log.o LIB_OBJS += line-range.o LIB_OBJS += list-objects.o +LIB_OBJS += list-objects-filter.o +LIB_OBJS += list-objects-filter-options.o LIB_OBJS += ll-merge.o LIB_OBJS += lockfile.o LIB_OBJS += log-tree.o diff --git a/list-objects-filter-options.c b/list-objects-filter-options.c new file mode 100644 index 0000000000..9b28322b1f --- /dev/null +++ b/list-objects-filter-options.c @@ -0,0 +1,81 @@ +#include "cache.h" +#include "commit.h" +#include "config.h" +#include "revision.h" +#include "argv-array.h" +#include "list-objects.h" +#include "list-objects-filter.h" +#include "list-objects-filter-options.h" + +/* + * Parse value of the argument to the "filter" keword. + * On the command line this looks like: + * --filter= + * and in the pack protocol as: + * "filter" SP + * + * The filter keyword will be used by many commands. + * See Documentation/rev-list-options.txt for allowed values for . + * + * Capture the given arg as the "filter_spec". This can be forwarded to + * subordinate commands when necessary. We also "intern" the arg for + * the convenience of the current command. + */ +int parse_list_objects_filter(struct list_objects_filter_options *filter_options, + const char *arg) +{ + const char *v0; + + if (filter_options->choice) + die(_("multiple object filter types cannot be combined")); + + filter_options->filter_spec = strdup(arg); + + if (!strcmp(arg, "blob:none")) { + filter_options->choice = LOFC_BLOB_NONE; + return 0; + } + + if (skip_prefix(arg, "blob:limit=", &v0)) { + if (!git_parse_ulong(v0, &filter_options->blob_limit_value)) + die(_("invalid filter-spec expression '%s'"), arg); + filter_options->choice = LOFC_BLOB_LIMIT; + return 0; + } + + if (skip_prefix(arg, "sparse:oid=", &v0)) { + struct object_context oc; + struct object_id sparse_oid; + + /* + * Try to parse into an OID for the current + * command, but DO NOT complain if we don't have the blob or + * ref locally. + */ + if (!get_oid_with_context(v0, GET_OID_BLOB, + &sparse_oid, &oc)) + filter_options->sparse_oid_value = oiddup(&sparse_oid); + filter_options->choice = LOFC_SPARSE_OID; + return 0; + } + + if (skip_prefix(arg, "sparse:path=", &v0)) { + filter_options->choice = LOFC_SPARSE_PATH; + filter_options->sparse_path_value = strdup(v0); + return 0; + } + + die(_("invalid filter-spec expression '%s'"), arg); + return 0; +} + +int opt_parse_list_objects_filter(const struct option *opt, + const char *arg, int unset) +{ + struct list_objects_filter_options *filter_options = opt->value; + + assert(arg); + assert(!unset); + + return parse_list_objects_filter(filter_options, arg); +} diff --git a/list-objects-filter-options.h b/list-objects-filter-options.h new file mode 100644 index 0000000000..dd7e5db2db --- /dev/null +++ b/list-objects-filter-options.h @@ -0,0 +1,58 @@ +#ifndef LIST_OBJECTS_FILTER_OPTIONS_H +#define LIST_OBJECTS_FILTER_OPTIONS_H + +#include "parse-options.h" + +/* + * The list of defined filters for list-objects. + */ +enum list_objects_filter_choice { + LOFC_DISABLED = 0, + LOFC_BLOB_NONE, + LOFC_BLOB_LIMIT, + LOFC_SPARSE_OID, + LOFC_SPARSE_PATH, + LOFC__COUNT /* must be last */ +}; + +struct list_objects_filter_options { + /* + * 'filter_spec' is the raw argument value given on the command line + * or protocol request. (The part after the "--keyword=".) For + * commands that launch filtering sub-processes, this value should be + * passed to them as received by the current process. + */ + char *filter_spec; + + /* + * 'choice' is determined by parsing the filter-spec. This indicates + * the filtering algorithm to use. + */ + enum list_objects_filter_choice choice; + + /* + * Parsed values (fields) from within the filter-spec. These are + * choice-specific; not all values will be defined for any given + * choice. + */ + struct object_id *sparse_oid_value; + char *sparse_path_value; + unsigned long blob_limit_value; +}; + +/* Normalized command line arguments */ +#define CL_ARG__FILTER "filter" + +int parse_list_objects_filter( + struct list_objects_filter_options *filter_options, + const char *arg); + +int opt_parse_list_objects_filter(const struct option *opt, + const char *arg, int unset); + +#define OPT_PARSE_LIST_OBJECTS_FILTER(fo) \ + { OPTION_CALLBACK, 0, CL_ARG__FILTER, fo, N_("args"), \ + N_("object filtering"), PARSE_OPT_NONEG, \ + opt_parse_list_objects_filter } + +#endif /* LIST_OBJECTS_FILTER_OPTIONS_H */ diff --git a/list-objects-filter.c b/list-objects-filter.c new file mode 100644 index 0000000000..4356c45368 --- /dev/null +++ b/list-objects-filter.c @@ -0,0 +1,401 @@ +#include "cache.h" +#include "dir.h" +#include "tag.h" +#include "commit.h" +#include "tree.h" +#include "blob.h" +#include "diff.h" +#include "tree-walk.h" +#include "revision.h" +#include "list-objects.h" +#include "list-objects-filter.h" +#include "list-objects-filter-options.h" +#include "oidset.h" + +/* Remember to update object flag allocation in object.h */ +/* + * FILTER_SHOWN_BUT_REVISIT -- we set this bit on tree objects + * that have been shown, but should be revisited if they appear + * in the traversal (until we mark it SEEN). This is a way to + * let us silently de-dup calls to show() in the caller. This + * is subtly different from the "revision.h:SHOWN" and the + * "sha1_name.c:ONELINE_SEEN" bits. And also different from + * the non-de-dup usage in pack-bitmap.c + */ +#define FILTER_SHOWN_BUT_REVISIT (1<<21) + +/* + * A filter for list-objects to omit ALL blobs from the traversal. + * And to OPTIONALLY collect a list of the omitted OIDs. + */ +struct filter_blobs_none_data { + struct oidset *omits; +}; + +static enum list_objects_filter_result filter_blobs_none( + enum list_objects_filter_situation filter_situation, + struct object *obj, + const char *pathname, + const char *filename, + void *filter_data_) +{ + struct filter_blobs_none_data *filter_data = filter_data_; + + switch (filter_situation) { + default: + die("unknown filter_situation"); + return LOFR_ZERO; + + case LOFS_BEGIN_TREE: + assert(obj->type == OBJ_TREE); + /* always include all tree objects */ + return LOFR_MARK_SEEN | LOFR_DO_SHOW; + + case LOFS_END_TREE: + assert(obj->type == OBJ_TREE); + return LOFR_ZERO; + + case LOFS_BLOB: + assert(obj->type == OBJ_BLOB); + assert((obj->flags & SEEN) == 0); + + if (filter_data->omits) + oidset_insert(filter_data->omits, &obj->oid); + return LOFR_MARK_SEEN; /* but not LOFR_DO_SHOW (hard omit) */ + } +} + +static void *filter_blobs_none__init( + struct oidset *omitted, + struct list_objects_filter_options *filter_options, + filter_object_fn *filter_fn, + filter_free_fn *filter_free_fn) +{ + struct filter_blobs_none_data *d = xcalloc(1, sizeof(*d)); + d->omits = omitted; + + *filter_fn = filter_blobs_none; + *filter_free_fn = free; + return d; +} + +/* + * A filter for list-objects to omit large blobs. + * And to OPTIONALLY collect a list of the omitted OIDs. + */ +struct filter_blobs_limit_data { + struct oidset *omits; + unsigned long max_bytes; +}; + +static enum list_objects_filter_result filter_blobs_limit( + enum list_objects_filter_situation filter_situation, + struct object *obj, + const char *pathname, + const char *filename, + void *filter_data_) +{ + struct filter_blobs_limit_data *filter_data = filter_data_; + unsigned long object_length; + enum object_type t; + + switch (filter_situation) { + default: + die("unknown filter_situation"); + return LOFR_ZERO; + + case LOFS_BEGIN_TREE: + assert(obj->type == OBJ_TREE); + /* always include all tree objects */ + return LOFR_MARK_SEEN | LOFR_DO_SHOW; + + case LOFS_END_TREE: + assert(obj->type == OBJ_TREE); + return LOFR_ZERO; + + case LOFS_BLOB: + assert(obj->type == OBJ_BLOB); + assert((obj->flags & SEEN) == 0); + + t = sha1_object_info(obj->oid.hash, &object_length); + if (t != OBJ_BLOB) { /* probably OBJ_NONE */ + /* + * We DO NOT have the blob locally, so we cannot + * apply the size filter criteria. Be conservative + * and force show it (and let the caller deal with + * the ambiguity). + */ + goto include_it; + } + + if (object_length < filter_data->max_bytes) + goto include_it; + + if (filter_data->omits) + oidset_insert(filter_data->omits, &obj->oid); + return LOFR_MARK_SEEN; /* but not LOFR_DO_SHOW (hard omit) */ + } + +include_it: + if (filter_data->omits) + oidset_remove(filter_data->omits, &obj->oid); + return LOFR_MARK_SEEN | LOFR_DO_SHOW; +} + +static void *filter_blobs_limit__init( + struct oidset *omitted, + struct list_objects_filter_options *filter_options, + filter_object_fn *filter_fn, + filter_free_fn *filter_free_fn) +{ + struct filter_blobs_limit_data *d = xcalloc(1, sizeof(*d)); + d->omits = omitted; + d->max_bytes = filter_options->blob_limit_value; + + *filter_fn = filter_blobs_limit; + *filter_free_fn = free; + return d; +} + +/* + * A filter driven by a sparse-checkout specification to only + * include blobs that a sparse checkout would populate. + * + * The sparse-checkout spec can be loaded from a blob with the + * given OID or from a local pathname. We allow an OID because + * the repo may be bare or we may be doing the filtering on the + * server. + */ +struct frame { + /* + * defval is the usual default include/exclude value that + * should be inherited as we recurse into directories based + * upon pattern matching of the directory itself or of a + * containing directory. + */ + int defval; + + /* + * 1 if the directory (recursively) contains any provisionally + * omitted objects. + * + * 0 if everything (recursively) contained in this directory + * has been explicitly included (SHOWN) in the result and + * the directory may be short-cut later in the traversal. + */ + unsigned child_prov_omit : 1; +}; + +struct filter_sparse_data { + struct oidset *omits; + struct exclude_list el; + + size_t nr, alloc; + struct frame *array_frame; +}; + +static enum list_objects_filter_result filter_sparse( + enum list_objects_filter_situation filter_situation, + struct object *obj, + const char *pathname, + const char *filename, + void *filter_data_) +{ + struct filter_sparse_data *filter_data = filter_data_; + int val, dtype; + struct frame *frame; + + switch (filter_situation) { + default: + die("unknown filter_situation"); + return LOFR_ZERO; + + case LOFS_BEGIN_TREE: + assert(obj->type == OBJ_TREE); + dtype = DT_DIR; + val = is_excluded_from_list(pathname, strlen(pathname), + filename, &dtype, &filter_data->el, + &the_index); + if (val < 0) + val = filter_data->array_frame[filter_data->nr].defval; + + ALLOC_GROW(filter_data->array_frame, filter_data->nr + 1, + filter_data->alloc); + filter_data->nr++; + filter_data->array_frame[filter_data->nr].defval = val; + filter_data->array_frame[filter_data->nr].child_prov_omit = 0; + + /* + * A directory with this tree OID may appear in multiple + * places in the tree. (Think of a directory move or copy, + * with no other changes, so the OID is the same, but the + * full pathnames of objects within this directory are new + * and may match is_excluded() patterns differently.) + * So we cannot mark this directory as SEEN (yet), since + * that will prevent process_tree() from revisiting this + * tree object with other pathname prefixes. + * + * Only _DO_SHOW the tree object the first time we visit + * this tree object. + * + * We always show all tree objects. A future optimization + * may want to attempt to narrow this. + */ + if (obj->flags & FILTER_SHOWN_BUT_REVISIT) + return LOFR_ZERO; + obj->flags |= FILTER_SHOWN_BUT_REVISIT; + return LOFR_DO_SHOW; + + case LOFS_END_TREE: + assert(obj->type == OBJ_TREE); + assert(filter_data->nr > 0); + + frame = &filter_data->array_frame[filter_data->nr]; + filter_data->nr--; + + /* + * Tell our parent directory if any of our children were + * provisionally omitted. + */ + filter_data->array_frame[filter_data->nr].child_prov_omit |= + frame->child_prov_omit; + + /* + * If there are NO provisionally omitted child objects (ALL child + * objects in this folder were INCLUDED), then we can mark the + * folder as SEEN (so we will not have to revisit it again). + */ + if (!frame->child_prov_omit) + return LOFR_MARK_SEEN; + return LOFR_ZERO; + + case LOFS_BLOB: + assert(obj->type == OBJ_BLOB); + assert((obj->flags & SEEN) == 0); + + frame = &filter_data->array_frame[filter_data->nr]; + + dtype = DT_REG; + val = is_excluded_from_list(pathname, strlen(pathname), + filename, &dtype, &filter_data->el, + &the_index); + if (val < 0) + val = frame->defval; + if (val > 0) { + if (filter_data->omits) + oidset_remove(filter_data->omits, &obj->oid); + return LOFR_MARK_SEEN | LOFR_DO_SHOW; + } + + /* + * Provisionally omit it. We've already established that + * this pathname is not in the sparse-checkout specification + * with the CURRENT pathname, so we *WANT* to omit this blob. + * + * However, a pathname elsewhere in the tree may also + * reference this same blob, so we cannot reject it yet. + * Leave the LOFR_ bits unset so that if the blob appears + * again in the traversal, we will be asked again. + */ + if (filter_data->omits) + oidset_insert(filter_data->omits, &obj->oid); + + /* + * Remember that at least 1 blob in this tree was + * provisionally omitted. This prevents us from short + * cutting the tree in future iterations. + */ + frame->child_prov_omit = 1; + return LOFR_ZERO; + } +} + + +static void filter_sparse_free(void *filter_data) +{ + struct filter_sparse_data *d = filter_data; + /* TODO free contents of 'd' */ + free(d); +} + +static void *filter_sparse_oid__init( + struct oidset *omitted, + struct list_objects_filter_options *filter_options, + filter_object_fn *filter_fn, + filter_free_fn *filter_free_fn) +{ + struct filter_sparse_data *d = xcalloc(1, sizeof(*d)); + d->omits = omitted; + if (add_excludes_from_blob_to_list(filter_options->sparse_oid_value, + NULL, 0, &d->el) < 0) + die("could not load filter specification"); + + ALLOC_GROW(d->array_frame, d->nr + 1, d->alloc); + d->array_frame[d->nr].defval = 0; /* default to include */ + d->array_frame[d->nr].child_prov_omit = 0; + + *filter_fn = filter_sparse; + *filter_free_fn = filter_sparse_free; + return d; +} + +static void *filter_sparse_path__init( + struct oidset *omitted, + struct list_objects_filter_options *filter_options, + filter_object_fn *filter_fn, + filter_free_fn *filter_free_fn) +{ + struct filter_sparse_data *d = xcalloc(1, sizeof(*d)); + d->omits = omitted; + if (add_excludes_from_file_to_list(filter_options->sparse_path_value, + NULL, 0, &d->el, NULL) < 0) + die("could not load filter specification"); + + ALLOC_GROW(d->array_frame, d->nr + 1, d->alloc); + d->array_frame[d->nr].defval = 0; /* default to include */ + d->array_frame[d->nr].child_prov_omit = 0; + + *filter_fn = filter_sparse; + *filter_free_fn = filter_sparse_free; + return d; +} + +typedef void *(*filter_init_fn)( + struct oidset *omitted, + struct list_objects_filter_options *filter_options, + filter_object_fn *filter_fn, + filter_free_fn *filter_free_fn); + +/* + * Must match "enum list_objects_filter_choice". + */ +static filter_init_fn s_filters[] = { + NULL, + filter_blobs_none__init, + filter_blobs_limit__init, + filter_sparse_oid__init, + filter_sparse_path__init, +}; + +void *list_objects_filter__init( + struct oidset *omitted, + struct list_objects_filter_options *filter_options, + filter_object_fn *filter_fn, + filter_free_fn *filter_free_fn) +{ + filter_init_fn init_fn; + + assert((sizeof(s_filters) / sizeof(s_filters[0])) == LOFC__COUNT); + + if (filter_options->choice >= LOFC__COUNT) + die("invalid list-objects filter choice: %d", + filter_options->choice); + + init_fn = s_filters[filter_options->choice]; + if (init_fn) + return init_fn(omitted, filter_options, + filter_fn, filter_free_fn); + *filter_fn = NULL; + *filter_free_fn = NULL; + return NULL; +} diff --git a/list-objects-filter.h b/list-objects-filter.h new file mode 100644 index 0000000000..a963d0274c --- /dev/null +++ b/list-objects-filter.h @@ -0,0 +1,77 @@ +#ifndef LIST_OBJECTS_FILTER_H +#define LIST_OBJECTS_FILTER_H + +/* + * During list-object traversal we allow certain objects to be + * filtered (omitted) from the result. The active filter uses + * these result values to guide list-objects. + * + * _ZERO : Do nothing with the object at this time. It may + * be revisited if it appears in another place in + * the tree or in another commit during the overall + * traversal. + * + * _MARK_SEEN : Mark this object as "SEEN" in the object flags. + * This will prevent it from being revisited during + * the remainder of the traversal. This DOES NOT + * imply that it will be included in the results. + * + * _DO_SHOW : Show this object in the results (call show() on it). + * In general, objects should only be shown once, but + * this result DOES NOT imply that we mark it SEEN. + * + * Most of the time, you want the combination (_MARK_SEEN | _DO_SHOW) + * but they can be used independently, such as when sparse-checkout + * pattern matching is being applied. + * + * A _MARK_SEEN without _DO_SHOW can be called a hard-omit -- the + * object is not shown and will never be reconsidered (unless a + * previous iteration has already shown it). + * + * A _DO_SHOW without _MARK_SEEN can be used, for example, to + * include a directory, but then revisit it to selectively include + * or omit objects within it. + * + * A _ZERO can be called a provisional-omit -- the object is NOT shown, + * but *may* be revisited (if the object appears again in the traversal). + * Therefore, it will be omitted from the results *unless* a later + * iteration causes it to be shown. + */ +enum list_objects_filter_result { + LOFR_ZERO = 0, + LOFR_MARK_SEEN = 1<<0, + LOFR_DO_SHOW = 1<<1, +}; + +enum list_objects_filter_situation { + LOFS_BEGIN_TREE, + LOFS_END_TREE, + LOFS_BLOB +}; + +typedef enum list_objects_filter_result (*filter_object_fn)( + enum list_objects_filter_situation filter_situation, + struct object *obj, + const char *pathname, + const char *filename, + void *filter_data); + +typedef void (*filter_free_fn)(void *filter_data); + +/* + * Constructor for the set of defined list-objects filters. + * Returns a generic "void *filter_data". + * + * The returned "filter_fn" will be used by traverse_commit_list() + * to filter the results. + * + * The returned "filter_free_fn" is a destructor for the + * filter_data. + */ +void *list_objects_filter__init( + struct oidset *omitted, + struct list_objects_filter_options *filter_options, + filter_object_fn *filter_fn, + filter_free_fn *filter_free_fn); + +#endif /* LIST_OBJECTS_FILTER_H */ diff --git a/list-objects.c b/list-objects.c index b3931fa434..d9e83d05e1 100644 --- a/list-objects.c +++ b/list-objects.c @@ -7,16 +7,21 @@ #include "tree-walk.h" #include "revision.h" #include "list-objects.h" +#include "list-objects-filter.h" +#include "list-objects-filter-options.h" static void process_blob(struct rev_info *revs, struct blob *blob, show_object_fn show, struct strbuf *path, const char *name, - void *cb_data) + void *cb_data, + filter_object_fn filter_fn, + void *filter_data) { struct object *obj = &blob->object; size_t pathlen; + enum list_objects_filter_result r = LOFR_MARK_SEEN | LOFR_DO_SHOW; if (!revs->blob_objects) return; @@ -24,11 +29,17 @@ static void process_blob(struct rev_info *revs, die("bad blob object"); if (obj->flags & (UNINTERESTING | SEEN)) return; - obj->flags |= SEEN; pathlen = path->len; strbuf_addstr(path, name); - show(obj, path->buf, cb_data); + if (filter_fn) + r = filter_fn(LOFS_BLOB, obj, + path->buf, &path->buf[pathlen], + filter_data); + if (r & LOFR_MARK_SEEN) + obj->flags |= SEEN; + if (r & LOFR_DO_SHOW) + show(obj, path->buf, cb_data); strbuf_setlen(path, pathlen); } @@ -69,7 +80,9 @@ static void process_tree(struct rev_info *revs, show_object_fn show, struct strbuf *base, const char *name, - void *cb_data) + void *cb_data, + filter_object_fn filter_fn, + void *filter_data) { struct object *obj = &tree->object; struct tree_desc desc; @@ -77,6 +90,7 @@ static void process_tree(struct rev_info *revs, enum interesting match = revs->diffopt.pathspec.nr == 0 ? all_entries_interesting: entry_not_interesting; int baselen = base->len; + enum list_objects_filter_result r = LOFR_MARK_SEEN | LOFR_DO_SHOW; if (!revs->tree_objects) return; @@ -90,9 +104,15 @@ static void process_tree(struct rev_info *revs, die("bad tree object %s", oid_to_hex(&obj->oid)); } - obj->flags |= SEEN; strbuf_addstr(base, name); - show(obj, base->buf, cb_data); + if (filter_fn) + r = filter_fn(LOFS_BEGIN_TREE, obj, + base->buf, &base->buf[baselen], + filter_data); + if (r & LOFR_MARK_SEEN) + obj->flags |= SEEN; + if (r & LOFR_DO_SHOW) + show(obj, base->buf, cb_data); if (base->len) strbuf_addch(base, '/'); @@ -112,7 +132,7 @@ static void process_tree(struct rev_info *revs, process_tree(revs, lookup_tree(entry.oid), show, base, entry.path, - cb_data); + cb_data, filter_fn, filter_data); else if (S_ISGITLINK(entry.mode)) process_gitlink(revs, entry.oid->hash, show, base, entry.path, @@ -121,8 +141,19 @@ static void process_tree(struct rev_info *revs, process_blob(revs, lookup_blob(entry.oid), show, base, entry.path, - cb_data); + cb_data, filter_fn, filter_data); } + + if (filter_fn) { + r = filter_fn(LOFS_END_TREE, obj, + base->buf, &base->buf[baselen], + filter_data); + if (r & LOFR_MARK_SEEN) + obj->flags |= SEEN; + if (r & LOFR_DO_SHOW) + show(obj, base->buf, cb_data); + } + strbuf_setlen(base, baselen); free_tree_buffer(tree); } @@ -183,10 +214,12 @@ static void add_pending_tree(struct rev_info *revs, struct tree *tree) add_pending_object(revs, &tree->object, ""); } -void traverse_commit_list(struct rev_info *revs, - show_commit_fn show_commit, - show_object_fn show_object, - void *data) +static void do_traverse(struct rev_info *revs, + show_commit_fn show_commit, + show_object_fn show_object, + void *show_data, + filter_object_fn filter_fn, + void *filter_data) { int i; struct commit *commit; @@ -200,7 +233,7 @@ void traverse_commit_list(struct rev_info *revs, */ if (commit->tree) add_pending_tree(revs, commit->tree); - show_commit(commit, data); + show_commit(commit, show_data); } for (i = 0; i < revs->pending.nr; i++) { struct object_array_entry *pending = revs->pending.objects + i; @@ -211,19 +244,21 @@ void traverse_commit_list(struct rev_info *revs, continue; if (obj->type == OBJ_TAG) { obj->flags |= SEEN; - show_object(obj, name, data); + show_object(obj, name, show_data); continue; } if (!path) path = ""; if (obj->type == OBJ_TREE) { process_tree(revs, (struct tree *)obj, show_object, - &base, path, data); + &base, path, show_data, + filter_fn, filter_data); continue; } if (obj->type == OBJ_BLOB) { process_blob(revs, (struct blob *)obj, show_object, - &base, path, data); + &base, path, show_data, + filter_fn, filter_data); continue; } die("unknown pending object %s (%s)", @@ -232,3 +267,31 @@ void traverse_commit_list(struct rev_info *revs, object_array_clear(&revs->pending); strbuf_release(&base); } + +void traverse_commit_list(struct rev_info *revs, + show_commit_fn show_commit, + show_object_fn show_object, + void *show_data) +{ + do_traverse(revs, show_commit, show_object, show_data, NULL, NULL); +} + +void traverse_commit_list_filtered( + struct list_objects_filter_options *filter_options, + struct rev_info *revs, + show_commit_fn show_commit, + show_object_fn show_object, + void *show_data, + struct oidset *omitted) +{ + filter_object_fn filter_fn = NULL; + filter_free_fn filter_free_fn = NULL; + void *filter_data = NULL; + + filter_data = list_objects_filter__init(omitted, filter_options, + &filter_fn, &filter_free_fn); + do_traverse(revs, show_commit, show_object, show_data, + filter_fn, filter_data); + if (filter_data && filter_free_fn) + filter_free_fn(filter_data); +} diff --git a/list-objects.h b/list-objects.h index 0cebf8585c..aa618d7f45 100644 --- a/list-objects.h +++ b/list-objects.h @@ -8,4 +8,15 @@ void traverse_commit_list(struct rev_info *, show_commit_fn, show_object_fn, voi typedef void (*show_edge_fn)(struct commit *); void mark_edges_uninteresting(struct rev_info *, show_edge_fn); -#endif +struct oidset; +struct list_objects_filter_options; + +void traverse_commit_list_filtered( + struct list_objects_filter_options *filter_options, + struct rev_info *revs, + show_commit_fn show_commit, + show_object_fn show_object, + void *show_data, + struct oidset *omitted); + +#endif /* LIST_OBJECTS_H */ diff --git a/object.h b/object.h index df8abe96f7..f34461d4af 100644 --- a/object.h +++ b/object.h @@ -38,6 +38,7 @@ struct object_array { * http-push.c: 16-----19 * commit.c: 16-----19 * sha1_name.c: 20 + * list-objects-filter.c: 21 * builtin/fsck.c: 0--3 */ #define FLAG_BITS 27 From caf3827e2f6a8a0315301d52b5bf46e269b3a388 Mon Sep 17 00:00:00 2001 From: Jeff Hostetler Date: Tue, 21 Nov 2017 20:58:51 +0000 Subject: [PATCH 5/9] rev-list: add list-objects filtering support Teach rev-list to use the filtering provided by the traverse_commit_list_filtered() interface to omit unwanted objects from the result. Object filtering is only allowed when one of the "--objects*" options are used. When the "--filter-print-omitted" option is used, the omitted objects are printed at the end. These are marked with a "~". This option can be combined with "--quiet" to get a list of just the omitted objects. Add t6112 test. In the future, we will introduce a "partial clone" mechanism wherein an object in a repo, obtained from a remote, may reference a missing object that can be dynamically fetched from that remote once needed. This "partial clone" mechanism will have a way, sometimes slow, of determining if a missing link is one of the links expected to be produced by this mechanism. This patch introduces handling of missing objects to help debugging and development of the "partial clone" mechanism, and once the mechanism is implemented, for a power user to perform operations that are missing-object aware without incurring the cost of checking if a missing link is expected. Signed-off-by: Jeff Hostetler Reviewed-by: Jonathan Tan Signed-off-by: Junio C Hamano --- Documentation/git-rev-list.txt | 4 +- Documentation/rev-list-options.txt | 36 +++++ builtin/rev-list.c | 108 ++++++++++++- t/t6112-rev-list-filters-objects.sh | 225 ++++++++++++++++++++++++++++ 4 files changed, 370 insertions(+), 3 deletions(-) create mode 100755 t/t6112-rev-list-filters-objects.sh diff --git a/Documentation/git-rev-list.txt b/Documentation/git-rev-list.txt index ef22f1775b..88609ff435 100644 --- a/Documentation/git-rev-list.txt +++ b/Documentation/git-rev-list.txt @@ -47,7 +47,9 @@ SYNOPSIS [ --fixed-strings | -F ] [ --date=] [ [ --objects | --objects-edge | --objects-edge-aggressive ] - [ --unpacked ] ] + [ --unpacked ] + [ --filter= [ --filter-print-omitted ] ] ] + [ --missing= ] [ --pretty | --header ] [ --bisect ] [ --bisect-vars ] diff --git a/Documentation/rev-list-options.txt b/Documentation/rev-list-options.txt index 13501e1556..11bb87f3dc 100644 --- a/Documentation/rev-list-options.txt +++ b/Documentation/rev-list-options.txt @@ -706,6 +706,42 @@ ifdef::git-rev-list[] --unpacked:: Only useful with `--objects`; print the object IDs that are not in packs. + +--filter=:: + Only useful with one of the `--objects*`; omits objects (usually + blobs) from the list of printed objects. The '' + may be one of the following: ++ +The form '--filter=blob:none' omits all blobs. ++ +The form '--filter=blob:limit=[kmg]' omits blobs larger than n bytes +or units. The value may be zero. ++ +The form '--filter=sparse:oid=' uses a sparse-checkout +specification contained in the object (or the object that the expression +evaluates to) to omit blobs that would not be not required for a +sparse checkout on the requested refs. ++ +The form '--filter=sparse:path=' similarly uses a sparse-checkout +specification contained in . + +--filter-print-omitted:: + Only useful with `--filter=`; prints a list of the objects omitted + by the filter. Object IDs are prefixed with a ``~'' character. + +--missing=:: + A debug option to help with future "partial clone" development. + This option specifies how missing objects are handled. ++ +The form '--missing=error' requests that rev-list stop with an error if +a missing object is encountered. This is the default action. ++ +The form '--missing=allow-any' will allow object traversal to continue +if a missing object is encountered. Missing objects will silently be +omitted from the results. ++ +The form '--missing=print' is like 'allow-any', but will also print a +list of the missing objects. Object IDs are prefixed with a ``?'' character. endif::git-rev-list[] --no-walk[=(sorted|unsorted)]:: diff --git a/builtin/rev-list.c b/builtin/rev-list.c index c1c74d4a79..71e3dfc46e 100644 --- a/builtin/rev-list.c +++ b/builtin/rev-list.c @@ -4,6 +4,8 @@ #include "diff.h" #include "revision.h" #include "list-objects.h" +#include "list-objects-filter.h" +#include "list-objects-filter-options.h" #include "pack.h" #include "pack-bitmap.h" #include "builtin.h" @@ -12,6 +14,7 @@ #include "bisect.h" #include "progress.h" #include "reflog-walk.h" +#include "oidset.h" static const char rev_list_usage[] = "git rev-list [OPTION] ... [ -- paths... ]\n" @@ -55,6 +58,20 @@ static const char rev_list_usage[] = static struct progress *progress; static unsigned progress_counter; +static struct list_objects_filter_options filter_options; +static struct oidset omitted_objects; +static int arg_print_omitted; /* print objects omitted by filter */ + +static struct oidset missing_objects; +enum missing_action { + MA_ERROR = 0, /* fail if any missing objects are encountered */ + MA_ALLOW_ANY, /* silently allow ALL missing objects */ + MA_PRINT, /* print ALL missing objects in special section */ +}; +static enum missing_action arg_missing_action; + +#define DEFAULT_OIDSET_SIZE (16*1024) + static void finish_commit(struct commit *commit, void *data); static void show_commit(struct commit *commit, void *data) { @@ -178,11 +195,31 @@ static void finish_commit(struct commit *commit, void *data) free_commit_buffer(commit); } +static inline void finish_object__ma(struct object *obj) +{ + switch (arg_missing_action) { + case MA_ERROR: + die("missing blob object '%s'", oid_to_hex(&obj->oid)); + return; + + case MA_ALLOW_ANY: + return; + + case MA_PRINT: + oidset_insert(&missing_objects, &obj->oid); + return; + + default: + BUG("unhandled missing_action"); + return; + } +} + static void finish_object(struct object *obj, const char *name, void *cb_data) { struct rev_list_info *info = cb_data; if (obj->type == OBJ_BLOB && !has_object_file(&obj->oid)) - die("missing blob object '%s'", oid_to_hex(&obj->oid)); + finish_object__ma(obj); if (info->revs->verify_objects && !obj->parsed && obj->type != OBJ_COMMIT) parse_object(&obj->oid); } @@ -269,6 +306,26 @@ static int show_object_fast( return 1; } +static inline int parse_missing_action_value(const char *value) +{ + if (!strcmp(value, "error")) { + arg_missing_action = MA_ERROR; + return 1; + } + + if (!strcmp(value, "allow-any")) { + arg_missing_action = MA_ALLOW_ANY; + return 1; + } + + if (!strcmp(value, "print")) { + arg_missing_action = MA_PRINT; + return 1; + } + + return 0; +} + int cmd_rev_list(int argc, const char **argv, const char *prefix) { struct rev_info revs; @@ -335,6 +392,26 @@ int cmd_rev_list(int argc, const char **argv, const char *prefix) show_progress = arg; continue; } + + if (skip_prefix(arg, ("--" CL_ARG__FILTER "="), &arg)) { + parse_list_objects_filter(&filter_options, arg); + if (filter_options.choice && !revs.blob_objects) + die(_("object filtering requires --objects")); + if (filter_options.choice == LOFC_SPARSE_OID && + !filter_options.sparse_oid_value) + die(_("invalid sparse value '%s'"), + filter_options.filter_spec); + continue; + } + if (!strcmp(arg, "--filter-print-omitted")) { + arg_print_omitted = 1; + continue; + } + + if (skip_prefix(arg, "--missing=", &arg) && + parse_missing_action_value(arg)) + continue; + usage(rev_list_usage); } @@ -360,6 +437,9 @@ int cmd_rev_list(int argc, const char **argv, const char *prefix) if (revs.show_notes) die(_("rev-list does not support display of notes")); + if (filter_options.choice && use_bitmap_index) + die(_("cannot combine --use-bitmap-index with object filtering")); + save_commit_buffer = (revs.verbose_header || revs.grep_filter.pattern_list || revs.grep_filter.header_list); @@ -404,7 +484,31 @@ int cmd_rev_list(int argc, const char **argv, const char *prefix) return show_bisect_vars(&info, reaches, all); } - traverse_commit_list(&revs, show_commit, show_object, &info); + if (arg_print_omitted) + oidset_init(&omitted_objects, DEFAULT_OIDSET_SIZE); + if (arg_missing_action == MA_PRINT) + oidset_init(&missing_objects, DEFAULT_OIDSET_SIZE); + + traverse_commit_list_filtered( + &filter_options, &revs, show_commit, show_object, &info, + (arg_print_omitted ? &omitted_objects : NULL)); + + if (arg_print_omitted) { + struct oidset_iter iter; + struct object_id *oid; + oidset_iter_init(&omitted_objects, &iter); + while ((oid = oidset_iter_next(&iter))) + printf("~%s\n", oid_to_hex(oid)); + oidset_clear(&omitted_objects); + } + if (arg_missing_action == MA_PRINT) { + struct oidset_iter iter; + struct object_id *oid; + oidset_iter_init(&missing_objects, &iter); + while ((oid = oidset_iter_next(&iter))) + printf("?%s\n", oid_to_hex(oid)); + oidset_clear(&missing_objects); + } stop_progress(&progress); diff --git a/t/t6112-rev-list-filters-objects.sh b/t/t6112-rev-list-filters-objects.sh new file mode 100755 index 0000000000..0a37dd5f97 --- /dev/null +++ b/t/t6112-rev-list-filters-objects.sh @@ -0,0 +1,225 @@ +#!/bin/sh + +test_description='git rev-list using object filtering' + +. ./test-lib.sh + +# Test the blob:none filter. + +test_expect_success 'setup r1' ' + echo "{print \$1}" >print_1.awk && + echo "{print \$2}" >print_2.awk && + + git init r1 && + for n in 1 2 3 4 5 + do + echo "This is file: $n" > r1/file.$n + git -C r1 add file.$n + git -C r1 commit -m "$n" + done +' + +test_expect_success 'verify blob:none omits all 5 blobs' ' + git -C r1 ls-files -s file.1 file.2 file.3 file.4 file.5 \ + | awk -f print_2.awk \ + | sort >expected && + git -C r1 rev-list HEAD --quiet --objects --filter-print-omitted --filter=blob:none \ + | awk -f print_1.awk \ + | sed "s/~//" \ + | sort >observed && + test_cmp observed expected +' + +test_expect_success 'verify emitted+omitted == all' ' + git -C r1 rev-list HEAD --objects \ + | awk -f print_1.awk \ + | sort >expected && + git -C r1 rev-list HEAD --objects --filter-print-omitted --filter=blob:none \ + | awk -f print_1.awk \ + | sed "s/~//" \ + | sort >observed && + test_cmp observed expected +' + + +# Test blob:limit=[kmg] filter. +# We boundary test around the size parameter. The filter is strictly less than +# the value, so size 500 and 1000 should have the same results, but 1001 should +# filter more. + +test_expect_success 'setup r2' ' + git init r2 && + for n in 1000 10000 + do + printf "%"$n"s" X > r2/large.$n + git -C r2 add large.$n + git -C r2 commit -m "$n" + done +' + +test_expect_success 'verify blob:limit=500 omits all blobs' ' + git -C r2 ls-files -s large.1000 large.10000 \ + | awk -f print_2.awk \ + | sort >expected && + git -C r2 rev-list HEAD --quiet --objects --filter-print-omitted --filter=blob:limit=500 \ + | awk -f print_1.awk \ + | sed "s/~//" \ + | sort >observed && + test_cmp observed expected +' + +test_expect_success 'verify emitted+omitted == all' ' + git -C r2 rev-list HEAD --objects \ + | awk -f print_1.awk \ + | sort >expected && + git -C r2 rev-list HEAD --objects --filter-print-omitted --filter=blob:limit=500 \ + | awk -f print_1.awk \ + | sed "s/~//" \ + | sort >observed && + test_cmp observed expected +' + +test_expect_success 'verify blob:limit=1000' ' + git -C r2 ls-files -s large.1000 large.10000 \ + | awk -f print_2.awk \ + | sort >expected && + git -C r2 rev-list HEAD --quiet --objects --filter-print-omitted --filter=blob:limit=1000 \ + | awk -f print_1.awk \ + | sed "s/~//" \ + | sort >observed && + test_cmp observed expected +' + +test_expect_success 'verify blob:limit=1001' ' + git -C r2 ls-files -s large.10000 \ + | awk -f print_2.awk \ + | sort >expected && + git -C r2 rev-list HEAD --quiet --objects --filter-print-omitted --filter=blob:limit=1001 \ + | awk -f print_1.awk \ + | sed "s/~//" \ + | sort >observed && + test_cmp observed expected +' + +test_expect_success 'verify blob:limit=1k' ' + git -C r2 ls-files -s large.10000 \ + | awk -f print_2.awk \ + | sort >expected && + git -C r2 rev-list HEAD --quiet --objects --filter-print-omitted --filter=blob:limit=1k \ + | awk -f print_1.awk \ + | sed "s/~//" \ + | sort >observed && + test_cmp observed expected +' + +test_expect_success 'verify blob:limit=1m' ' + cat expected && + git -C r2 rev-list HEAD --quiet --objects --filter-print-omitted --filter=blob:limit=1m \ + | awk -f print_1.awk \ + | sed "s/~//" \ + | sort >observed && + test_cmp observed expected +' + +# Test sparse:path= filter. +# Use a local file containing a sparse-checkout specification to filter +# out blobs not required for the corresponding sparse-checkout. We do not +# require sparse-checkout to actually be enabled. + +test_expect_success 'setup r3' ' + git init r3 && + mkdir r3/dir1 && + for n in sparse1 sparse2 + do + echo "This is file: $n" > r3/$n + git -C r3 add $n + echo "This is file: dir1/$n" > r3/dir1/$n + git -C r3 add dir1/$n + done && + git -C r3 commit -m "sparse" && + echo dir1/ >pattern1 && + echo sparse1 >pattern2 +' + +test_expect_success 'verify sparse:path=pattern1 omits top-level files' ' + git -C r3 ls-files -s sparse1 sparse2 \ + | awk -f print_2.awk \ + | sort >expected && + git -C r3 rev-list HEAD --quiet --objects --filter-print-omitted --filter=sparse:path=../pattern1 \ + | awk -f print_1.awk \ + | sed "s/~//" \ + | sort >observed && + test_cmp observed expected +' + +test_expect_success 'verify sparse:path=pattern2 omits both sparse2 files' ' + git -C r3 ls-files -s sparse2 dir1/sparse2 \ + | awk -f print_2.awk \ + | sort >expected && + git -C r3 rev-list HEAD --quiet --objects --filter-print-omitted --filter=sparse:path=../pattern2 \ + | awk -f print_1.awk \ + | sed "s/~//" \ + | sort >observed && + test_cmp observed expected +' + +# Test sparse:oid= filter. +# Like sparse:path, but we get the sparse-checkout specification from +# a blob rather than a file on disk. + +test_expect_success 'setup r3 part 2' ' + echo dir1/ >r3/pattern && + git -C r3 add pattern && + git -C r3 commit -m "pattern" +' + +test_expect_success 'verify sparse:oid=OID omits top-level files' ' + git -C r3 ls-files -s pattern sparse1 sparse2 \ + | awk -f print_2.awk \ + | sort >expected && + oid=$(git -C r3 ls-files -s pattern | awk -f print_2.awk) && + git -C r3 rev-list HEAD --quiet --objects --filter-print-omitted --filter=sparse:oid=$oid \ + | awk -f print_1.awk \ + | sed "s/~//" \ + | sort >observed && + test_cmp observed expected +' + +test_expect_success 'verify sparse:oid=oid-ish omits top-level files' ' + git -C r3 ls-files -s pattern sparse1 sparse2 \ + | awk -f print_2.awk \ + | sort >expected && + git -C r3 rev-list HEAD --quiet --objects --filter-print-omitted --filter=sparse:oid=master:pattern \ + | awk -f print_1.awk \ + | sed "s/~//" \ + | sort >observed && + test_cmp observed expected +' + +# Delete some loose objects and use rev-list, but WITHOUT any filtering. +# This models previously omitted objects that we did not receive. + +test_expect_success 'rev-list W/ --missing=print' ' + git -C r1 ls-files -s file.1 file.2 file.3 file.4 file.5 \ + | awk -f print_2.awk \ + | sort >expected && + for id in `cat expected | sed "s|..|&/|"` + do + rm r1/.git/objects/$id + done && + git -C r1 rev-list --quiet HEAD --missing=print --objects \ + | awk -f print_1.awk \ + | sed "s/?//" \ + | sort >observed && + test_cmp observed expected +' + +test_expect_success 'rev-list W/O --missing fails' ' + test_must_fail git -C r1 rev-list --quiet --objects HEAD +' + +test_expect_success 'rev-list W/ missing=allow-any' ' + git -C r1 rev-list --quiet --missing=allow-any --objects HEAD +' + +test_done From 9535ce7337b9add29fab3cb0021ccaf0da8c7138 Mon Sep 17 00:00:00 2001 From: Jeff Hostetler Date: Tue, 21 Nov 2017 20:58:52 +0000 Subject: [PATCH 6/9] pack-objects: add list-objects filtering Teach pack-objects to use the filtering provided by the traverse_commit_list_filtered() interface to omit unwanted objects from the resulting packfile. Filtering requires the use of the "--stdout" option. Add t5317 test. In the future, we will introduce a "partial clone" mechanism wherein an object in a repo, obtained from a remote, may reference a missing object that can be dynamically fetched from that remote once needed. This "partial clone" mechanism will have a way, sometimes slow, of determining if a missing link is one of the links expected to be produced by this mechanism. This patch introduces handling of missing objects to help debugging and development of the "partial clone" mechanism, and once the mechanism is implemented, for a power user to perform operations that are missing-object aware without incurring the cost of checking if a missing link is expected. Signed-off-by: Jeff Hostetler Reviewed-by: Jonathan Tan Signed-off-by: Junio C Hamano --- Documentation/git-pack-objects.txt | 19 +- builtin/pack-objects.c | 64 ++++- t/t5317-pack-objects-filter-objects.sh | 375 +++++++++++++++++++++++++ 3 files changed, 456 insertions(+), 2 deletions(-) create mode 100755 t/t5317-pack-objects-filter-objects.sh diff --git a/Documentation/git-pack-objects.txt b/Documentation/git-pack-objects.txt index 473a16135a..b924c6cbf7 100644 --- a/Documentation/git-pack-objects.txt +++ b/Documentation/git-pack-objects.txt @@ -12,7 +12,8 @@ SYNOPSIS 'git pack-objects' [-q | --progress | --all-progress] [--all-progress-implied] [--no-reuse-delta] [--delta-base-offset] [--non-empty] [--local] [--incremental] [--window=] [--depth=] - [--revs [--unpacked | --all]] [--stdout | base-name] + [--revs [--unpacked | --all]] + [--stdout [--filter=] | base-name] [--shallow] [--keep-true-parents] < object-list @@ -236,6 +237,22 @@ So does `git bundle` (see linkgit:git-bundle[1]) when it creates a bundle. With this option, parents that are hidden by grafts are packed nevertheless. +--filter=:: + Requires `--stdout`. Omits certain objects (usually blobs) from + the resulting packfile. See linkgit:git-rev-list[1] for valid + `` forms. + +--missing=:: + A debug option to help with future "partial clone" development. + This option specifies how missing objects are handled. ++ +The form '--missing=error' requests that pack-objects stop with an error if +a missing object is encountered. This is the default action. ++ +The form '--missing=allow-any' will allow object traversal to continue +if a missing object is encountered. Missing objects will silently be +omitted from the results. + SEE ALSO -------- linkgit:git-rev-list[1] diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c index 6e77dfd444..45ad35d918 100644 --- a/builtin/pack-objects.c +++ b/builtin/pack-objects.c @@ -15,6 +15,8 @@ #include "diff.h" #include "revision.h" #include "list-objects.h" +#include "list-objects-filter.h" +#include "list-objects-filter-options.h" #include "pack-objects.h" #include "progress.h" #include "refs.h" @@ -79,6 +81,15 @@ static unsigned long cache_max_small_delta_size = 1000; static unsigned long window_memory_limit = 0; +static struct list_objects_filter_options filter_options; + +enum missing_action { + MA_ERROR = 0, /* fail if any missing objects are encountered */ + MA_ALLOW_ANY, /* silently allow ALL missing objects */ +}; +static enum missing_action arg_missing_action; +static show_object_fn fn_show_object; + /* * stats */ @@ -2552,6 +2563,42 @@ static void show_object(struct object *obj, const char *name, void *data) obj->flags |= OBJECT_ADDED; } +static void show_object__ma_allow_any(struct object *obj, const char *name, void *data) +{ + assert(arg_missing_action == MA_ALLOW_ANY); + + /* + * Quietly ignore ALL missing objects. This avoids problems with + * staging them now and getting an odd error later. + */ + if (!has_object_file(&obj->oid)) + return; + + show_object(obj, name, data); +} + +static int option_parse_missing_action(const struct option *opt, + const char *arg, int unset) +{ + assert(arg); + assert(!unset); + + if (!strcmp(arg, "error")) { + arg_missing_action = MA_ERROR; + fn_show_object = show_object; + return 0; + } + + if (!strcmp(arg, "allow-any")) { + arg_missing_action = MA_ALLOW_ANY; + fn_show_object = show_object__ma_allow_any; + return 0; + } + + die(_("invalid value for --missing")); + return 0; +} + static void show_edge(struct commit *commit) { add_preferred_base(commit->object.oid.hash); @@ -2816,7 +2863,12 @@ static void get_object_list(int ac, const char **av) if (prepare_revision_walk(&revs)) die("revision walk setup failed"); mark_edges_uninteresting(&revs, show_edge); - traverse_commit_list(&revs, show_commit, show_object, NULL); + + if (!fn_show_object) + fn_show_object = show_object; + traverse_commit_list_filtered(&filter_options, &revs, + show_commit, fn_show_object, NULL, + NULL); if (unpack_unreachable_expiration) { revs.ignore_missing_links = 1; @@ -2952,6 +3004,10 @@ int cmd_pack_objects(int argc, const char **argv, const char *prefix) N_("use a bitmap index if available to speed up counting objects")), OPT_BOOL(0, "write-bitmap-index", &write_bitmap_index, N_("write a bitmap index together with the pack index")), + OPT_PARSE_LIST_OBJECTS_FILTER(&filter_options), + { OPTION_CALLBACK, 0, "missing", NULL, N_("action"), + N_("handling for missing objects"), PARSE_OPT_NONEG, + option_parse_missing_action }, OPT_END(), }; @@ -3028,6 +3084,12 @@ int cmd_pack_objects(int argc, const char **argv, const char *prefix) if (!rev_list_all || !rev_list_reflog || !rev_list_index) unpack_unreachable_expiration = 0; + if (filter_options.choice) { + if (!pack_to_stdout) + die("cannot use --filter without --stdout."); + use_bitmap_index = 0; + } + /* * "soft" reasons not to use bitmaps - for on-disk repack by default we want * diff --git a/t/t5317-pack-objects-filter-objects.sh b/t/t5317-pack-objects-filter-objects.sh new file mode 100755 index 0000000000..1b0acc383b --- /dev/null +++ b/t/t5317-pack-objects-filter-objects.sh @@ -0,0 +1,375 @@ +#!/bin/sh + +test_description='git pack-objects using object filtering' + +. ./test-lib.sh + +# Test blob:none filter. + +test_expect_success 'setup r1' ' + echo "{print \$1}" >print_1.awk && + echo "{print \$2}" >print_2.awk && + + git init r1 && + for n in 1 2 3 4 5 + do + echo "This is file: $n" > r1/file.$n + git -C r1 add file.$n + git -C r1 commit -m "$n" + done +' + +test_expect_success 'verify blob count in normal packfile' ' + git -C r1 ls-files -s file.1 file.2 file.3 file.4 file.5 \ + | awk -f print_2.awk \ + | sort >expected && + git -C r1 pack-objects --rev --stdout >all.pack <<-EOF && + HEAD + EOF + git -C r1 index-pack ../all.pack && + git -C r1 verify-pack -v ../all.pack \ + | grep blob \ + | awk -f print_1.awk \ + | sort >observed && + test_cmp observed expected +' + +test_expect_success 'verify blob:none packfile has no blobs' ' + git -C r1 pack-objects --rev --stdout --filter=blob:none >filter.pack <<-EOF && + HEAD + EOF + git -C r1 index-pack ../filter.pack && + git -C r1 verify-pack -v ../filter.pack \ + | grep blob \ + | awk -f print_1.awk \ + | sort >observed && + nr=$(wc -l expected && + git -C r1 verify-pack -v ../filter.pack \ + | grep -E "commit|tree" \ + | awk -f print_1.awk \ + | sort >observed && + test_cmp observed expected +' + +# Test blob:limit=[kmg] filter. +# We boundary test around the size parameter. The filter is strictly less than +# the value, so size 500 and 1000 should have the same results, but 1001 should +# filter more. + +test_expect_success 'setup r2' ' + git init r2 && + for n in 1000 10000 + do + printf "%"$n"s" X > r2/large.$n + git -C r2 add large.$n + git -C r2 commit -m "$n" + done +' + +test_expect_success 'verify blob count in normal packfile' ' + git -C r2 ls-files -s large.1000 large.10000 \ + | awk -f print_2.awk \ + | sort >expected && + git -C r2 pack-objects --rev --stdout >all.pack <<-EOF && + HEAD + EOF + git -C r2 index-pack ../all.pack && + git -C r2 verify-pack -v ../all.pack \ + | grep blob \ + | awk -f print_1.awk \ + | sort >observed && + test_cmp observed expected +' + +test_expect_success 'verify blob:limit=500 omits all blobs' ' + git -C r2 pack-objects --rev --stdout --filter=blob:limit=500 >filter.pack <<-EOF && + HEAD + EOF + git -C r2 index-pack ../filter.pack && + git -C r2 verify-pack -v ../filter.pack \ + | grep blob \ + | awk -f print_1.awk \ + | sort >observed && + nr=$(wc -l filter.pack <<-EOF && + HEAD + EOF + git -C r2 index-pack ../filter.pack && + git -C r2 verify-pack -v ../filter.pack \ + | grep blob \ + | awk -f print_1.awk \ + | sort >observed && + nr=$(wc -l expected && + git -C r2 pack-objects --rev --stdout --filter=blob:limit=1001 >filter.pack <<-EOF && + HEAD + EOF + git -C r2 index-pack ../filter.pack && + git -C r2 verify-pack -v ../filter.pack \ + | grep blob \ + | awk -f print_1.awk \ + | sort >observed && + test_cmp observed expected +' + +test_expect_success 'verify blob:limit=10001' ' + git -C r2 ls-files -s large.1000 large.10000 \ + | awk -f print_2.awk \ + | sort >expected && + git -C r2 pack-objects --rev --stdout --filter=blob:limit=10001 >filter.pack <<-EOF && + HEAD + EOF + git -C r2 index-pack ../filter.pack && + git -C r2 verify-pack -v ../filter.pack \ + | grep blob \ + | awk -f print_1.awk \ + | sort >observed && + test_cmp observed expected +' + +test_expect_success 'verify blob:limit=1k' ' + git -C r2 ls-files -s large.1000 \ + | awk -f print_2.awk \ + | sort >expected && + git -C r2 pack-objects --rev --stdout --filter=blob:limit=1k >filter.pack <<-EOF && + HEAD + EOF + git -C r2 index-pack ../filter.pack && + git -C r2 verify-pack -v ../filter.pack \ + | grep blob \ + | awk -f print_1.awk \ + | sort >observed && + test_cmp observed expected +' + +test_expect_success 'verify blob:limit=1m' ' + git -C r2 ls-files -s large.1000 large.10000 \ + | awk -f print_2.awk \ + | sort >expected && + git -C r2 pack-objects --rev --stdout --filter=blob:limit=1m >filter.pack <<-EOF && + HEAD + EOF + git -C r2 index-pack ../filter.pack && + git -C r2 verify-pack -v ../filter.pack \ + | grep blob \ + | awk -f print_1.awk \ + | sort >observed && + test_cmp observed expected +' + +test_expect_success 'verify normal and blob:limit packfiles have same commits/trees' ' + git -C r2 verify-pack -v ../all.pack \ + | grep -E "commit|tree" \ + | awk -f print_1.awk \ + | sort >expected && + git -C r2 verify-pack -v ../filter.pack \ + | grep -E "commit|tree" \ + | awk -f print_1.awk \ + | sort >observed && + test_cmp observed expected +' + +# Test sparse:path= filter. +# Use a local file containing a sparse-checkout specification to filter +# out blobs not required for the corresponding sparse-checkout. We do not +# require sparse-checkout to actually be enabled. + +test_expect_success 'setup r3' ' + git init r3 && + mkdir r3/dir1 && + for n in sparse1 sparse2 + do + echo "This is file: $n" > r3/$n + git -C r3 add $n + echo "This is file: dir1/$n" > r3/dir1/$n + git -C r3 add dir1/$n + done && + git -C r3 commit -m "sparse" && + echo dir1/ >pattern1 && + echo sparse1 >pattern2 +' + +test_expect_success 'verify blob count in normal packfile' ' + git -C r3 ls-files -s sparse1 sparse2 dir1/sparse1 dir1/sparse2 \ + | awk -f print_2.awk \ + | sort >expected && + git -C r3 pack-objects --rev --stdout >all.pack <<-EOF && + HEAD + EOF + git -C r3 index-pack ../all.pack && + git -C r3 verify-pack -v ../all.pack \ + | grep blob \ + | awk -f print_1.awk \ + | sort >observed && + test_cmp observed expected +' + +test_expect_success 'verify sparse:path=pattern1' ' + git -C r3 ls-files -s dir1/sparse1 dir1/sparse2 \ + | awk -f print_2.awk \ + | sort >expected && + git -C r3 pack-objects --rev --stdout --filter=sparse:path=../pattern1 >filter.pack <<-EOF && + HEAD + EOF + git -C r3 index-pack ../filter.pack && + git -C r3 verify-pack -v ../filter.pack \ + | grep blob \ + | awk -f print_1.awk \ + | sort >observed && + test_cmp observed expected +' + +test_expect_success 'verify normal and sparse:path=pattern1 packfiles have same commits/trees' ' + git -C r3 verify-pack -v ../all.pack \ + | grep -E "commit|tree" \ + | awk -f print_1.awk \ + | sort >expected && + git -C r3 verify-pack -v ../filter.pack \ + | grep -E "commit|tree" \ + | awk -f print_1.awk \ + | sort >observed && + test_cmp observed expected +' + +test_expect_success 'verify sparse:path=pattern2' ' + git -C r3 ls-files -s sparse1 dir1/sparse1 \ + | awk -f print_2.awk \ + | sort >expected && + git -C r3 pack-objects --rev --stdout --filter=sparse:path=../pattern2 >filter.pack <<-EOF && + HEAD + EOF + git -C r3 index-pack ../filter.pack && + git -C r3 verify-pack -v ../filter.pack \ + | grep blob \ + | awk -f print_1.awk \ + | sort >observed && + test_cmp observed expected +' + +test_expect_success 'verify normal and sparse:path=pattern2 packfiles have same commits/trees' ' + git -C r3 verify-pack -v ../all.pack \ + | grep -E "commit|tree" \ + | awk -f print_1.awk \ + | sort >expected && + git -C r3 verify-pack -v ../filter.pack \ + | grep -E "commit|tree" \ + | awk -f print_1.awk \ + | sort >observed && + test_cmp observed expected +' + +# Test sparse:oid= filter. +# Like sparse:path, but we get the sparse-checkout specification from +# a blob rather than a file on disk. + +test_expect_success 'setup r4' ' + git init r4 && + mkdir r4/dir1 && + for n in sparse1 sparse2 + do + echo "This is file: $n" > r4/$n + git -C r4 add $n + echo "This is file: dir1/$n" > r4/dir1/$n + git -C r4 add dir1/$n + done && + echo dir1/ >r4/pattern && + git -C r4 add pattern && + git -C r4 commit -m "pattern" +' + +test_expect_success 'verify blob count in normal packfile' ' + git -C r4 ls-files -s pattern sparse1 sparse2 dir1/sparse1 dir1/sparse2 \ + | awk -f print_2.awk \ + | sort >expected && + git -C r4 pack-objects --rev --stdout >all.pack <<-EOF && + HEAD + EOF + git -C r4 index-pack ../all.pack && + git -C r4 verify-pack -v ../all.pack \ + | grep blob \ + | awk -f print_1.awk \ + | sort >observed && + test_cmp observed expected +' + +test_expect_success 'verify sparse:oid=OID' ' + git -C r4 ls-files -s dir1/sparse1 dir1/sparse2 \ + | awk -f print_2.awk \ + | sort >expected && + oid=$(git -C r4 ls-files -s pattern | awk -f print_2.awk) && + git -C r4 pack-objects --rev --stdout --filter=sparse:oid=$oid >filter.pack <<-EOF && + HEAD + EOF + git -C r4 index-pack ../filter.pack && + git -C r4 verify-pack -v ../filter.pack \ + | grep blob \ + | awk -f print_1.awk \ + | sort >observed && + test_cmp observed expected +' + +test_expect_success 'verify sparse:oid=oid-ish' ' + git -C r4 ls-files -s dir1/sparse1 dir1/sparse2 \ + | awk -f print_2.awk \ + | sort >expected && + git -C r4 pack-objects --rev --stdout --filter=sparse:oid=master:pattern >filter.pack <<-EOF && + HEAD + EOF + git -C r4 index-pack ../filter.pack && + git -C r4 verify-pack -v ../filter.pack \ + | grep blob \ + | awk -f print_1.awk \ + | sort >observed && + test_cmp observed expected +' + +# Delete some loose objects and use pack-objects, but WITHOUT any filtering. +# This models previously omitted objects that we did not receive. + +test_expect_success 'setup r1 - delete loose blobs' ' + git -C r1 ls-files -s file.1 file.2 file.3 file.4 file.5 \ + | awk -f print_2.awk \ + | sort >expected && + for id in `cat expected | sed "s|..|&/|"` + do + rm r1/.git/objects/$id + done +' + +test_expect_success 'verify pack-objects fails w/ missing objects' ' + test_must_fail git -C r1 pack-objects --rev --stdout >miss.pack <<-EOF + HEAD + EOF +' + +test_expect_success 'verify pack-objects fails w/ --missing=error' ' + test_must_fail git -C r1 pack-objects --rev --stdout --missing=error >miss.pack <<-EOF + HEAD + EOF +' + +test_expect_success 'verify pack-objects w/ --missing=allow-any' ' + git -C r1 pack-objects --rev --stdout --missing=allow-any >miss.pack <<-EOF + HEAD + EOF +' + +test_done From 1dde5fa2b254663b3944053c67e45a1903682a80 Mon Sep 17 00:00:00 2001 From: Christian Couder Date: Tue, 5 Dec 2017 16:50:12 +0000 Subject: [PATCH 7/9] list-objects-filter-options: fix 'keword' typo in comment Signed-off-by: Christian Couder Signed-off-by: Jeff Hostetler Signed-off-by: Junio C Hamano --- list-objects-filter-options.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/list-objects-filter-options.c b/list-objects-filter-options.c index 9b28322b1f..52bdec75bf 100644 --- a/list-objects-filter-options.c +++ b/list-objects-filter-options.c @@ -8,7 +8,7 @@ #include "list-objects-filter-options.h" /* - * Parse value of the argument to the "filter" keword. + * Parse value of the argument to the "filter" keyword. * On the command line this looks like: * --filter= * and in the pack protocol as: From 4875c9791e787af07992d3ba30061885322b7d11 Mon Sep 17 00:00:00 2001 From: Jeff Hostetler Date: Tue, 5 Dec 2017 16:50:13 +0000 Subject: [PATCH 8/9] list-objects-filter-options: support --no-filter Teach opt_parse_list_objects_filter() to take --no-filter option and to free the contents of struct filter_options. This command line argument will be automatically inherited by commands using OPT_PARSE_LIST_OBJECTS_FILTER(); this includes pack-objects. Signed-off-by: Jeff Hostetler Signed-off-by: Junio C Hamano --- Documentation/git-pack-objects.txt | 3 +++ list-objects-filter-options.c | 15 +++++++++++++-- list-objects-filter-options.h | 5 ++++- 3 files changed, 20 insertions(+), 3 deletions(-) diff --git a/Documentation/git-pack-objects.txt b/Documentation/git-pack-objects.txt index b924c6cbf7..aa403d02f3 100644 --- a/Documentation/git-pack-objects.txt +++ b/Documentation/git-pack-objects.txt @@ -242,6 +242,9 @@ So does `git bundle` (see linkgit:git-bundle[1]) when it creates a bundle. the resulting packfile. See linkgit:git-rev-list[1] for valid `` forms. +--no-filter:: + Turns off any previous `--filter=` argument. + --missing=:: A debug option to help with future "partial clone" development. This option specifies how missing objects are handled. diff --git a/list-objects-filter-options.c b/list-objects-filter-options.c index 52bdec75bf..4c5b34e949 100644 --- a/list-objects-filter-options.c +++ b/list-objects-filter-options.c @@ -74,8 +74,19 @@ int opt_parse_list_objects_filter(const struct option *opt, { struct list_objects_filter_options *filter_options = opt->value; - assert(arg); - assert(!unset); + if (unset || !arg) { + list_objects_filter_release(filter_options); + return 0; + } return parse_list_objects_filter(filter_options, arg); } + +void list_objects_filter_release( + struct list_objects_filter_options *filter_options) +{ + free(filter_options->filter_spec); + free(filter_options->sparse_oid_value); + free(filter_options->sparse_path_value); + memset(filter_options, 0, sizeof(*filter_options)); +} diff --git a/list-objects-filter-options.h b/list-objects-filter-options.h index dd7e5db2db..eea44a1a51 100644 --- a/list-objects-filter-options.h +++ b/list-objects-filter-options.h @@ -52,7 +52,10 @@ int opt_parse_list_objects_filter(const struct option *opt, #define OPT_PARSE_LIST_OBJECTS_FILTER(fo) \ { OPTION_CALLBACK, 0, CL_ARG__FILTER, fo, N_("args"), \ - N_("object filtering"), PARSE_OPT_NONEG, \ + N_("object filtering"), 0, \ opt_parse_list_objects_filter } +void list_objects_filter_release( + struct list_objects_filter_options *filter_options); + #endif /* LIST_OBJECTS_FILTER_OPTIONS_H */ From f4371a883fa2d740d6b3cd436f62c9b56f13432e Mon Sep 17 00:00:00 2001 From: Jeff Hostetler Date: Tue, 5 Dec 2017 16:50:14 +0000 Subject: [PATCH 9/9] rev-list: support --no-filter argument Teach rev-list to support --no-filter to override a previous --filter= argument. This is to be consistent with commands that use OPT_PARSE macros. Signed-off-by: Jeff Hostetler Signed-off-by: Junio C Hamano --- Documentation/rev-list-options.txt | 15 ++++++++++----- builtin/rev-list.c | 4 ++++ 2 files changed, 14 insertions(+), 5 deletions(-) diff --git a/Documentation/rev-list-options.txt b/Documentation/rev-list-options.txt index 11bb87f3dc..8d8b7f492a 100644 --- a/Documentation/rev-list-options.txt +++ b/Documentation/rev-list-options.txt @@ -715,16 +715,21 @@ ifdef::git-rev-list[] The form '--filter=blob:none' omits all blobs. + The form '--filter=blob:limit=[kmg]' omits blobs larger than n bytes -or units. The value may be zero. +or units. n may be zero. The suffixes k, m, and g can be used to name +units in KiB, MiB, or GiB. For example, 'blob:limit=1k' is the same +as 'blob:limit=1024'. + -The form '--filter=sparse:oid=' uses a sparse-checkout -specification contained in the object (or the object that the expression -evaluates to) to omit blobs that would not be not required for a -sparse checkout on the requested refs. +The form '--filter=sparse:oid=' uses a sparse-checkout +specification contained in the blob (or blob-expression) '' +to omit blobs that would not be not required for a sparse checkout on +the requested refs. + The form '--filter=sparse:path=' similarly uses a sparse-checkout specification contained in . +--no-filter:: + Turn off any previous `--filter=` argument. + --filter-print-omitted:: Only useful with `--filter=`; prints a list of the objects omitted by the filter. Object IDs are prefixed with a ``~'' character. diff --git a/builtin/rev-list.c b/builtin/rev-list.c index 71e3dfc46e..159b035a2c 100644 --- a/builtin/rev-list.c +++ b/builtin/rev-list.c @@ -403,6 +403,10 @@ int cmd_rev_list(int argc, const char **argv, const char *prefix) filter_options.filter_spec); continue; } + if (!strcmp(arg, ("--no-" CL_ARG__FILTER))) { + list_objects_filter_release(&filter_options); + continue; + } if (!strcmp(arg, "--filter-print-omitted")) { arg_print_omitted = 1; continue;