From e2b43831a5e4b482be19746d2257a309b51ba5fe Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Wed, 13 Sep 2023 15:17:35 -0400 Subject: [PATCH 1/8] builtin/repack.c: extract structure to store existing packs The repack machinery needs to keep track of which packfiles were present in the repository at the beginning of a repack, segmented by whether or not each pack is marked as kept. The names of these packs are stored in two `string_list`s, corresponding to kept- and non-kept packs, respectively. As a consequence, many functions within the repack code need to take both `string_list`s as arguments, leading to code like this: ret = write_cruft_pack(&cruft_po_args, packtmp, pack_prefix, cruft_expiration, &names, &existing_nonkept_packs, /* <- */ &existing_kept_packs); /* <- */ Wrap up this pair of `string_list`s into a single structure that stores both. This saves us from having to pass both string lists separately, and prepares for adding additional fields to this structure. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/repack.c | 90 ++++++++++++++++++++++++++---------------------- 1 file changed, 49 insertions(+), 41 deletions(-) diff --git a/builtin/repack.c b/builtin/repack.c index 2b43a5be08..c3ab89912e 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -95,14 +95,29 @@ static int repack_config(const char *var, const char *value, return git_default_config(var, value, ctx, cb); } +struct existing_packs { + struct string_list kept_packs; + struct string_list non_kept_packs; +}; + +#define EXISTING_PACKS_INIT { \ + .kept_packs = STRING_LIST_INIT_DUP, \ + .non_kept_packs = STRING_LIST_INIT_DUP, \ +} + +static void existing_packs_release(struct existing_packs *existing) +{ + string_list_clear(&existing->kept_packs, 0); + string_list_clear(&existing->non_kept_packs, 0); +} + /* - * Adds all packs hex strings (pack-$HASH) to either fname_nonkept_list - * or fname_kept_list based on whether each pack has a corresponding + * Adds all packs hex strings (pack-$HASH) to either packs->non_kept + * or packs->kept based on whether each pack has a corresponding * .keep file or not. Packs without a .keep file are not to be kept * if we are going to pack everything into one file. */ -static void collect_pack_filenames(struct string_list *fname_nonkept_list, - struct string_list *fname_kept_list, +static void collect_pack_filenames(struct existing_packs *existing, const struct string_list *extra_keep) { struct packed_git *p; @@ -126,16 +141,16 @@ static void collect_pack_filenames(struct string_list *fname_nonkept_list, strbuf_strip_suffix(&buf, ".pack"); if ((extra_keep->nr > 0 && i < extra_keep->nr) || p->pack_keep) - string_list_append(fname_kept_list, buf.buf); + string_list_append(&existing->kept_packs, buf.buf); else { struct string_list_item *item; - item = string_list_append(fname_nonkept_list, buf.buf); + item = string_list_append(&existing->non_kept_packs, buf.buf); if (p->is_cruft) item->util = (void*)(uintptr_t)CRUFT_PACK; } } - string_list_sort(fname_kept_list); + string_list_sort(&existing->kept_packs); strbuf_release(&buf); } @@ -327,7 +342,7 @@ static int geometry_cmp(const void *va, const void *vb) } static void init_pack_geometry(struct pack_geometry *geometry, - struct string_list *existing_kept_packs, + struct existing_packs *existing, const struct pack_objects_args *args) { struct packed_git *p; @@ -344,23 +359,24 @@ static void init_pack_geometry(struct pack_geometry *geometry, if (!pack_kept_objects) { /* - * Any pack that has its pack_keep bit set will appear - * in existing_kept_packs below, but this saves us from - * doing a more expensive check. + * Any pack that has its pack_keep bit set will + * appear in existing->kept_packs below, but + * this saves us from doing a more expensive + * check. */ if (p->pack_keep) continue; /* - * The pack may be kept via the --keep-pack option; - * check 'existing_kept_packs' to determine whether to - * ignore it. + * The pack may be kept via the --keep-pack + * option; check 'existing->kept_packs' to + * determine whether to ignore it. */ strbuf_reset(&buf); strbuf_addstr(&buf, pack_basename(p)); strbuf_strip_suffix(&buf, ".pack"); - if (string_list_has_string(existing_kept_packs, buf.buf)) + if (string_list_has_string(&existing->kept_packs, buf.buf)) continue; } if (p->is_cruft) @@ -565,14 +581,13 @@ static void midx_snapshot_refs(struct tempfile *f) } static void midx_included_packs(struct string_list *include, - struct string_list *existing_nonkept_packs, - struct string_list *existing_kept_packs, + struct existing_packs *existing, struct string_list *names, struct pack_geometry *geometry) { struct string_list_item *item; - for_each_string_list_item(item, existing_kept_packs) + for_each_string_list_item(item, &existing->kept_packs) string_list_insert(include, xstrfmt("%s.idx", item->string)); for_each_string_list_item(item, names) string_list_insert(include, xstrfmt("pack-%s.idx", item->string)); @@ -600,7 +615,7 @@ static void midx_included_packs(struct string_list *include, string_list_insert(include, strbuf_detach(&buf, NULL)); } - for_each_string_list_item(item, existing_nonkept_packs) { + for_each_string_list_item(item, &existing->non_kept_packs) { if (!((uintptr_t)item->util & CRUFT_PACK)) { /* * no need to check DELETE_PACK, since we're not @@ -611,7 +626,7 @@ static void midx_included_packs(struct string_list *include, string_list_insert(include, xstrfmt("%s.idx", item->string)); } } else { - for_each_string_list_item(item, existing_nonkept_packs) { + for_each_string_list_item(item, &existing->non_kept_packs) { if ((uintptr_t)item->util & DELETE_PACK) continue; string_list_insert(include, xstrfmt("%s.idx", item->string)); @@ -700,8 +715,7 @@ static int write_cruft_pack(const struct pack_objects_args *args, const char *pack_prefix, const char *cruft_expiration, struct string_list *names, - struct string_list *existing_packs, - struct string_list *existing_kept_packs) + struct existing_packs *existing) { struct child_process cmd = CHILD_PROCESS_INIT; struct strbuf line = STRBUF_INIT; @@ -744,9 +758,9 @@ static int write_cruft_pack(const struct pack_objects_args *args, in = xfdopen(cmd.in, "w"); for_each_string_list_item(item, names) fprintf(in, "%s-%s.pack\n", pack_prefix, item->string); - for_each_string_list_item(item, existing_packs) + for_each_string_list_item(item, &existing->non_kept_packs) fprintf(in, "-%s.pack\n", item->string); - for_each_string_list_item(item, existing_kept_packs) + for_each_string_list_item(item, &existing->kept_packs) fprintf(in, "%s.pack\n", item->string); fclose(in); @@ -778,8 +792,7 @@ int cmd_repack(int argc, const char **argv, const char *prefix) struct child_process cmd = CHILD_PROCESS_INIT; struct string_list_item *item; struct string_list names = STRING_LIST_INIT_DUP; - struct string_list existing_nonkept_packs = STRING_LIST_INIT_DUP; - struct string_list existing_kept_packs = STRING_LIST_INIT_DUP; + struct existing_packs existing = EXISTING_PACKS_INIT; struct pack_geometry geometry = { 0 }; struct strbuf line = STRBUF_INIT; struct tempfile *refs_snapshot = NULL; @@ -915,13 +928,12 @@ int cmd_repack(int argc, const char **argv, const char *prefix) packtmp_name = xstrfmt(".tmp-%d-pack", (int)getpid()); packtmp = mkpathdup("%s/%s", packdir, packtmp_name); - collect_pack_filenames(&existing_nonkept_packs, &existing_kept_packs, - &keep_pack_list); + collect_pack_filenames(&existing, &keep_pack_list); if (geometry.split_factor) { if (pack_everything) die(_("options '%s' and '%s' cannot be used together"), "--geometric", "-A/-a"); - init_pack_geometry(&geometry, &existing_kept_packs, &po_args); + init_pack_geometry(&geometry, &existing, &po_args); split_pack_geometry(&geometry); } @@ -965,7 +977,7 @@ int cmd_repack(int argc, const char **argv, const char *prefix) if (pack_everything & ALL_INTO_ONE) { repack_promisor_objects(&po_args, &names); - if (existing_nonkept_packs.nr && delete_redundant && + if (existing.non_kept_packs.nr && delete_redundant && !(pack_everything & PACK_CRUFT)) { for_each_string_list_item(item, &names) { strvec_pushf(&cmd.args, "--keep-pack=%s-%s.pack", @@ -1054,8 +1066,7 @@ int cmd_repack(int argc, const char **argv, const char *prefix) ret = write_cruft_pack(&cruft_po_args, packtmp, pack_prefix, cruft_expiration, &names, - &existing_nonkept_packs, - &existing_kept_packs); + &existing); if (ret) goto cleanup; @@ -1086,8 +1097,7 @@ int cmd_repack(int argc, const char **argv, const char *prefix) pack_prefix, NULL, &names, - &existing_nonkept_packs, - &existing_kept_packs); + &existing); if (ret) goto cleanup; } @@ -1133,7 +1143,7 @@ int cmd_repack(int argc, const char **argv, const char *prefix) if (delete_redundant && pack_everything & ALL_INTO_ONE) { const int hexsz = the_hash_algo->hexsz; - for_each_string_list_item(item, &existing_nonkept_packs) { + for_each_string_list_item(item, &existing.non_kept_packs) { char *sha1; size_t len = strlen(item->string); if (len < hexsz) @@ -1152,8 +1162,7 @@ int cmd_repack(int argc, const char **argv, const char *prefix) if (write_midx) { struct string_list include = STRING_LIST_INIT_NODUP; - midx_included_packs(&include, &existing_nonkept_packs, - &existing_kept_packs, &names, &geometry); + midx_included_packs(&include, &existing, &names, &geometry); ret = write_midx_included_packs(&include, &geometry, refs_snapshot ? get_tempfile_path(refs_snapshot) : NULL, @@ -1172,7 +1181,7 @@ int cmd_repack(int argc, const char **argv, const char *prefix) if (delete_redundant) { int opts = 0; - for_each_string_list_item(item, &existing_nonkept_packs) { + for_each_string_list_item(item, &existing.non_kept_packs) { if (!((uintptr_t)item->util & DELETE_PACK)) continue; remove_redundant_pack(packdir, item->string); @@ -1193,7 +1202,7 @@ int cmd_repack(int argc, const char **argv, const char *prefix) strbuf_strip_suffix(&buf, ".pack"); if ((p->pack_keep) || - (string_list_has_string(&existing_kept_packs, + (string_list_has_string(&existing.kept_packs, buf.buf))) continue; @@ -1224,8 +1233,7 @@ int cmd_repack(int argc, const char **argv, const char *prefix) cleanup: string_list_clear(&names, 1); - string_list_clear(&existing_nonkept_packs, 0); - string_list_clear(&existing_kept_packs, 0); + existing_packs_release(&existing); free_pack_geometry(&geometry); return ret; From 054b5e4873d9ef2348f0880d66f1acf73bad7d59 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Wed, 13 Sep 2023 15:17:46 -0400 Subject: [PATCH 2/8] builtin/repack.c: extract marking packs for deletion At the end of a repack (when given `-d`), Git attempts to remove any packs which have been made "redundant" as a result of the repacking operation. For example, an all-into-one (`-A` or `-a`) repack makes every pre-existing pack which is not marked as kept redundant. Geometric repacks (with `--geometric=`) make any packs which were rolled up redundant, and so on. But before deleting the set of packs we think are redundant, we first check to see whether or not we just wrote a pack which is identical to any one of the packs we were going to delete. When this is the case, Git must avoid deleting that pack, since it matches a pack we just wrote (so deleting it may cause the repository to become corrupt). Right now we only process the list of non-kept packs in a single pass. But a future change will split the existing non-kept packs further into two lists: one for cruft packs, and another for non-cruft packs. Factor out this routine to prepare for calling it twice on two separate lists in a future patch. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/repack.c | 50 +++++++++++++++++++++++++++++++----------------- 1 file changed, 32 insertions(+), 18 deletions(-) diff --git a/builtin/repack.c b/builtin/repack.c index c3ab89912e..708556836e 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -105,6 +105,36 @@ struct existing_packs { .non_kept_packs = STRING_LIST_INIT_DUP, \ } +static void mark_packs_for_deletion_1(struct string_list *names, + struct string_list *list) +{ + struct string_list_item *item; + const int hexsz = the_hash_algo->hexsz; + + for_each_string_list_item(item, list) { + char *sha1; + size_t len = strlen(item->string); + if (len < hexsz) + continue; + sha1 = item->string + len - hexsz; + /* + * Mark this pack for deletion, which ensures that this + * pack won't be included in a MIDX (if `--write-midx` + * was given) and that we will actually delete this pack + * (if `-d` was given). + */ + if (!string_list_has_string(names, sha1)) + item->util = (void*)(uintptr_t)((size_t)item->util | DELETE_PACK); + } +} + +static void mark_packs_for_deletion(struct existing_packs *existing, + struct string_list *names) + +{ + mark_packs_for_deletion_1(names, &existing->non_kept_packs); +} + static void existing_packs_release(struct existing_packs *existing) { string_list_clear(&existing->kept_packs, 0); @@ -1141,24 +1171,8 @@ int cmd_repack(int argc, const char **argv, const char *prefix) } /* End of pack replacement. */ - if (delete_redundant && pack_everything & ALL_INTO_ONE) { - const int hexsz = the_hash_algo->hexsz; - for_each_string_list_item(item, &existing.non_kept_packs) { - char *sha1; - size_t len = strlen(item->string); - if (len < hexsz) - continue; - sha1 = item->string + len - hexsz; - /* - * Mark this pack for deletion, which ensures that this - * pack won't be included in a MIDX (if `--write-midx` - * was given) and that we will actually delete this pack - * (if `-d` was given). - */ - if (!string_list_has_string(&names, sha1)) - item->util = (void*)(uintptr_t)((size_t)item->util | DELETE_PACK); - } - } + if (delete_redundant && pack_everything & ALL_INTO_ONE) + mark_packs_for_deletion(&existing, &names); if (write_midx) { struct string_list include = STRING_LIST_INIT_NODUP; From 639c4a3992337e15c0faa2fa6d73462cc76ca7b9 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Wed, 13 Sep 2023 15:17:49 -0400 Subject: [PATCH 3/8] builtin/repack.c: extract redundant pack cleanup for --geometric To reduce the complexity of the already quite-long `cmd_repack()` implementation, extract out the parts responsible for deleting redundant packs from a geometric repack out into its own sub-routine. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/repack.c | 52 +++++++++++++++++++++++++++--------------------- 1 file changed, 29 insertions(+), 23 deletions(-) diff --git a/builtin/repack.c b/builtin/repack.c index 708556836e..d3e6326bb9 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -540,6 +540,32 @@ static struct packed_git *get_preferred_pack(struct pack_geometry *geometry) return NULL; } +static void geometry_remove_redundant_packs(struct pack_geometry *geometry, + struct string_list *names, + struct existing_packs *existing) +{ + struct strbuf buf = STRBUF_INIT; + uint32_t i; + + for (i = 0; i < geometry->split; i++) { + struct packed_git *p = geometry->pack[i]; + if (string_list_has_string(names, hash_to_hex(p->hash))) + continue; + + strbuf_reset(&buf); + strbuf_addstr(&buf, pack_basename(p)); + strbuf_strip_suffix(&buf, ".pack"); + + if ((p->pack_keep) || + (string_list_has_string(&existing->kept_packs, buf.buf))) + continue; + + remove_redundant_pack(packdir, buf.buf); + } + + strbuf_release(&buf); +} + static void free_pack_geometry(struct pack_geometry *geometry) { if (!geometry) @@ -1201,29 +1227,9 @@ int cmd_repack(int argc, const char **argv, const char *prefix) remove_redundant_pack(packdir, item->string); } - if (geometry.split_factor) { - struct strbuf buf = STRBUF_INIT; - - uint32_t i; - for (i = 0; i < geometry.split; i++) { - struct packed_git *p = geometry.pack[i]; - if (string_list_has_string(&names, - hash_to_hex(p->hash))) - continue; - - strbuf_reset(&buf); - strbuf_addstr(&buf, pack_basename(p)); - strbuf_strip_suffix(&buf, ".pack"); - - if ((p->pack_keep) || - (string_list_has_string(&existing.kept_packs, - buf.buf))) - continue; - - remove_redundant_pack(packdir, buf.buf); - } - strbuf_release(&buf); - } + if (geometry.split_factor) + geometry_remove_redundant_packs(&geometry, &names, + &existing); if (show_progress) opts |= PRUNE_PACKED_VERBOSE; prune_packed_objects(opts); From f2d3bf178aaf4e590f3618d657e14aceb44514f0 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Wed, 13 Sep 2023 15:17:51 -0400 Subject: [PATCH 4/8] builtin/repack.c: extract redundant pack cleanup for existing packs To remove redundant packs at the end of a repacking operation, Git uses its `remove_redundant_pack()` function in a loop over the set of pre-existing, non-kept packs. In a later commit, we will split this list into two, one for pre-existing cruft pack(s), and another for non-cruft pack(s). Prepare for this by factoring out the routine to loop over and delete redundant packs into its own function. Instead of calling `remove_redundant_pack()` directly, we now will call `remove_redundant_existing_packs()`, which itself dispatches a call to `remove_redundant_packs_1()`. Note that the geometric repacking code will still call `remove_redundant_pack()` directly, but see the previous commit for more details. Having `remove_redundant_packs_1()` exist as a separate function may seem like overkill in this patch. However, a later patch will call `remove_redundant_packs_1()` once over two separate lists, so this refactoring sets us up for that. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/repack.c | 45 ++++++++++++++++++++++++++++----------------- 1 file changed, 28 insertions(+), 17 deletions(-) diff --git a/builtin/repack.c b/builtin/repack.c index d3e6326bb9..f6717e334c 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -135,6 +135,33 @@ static void mark_packs_for_deletion(struct existing_packs *existing, mark_packs_for_deletion_1(names, &existing->non_kept_packs); } +static void remove_redundant_pack(const char *dir_name, const char *base_name) +{ + struct strbuf buf = STRBUF_INIT; + struct multi_pack_index *m = get_local_multi_pack_index(the_repository); + strbuf_addf(&buf, "%s.pack", base_name); + if (m && midx_contains_pack(m, buf.buf)) + clear_midx_file(the_repository); + strbuf_insertf(&buf, 0, "%s/", dir_name); + unlink_pack_path(buf.buf, 1); + strbuf_release(&buf); +} + +static void remove_redundant_packs_1(struct string_list *packs) +{ + struct string_list_item *item; + for_each_string_list_item(item, packs) { + if (!((uintptr_t)item->util & DELETE_PACK)) + continue; + remove_redundant_pack(packdir, item->string); + } +} + +static void remove_redundant_existing_packs(struct existing_packs *existing) +{ + remove_redundant_packs_1(&existing->non_kept_packs); +} + static void existing_packs_release(struct existing_packs *existing) { string_list_clear(&existing->kept_packs, 0); @@ -184,18 +211,6 @@ static void collect_pack_filenames(struct existing_packs *existing, strbuf_release(&buf); } -static void remove_redundant_pack(const char *dir_name, const char *base_name) -{ - struct strbuf buf = STRBUF_INIT; - struct multi_pack_index *m = get_local_multi_pack_index(the_repository); - strbuf_addf(&buf, "%s.pack", base_name); - if (m && midx_contains_pack(m, buf.buf)) - clear_midx_file(the_repository); - strbuf_insertf(&buf, 0, "%s/", dir_name); - unlink_pack_path(buf.buf, 1); - strbuf_release(&buf); -} - static void prepare_pack_objects(struct child_process *cmd, const struct pack_objects_args *args, const char *out) @@ -1221,11 +1236,7 @@ int cmd_repack(int argc, const char **argv, const char *prefix) if (delete_redundant) { int opts = 0; - for_each_string_list_item(item, &existing.non_kept_packs) { - if (!((uintptr_t)item->util & DELETE_PACK)) - continue; - remove_redundant_pack(packdir, item->string); - } + remove_redundant_existing_packs(&existing); if (geometry.split_factor) geometry_remove_redundant_packs(&geometry, &names, From 4bbfb003c06c9a3b8e02d8957f053ce938c3d93e Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Wed, 13 Sep 2023 15:17:54 -0400 Subject: [PATCH 5/8] builtin/repack.c: extract `has_existing_non_kept_packs()` When there is: - at least one pre-existing packfile (which is not marked as kept), - repacking with the `-d` flag, and - not doing a cruft repack , then we pass a handful of additional options to the inner `pack-objects` process, like `--unpack-unreachable`, `--keep-unreachable`, and `--pack-loose-unreachable`, in addition to marking any packs we just wrote for promisor remotes as kept in-core (with `--keep-pack`, as opposed to the presence of a ".keep" file on disk). Because we store both cruft and non-cruft packs together in the same `existing.non_kept_packs` list, it suffices to check its `nr` member to see if it is zero or not. But a following change will store cruft- and non-cruft packs separately, meaning this check would break as a result. Prepare for this by extracting this part of the check into a new helper function called `has_existing_non_kept_packs()`. This patch does not introduce any functional changes, but prepares us to make a more isolated change in a subsequent patch. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/repack.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/builtin/repack.c b/builtin/repack.c index f6717e334c..3f0789ff89 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -105,6 +105,11 @@ struct existing_packs { .non_kept_packs = STRING_LIST_INIT_DUP, \ } +static int has_existing_non_kept_packs(const struct existing_packs *existing) +{ + return existing->non_kept_packs.nr; +} + static void mark_packs_for_deletion_1(struct string_list *names, struct string_list *list) { @@ -1048,7 +1053,8 @@ int cmd_repack(int argc, const char **argv, const char *prefix) if (pack_everything & ALL_INTO_ONE) { repack_promisor_objects(&po_args, &names); - if (existing.non_kept_packs.nr && delete_redundant && + if (has_existing_non_kept_packs(&existing) && + delete_redundant && !(pack_everything & PACK_CRUFT)) { for_each_string_list_item(item, &names) { strvec_pushf(&cmd.args, "--keep-pack=%s-%s.pack", From eabfaf8e8db27dd76ec1f1d4e0a2a124374475ab Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Wed, 13 Sep 2023 15:17:57 -0400 Subject: [PATCH 6/8] builtin/repack.c: store existing cruft packs separately When repacking with the `--write-midx` option, we invoke the function `midx_included_packs()` in order to produce the list of packs we want to include in the resulting MIDX. This list is comprised of: - existing .keep packs - any pack(s) which were written earlier in the same process - any unchanged packs when doing a `--geometric` repack - any cruft packs Prior to this patch, we stored pre-existing cruft and non-cruft packs together (provided those packs are non-kept). This meant we needed an additional bit to indicate which non-kept pack(s) were cruft versus those that aren't. But alternatively we can store cruft packs in a separate list, avoiding the need for this extra bit, and simplifying the code below. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/repack.c | 39 +++++++++++++++++++++++---------------- 1 file changed, 23 insertions(+), 16 deletions(-) diff --git a/builtin/repack.c b/builtin/repack.c index 3f0789ff89..478fab96c9 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -27,7 +27,6 @@ #define PACK_CRUFT 4 #define DELETE_PACK 1 -#define CRUFT_PACK 2 static int pack_everything; static int delta_base_offset = 1; @@ -98,16 +97,18 @@ static int repack_config(const char *var, const char *value, struct existing_packs { struct string_list kept_packs; struct string_list non_kept_packs; + struct string_list cruft_packs; }; #define EXISTING_PACKS_INIT { \ .kept_packs = STRING_LIST_INIT_DUP, \ .non_kept_packs = STRING_LIST_INIT_DUP, \ + .cruft_packs = STRING_LIST_INIT_DUP, \ } static int has_existing_non_kept_packs(const struct existing_packs *existing) { - return existing->non_kept_packs.nr; + return existing->non_kept_packs.nr || existing->cruft_packs.nr; } static void mark_packs_for_deletion_1(struct string_list *names, @@ -138,6 +139,7 @@ static void mark_packs_for_deletion(struct existing_packs *existing, { mark_packs_for_deletion_1(names, &existing->non_kept_packs); + mark_packs_for_deletion_1(names, &existing->cruft_packs); } static void remove_redundant_pack(const char *dir_name, const char *base_name) @@ -165,12 +167,14 @@ static void remove_redundant_packs_1(struct string_list *packs) static void remove_redundant_existing_packs(struct existing_packs *existing) { remove_redundant_packs_1(&existing->non_kept_packs); + remove_redundant_packs_1(&existing->cruft_packs); } static void existing_packs_release(struct existing_packs *existing) { string_list_clear(&existing->kept_packs, 0); string_list_clear(&existing->non_kept_packs, 0); + string_list_clear(&existing->cruft_packs, 0); } /* @@ -204,12 +208,10 @@ static void collect_pack_filenames(struct existing_packs *existing, if ((extra_keep->nr > 0 && i < extra_keep->nr) || p->pack_keep) string_list_append(&existing->kept_packs, buf.buf); - else { - struct string_list_item *item; - item = string_list_append(&existing->non_kept_packs, buf.buf); - if (p->is_cruft) - item->util = (void*)(uintptr_t)CRUFT_PACK; - } + else if (p->is_cruft) + string_list_append(&existing->cruft_packs, buf.buf); + else + string_list_append(&existing->non_kept_packs, buf.buf); } string_list_sort(&existing->kept_packs); @@ -691,14 +693,11 @@ static void midx_included_packs(struct string_list *include, string_list_insert(include, strbuf_detach(&buf, NULL)); } - for_each_string_list_item(item, &existing->non_kept_packs) { - if (!((uintptr_t)item->util & CRUFT_PACK)) { - /* - * no need to check DELETE_PACK, since we're not - * doing an ALL_INTO_ONE repack - */ - continue; - } + for_each_string_list_item(item, &existing->cruft_packs) { + /* + * no need to check DELETE_PACK, since we're not + * doing an ALL_INTO_ONE repack + */ string_list_insert(include, xstrfmt("%s.idx", item->string)); } } else { @@ -707,6 +706,12 @@ static void midx_included_packs(struct string_list *include, continue; string_list_insert(include, xstrfmt("%s.idx", item->string)); } + + for_each_string_list_item(item, &existing->cruft_packs) { + if ((uintptr_t)item->util & DELETE_PACK) + continue; + string_list_insert(include, xstrfmt("%s.idx", item->string)); + } } } @@ -836,6 +841,8 @@ static int write_cruft_pack(const struct pack_objects_args *args, fprintf(in, "%s-%s.pack\n", pack_prefix, item->string); for_each_string_list_item(item, &existing->non_kept_packs) fprintf(in, "-%s.pack\n", item->string); + for_each_string_list_item(item, &existing->cruft_packs) + fprintf(in, "-%s.pack\n", item->string); for_each_string_list_item(item, &existing->kept_packs) fprintf(in, "%s.pack\n", item->string); fclose(in); From 4a17e97246470049b8bde5be0d767ef66e550555 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Wed, 13 Sep 2023 15:18:00 -0400 Subject: [PATCH 7/8] builtin/repack.c: avoid directly inspecting "util" The `->util` field corresponding to each string_list_item is used to track the existence of some pack at the beginning of a repack operation was originally intended to be used as a bitfield. This bitfield tracked: - (1 << 0): whether or not the pack should be deleted - (1 << 1): whether or not the pack is cruft The previous commit removed the use of the second bit, but a future patch (from a different series than this one) will introduce a new use of it. So we could stop treating the util pointer as a bitfield and instead start treating it as if it were a boolean. But this would require some backtracking when that later patch is applied. Instead, let's avoid touching the ->util field directly, and instead introduce convenience functions like: - pack_mark_for_deletion() - pack_is_marked_for_deletion() Helped-by: Junio C Hamano Helped-by: Jeff King Helped-by: Patrick Steinhardt Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/repack.c | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/builtin/repack.c b/builtin/repack.c index 478fab96c9..be8d314e0c 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -111,6 +111,16 @@ static int has_existing_non_kept_packs(const struct existing_packs *existing) return existing->non_kept_packs.nr || existing->cruft_packs.nr; } +static void pack_mark_for_deletion(struct string_list_item *item) +{ + item->util = (void*)((uintptr_t)item->util | DELETE_PACK); +} + +static int pack_is_marked_for_deletion(struct string_list_item *item) +{ + return (uintptr_t)item->util & DELETE_PACK; +} + static void mark_packs_for_deletion_1(struct string_list *names, struct string_list *list) { @@ -130,7 +140,7 @@ static void mark_packs_for_deletion_1(struct string_list *names, * (if `-d` was given). */ if (!string_list_has_string(names, sha1)) - item->util = (void*)(uintptr_t)((size_t)item->util | DELETE_PACK); + pack_mark_for_deletion(item); } } @@ -158,7 +168,7 @@ static void remove_redundant_packs_1(struct string_list *packs) { struct string_list_item *item; for_each_string_list_item(item, packs) { - if (!((uintptr_t)item->util & DELETE_PACK)) + if (!pack_is_marked_for_deletion(item)) continue; remove_redundant_pack(packdir, item->string); } @@ -702,13 +712,13 @@ static void midx_included_packs(struct string_list *include, } } else { for_each_string_list_item(item, &existing->non_kept_packs) { - if ((uintptr_t)item->util & DELETE_PACK) + if (pack_is_marked_for_deletion(item)) continue; string_list_insert(include, xstrfmt("%s.idx", item->string)); } for_each_string_list_item(item, &existing->cruft_packs) { - if ((uintptr_t)item->util & DELETE_PACK) + if (pack_is_marked_for_deletion(item)) continue; string_list_insert(include, xstrfmt("%s.idx", item->string)); } From c6a0468f824f458acea442450d204a3d01d1aa9b Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Wed, 13 Sep 2023 15:18:03 -0400 Subject: [PATCH 8/8] builtin/repack.c: extract common cruft pack loop When generating the list of packs to store in a MIDX (when given the `--write-midx` option), we include any cruft packs both during --geometric and non-geometric repacks. But the rules for when we do and don't have to check whether any of those cruft packs were queued for deletion differ slightly between the two cases. But the two can be unified, provided there is a little bit of extra detail added in the comment to clarify when it is safe to avoid checking for any pending deletions (and why it is OK to do so even when not required). Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/repack.c | 31 ++++++++++++++++++------------- 1 file changed, 18 insertions(+), 13 deletions(-) diff --git a/builtin/repack.c b/builtin/repack.c index be8d314e0c..48245ffd99 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -702,26 +702,31 @@ static void midx_included_packs(struct string_list *include, string_list_insert(include, strbuf_detach(&buf, NULL)); } - - for_each_string_list_item(item, &existing->cruft_packs) { - /* - * no need to check DELETE_PACK, since we're not - * doing an ALL_INTO_ONE repack - */ - string_list_insert(include, xstrfmt("%s.idx", item->string)); - } } else { for_each_string_list_item(item, &existing->non_kept_packs) { if (pack_is_marked_for_deletion(item)) continue; string_list_insert(include, xstrfmt("%s.idx", item->string)); } + } - for_each_string_list_item(item, &existing->cruft_packs) { - if (pack_is_marked_for_deletion(item)) - continue; - string_list_insert(include, xstrfmt("%s.idx", item->string)); - } + for_each_string_list_item(item, &existing->cruft_packs) { + /* + * When doing a --geometric repack, there is no need to check + * for deleted packs, since we're by definition not doing an + * ALL_INTO_ONE repack (hence no packs will be deleted). + * Otherwise we must check for and exclude any packs which are + * enqueued for deletion. + * + * So we could omit the conditional below in the --geometric + * case, but doing so is unnecessary since no packs are marked + * as pending deletion (since we only call + * `mark_packs_for_deletion()` when doing an all-into-one + * repack). + */ + if (pack_is_marked_for_deletion(item)) + continue; + string_list_insert(include, xstrfmt("%s.idx", item->string)); } }