From a0efef144686ca2c46caad98df72507ba2606ce5 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Wed, 20 Nov 2024 08:51:30 +0100 Subject: [PATCH 01/12] refs: allow passing flags when setting up a transaction Allow passing flags when setting up a transaction such that the behaviour of the transaction itself can be altered. This functionality will be used in a subsequent patch. Adapt callers accordingly. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- branch.c | 2 +- builtin/clone.c | 2 +- builtin/fast-import.c | 4 ++-- builtin/fetch.c | 4 ++-- builtin/receive-pack.c | 4 ++-- builtin/replace.c | 2 +- builtin/tag.c | 2 +- builtin/update-ref.c | 4 ++-- refs.c | 12 +++++++----- refs.h | 3 ++- refs/files-backend.c | 11 +++++++---- refs/refs-internal.h | 1 + sequencer.c | 6 +++--- walker.c | 2 +- 14 files changed, 33 insertions(+), 26 deletions(-) diff --git a/branch.c b/branch.c index 44977ad0aa..ebaa870c01 100644 --- a/branch.c +++ b/branch.c @@ -627,7 +627,7 @@ void create_branch(struct repository *r, else msg = xstrfmt("branch: Created from %s", start_name); transaction = ref_store_transaction_begin(get_main_ref_store(the_repository), - &err); + 0, &err); if (!transaction || ref_transaction_update(transaction, ref.buf, &oid, forcing ? NULL : null_oid(), diff --git a/builtin/clone.c b/builtin/clone.c index 59fcb317a6..d963cc6eb5 100644 --- a/builtin/clone.c +++ b/builtin/clone.c @@ -574,7 +574,7 @@ static void write_remote_refs(const struct ref *local_refs) struct strbuf err = STRBUF_INIT; t = ref_store_transaction_begin(get_main_ref_store(the_repository), - &err); + 0, &err); if (!t) die("%s", err.buf); diff --git a/builtin/fast-import.c b/builtin/fast-import.c index 76d5c20f14..db82c37a06 100644 --- a/builtin/fast-import.c +++ b/builtin/fast-import.c @@ -1634,7 +1634,7 @@ static int update_branch(struct branch *b) } } transaction = ref_store_transaction_begin(get_main_ref_store(the_repository), - &err); + 0, &err); if (!transaction || ref_transaction_update(transaction, b->name, &b->oid, &old_oid, NULL, NULL, 0, msg, &err) || @@ -1669,7 +1669,7 @@ static void dump_tags(void) struct ref_transaction *transaction; transaction = ref_store_transaction_begin(get_main_ref_store(the_repository), - &err); + 0, &err); if (!transaction) { failure |= error("%s", err.buf); goto cleanup; diff --git a/builtin/fetch.c b/builtin/fetch.c index d9027e4dc9..68d291c7dd 100644 --- a/builtin/fetch.c +++ b/builtin/fetch.c @@ -669,7 +669,7 @@ static int s_update_ref(const char *action, */ if (!transaction) { transaction = our_transaction = ref_store_transaction_begin(get_main_ref_store(the_repository), - &err); + 0, &err); if (!transaction) { ret = STORE_REF_ERROR_OTHER; goto out; @@ -1671,7 +1671,7 @@ static int do_fetch(struct transport *transport, if (atomic_fetch) { transaction = ref_store_transaction_begin(get_main_ref_store(the_repository), - &err); + 0, &err); if (!transaction) { retcode = -1; goto cleanup; diff --git a/builtin/receive-pack.c b/builtin/receive-pack.c index ab5b20e39c..9d2c07f68d 100644 --- a/builtin/receive-pack.c +++ b/builtin/receive-pack.c @@ -1849,7 +1849,7 @@ static void execute_commands_non_atomic(struct command *commands, continue; transaction = ref_store_transaction_begin(get_main_ref_store(the_repository), - &err); + 0, &err); if (!transaction) { rp_error("%s", err.buf); strbuf_reset(&err); @@ -1878,7 +1878,7 @@ static void execute_commands_atomic(struct command *commands, const char *reported_error = "atomic push failure"; transaction = ref_store_transaction_begin(get_main_ref_store(the_repository), - &err); + 0, &err); if (!transaction) { rp_error("%s", err.buf); strbuf_reset(&err); diff --git a/builtin/replace.c b/builtin/replace.c index a44f4e7ea9..a4eaadff91 100644 --- a/builtin/replace.c +++ b/builtin/replace.c @@ -201,7 +201,7 @@ static int replace_object_oid(const char *object_ref, } transaction = ref_store_transaction_begin(get_main_ref_store(the_repository), - &err); + 0, &err); if (!transaction || ref_transaction_update(transaction, ref.buf, repl, &prev, NULL, NULL, 0, NULL, &err) || diff --git a/builtin/tag.c b/builtin/tag.c index 93d10d5915..3fa0ab1113 100644 --- a/builtin/tag.c +++ b/builtin/tag.c @@ -681,7 +681,7 @@ int cmd_tag(int argc, } transaction = ref_store_transaction_begin(get_main_ref_store(the_repository), - &err); + 0, &err); if (!transaction || ref_transaction_update(transaction, ref.buf, &object, &prev, NULL, NULL, diff --git a/builtin/update-ref.c b/builtin/update-ref.c index 8a98615dc8..670e7812d6 100644 --- a/builtin/update-ref.c +++ b/builtin/update-ref.c @@ -612,7 +612,7 @@ static void update_refs_stdin(void) int i, j; transaction = ref_store_transaction_begin(get_main_ref_store(the_repository), - &err); + 0, &err); if (!transaction) die("%s", err.buf); @@ -680,7 +680,7 @@ static void update_refs_stdin(void) */ state = cmd->state; transaction = ref_store_transaction_begin(get_main_ref_store(the_repository), - &err); + 0, &err); if (!transaction) die("%s", err.buf); diff --git a/refs.c b/refs.c index 5f729ed412..9effeb01eb 100644 --- a/refs.c +++ b/refs.c @@ -918,7 +918,7 @@ int refs_delete_ref(struct ref_store *refs, const char *msg, struct ref_transaction *transaction; struct strbuf err = STRBUF_INIT; - transaction = ref_store_transaction_begin(refs, &err); + transaction = ref_store_transaction_begin(refs, 0, &err); if (!transaction || ref_transaction_delete(transaction, refname, old_oid, NULL, flags, msg, &err) || @@ -1116,6 +1116,7 @@ int read_ref_at(struct ref_store *refs, const char *refname, } struct ref_transaction *ref_store_transaction_begin(struct ref_store *refs, + unsigned int flags, struct strbuf *err) { struct ref_transaction *tr; @@ -1123,6 +1124,7 @@ struct ref_transaction *ref_store_transaction_begin(struct ref_store *refs, CALLOC_ARRAY(tr, 1); tr->ref_store = refs; + tr->flags = flags; return tr; } @@ -1309,7 +1311,7 @@ int refs_update_ref(struct ref_store *refs, const char *msg, struct strbuf err = STRBUF_INIT; int ret = 0; - t = ref_store_transaction_begin(refs, &err); + t = ref_store_transaction_begin(refs, 0, &err); if (!t || ref_transaction_update(t, refname, new_oid, old_oid, NULL, NULL, flags, msg, &err) || @@ -2120,7 +2122,7 @@ int refs_update_symref(struct ref_store *refs, const char *ref, struct strbuf err = STRBUF_INIT; int ret = 0; - transaction = ref_store_transaction_begin(refs, &err); + transaction = ref_store_transaction_begin(refs, 0, &err); if (!transaction || ref_transaction_update(transaction, ref, NULL, NULL, target, NULL, REF_NO_DEREF, @@ -2527,7 +2529,7 @@ int refs_delete_refs(struct ref_store *refs, const char *logmsg, * individual updates can't fail, so we can pack all of the * updates into a single transaction. */ - transaction = ref_store_transaction_begin(refs, &err); + transaction = ref_store_transaction_begin(refs, 0, &err); if (!transaction) { ret = error("%s", err.buf); goto out; @@ -2833,7 +2835,7 @@ int repo_migrate_ref_storage_format(struct repository *repo, if (ret < 0) goto done; - transaction = ref_store_transaction_begin(new_refs, errbuf); + transaction = ref_store_transaction_begin(new_refs, 0, errbuf); if (!transaction) goto done; diff --git a/refs.h b/refs.h index 108dfc93b3..9821f3e80d 100644 --- a/refs.h +++ b/refs.h @@ -234,7 +234,7 @@ char *repo_default_branch_name(struct repository *r, int quiet); * struct strbuf err = STRBUF_INIT; * int ret = 0; * - * transaction = ref_store_transaction_begin(refs, &err); + * transaction = ref_store_transaction_begin(refs, 0, &err); * if (!transaction || * ref_transaction_update(...) || * ref_transaction_create(...) || @@ -584,6 +584,7 @@ enum action_on_err { * be freed by calling ref_transaction_free(). */ struct ref_transaction *ref_store_transaction_begin(struct ref_store *refs, + unsigned int flags, struct strbuf *err); /* diff --git a/refs/files-backend.c b/refs/files-backend.c index 0824c0b8a9..df61057c9f 100644 --- a/refs/files-backend.c +++ b/refs/files-backend.c @@ -1252,7 +1252,7 @@ static void prune_ref(struct files_ref_store *refs, struct ref_to_prune *r) if (check_refname_format(r->name, 0)) return; - transaction = ref_store_transaction_begin(&refs->base, &err); + transaction = ref_store_transaction_begin(&refs->base, 0, &err); if (!transaction) goto cleanup; ref_transaction_add_update( @@ -1396,7 +1396,8 @@ static int files_pack_refs(struct ref_store *ref_store, if (!should_pack_refs(refs, opts)) return 0; - transaction = ref_store_transaction_begin(refs->packed_ref_store, &err); + transaction = ref_store_transaction_begin(refs->packed_ref_store, + 0, &err); if (!transaction) return -1; @@ -2867,7 +2868,8 @@ static int files_transaction_prepare(struct ref_store *ref_store, */ if (!packed_transaction) { packed_transaction = ref_store_transaction_begin( - refs->packed_ref_store, err); + refs->packed_ref_store, + transaction->flags, err); if (!packed_transaction) { ret = TRANSACTION_GENERIC_ERROR; goto cleanup; @@ -3174,7 +3176,8 @@ static int files_initial_transaction_commit(struct ref_store *ref_store, &affected_refnames)) BUG("initial ref transaction called with existing refs"); - packed_transaction = ref_store_transaction_begin(refs->packed_ref_store, err); + packed_transaction = ref_store_transaction_begin(refs->packed_ref_store, + transaction->flags, err); if (!packed_transaction) { ret = TRANSACTION_GENERIC_ERROR; goto cleanup; diff --git a/refs/refs-internal.h b/refs/refs-internal.h index 2313c830d8..dbc6360c5a 100644 --- a/refs/refs-internal.h +++ b/refs/refs-internal.h @@ -193,6 +193,7 @@ struct ref_transaction { size_t nr; enum ref_transaction_state state; void *backend_data; + unsigned int flags; }; /* diff --git a/sequencer.c b/sequencer.c index 353d804999..287f4e5e87 100644 --- a/sequencer.c +++ b/sequencer.c @@ -662,7 +662,7 @@ static int fast_forward_to(struct repository *r, strbuf_addf(&sb, "%s: fast-forward", action_name(opts)); transaction = ref_store_transaction_begin(get_main_ref_store(the_repository), - &err); + 0, &err); if (!transaction || ref_transaction_update(transaction, "HEAD", to, unborn && !is_rebase_i(opts) ? @@ -1297,7 +1297,7 @@ int update_head_with_reflog(const struct commit *old_head, } transaction = ref_store_transaction_begin(get_main_ref_store(the_repository), - err); + 0, err); if (!transaction || ref_transaction_update(transaction, "HEAD", new_head, old_head ? &old_head->object.oid : null_oid(), @@ -3890,7 +3890,7 @@ static int do_label(struct repository *r, const char *name, int len) strbuf_addf(&ref_name, "refs/rewritten/%.*s", len, name); strbuf_addf(&msg, "rebase (label) '%.*s'", len, name); - transaction = ref_store_transaction_begin(refs, &err); + transaction = ref_store_transaction_begin(refs, 0, &err); if (!transaction) { error("%s", err.buf); ret = -1; diff --git a/walker.c b/walker.c index 5ea7e5b392..7cc9dbea46 100644 --- a/walker.c +++ b/walker.c @@ -290,7 +290,7 @@ int walker_fetch(struct walker *walker, int targets, char **target, if (write_ref) { transaction = ref_store_transaction_begin(get_main_ref_store(the_repository), - &err); + 0, &err); if (!transaction) { error("%s", err.buf); goto done; From 83b8ed8bba5fe6655bb835a27b49bbf249314f55 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Wed, 20 Nov 2024 08:51:31 +0100 Subject: [PATCH 02/12] refs/files: move logic to commit initial transaction Move the logic to commit initial transactions such that we can start to call it in `files_transaction_finish()` in a subsequent commit without requiring a separate function declaration. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- refs/files-backend.c | 202 +++++++++++++++++++++---------------------- 1 file changed, 101 insertions(+), 101 deletions(-) diff --git a/refs/files-backend.c b/refs/files-backend.c index df61057c9f..f37c805a34 100644 --- a/refs/files-backend.c +++ b/refs/files-backend.c @@ -2975,6 +2975,107 @@ static int parse_and_write_reflog(struct files_ref_store *refs, return 0; } +static int ref_present(const char *refname, const char *referent UNUSED, + const struct object_id *oid UNUSED, + int flags UNUSED, + void *cb_data) +{ + struct string_list *affected_refnames = cb_data; + + return string_list_has_string(affected_refnames, refname); +} + +static int files_initial_transaction_commit(struct ref_store *ref_store, + struct ref_transaction *transaction, + struct strbuf *err) +{ + struct files_ref_store *refs = + files_downcast(ref_store, REF_STORE_WRITE, + "initial_ref_transaction_commit"); + size_t i; + int ret = 0; + struct string_list affected_refnames = STRING_LIST_INIT_NODUP; + struct ref_transaction *packed_transaction = NULL; + + assert(err); + + if (transaction->state != REF_TRANSACTION_OPEN) + BUG("commit called for transaction that is not open"); + + /* Fail if a refname appears more than once in the transaction: */ + for (i = 0; i < transaction->nr; i++) + string_list_append(&affected_refnames, + transaction->updates[i]->refname); + string_list_sort(&affected_refnames); + if (ref_update_reject_duplicates(&affected_refnames, err)) { + ret = TRANSACTION_GENERIC_ERROR; + goto cleanup; + } + + /* + * It's really undefined to call this function in an active + * repository or when there are existing references: we are + * only locking and changing packed-refs, so (1) any + * simultaneous processes might try to change a reference at + * the same time we do, and (2) any existing loose versions of + * the references that we are setting would have precedence + * over our values. But some remote helpers create the remote + * "HEAD" and "master" branches before calling this function, + * so here we really only check that none of the references + * that we are creating already exists. + */ + if (refs_for_each_rawref(&refs->base, ref_present, + &affected_refnames)) + BUG("initial ref transaction called with existing refs"); + + packed_transaction = ref_store_transaction_begin(refs->packed_ref_store, + transaction->flags, err); + if (!packed_transaction) { + ret = TRANSACTION_GENERIC_ERROR; + goto cleanup; + } + + for (i = 0; i < transaction->nr; i++) { + struct ref_update *update = transaction->updates[i]; + + if ((update->flags & REF_HAVE_OLD) && + !is_null_oid(&update->old_oid)) + BUG("initial ref transaction with old_sha1 set"); + if (refs_verify_refname_available(&refs->base, update->refname, + &affected_refnames, NULL, + err)) { + ret = TRANSACTION_NAME_CONFLICT; + goto cleanup; + } + + /* + * Add a reference creation for this reference to the + * packed-refs transaction: + */ + ref_transaction_add_update(packed_transaction, update->refname, + update->flags & ~REF_HAVE_OLD, + &update->new_oid, &update->old_oid, + NULL, NULL, NULL); + } + + if (packed_refs_lock(refs->packed_ref_store, 0, err)) { + ret = TRANSACTION_GENERIC_ERROR; + goto cleanup; + } + + if (initial_ref_transaction_commit(packed_transaction, err)) { + ret = TRANSACTION_GENERIC_ERROR; + } + + packed_refs_unlock(refs->packed_ref_store); +cleanup: + if (packed_transaction) + ref_transaction_free(packed_transaction); + transaction->state = REF_TRANSACTION_CLOSED; + string_list_clear(&affected_refnames, 0); + return ret; +} + static int files_transaction_finish(struct ref_store *ref_store, struct ref_transaction *transaction, struct strbuf *err) @@ -3123,107 +3224,6 @@ static int files_transaction_abort(struct ref_store *ref_store, return 0; } -static int ref_present(const char *refname, const char *referent UNUSED, - const struct object_id *oid UNUSED, - int flags UNUSED, - void *cb_data) -{ - struct string_list *affected_refnames = cb_data; - - return string_list_has_string(affected_refnames, refname); -} - -static int files_initial_transaction_commit(struct ref_store *ref_store, - struct ref_transaction *transaction, - struct strbuf *err) -{ - struct files_ref_store *refs = - files_downcast(ref_store, REF_STORE_WRITE, - "initial_ref_transaction_commit"); - size_t i; - int ret = 0; - struct string_list affected_refnames = STRING_LIST_INIT_NODUP; - struct ref_transaction *packed_transaction = NULL; - - assert(err); - - if (transaction->state != REF_TRANSACTION_OPEN) - BUG("commit called for transaction that is not open"); - - /* Fail if a refname appears more than once in the transaction: */ - for (i = 0; i < transaction->nr; i++) - string_list_append(&affected_refnames, - transaction->updates[i]->refname); - string_list_sort(&affected_refnames); - if (ref_update_reject_duplicates(&affected_refnames, err)) { - ret = TRANSACTION_GENERIC_ERROR; - goto cleanup; - } - - /* - * It's really undefined to call this function in an active - * repository or when there are existing references: we are - * only locking and changing packed-refs, so (1) any - * simultaneous processes might try to change a reference at - * the same time we do, and (2) any existing loose versions of - * the references that we are setting would have precedence - * over our values. But some remote helpers create the remote - * "HEAD" and "master" branches before calling this function, - * so here we really only check that none of the references - * that we are creating already exists. - */ - if (refs_for_each_rawref(&refs->base, ref_present, - &affected_refnames)) - BUG("initial ref transaction called with existing refs"); - - packed_transaction = ref_store_transaction_begin(refs->packed_ref_store, - transaction->flags, err); - if (!packed_transaction) { - ret = TRANSACTION_GENERIC_ERROR; - goto cleanup; - } - - for (i = 0; i < transaction->nr; i++) { - struct ref_update *update = transaction->updates[i]; - - if ((update->flags & REF_HAVE_OLD) && - !is_null_oid(&update->old_oid)) - BUG("initial ref transaction with old_sha1 set"); - if (refs_verify_refname_available(&refs->base, update->refname, - &affected_refnames, NULL, - err)) { - ret = TRANSACTION_NAME_CONFLICT; - goto cleanup; - } - - /* - * Add a reference creation for this reference to the - * packed-refs transaction: - */ - ref_transaction_add_update(packed_transaction, update->refname, - update->flags & ~REF_HAVE_OLD, - &update->new_oid, &update->old_oid, - NULL, NULL, NULL); - } - - if (packed_refs_lock(refs->packed_ref_store, 0, err)) { - ret = TRANSACTION_GENERIC_ERROR; - goto cleanup; - } - - if (initial_ref_transaction_commit(packed_transaction, err)) { - ret = TRANSACTION_GENERIC_ERROR; - } - - packed_refs_unlock(refs->packed_ref_store); -cleanup: - if (packed_transaction) - ref_transaction_free(packed_transaction); - transaction->state = REF_TRANSACTION_CLOSED; - string_list_clear(&affected_refnames, 0); - return ret; -} - struct expire_reflog_cb { reflog_expiry_should_prune_fn *should_prune_fn; void *policy_cb; From 1c299d03e5551847533019aa32863d2cbe589c7f Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Wed, 20 Nov 2024 08:51:32 +0100 Subject: [PATCH 03/12] refs: introduce "initial" transaction flag There are two different ways to commit a transaction: - `ref_transaction_commit()` can be used to commit a regular transaction and is what almost every caller wants. - `initial_ref_transaction_commit()` can be used when it is known that the ref store that the transaction is committed for is empty and when there are no concurrent processes. This is used when cloning a new repository. Implementing this via two separate functions has a couple of downsides. First, every reference backend needs to implement a separate callback even in the case where they don't special-case the initial transaction. Second, backends are basically forced to reimplement the whole logic for how to commit the transaction like the "files" backend does, even though backends may wish to only tweak certain behaviour of a "normal" commit. Third, it is awkward that callers must never prepare the transaction as this is somewhat different than how a transaction typically works. Refactor the code such that we instead mark initial transactions via a separate flag when starting the transaction. This addresses all of the mentioned painpoints, where the most important part is that it will allow backends to have way more leeway in how exactly they want to handle the initial transaction. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- builtin/clone.c | 4 ++-- refs.c | 10 +--------- refs.h | 37 ++++++++++++++++++------------------- refs/debug.c | 13 ------------- refs/files-backend.c | 16 ++++++++-------- refs/packed-backend.c | 8 -------- refs/refs-internal.h | 1 - refs/reftable-backend.c | 8 -------- 8 files changed, 29 insertions(+), 68 deletions(-) diff --git a/builtin/clone.c b/builtin/clone.c index d963cc6eb5..8427ccec17 100644 --- a/builtin/clone.c +++ b/builtin/clone.c @@ -574,7 +574,7 @@ static void write_remote_refs(const struct ref *local_refs) struct strbuf err = STRBUF_INIT; t = ref_store_transaction_begin(get_main_ref_store(the_repository), - 0, &err); + REF_TRANSACTION_FLAG_INITIAL, &err); if (!t) die("%s", err.buf); @@ -586,7 +586,7 @@ static void write_remote_refs(const struct ref *local_refs) die("%s", err.buf); } - if (initial_ref_transaction_commit(t, &err)) + if (ref_transaction_commit(t, &err)) die("%s", err.buf); strbuf_release(&err); diff --git a/refs.c b/refs.c index 9effeb01eb..8b9dfc6173 100644 --- a/refs.c +++ b/refs.c @@ -2315,7 +2315,7 @@ int ref_transaction_commit(struct ref_transaction *transaction, } ret = refs->be->transaction_finish(refs, transaction, err); - if (!ret) + if (!ret && !(transaction->flags & REF_TRANSACTION_FLAG_INITIAL)) run_transaction_hook(transaction, "committed"); return ret; } @@ -2486,14 +2486,6 @@ int refs_reflog_expire(struct ref_store *refs, cleanup_fn, policy_cb_data); } -int initial_ref_transaction_commit(struct ref_transaction *transaction, - struct strbuf *err) -{ - struct ref_store *refs = transaction->ref_store; - - return refs->be->initial_transaction_commit(refs, transaction, err); -} - void ref_transaction_for_each_queued_update(struct ref_transaction *transaction, ref_transaction_for_each_queued_update_fn cb, void *cb_data) diff --git a/refs.h b/refs.h index 9821f3e80d..024a370554 100644 --- a/refs.h +++ b/refs.h @@ -214,11 +214,9 @@ char *repo_default_branch_name(struct repository *r, int quiet); * * Or * - * - Call `initial_ref_transaction_commit()` if the ref database is - * known to be empty and have no other writers (e.g. during - * clone). This is likely to be much faster than - * `ref_transaction_commit()`. `ref_transaction_prepare()` should - * *not* be called before `initial_ref_transaction_commit()`. + * - Call `ref_transaction_begin()` with REF_TRANSACTION_FLAG_INITIAL if the + * ref database is known to be empty and have no other writers (e.g. during + * clone). This is likely to be much faster than without the flag. * * - Then finally, call `ref_transaction_free()` to free the * `ref_transaction` data structure. @@ -579,6 +577,21 @@ enum action_on_err { UPDATE_REFS_QUIET_ON_ERR }; +enum ref_transaction_flag { + /* + * The ref transaction is part of the initial creation of the ref store + * and can thus assume that the ref store is completely empty. This + * allows the backend to perform the transaction more efficiently by + * skipping certain checks. + * + * It is a bug to set this flag when there might be other processes + * accessing the repository or if there are existing references that + * might conflict with the ones being created. All old_oid values must + * either be absent or null_oid. + */ + REF_TRANSACTION_FLAG_INITIAL = (1 << 0), +}; + /* * Begin a reference transaction. The reference transaction must * be freed by calling ref_transaction_free(). @@ -798,20 +811,6 @@ int ref_transaction_commit(struct ref_transaction *transaction, int ref_transaction_abort(struct ref_transaction *transaction, struct strbuf *err); -/* - * Like ref_transaction_commit(), but optimized for creating - * references when originally initializing a repository (e.g., by "git - * clone"). It writes the new references directly to packed-refs - * without locking the individual references. - * - * It is a bug to call this function when there might be other - * processes accessing the repository or if there are existing - * references that might conflict with the ones being created. All - * old_oid values must either be absent or null_oid. - */ -int initial_ref_transaction_commit(struct ref_transaction *transaction, - struct strbuf *err); - /* * Execute the given callback function for each of the reference updates which * have been queued in the given transaction. `old_oid` and `new_oid` may be diff --git a/refs/debug.c b/refs/debug.c index 45e2e784a0..cbac62c8a4 100644 --- a/refs/debug.c +++ b/refs/debug.c @@ -118,18 +118,6 @@ static int debug_transaction_abort(struct ref_store *refs, return res; } -static int debug_initial_transaction_commit(struct ref_store *refs, - struct ref_transaction *transaction, - struct strbuf *err) -{ - struct debug_ref_store *drefs = (struct debug_ref_store *)refs; - int res; - transaction->ref_store = drefs->refs; - res = drefs->refs->be->initial_transaction_commit(drefs->refs, - transaction, err); - return res; -} - static int debug_pack_refs(struct ref_store *ref_store, struct pack_refs_opts *opts) { struct debug_ref_store *drefs = (struct debug_ref_store *)ref_store; @@ -443,7 +431,6 @@ struct ref_storage_be refs_be_debug = { .transaction_prepare = debug_transaction_prepare, .transaction_finish = debug_transaction_finish, .transaction_abort = debug_transaction_abort, - .initial_transaction_commit = debug_initial_transaction_commit, .pack_refs = debug_pack_refs, .rename_ref = debug_rename_ref, diff --git a/refs/files-backend.c b/refs/files-backend.c index f37c805a34..3ed18475a7 100644 --- a/refs/files-backend.c +++ b/refs/files-backend.c @@ -2781,6 +2781,8 @@ static int files_transaction_prepare(struct ref_store *ref_store, assert(err); + if (transaction->flags & REF_TRANSACTION_FLAG_INITIAL) + goto cleanup; if (!transaction->nr) goto cleanup; @@ -2985,13 +2987,10 @@ static int ref_present(const char *refname, const char *referent UNUSED, return string_list_has_string(affected_refnames, refname); } -static int files_initial_transaction_commit(struct ref_store *ref_store, +static int files_transaction_finish_initial(struct files_ref_store *refs, struct ref_transaction *transaction, struct strbuf *err) { - struct files_ref_store *refs = - files_downcast(ref_store, REF_STORE_WRITE, - "initial_ref_transaction_commit"); size_t i; int ret = 0; struct string_list affected_refnames = STRING_LIST_INIT_NODUP; @@ -2999,8 +2998,8 @@ static int files_initial_transaction_commit(struct ref_store *ref_store, assert(err); - if (transaction->state != REF_TRANSACTION_OPEN) - BUG("commit called for transaction that is not open"); + if (transaction->state != REF_TRANSACTION_PREPARED) + BUG("commit called for transaction that is not prepared"); /* Fail if a refname appears more than once in the transaction: */ for (i = 0; i < transaction->nr; i++) @@ -3063,7 +3062,7 @@ static int files_initial_transaction_commit(struct ref_store *ref_store, goto cleanup; } - if (initial_ref_transaction_commit(packed_transaction, err)) { + if (ref_transaction_commit(packed_transaction, err)) { ret = TRANSACTION_GENERIC_ERROR; } @@ -3091,6 +3090,8 @@ static int files_transaction_finish(struct ref_store *ref_store, assert(err); + if (transaction->flags & REF_TRANSACTION_FLAG_INITIAL) + return files_transaction_finish_initial(refs, transaction, err); if (!transaction->nr) { transaction->state = REF_TRANSACTION_CLOSED; return 0; @@ -3617,7 +3618,6 @@ struct ref_storage_be refs_be_files = { .transaction_prepare = files_transaction_prepare, .transaction_finish = files_transaction_finish, .transaction_abort = files_transaction_abort, - .initial_transaction_commit = files_initial_transaction_commit, .pack_refs = files_pack_refs, .rename_ref = files_rename_ref, diff --git a/refs/packed-backend.c b/refs/packed-backend.c index 07c57fd541..794471de60 100644 --- a/refs/packed-backend.c +++ b/refs/packed-backend.c @@ -1730,13 +1730,6 @@ cleanup: return ret; } -static int packed_initial_transaction_commit(struct ref_store *ref_store UNUSED, - struct ref_transaction *transaction, - struct strbuf *err) -{ - return ref_transaction_commit(transaction, err); -} - static int packed_pack_refs(struct ref_store *ref_store UNUSED, struct pack_refs_opts *pack_opts UNUSED) { @@ -1769,7 +1762,6 @@ struct ref_storage_be refs_be_packed = { .transaction_prepare = packed_transaction_prepare, .transaction_finish = packed_transaction_finish, .transaction_abort = packed_transaction_abort, - .initial_transaction_commit = packed_initial_transaction_commit, .pack_refs = packed_pack_refs, .rename_ref = NULL, diff --git a/refs/refs-internal.h b/refs/refs-internal.h index dbc6360c5a..33335d54c9 100644 --- a/refs/refs-internal.h +++ b/refs/refs-internal.h @@ -666,7 +666,6 @@ struct ref_storage_be { ref_transaction_prepare_fn *transaction_prepare; ref_transaction_finish_fn *transaction_finish; ref_transaction_abort_fn *transaction_abort; - ref_transaction_commit_fn *initial_transaction_commit; pack_refs_fn *pack_refs; rename_ref_fn *rename_ref; diff --git a/refs/reftable-backend.c b/refs/reftable-backend.c index 38eb14d591..8e914afc81 100644 --- a/refs/reftable-backend.c +++ b/refs/reftable-backend.c @@ -1490,13 +1490,6 @@ done: return ret; } -static int reftable_be_initial_transaction_commit(struct ref_store *ref_store UNUSED, - struct ref_transaction *transaction, - struct strbuf *err) -{ - return ref_transaction_commit(transaction, err); -} - static int reftable_be_pack_refs(struct ref_store *ref_store, struct pack_refs_opts *opts) { @@ -2490,7 +2483,6 @@ struct ref_storage_be refs_be_reftable = { .transaction_prepare = reftable_be_transaction_prepare, .transaction_finish = reftable_be_transaction_finish, .transaction_abort = reftable_be_transaction_abort, - .initial_transaction_commit = reftable_be_initial_transaction_commit, .pack_refs = reftable_be_pack_refs, .rename_ref = reftable_be_rename_ref, From c0b9cf3b55e9e1d6ea2d848bae1f8efe67b3b61b Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Wed, 20 Nov 2024 08:51:33 +0100 Subject: [PATCH 04/12] refs/files: support symbolic and root refs in initial transaction The "files" backend has implemented special logic when committing the first transactions in an otherwise empty ref store: instead of writing all refs as separate loose files, it instead knows to write them all into a "packed-refs" file directly. This is significantly more efficient than having to write each of the refs as separate "loose" ref. The only user of this optimization is git-clone(1), which only uses this mechanism to write regular refs. Consequently, the implementation does not know how to handle both symbolic and root refs. While fine in the context of git-clone(1), this keeps us from using the mechanism in more cases. Adapt the logic to also support symbolic and root refs by using a second transaction that we use for all of the refs that need to be written as loose refs. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- refs/files-backend.c | 44 ++++++++++++++++++++++++++++++++++---------- 1 file changed, 34 insertions(+), 10 deletions(-) diff --git a/refs/files-backend.c b/refs/files-backend.c index 3ed18475a7..116d425969 100644 --- a/refs/files-backend.c +++ b/refs/files-backend.c @@ -2995,6 +2995,7 @@ static int files_transaction_finish_initial(struct files_ref_store *refs, int ret = 0; struct string_list affected_refnames = STRING_LIST_INIT_NODUP; struct ref_transaction *packed_transaction = NULL; + struct ref_transaction *loose_transaction = NULL; assert(err); @@ -3040,6 +3041,7 @@ static int files_transaction_finish_initial(struct files_ref_store *refs, if ((update->flags & REF_HAVE_OLD) && !is_null_oid(&update->old_oid)) BUG("initial ref transaction with old_sha1 set"); + if (refs_verify_refname_available(&refs->base, update->refname, &affected_refnames, NULL, err)) { @@ -3048,26 +3050,48 @@ static int files_transaction_finish_initial(struct files_ref_store *refs, } /* - * Add a reference creation for this reference to the - * packed-refs transaction: + * packed-refs don't support symbolic refs and root refs, so we + * have to queue these references via the loose transaction. */ - ref_transaction_add_update(packed_transaction, update->refname, - update->flags & ~REF_HAVE_OLD, - &update->new_oid, &update->old_oid, - NULL, NULL, NULL); + if (update->new_target || is_root_ref(update->refname)) { + if (!loose_transaction) { + loose_transaction = ref_store_transaction_begin(&refs->base, 0, err); + if (!loose_transaction) { + ret = TRANSACTION_GENERIC_ERROR; + goto cleanup; + } + } + + ref_transaction_add_update(loose_transaction, update->refname, + update->flags & ~REF_HAVE_OLD, + update->new_target ? NULL : &update->new_oid, NULL, + update->new_target, NULL, NULL); + } else { + ref_transaction_add_update(packed_transaction, update->refname, + update->flags & ~REF_HAVE_OLD, + &update->new_oid, &update->old_oid, + NULL, NULL, NULL); + } } - if (packed_refs_lock(refs->packed_ref_store, 0, err)) { + if (packed_refs_lock(refs->packed_ref_store, 0, err) || + ref_transaction_commit(packed_transaction, err)) { ret = TRANSACTION_GENERIC_ERROR; goto cleanup; } + packed_refs_unlock(refs->packed_ref_store); - if (ref_transaction_commit(packed_transaction, err)) { - ret = TRANSACTION_GENERIC_ERROR; + if (loose_transaction) { + if (ref_transaction_prepare(loose_transaction, err) || + ref_transaction_commit(loose_transaction, err)) { + ret = TRANSACTION_GENERIC_ERROR; + goto cleanup; + } } - packed_refs_unlock(refs->packed_ref_store); cleanup: + if (loose_transaction) + ref_transaction_free(loose_transaction); if (packed_transaction) ref_transaction_free(packed_transaction); transaction->state = REF_TRANSACTION_CLOSED; From 00bd6c3e4649d3e764db7ffcfbd317aabd2525d3 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Wed, 20 Nov 2024 08:51:34 +0100 Subject: [PATCH 05/12] refs: use "initial" transaction semantics to migrate refs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Until now, we couldn't use "initial" transaction semantics to migrate refs because the "files" backend only supported writing regular refs via the initial transaction because it simply mapped the transaction to a "packed-refs" transaction. But with the preceding commit, the "files" backend has learned to also write symbolic and root refs in the initial transaction by creating a second transaction for all refs that need to be written as loose refs. Adapt the code to migrate refs to commit the transaction as an initial transaction. This results in a signiticant speedup when migrating many refs: Benchmark 1: migrate reftable:files (refcount = 100000, revision = HEAD~) Time (mean ± σ): 3.247 s ± 0.034 s [User: 0.485 s, System: 2.722 s] Range (min … max): 3.216 s … 3.309 s 10 runs Benchmark 2: migrate reftable:files (refcount = 100000, revision = HEAD) Time (mean ± σ): 453.6 ms ± 1.9 ms [User: 214.6 ms, System: 230.5 ms] Range (min … max): 451.5 ms … 456.4 ms 10 runs Summary migrate reftable:files (refcount = 100000, revision = HEAD) ran 7.16 ± 0.08 times faster than migrate reftable:files (refcount = 100000, revision = HEAD~) As the reftable backend doesn't (yet) special-case initial transactions there is no comparable speedup for that backend. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- refs.c | 10 ++-------- t/t1460-refs-migrate.sh | 2 +- 2 files changed, 3 insertions(+), 9 deletions(-) diff --git a/refs.c b/refs.c index 8b9dfc6173..0f10c565bb 100644 --- a/refs.c +++ b/refs.c @@ -2827,7 +2827,8 @@ int repo_migrate_ref_storage_format(struct repository *repo, if (ret < 0) goto done; - transaction = ref_store_transaction_begin(new_refs, 0, errbuf); + transaction = ref_store_transaction_begin(new_refs, REF_TRANSACTION_FLAG_INITIAL, + errbuf); if (!transaction) goto done; @@ -2852,13 +2853,6 @@ int repo_migrate_ref_storage_format(struct repository *repo, if (ret < 0) goto done; - /* - * TODO: we might want to migrate to `initial_ref_transaction_commit()` - * here, which is more efficient for the files backend because it would - * write new refs into the packed-refs file directly. At this point, - * the files backend doesn't handle pseudo-refs and symrefs correctly - * though, so this requires some more work. - */ ret = ref_transaction_commit(transaction, errbuf); if (ret < 0) goto done; diff --git a/t/t1460-refs-migrate.sh b/t/t1460-refs-migrate.sh index f7c0783d30..b90b38a87f 100755 --- a/t/t1460-refs-migrate.sh +++ b/t/t1460-refs-migrate.sh @@ -237,7 +237,7 @@ test_expect_success 'migrating from reftable format deletes backend files' ' test_path_is_missing repo/.git/reftable && echo "ref: refs/heads/main" >expect && test_cmp expect repo/.git/HEAD && - test_path_is_file repo/.git/refs/heads/main + test_path_is_file repo/.git/packed-refs ' test_done From e4929cdf797467b5d55a976ee938f1d4f358900e Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Wed, 20 Nov 2024 08:51:35 +0100 Subject: [PATCH 06/12] refs: skip collision checks in initial transactions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reference transactions use `refs_verify_refname_available()` to check for colliding references. This check consists of two parts: - Checks for whether multiple ref updates in the same transaction conflict with each other. - Checks for whether existing refs conflict with any refs part of the transaction. While we generally cannot avoid the first check, the second check is superfluous in cases where the transaction is an initial one in an otherwise empty ref store. The check results in multiple ref reads as well as the creation of a ref iterator for every ref we're checking, which adds up quite fast when performing the check for many refs. Introduce a new flag that allows us to skip this check and wire it up in such that the backends pass it when running an initial transaction. This leads to significant speedups when migrating ref storage backends. From "files" to "reftable": Benchmark 1: migrate files:reftable (refcount = 100000, revision = HEAD~) Time (mean ± σ): 472.4 ms ± 6.7 ms [User: 175.9 ms, System: 285.2 ms] Range (min … max): 463.5 ms … 483.2 ms 10 runs Benchmark 2: migrate files:reftable (refcount = 100000, revision = HEAD) Time (mean ± σ): 86.1 ms ± 1.9 ms [User: 67.9 ms, System: 16.0 ms] Range (min … max): 82.9 ms … 90.9 ms 29 runs Summary migrate files:reftable (refcount = 100000, revision = HEAD) ran 5.48 ± 0.15 times faster than migrate files:reftable (refcount = 100000, revision = HEAD~) And from "reftable" to "files": Benchmark 1: migrate reftable:files (refcount = 100000, revision = HEAD~) Time (mean ± σ): 452.7 ms ± 3.4 ms [User: 209.9 ms, System: 235.4 ms] Range (min … max): 445.9 ms … 457.5 ms 10 runs Benchmark 2: migrate reftable:files (refcount = 100000, revision = HEAD) Time (mean ± σ): 95.2 ms ± 2.2 ms [User: 73.6 ms, System: 20.6 ms] Range (min … max): 91.7 ms … 100.8 ms 28 runs Summary migrate reftable:files (refcount = 100000, revision = HEAD) ran 4.76 ± 0.11 times faster than migrate reftable:files (refcount = 100000, revision = HEAD~) Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- refs.c | 37 +++++++++++++++++++++---------------- refs.h | 5 ++++- refs/files-backend.c | 13 ++++++------- refs/reftable-backend.c | 6 ++++-- t/helper/test-ref-store.c | 2 +- 5 files changed, 36 insertions(+), 27 deletions(-) diff --git a/refs.c b/refs.c index 0f10c565bb..d690eb19b3 100644 --- a/refs.c +++ b/refs.c @@ -2324,6 +2324,7 @@ int refs_verify_refname_available(struct ref_store *refs, const char *refname, const struct string_list *extras, const struct string_list *skip, + int initial_transaction, struct strbuf *err) { const char *slash; @@ -2332,8 +2333,6 @@ int refs_verify_refname_available(struct ref_store *refs, struct strbuf referent = STRBUF_INIT; struct object_id oid; unsigned int type; - struct ref_iterator *iter; - int ok; int ret = -1; /* @@ -2363,7 +2362,8 @@ int refs_verify_refname_available(struct ref_store *refs, if (skip && string_list_has_string(skip, dirname.buf)) continue; - if (!refs_read_raw_ref(refs, dirname.buf, &oid, &referent, + if (!initial_transaction && + !refs_read_raw_ref(refs, dirname.buf, &oid, &referent, &type, &ignore_errno)) { strbuf_addf(err, _("'%s' exists; cannot create '%s'"), dirname.buf, refname); @@ -2388,22 +2388,27 @@ int refs_verify_refname_available(struct ref_store *refs, strbuf_addstr(&dirname, refname + dirname.len); strbuf_addch(&dirname, '/'); - iter = refs_ref_iterator_begin(refs, dirname.buf, NULL, 0, - DO_FOR_EACH_INCLUDE_BROKEN); - while ((ok = ref_iterator_advance(iter)) == ITER_OK) { - if (skip && - string_list_has_string(skip, iter->refname)) - continue; + if (!initial_transaction) { + struct ref_iterator *iter; + int ok; - strbuf_addf(err, _("'%s' exists; cannot create '%s'"), - iter->refname, refname); - ref_iterator_abort(iter); - goto cleanup; + iter = refs_ref_iterator_begin(refs, dirname.buf, NULL, 0, + DO_FOR_EACH_INCLUDE_BROKEN); + while ((ok = ref_iterator_advance(iter)) == ITER_OK) { + if (skip && + string_list_has_string(skip, iter->refname)) + continue; + + strbuf_addf(err, _("'%s' exists; cannot create '%s'"), + iter->refname, refname); + ref_iterator_abort(iter); + goto cleanup; + } + + if (ok != ITER_DONE) + BUG("error while iterating over references"); } - if (ok != ITER_DONE) - BUG("error while iterating over references"); - extra_refname = find_descendant_ref(dirname.buf, extras, skip); if (extra_refname) strbuf_addf(err, _("cannot process '%s' and '%s' at the same time"), diff --git a/refs.h b/refs.h index 024a370554..980bd20cf2 100644 --- a/refs.h +++ b/refs.h @@ -101,13 +101,16 @@ int refs_read_symbolic_ref(struct ref_store *ref_store, const char *refname, * both "foo" and with "foo/bar/baz" but not with "foo/bar" or * "foo/barbados". * + * If `initial_transaction` is truish, then all collision checks with + * preexisting refs are skipped. + * * extras and skip must be sorted. */ - int refs_verify_refname_available(struct ref_store *refs, const char *refname, const struct string_list *extras, const struct string_list *skip, + int initial_transaction, struct strbuf *err); int refs_ref_exists(struct ref_store *refs, const char *refname); diff --git a/refs/files-backend.c b/refs/files-backend.c index 116d425969..d27806c02c 100644 --- a/refs/files-backend.c +++ b/refs/files-backend.c @@ -706,7 +706,7 @@ retry: * reason to expect this error to be transitory. */ if (refs_verify_refname_available(&refs->base, refname, - extras, NULL, err)) { + extras, NULL, 0, err)) { if (mustexist) { /* * To the user the relevant error is @@ -813,7 +813,7 @@ retry: REMOVE_DIR_EMPTY_ONLY)) { if (refs_verify_refname_available( &refs->base, refname, - extras, NULL, err)) { + extras, NULL, 0, err)) { /* * The error message set by * verify_refname_available() is OK. @@ -850,7 +850,7 @@ retry: */ if (refs_verify_refname_available( refs->packed_ref_store, refname, - extras, NULL, err)) { + extras, NULL, 0, err)) { ret = TRANSACTION_NAME_CONFLICT; goto error_return; } @@ -1159,7 +1159,7 @@ static struct ref_lock *lock_ref_oid_basic(struct files_ref_store *refs, */ if (is_null_oid(&lock->old_oid) && refs_verify_refname_available(refs->packed_ref_store, refname, - NULL, NULL, err)) + NULL, NULL, 0, err)) goto error_return; lock->ref_name = xstrdup(refname); @@ -1538,7 +1538,7 @@ static int refs_rename_ref_available(struct ref_store *refs, string_list_insert(&skip, old_refname); ok = !refs_verify_refname_available(refs, new_refname, - NULL, &skip, &err); + NULL, &skip, 0, &err); if (!ok) error("%s", err.buf); @@ -3043,8 +3043,7 @@ static int files_transaction_finish_initial(struct files_ref_store *refs, BUG("initial ref transaction with old_sha1 set"); if (refs_verify_refname_available(&refs->base, update->refname, - &affected_refnames, NULL, - err)) { + &affected_refnames, NULL, 1, err)) { ret = TRANSACTION_NAME_CONFLICT; goto cleanup; } diff --git a/refs/reftable-backend.c b/refs/reftable-backend.c index 8e914afc81..bbc779ab41 100644 --- a/refs/reftable-backend.c +++ b/refs/reftable-backend.c @@ -1097,7 +1097,9 @@ static int reftable_be_transaction_prepare(struct ref_store *ref_store, * at a later point. */ ret = refs_verify_refname_available(ref_store, u->refname, - &affected_refnames, NULL, err); + &affected_refnames, NULL, + transaction->flags & REF_TRANSACTION_FLAG_INITIAL, + err); if (ret < 0) goto done; @@ -1584,7 +1586,7 @@ static int write_copy_table(struct reftable_writer *writer, void *cb_data) if (arg->delete_old) string_list_insert(&skip, arg->oldname); ret = refs_verify_refname_available(&arg->refs->base, arg->newname, - NULL, &skip, &errbuf); + NULL, &skip, 0, &errbuf); if (ret < 0) { error("%s", errbuf.buf); goto done; diff --git a/t/helper/test-ref-store.c b/t/helper/test-ref-store.c index 65346dee55..240f6fc29d 100644 --- a/t/helper/test-ref-store.c +++ b/t/helper/test-ref-store.c @@ -199,7 +199,7 @@ static int cmd_verify_ref(struct ref_store *refs, const char **argv) struct strbuf err = STRBUF_INIT; int ret; - ret = refs_verify_refname_available(refs, refname, NULL, NULL, &err); + ret = refs_verify_refname_available(refs, refname, NULL, NULL, 0, &err); if (err.len) puts(err.buf); return ret; From a7004abd0b17c2f2460f0fb318f49a75aab0785f Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Wed, 20 Nov 2024 08:51:36 +0100 Subject: [PATCH 07/12] refs: don't normalize log messages with `REF_SKIP_CREATE_REFLOG` MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When the `REF_SKIP_CREATE_REFLOG` flag is set we skip the creation of the reflog entry, but we still normalize the reflog message when we queue the update. This is a waste of resources as the normalized message will never get used in the first place. Fix this issue by skipping the normalization in case the flag is set. This leads to a surprisingly large speedup when migrating from the "files" to the "reftable" backend: Benchmark 1: migrate files:reftable (refcount = 1000000, revision = HEAD~) Time (mean ± σ): 878.5 ms ± 14.9 ms [User: 726.5 ms, System: 139.2 ms] Range (min … max): 858.4 ms … 941.3 ms 50 runs Benchmark 2: migrate files:reftable (refcount = 1000000, revision = HEAD) Time (mean ± σ): 831.1 ms ± 10.5 ms [User: 694.1 ms, System: 126.3 ms] Range (min … max): 812.4 ms … 851.4 ms 50 runs Summary migrate files:reftable (refcount = 1000000, revision = HEAD) ran 1.06 ± 0.02 times faster than migrate files:reftable (refcount = 1000000, revision = HEAD~) And an ever larger speedup when migrating the other way round: Benchmark 1: migrate reftable:files (refcount = 1000000, revision = HEAD~) Time (mean ± σ): 923.6 ms ± 11.6 ms [User: 705.5 ms, System: 208.1 ms] Range (min … max): 905.3 ms … 946.5 ms 50 runs Benchmark 2: migrate reftable:files (refcount = 1000000, revision = HEAD) Time (mean ± σ): 818.5 ms ± 9.0 ms [User: 627.6 ms, System: 180.6 ms] Range (min … max): 802.2 ms … 842.9 ms 50 runs Summary migrate reftable:files (refcount = 1000000, revision = HEAD) ran 1.13 ± 0.02 times faster than migrate reftable:files (refcount = 1000000, revision = HEAD~) Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- refs.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/refs.c b/refs.c index d690eb19b3..65eea3eb77 100644 --- a/refs.c +++ b/refs.c @@ -1188,8 +1188,9 @@ struct ref_update *ref_transaction_add_update( oidcpy(&update->new_oid, new_oid); if ((flags & REF_HAVE_OLD) && old_oid) oidcpy(&update->old_oid, old_oid); + if (!(flags & REF_SKIP_CREATE_REFLOG)) + update->msg = normalize_reflog_message(msg); - update->msg = normalize_reflog_message(msg); return update; } From 66ed011bf73e6503059a44698f83fd8a6b0f7726 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Wed, 20 Nov 2024 08:51:37 +0100 Subject: [PATCH 08/12] reftable/writer: optimize allocations by using a scratch buffer MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Both `writer_add_record()` and `reftable_writer_add_ref()` get executed for every single ref record we're adding to the reftable writer. And as both functions use a local buffer to write data, the allocations we have to do here add up during larger transactions. Refactor the code to use a scratch buffer part of the `reftable_writer` itself such that we can reuse it. This signifcantly reduces the number of allocations during large transactions, e.g. when migrating refs from the "files" backend to the "reftable" backend. Before this change: HEAP SUMMARY: in use at exit: 80,048 bytes in 49 blocks total heap usage: 5,032,171 allocs, 5,032,122 frees, 418,792,092 bytes allocated After this change: HEAP SUMMARY: in use at exit: 80,048 bytes in 49 blocks total heap usage: 3,025,864 allocs, 3,025,815 frees, 372,746,291 bytes allocated This also translate into a small speedup: Benchmark 1: migrate files:reftable (refcount = 1000000, revision = HEAD~) Time (mean ± σ): 827.2 ms ± 16.5 ms [User: 689.4 ms, System: 124.9 ms] Range (min … max): 809.0 ms … 924.7 ms 50 runs Benchmark 2: migrate files:reftable (refcount = 1000000, revision = HEAD) Time (mean ± σ): 813.6 ms ± 11.6 ms [User: 679.0 ms, System: 123.4 ms] Range (min … max): 786.7 ms … 833.5 ms 50 runs Summary migrate files:reftable (refcount = 1000000, revision = HEAD) ran 1.02 ± 0.02 times faster than migrate files:reftable (refcount = 1000000, revision = HEAD~) Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- reftable/writer.c | 23 +++++++++++------------ reftable/writer.h | 1 + 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/reftable/writer.c b/reftable/writer.c index fd136794d5..6501376ce8 100644 --- a/reftable/writer.c +++ b/reftable/writer.c @@ -148,6 +148,7 @@ int reftable_writer_new(struct reftable_writer **out, reftable_buf_init(&wp->block_writer_data.last_key); reftable_buf_init(&wp->last_key); + reftable_buf_init(&wp->buf); REFTABLE_CALLOC_ARRAY(wp->block, opts.block_size); if (!wp->block) { reftable_free(wp); @@ -180,6 +181,7 @@ static void writer_release(struct reftable_writer *w) w->block_writer = NULL; writer_clear_index(w); reftable_buf_release(&w->last_key); + reftable_buf_release(&w->buf); } } @@ -249,20 +251,19 @@ static int writer_index_hash(struct reftable_writer *w, struct reftable_buf *has static int writer_add_record(struct reftable_writer *w, struct reftable_record *rec) { - struct reftable_buf key = REFTABLE_BUF_INIT; int err; - err = reftable_record_key(rec, &key); + err = reftable_record_key(rec, &w->buf); if (err < 0) goto done; - if (reftable_buf_cmp(&w->last_key, &key) >= 0) { + if (reftable_buf_cmp(&w->last_key, &w->buf) >= 0) { err = REFTABLE_API_ERROR; goto done; } reftable_buf_reset(&w->last_key); - err = reftable_buf_add(&w->last_key, key.buf, key.len); + err = reftable_buf_add(&w->last_key, w->buf.buf, w->buf.len); if (err < 0) goto done; @@ -312,7 +313,6 @@ static int writer_add_record(struct reftable_writer *w, } done: - reftable_buf_release(&key); return err; } @@ -325,7 +325,6 @@ int reftable_writer_add_ref(struct reftable_writer *w, .ref = *ref }, }; - struct reftable_buf buf = REFTABLE_BUF_INIT; int err; if (!ref->refname || @@ -340,24 +339,25 @@ int reftable_writer_add_ref(struct reftable_writer *w, goto out; if (!w->opts.skip_index_objects && reftable_ref_record_val1(ref)) { - err = reftable_buf_add(&buf, (char *)reftable_ref_record_val1(ref), + reftable_buf_reset(&w->buf); + err = reftable_buf_add(&w->buf, (char *)reftable_ref_record_val1(ref), hash_size(w->opts.hash_id)); if (err < 0) goto out; - err = writer_index_hash(w, &buf); + err = writer_index_hash(w, &w->buf); if (err < 0) goto out; } if (!w->opts.skip_index_objects && reftable_ref_record_val2(ref)) { - reftable_buf_reset(&buf); - err = reftable_buf_add(&buf, reftable_ref_record_val2(ref), + reftable_buf_reset(&w->buf); + err = reftable_buf_add(&w->buf, reftable_ref_record_val2(ref), hash_size(w->opts.hash_id)); if (err < 0) goto out; - err = writer_index_hash(w, &buf); + err = writer_index_hash(w, &w->buf); if (err < 0) goto out; } @@ -365,7 +365,6 @@ int reftable_writer_add_ref(struct reftable_writer *w, err = 0; out: - reftable_buf_release(&buf); return err; } diff --git a/reftable/writer.h b/reftable/writer.h index e8a6fbb785..421a897dcc 100644 --- a/reftable/writer.h +++ b/reftable/writer.h @@ -20,6 +20,7 @@ struct reftable_writer { void *write_arg; int pending_padding; struct reftable_buf last_key; + struct reftable_buf buf; /* offset of next block to write. */ uint64_t next; From aa248b8ab2ec0dd6e22373b504cfd4ec4b9b970a Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Wed, 20 Nov 2024 08:51:38 +0100 Subject: [PATCH 09/12] reftable/block: rename `block_writer::buf` variable Adapt the name of the `block_writer::buf` variable to instead be called `block`. This aligns it with the existing `block_len` variable, which tracks the length of this buffer, and is generally a bit more tied to the actual context where this variable gets used. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- reftable/block.c | 20 ++++++++++---------- reftable/block.h | 8 ++++---- 2 files changed, 14 insertions(+), 14 deletions(-) diff --git a/reftable/block.c b/reftable/block.c index f5b432566a..3fa36c002a 100644 --- a/reftable/block.c +++ b/reftable/block.c @@ -70,14 +70,14 @@ static int block_writer_register_restart(struct block_writer *w, int n, return 0; } -int block_writer_init(struct block_writer *bw, uint8_t typ, uint8_t *buf, +int block_writer_init(struct block_writer *bw, uint8_t typ, uint8_t *block, uint32_t block_size, uint32_t header_off, int hash_size) { - bw->buf = buf; + bw->block = block; bw->hash_size = hash_size; bw->block_size = block_size; bw->header_off = header_off; - bw->buf[header_off] = typ; + bw->block[header_off] = typ; bw->next = header_off + 4; bw->restart_interval = 16; bw->entries = 0; @@ -95,7 +95,7 @@ int block_writer_init(struct block_writer *bw, uint8_t typ, uint8_t *buf, uint8_t block_writer_type(struct block_writer *bw) { - return bw->buf[bw->header_off]; + return bw->block[bw->header_off]; } /* Adds the reftable_record to the block. Returns -1 if it does not fit, 0 on @@ -107,7 +107,7 @@ int block_writer_add(struct block_writer *w, struct reftable_record *rec) struct reftable_buf last = w->entries % w->restart_interval == 0 ? empty : w->last_key; struct string_view out = { - .buf = w->buf + w->next, + .buf = w->block + w->next, .len = w->block_size - w->next, }; @@ -153,13 +153,13 @@ int block_writer_finish(struct block_writer *w) { int i; for (i = 0; i < w->restart_len; i++) { - put_be24(w->buf + w->next, w->restarts[i]); + put_be24(w->block + w->next, w->restarts[i]); w->next += 3; } - put_be16(w->buf + w->next, w->restart_len); + put_be16(w->block + w->next, w->restart_len); w->next += 2; - put_be24(w->buf + 1 + w->header_off, w->next); + put_be24(w->block + 1 + w->header_off, w->next); /* * Log records are stored zlib-compressed. Note that the compression @@ -188,7 +188,7 @@ int block_writer_finish(struct block_writer *w) w->zstream->next_out = w->compressed; w->zstream->avail_out = compressed_len; - w->zstream->next_in = w->buf + block_header_skip; + w->zstream->next_in = w->block + block_header_skip; w->zstream->avail_in = src_len; /* @@ -206,7 +206,7 @@ int block_writer_finish(struct block_writer *w) * adjust the `next` pointer to point right after the * compressed data. */ - memcpy(w->buf + block_header_skip, w->compressed, + memcpy(w->block + block_header_skip, w->compressed, w->zstream->total_out); w->next = w->zstream->total_out + block_header_skip; } diff --git a/reftable/block.h b/reftable/block.h index 9a3effa513..b3f837d612 100644 --- a/reftable/block.h +++ b/reftable/block.h @@ -22,7 +22,7 @@ struct block_writer { unsigned char *compressed; size_t compressed_cap; - uint8_t *buf; + uint8_t *block; uint32_t block_size; /* Offset of the global header. Nonzero in the first block only. */ @@ -43,9 +43,9 @@ struct block_writer { }; /* - * initializes the blockwriter to write `typ` entries, using `buf` as temporary - * storage. `buf` is not owned by the block_writer. */ -int block_writer_init(struct block_writer *bw, uint8_t typ, uint8_t *buf, + * initializes the blockwriter to write `typ` entries, using `block` as temporary + * storage. `block` is not owned by the block_writer. */ +int block_writer_init(struct block_writer *bw, uint8_t typ, uint8_t *block, uint32_t block_size, uint32_t header_off, int hash_size); /* returns the block type (eg. 'r' for ref records. */ From d94ac23d3b9e223c6cacfdab7047d3d4efae3218 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Wed, 20 Nov 2024 08:51:39 +0100 Subject: [PATCH 10/12] reftable/block: optimize allocations by using scratch buffer The block writer needs to compute the key for every record that one adds to the writer. The buffer for this key is stored on the stack and thus reallocated on every call to `block_writer_add()`, which is inefficient. Refactor the code so that we store the buffer in the `block_writer` struct itself so that we can reuse it. This reduces the number of allocations when writing many refs, e.g. when migrating one million refs from the "files" backend to the "reftable backend. Before this change: HEAP SUMMARY: in use at exit: 80,048 bytes in 49 blocks total heap usage: 3,025,864 allocs, 3,025,815 frees, 372,746,291 bytes allocated After this change: HEAP SUMMARY: in use at exit: 80,048 bytes in 49 blocks total heap usage: 2,013,250 allocs, 2,013,201 frees, 347,543,583 bytes allocated Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- reftable/block.c | 13 +++++-------- reftable/block.h | 1 + 2 files changed, 6 insertions(+), 8 deletions(-) diff --git a/reftable/block.c b/reftable/block.c index 3fa36c002a..1aa7e8cd3c 100644 --- a/reftable/block.c +++ b/reftable/block.c @@ -110,24 +110,21 @@ int block_writer_add(struct block_writer *w, struct reftable_record *rec) .buf = w->block + w->next, .len = w->block_size - w->next, }; - struct string_view start = out; - int is_restart = 0; - struct reftable_buf key = REFTABLE_BUF_INIT; int n = 0; int err; - err = reftable_record_key(rec, &key); + err = reftable_record_key(rec, &w->buf); if (err < 0) goto done; - if (!key.len) { + if (!w->buf.len) { err = REFTABLE_API_ERROR; goto done; } - n = reftable_encode_key(&is_restart, out, last, key, + n = reftable_encode_key(&is_restart, out, last, w->buf, reftable_record_val_type(rec)); if (n < 0) { err = -1; @@ -143,9 +140,8 @@ int block_writer_add(struct block_writer *w, struct reftable_record *rec) string_view_consume(&out, n); err = block_writer_register_restart(w, start.len - out.len, is_restart, - &key); + &w->buf); done: - reftable_buf_release(&key); return err; } @@ -569,6 +565,7 @@ void block_writer_release(struct block_writer *bw) REFTABLE_FREE_AND_NULL(bw->zstream); REFTABLE_FREE_AND_NULL(bw->restarts); REFTABLE_FREE_AND_NULL(bw->compressed); + reftable_buf_release(&bw->buf); reftable_buf_release(&bw->last_key); /* the block is not owned. */ } diff --git a/reftable/block.h b/reftable/block.h index b3f837d612..d76f005530 100644 --- a/reftable/block.h +++ b/reftable/block.h @@ -39,6 +39,7 @@ struct block_writer { uint32_t restart_cap; struct reftable_buf last_key; + struct reftable_buf buf; int entries; }; From 0f5762b0435234c4dc916f4b3672c78c1b24f0e2 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 25 Nov 2024 08:34:42 +0100 Subject: [PATCH 11/12] refs: adapt `initial_transaction` flag to be unsigned The `initial_transaction` flag is tracked as a signed integer, but we typically pass around flags via unsigned integers. Adapt the type accordingly. Suggested-by: Christian Couder Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- refs.c | 2 +- refs.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/refs.c b/refs.c index 65eea3eb77..ee87081746 100644 --- a/refs.c +++ b/refs.c @@ -2325,7 +2325,7 @@ int refs_verify_refname_available(struct ref_store *refs, const char *refname, const struct string_list *extras, const struct string_list *skip, - int initial_transaction, + unsigned int initial_transaction, struct strbuf *err) { const char *slash; diff --git a/refs.h b/refs.h index 980bd20cf2..95baf194ba 100644 --- a/refs.h +++ b/refs.h @@ -110,7 +110,7 @@ int refs_verify_refname_available(struct ref_store *refs, const char *refname, const struct string_list *extras, const struct string_list *skip, - int initial_transaction, + unsigned int initial_transaction, struct strbuf *err); int refs_ref_exists(struct ref_store *refs, const char *refname); From ef46ad0815509c464e8a2558b4ebf6dc42736a01 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 25 Nov 2024 08:34:43 +0100 Subject: [PATCH 12/12] reftable: rename scratch buffer Both `struct block_writer` and `struct reftable_writer` have a `buf` member that is being reused to optimize the number of allocations. Rename the variable to `scratch` to clarify its intend and provide a comment explaining why it exists. Suggested-by: Christian Couder Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- reftable/block.c | 10 +++++----- reftable/block.h | 3 ++- reftable/writer.c | 22 +++++++++++----------- reftable/writer.h | 3 ++- 4 files changed, 20 insertions(+), 18 deletions(-) diff --git a/reftable/block.c b/reftable/block.c index 1aa7e8cd3c..0198078485 100644 --- a/reftable/block.c +++ b/reftable/block.c @@ -115,16 +115,16 @@ int block_writer_add(struct block_writer *w, struct reftable_record *rec) int n = 0; int err; - err = reftable_record_key(rec, &w->buf); + err = reftable_record_key(rec, &w->scratch); if (err < 0) goto done; - if (!w->buf.len) { + if (!w->scratch.len) { err = REFTABLE_API_ERROR; goto done; } - n = reftable_encode_key(&is_restart, out, last, w->buf, + n = reftable_encode_key(&is_restart, out, last, w->scratch, reftable_record_val_type(rec)); if (n < 0) { err = -1; @@ -140,7 +140,7 @@ int block_writer_add(struct block_writer *w, struct reftable_record *rec) string_view_consume(&out, n); err = block_writer_register_restart(w, start.len - out.len, is_restart, - &w->buf); + &w->scratch); done: return err; } @@ -565,7 +565,7 @@ void block_writer_release(struct block_writer *bw) REFTABLE_FREE_AND_NULL(bw->zstream); REFTABLE_FREE_AND_NULL(bw->restarts); REFTABLE_FREE_AND_NULL(bw->compressed); - reftable_buf_release(&bw->buf); + reftable_buf_release(&bw->scratch); reftable_buf_release(&bw->last_key); /* the block is not owned. */ } diff --git a/reftable/block.h b/reftable/block.h index d76f005530..0431e8591f 100644 --- a/reftable/block.h +++ b/reftable/block.h @@ -39,7 +39,8 @@ struct block_writer { uint32_t restart_cap; struct reftable_buf last_key; - struct reftable_buf buf; + /* Scratch buffer used to avoid allocations. */ + struct reftable_buf scratch; int entries; }; diff --git a/reftable/writer.c b/reftable/writer.c index 6501376ce8..be0fae6cb2 100644 --- a/reftable/writer.c +++ b/reftable/writer.c @@ -148,7 +148,7 @@ int reftable_writer_new(struct reftable_writer **out, reftable_buf_init(&wp->block_writer_data.last_key); reftable_buf_init(&wp->last_key); - reftable_buf_init(&wp->buf); + reftable_buf_init(&wp->scratch); REFTABLE_CALLOC_ARRAY(wp->block, opts.block_size); if (!wp->block) { reftable_free(wp); @@ -181,7 +181,7 @@ static void writer_release(struct reftable_writer *w) w->block_writer = NULL; writer_clear_index(w); reftable_buf_release(&w->last_key); - reftable_buf_release(&w->buf); + reftable_buf_release(&w->scratch); } } @@ -253,17 +253,17 @@ static int writer_add_record(struct reftable_writer *w, { int err; - err = reftable_record_key(rec, &w->buf); + err = reftable_record_key(rec, &w->scratch); if (err < 0) goto done; - if (reftable_buf_cmp(&w->last_key, &w->buf) >= 0) { + if (reftable_buf_cmp(&w->last_key, &w->scratch) >= 0) { err = REFTABLE_API_ERROR; goto done; } reftable_buf_reset(&w->last_key); - err = reftable_buf_add(&w->last_key, w->buf.buf, w->buf.len); + err = reftable_buf_add(&w->last_key, w->scratch.buf, w->scratch.len); if (err < 0) goto done; @@ -339,25 +339,25 @@ int reftable_writer_add_ref(struct reftable_writer *w, goto out; if (!w->opts.skip_index_objects && reftable_ref_record_val1(ref)) { - reftable_buf_reset(&w->buf); - err = reftable_buf_add(&w->buf, (char *)reftable_ref_record_val1(ref), + reftable_buf_reset(&w->scratch); + err = reftable_buf_add(&w->scratch, (char *)reftable_ref_record_val1(ref), hash_size(w->opts.hash_id)); if (err < 0) goto out; - err = writer_index_hash(w, &w->buf); + err = writer_index_hash(w, &w->scratch); if (err < 0) goto out; } if (!w->opts.skip_index_objects && reftable_ref_record_val2(ref)) { - reftable_buf_reset(&w->buf); - err = reftable_buf_add(&w->buf, reftable_ref_record_val2(ref), + reftable_buf_reset(&w->scratch); + err = reftable_buf_add(&w->scratch, reftable_ref_record_val2(ref), hash_size(w->opts.hash_id)); if (err < 0) goto out; - err = writer_index_hash(w, &w->buf); + err = writer_index_hash(w, &w->scratch); if (err < 0) goto out; } diff --git a/reftable/writer.h b/reftable/writer.h index 421a897dcc..1f4788a430 100644 --- a/reftable/writer.h +++ b/reftable/writer.h @@ -20,7 +20,8 @@ struct reftable_writer { void *write_arg; int pending_padding; struct reftable_buf last_key; - struct reftable_buf buf; + /* Scratch buffer used to avoid allocations. */ + struct reftable_buf scratch; /* offset of next block to write. */ uint64_t next;