From 237a1d138c4322a7e934f129dee02e2ea6a214cd Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Sat, 28 May 2022 16:11:12 -0700 Subject: [PATCH 1/7] archive: optionally add "virtual" files With the `--add-virtual-file=:` option, `git archive` now supports use cases where relatively trivial files need to be added that do not exist on disk. This will allow us to generate `.zip` files with generated content, without having to add said content to the object database and without having to write it out to disk. Signed-off-by: Johannes Schindelin [jc: tweaked handling] Signed-off-by: Junio C Hamano --- Documentation/git-archive.txt | 13 +++++- archive.c | 75 ++++++++++++++++++++++++++--------- t/t5003-archive-zip.sh | 12 ++++++ 3 files changed, 81 insertions(+), 19 deletions(-) diff --git a/Documentation/git-archive.txt b/Documentation/git-archive.txt index 94519aae23..b41cc5bc2e 100644 --- a/Documentation/git-archive.txt +++ b/Documentation/git-archive.txt @@ -51,7 +51,7 @@ OPTIONS --prefix=/:: Prepend / to paths in the archive. Can be repeated; its rightmost value is used for all tracked files. See below which - value gets used by `--add-file`. + value gets used by `--add-file` and `--add-virtual-file`. -o :: --output=:: @@ -63,6 +63,17 @@ OPTIONS concatenating the value of the last `--prefix` option (if any) before this `--add-file` and the basename of . +--add-virtual-file=::: + Add the specified contents to the archive. Can be repeated to add + multiple files. The path of the file in the archive is built + by concatenating the value of the last `--prefix` option (if any) + before this `--add-virtual-file` and ``. ++ +The `` cannot contain any colon, the file mode is limited to +a regular file, and the option may be subject to platform-dependent +command-line limits. For non-trivial cases, write an untracked file +and use `--add-file` instead. + --worktree-attributes:: Look for attributes in .gitattributes files in the working tree as well (see <>). diff --git a/archive.c b/archive.c index e2121ebefb..29a90c7032 100644 --- a/archive.c +++ b/archive.c @@ -263,6 +263,7 @@ static int queue_or_write_archive_entry(const struct object_id *oid, struct extra_file_info { char *base; struct stat stat; + void *content; }; int write_archive_entries(struct archiver_args *args, @@ -331,19 +332,27 @@ int write_archive_entries(struct archiver_args *args, put_be64(fake_oid.hash, i + 1); - strbuf_reset(&path_in_archive); - if (info->base) - strbuf_addstr(&path_in_archive, info->base); - strbuf_addstr(&path_in_archive, basename(path)); + if (!info->content) { + strbuf_reset(&path_in_archive); + if (info->base) + strbuf_addstr(&path_in_archive, info->base); + strbuf_addstr(&path_in_archive, basename(path)); - strbuf_reset(&content); - if (strbuf_read_file(&content, path, info->stat.st_size) < 0) - err = error_errno(_("cannot read '%s'"), path); - else - err = write_entry(args, &fake_oid, path_in_archive.buf, - path_in_archive.len, + strbuf_reset(&content); + if (strbuf_read_file(&content, path, info->stat.st_size) < 0) + err = error_errno(_("cannot read '%s'"), path); + else + err = write_entry(args, &fake_oid, path_in_archive.buf, + path_in_archive.len, + canon_mode(info->stat.st_mode), + content.buf, content.len); + } else { + err = write_entry(args, &fake_oid, + path, strlen(path), canon_mode(info->stat.st_mode), - content.buf, content.len); + info->content, info->stat.st_size); + } + if (err) break; } @@ -493,6 +502,7 @@ static void extra_file_info_clear(void *util, const char *str) { struct extra_file_info *info = util; free(info->base); + free(info->content); free(info); } @@ -514,14 +524,40 @@ static int add_file_cb(const struct option *opt, const char *arg, int unset) if (!arg) return -1; - path = prefix_filename(args->prefix, arg); - item = string_list_append_nodup(&args->extra_files, path); - item->util = info = xmalloc(sizeof(*info)); + info = xmalloc(sizeof(*info)); info->base = xstrdup_or_null(base); - if (stat(path, &info->stat)) - die(_("File not found: %s"), path); - if (!S_ISREG(info->stat.st_mode)) - die(_("Not a regular file: %s"), path); + + if (!strcmp(opt->long_name, "add-file")) { + path = prefix_filename(args->prefix, arg); + if (stat(path, &info->stat)) + die(_("File not found: %s"), path); + if (!S_ISREG(info->stat.st_mode)) + die(_("Not a regular file: %s"), path); + info->content = NULL; /* read the file later */ + } else if (!strcmp(opt->long_name, "add-virtual-file")) { + const char *colon = strchr(arg, ':'); + char *p; + + if (!colon) + die(_("missing colon: '%s'"), arg); + + p = xstrndup(arg, colon - arg); + if (!args->prefix) + path = p; + else { + path = prefix_filename(args->prefix, p); + free(p); + } + memset(&info->stat, 0, sizeof(info->stat)); + info->stat.st_mode = S_IFREG | 0644; + info->content = xstrdup(colon + 1); + info->stat.st_size = strlen(info->content); + } else { + BUG("add_file_cb() called for %s", opt->long_name); + } + item = string_list_append_nodup(&args->extra_files, path); + item->util = info; + return 0; } @@ -554,6 +590,9 @@ static int parse_archive_args(int argc, const char **argv, { OPTION_CALLBACK, 0, "add-file", args, N_("file"), N_("add untracked file to archive"), 0, add_file_cb, (intptr_t)&base }, + { OPTION_CALLBACK, 0, "add-virtual-file", args, + N_("path:content"), N_("add untracked file to archive"), 0, + add_file_cb, (intptr_t)&base }, OPT_STRING('o', "output", &output, N_("file"), N_("write the archive to this file")), OPT_BOOL(0, "worktree-attributes", &worktree_attributes, diff --git a/t/t5003-archive-zip.sh b/t/t5003-archive-zip.sh index d726964307..d6027189e2 100755 --- a/t/t5003-archive-zip.sh +++ b/t/t5003-archive-zip.sh @@ -206,6 +206,18 @@ test_expect_success 'git archive --format=zip --add-file' ' check_zip with_untracked check_added with_untracked untracked untracked +test_expect_success UNZIP 'git archive --format=zip --add-virtual-file' ' + git archive --format=zip >with_file_with_content.zip \ + --add-virtual-file=hello:world $EMPTY_TREE && + test_when_finished "rm -rf tmp-unpack" && + mkdir tmp-unpack && ( + cd tmp-unpack && + "$GIT_UNZIP" ../with_file_with_content.zip && + test_path_is_file hello && + test world = $(cat hello) + ) +' + test_expect_success 'git archive --format=zip --add-file twice' ' echo untracked >untracked && git archive --format=zip --prefix=one/ --add-file=untracked \ From de1f68a968e64b3e1e2979222238fec1f045bbf3 Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Sat, 28 May 2022 16:11:13 -0700 Subject: [PATCH 2/7] archive --add-virtual-file: allow paths containing colons By allowing the path to be enclosed in double-quotes, we can avoid the limitation that paths cannot contain colons. Signed-off-by: Johannes Schindelin Signed-off-by: Junio C Hamano --- Documentation/git-archive.txt | 14 ++++++++++---- archive.c | 30 ++++++++++++++++++++---------- t/t5003-archive-zip.sh | 8 ++++++++ 3 files changed, 38 insertions(+), 14 deletions(-) diff --git a/Documentation/git-archive.txt b/Documentation/git-archive.txt index b41cc5bc2e..56989a2f34 100644 --- a/Documentation/git-archive.txt +++ b/Documentation/git-archive.txt @@ -69,10 +69,16 @@ OPTIONS by concatenating the value of the last `--prefix` option (if any) before this `--add-virtual-file` and ``. + -The `` cannot contain any colon, the file mode is limited to -a regular file, and the option may be subject to platform-dependent -command-line limits. For non-trivial cases, write an untracked file -and use `--add-file` instead. +The `` argument can start and end with a literal double-quote +character; the contained file name is interpreted as a C-style string, +i.e. the backslash is interpreted as escape character. The path must +be quoted if it contains a colon, to avoid the colon from being +misinterpreted as the separator between the path and the contents, or +if the path begins or ends with a double-quote character. ++ +The file mode is limited to a regular file, and the option may be +subject to platform-dependent command-line limits. For non-trivial +cases, write an untracked file and use `--add-file` instead. --worktree-attributes:: Look for attributes in .gitattributes files in the working tree diff --git a/archive.c b/archive.c index 29a90c7032..d5109abb89 100644 --- a/archive.c +++ b/archive.c @@ -9,6 +9,7 @@ #include "parse-options.h" #include "unpack-trees.h" #include "dir.h" +#include "quote.h" static char const * const archive_usage[] = { N_("git archive [] [...]"), @@ -535,22 +536,31 @@ static int add_file_cb(const struct option *opt, const char *arg, int unset) die(_("Not a regular file: %s"), path); info->content = NULL; /* read the file later */ } else if (!strcmp(opt->long_name, "add-virtual-file")) { - const char *colon = strchr(arg, ':'); - char *p; + struct strbuf buf = STRBUF_INIT; + const char *p = arg; - if (!colon) + if (*p != '"') + p = strchr(p, ':'); + else if (unquote_c_style(&buf, p, &p) < 0) + die(_("unclosed quote: '%s'"), arg); + + if (!p || *p != ':') die(_("missing colon: '%s'"), arg); - p = xstrndup(arg, colon - arg); - if (!args->prefix) - path = p; - else { - path = prefix_filename(args->prefix, p); - free(p); + if (p == arg) + die(_("empty file name: '%s'"), arg); + + path = buf.len ? + strbuf_detach(&buf, NULL) : xstrndup(arg, p - arg); + + if (args->prefix) { + char *save = path; + path = prefix_filename(args->prefix, path); + free(save); } memset(&info->stat, 0, sizeof(info->stat)); info->stat.st_mode = S_IFREG | 0644; - info->content = xstrdup(colon + 1); + info->content = xstrdup(p + 1); info->stat.st_size = strlen(info->content); } else { BUG("add_file_cb() called for %s", opt->long_name); diff --git a/t/t5003-archive-zip.sh b/t/t5003-archive-zip.sh index d6027189e2..3992d08158 100755 --- a/t/t5003-archive-zip.sh +++ b/t/t5003-archive-zip.sh @@ -207,13 +207,21 @@ check_zip with_untracked check_added with_untracked untracked untracked test_expect_success UNZIP 'git archive --format=zip --add-virtual-file' ' + if test_have_prereq FUNNYNAMES + then + PATHNAME="pathname with : colon" + else + PATHNAME="pathname without colon" + fi && git archive --format=zip >with_file_with_content.zip \ + --add-virtual-file=\""$PATHNAME"\": \ --add-virtual-file=hello:world $EMPTY_TREE && test_when_finished "rm -rf tmp-unpack" && mkdir tmp-unpack && ( cd tmp-unpack && "$GIT_UNZIP" ../with_file_with_content.zip && test_path_is_file hello && + test_path_is_file "$PATHNAME" && test world = $(cat hello) ) ' From b44855743b1674caf3bd4f42814473771b129c48 Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Sat, 28 May 2022 16:11:14 -0700 Subject: [PATCH 3/7] scalar: validate the optional enlistment argument The `scalar` command needs a Scalar enlistment for many subcommands, and looks in the current directory for such an enlistment (traversing the parent directories until it finds one). These is subcommands can also be called with an optional argument specifying the enlistment. Here, too, we traverse parent directories as needed, until we find an enlistment. However, if the specified directory does not even exist, or is not a directory, we should stop right there, with an error message. Signed-off-by: Johannes Schindelin Signed-off-by: Junio C Hamano --- contrib/scalar/scalar.c | 6 ++++-- contrib/scalar/t/t9099-scalar.sh | 5 +++++ 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/contrib/scalar/scalar.c b/contrib/scalar/scalar.c index 58ca0e56f1..6d58c7a698 100644 --- a/contrib/scalar/scalar.c +++ b/contrib/scalar/scalar.c @@ -43,9 +43,11 @@ static void setup_enlistment_directory(int argc, const char **argv, usage_with_options(usagestr, options); /* find the worktree, determine its corresponding root */ - if (argc == 1) + if (argc == 1) { strbuf_add_absolute_path(&path, argv[0]); - else if (strbuf_getcwd(&path) < 0) + if (!is_directory(path.buf)) + die(_("'%s' does not exist"), path.buf); + } else if (strbuf_getcwd(&path) < 0) die(_("need a working directory")); strbuf_trim_trailing_dir_sep(&path); diff --git a/contrib/scalar/t/t9099-scalar.sh b/contrib/scalar/t/t9099-scalar.sh index 89781568f4..bb42354a8b 100755 --- a/contrib/scalar/t/t9099-scalar.sh +++ b/contrib/scalar/t/t9099-scalar.sh @@ -93,4 +93,9 @@ test_expect_success 'scalar supports -c/-C' ' test true = "$(git -C sub config core.preloadIndex)" ' +test_expect_success '`scalar [...] ` errors out when dir is missing' ' + ! scalar run config cloned 2>err && + grep "cloned. does not exist" err +' + test_done From aa5c79a33156c2f086ca3a149c11f1434d10f5ce Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Sat, 28 May 2022 16:11:15 -0700 Subject: [PATCH 4/7] scalar: implement `scalar diagnose` Over the course of Scalar's development, it became obvious that there is a need for a command that can gather all kinds of useful information that can help identify the most typical problems with large worktrees/repositories. The `diagnose` command is the culmination of this hard-won knowledge: it gathers the installed hooks, the config, a couple statistics describing the data shape, among other pieces of information, and then wraps everything up in a tidy, neat `.zip` archive. Note: originally, Scalar was implemented in C# using the .NET API, where we had the luxury of a comprehensive standard library that includes basic functionality such as writing a `.zip` file. In the C version, we lack such a commodity. Rather than introducing a dependency on, say, libzip, we slightly abuse Git's `archive` machinery: we write out a `.zip` of the empty try, augmented by a couple files that are added via the `--add-file*` options. We are careful trying not to modify the current repository in any way lest the very circumstances that required `scalar diagnose` to be run are changed by the `diagnose` run itself. Signed-off-by: Johannes Schindelin Signed-off-by: Junio C Hamano --- contrib/scalar/scalar.c | 144 +++++++++++++++++++++++++++++++ contrib/scalar/scalar.txt | 12 +++ contrib/scalar/t/t9099-scalar.sh | 14 +++ 3 files changed, 170 insertions(+) diff --git a/contrib/scalar/scalar.c b/contrib/scalar/scalar.c index 6d58c7a698..a1e05a2146 100644 --- a/contrib/scalar/scalar.c +++ b/contrib/scalar/scalar.c @@ -11,6 +11,7 @@ #include "dir.h" #include "packfile.h" #include "help.h" +#include "archive.h" /* * Remove the deepest subdirectory in the provided path string. Path must not @@ -260,6 +261,47 @@ static int unregister_dir(void) return res; } +static int add_directory_to_archiver(struct strvec *archiver_args, + const char *path, int recurse) +{ + int at_root = !*path; + DIR *dir = opendir(at_root ? "." : path); + struct dirent *e; + struct strbuf buf = STRBUF_INIT; + size_t len; + int res = 0; + + if (!dir) + return error_errno(_("could not open directory '%s'"), path); + + if (!at_root) + strbuf_addf(&buf, "%s/", path); + len = buf.len; + strvec_pushf(archiver_args, "--prefix=%s", buf.buf); + + while (!res && (e = readdir(dir))) { + if (!strcmp(".", e->d_name) || !strcmp("..", e->d_name)) + continue; + + strbuf_setlen(&buf, len); + strbuf_addstr(&buf, e->d_name); + + if (e->d_type == DT_REG) + strvec_pushf(archiver_args, "--add-file=%s", buf.buf); + else if (e->d_type != DT_DIR) + warning(_("skipping '%s', which is neither file nor " + "directory"), buf.buf); + else if (recurse && + add_directory_to_archiver(archiver_args, + buf.buf, recurse) < 0) + res = -1; + } + + closedir(dir); + strbuf_release(&buf); + return res; +} + /* printf-style interface, expects `=` argument */ static int set_config(const char *fmt, ...) { @@ -500,6 +542,107 @@ cleanup: return res; } +static int cmd_diagnose(int argc, const char **argv) +{ + struct option options[] = { + OPT_END(), + }; + const char * const usage[] = { + N_("scalar diagnose []"), + NULL + }; + struct strbuf zip_path = STRBUF_INIT; + struct strvec archiver_args = STRVEC_INIT; + char **argv_copy = NULL; + int stdout_fd = -1, archiver_fd = -1; + time_t now = time(NULL); + struct tm tm; + struct strbuf path = STRBUF_INIT, buf = STRBUF_INIT; + int res = 0; + + argc = parse_options(argc, argv, NULL, options, + usage, 0); + + setup_enlistment_directory(argc, argv, usage, options, &zip_path); + + strbuf_addstr(&zip_path, "/.scalarDiagnostics/scalar_"); + strbuf_addftime(&zip_path, + "%Y%m%d_%H%M%S", localtime_r(&now, &tm), 0, 0); + strbuf_addstr(&zip_path, ".zip"); + switch (safe_create_leading_directories(zip_path.buf)) { + case SCLD_EXISTS: + case SCLD_OK: + break; + default: + error_errno(_("could not create directory for '%s'"), + zip_path.buf); + goto diagnose_cleanup; + } + stdout_fd = dup(1); + if (stdout_fd < 0) { + res = error_errno(_("could not duplicate stdout")); + goto diagnose_cleanup; + } + + archiver_fd = xopen(zip_path.buf, O_CREAT | O_WRONLY | O_TRUNC, 0666); + if (archiver_fd < 0 || dup2(archiver_fd, 1) < 0) { + res = error_errno(_("could not redirect output")); + goto diagnose_cleanup; + } + + init_zip_archiver(); + strvec_pushl(&archiver_args, "scalar-diagnose", "--format=zip", NULL); + + strbuf_reset(&buf); + strbuf_addstr(&buf, "Collecting diagnostic info\n\n"); + get_version_info(&buf, 1); + + strbuf_addf(&buf, "Enlistment root: %s\n", the_repository->worktree); + write_or_die(stdout_fd, buf.buf, buf.len); + strvec_pushf(&archiver_args, + "--add-virtual-file=diagnostics.log:%.*s", + (int)buf.len, buf.buf); + + if ((res = add_directory_to_archiver(&archiver_args, ".git", 0)) || + (res = add_directory_to_archiver(&archiver_args, ".git/hooks", 0)) || + (res = add_directory_to_archiver(&archiver_args, ".git/info", 0)) || + (res = add_directory_to_archiver(&archiver_args, ".git/logs", 1)) || + (res = add_directory_to_archiver(&archiver_args, ".git/objects/info", 0))) + goto diagnose_cleanup; + + strvec_pushl(&archiver_args, "--prefix=", + oid_to_hex(the_hash_algo->empty_tree), "--", NULL); + + /* `write_archive()` modifies the `argv` passed to it. Let it. */ + argv_copy = xmemdupz(archiver_args.v, + sizeof(char *) * archiver_args.nr); + res = write_archive(archiver_args.nr, (const char **)argv_copy, NULL, + the_repository, NULL, 0); + if (res) { + error(_("failed to write archive")); + goto diagnose_cleanup; + } + + if (!res) + fprintf(stderr, "\n" + "Diagnostics complete.\n" + "All of the gathered info is captured in '%s'\n", + zip_path.buf); + +diagnose_cleanup: + if (archiver_fd >= 0) { + close(1); + dup2(stdout_fd, 1); + } + free(argv_copy); + strvec_clear(&archiver_args); + strbuf_release(&zip_path); + strbuf_release(&path); + strbuf_release(&buf); + + return res; +} + static int cmd_list(int argc, const char **argv) { if (argc != 1) @@ -801,6 +944,7 @@ static struct { { "reconfigure", cmd_reconfigure }, { "delete", cmd_delete }, { "version", cmd_version }, + { "diagnose", cmd_diagnose }, { NULL, NULL}, }; diff --git a/contrib/scalar/scalar.txt b/contrib/scalar/scalar.txt index cf4e5b889c..c0425e0653 100644 --- a/contrib/scalar/scalar.txt +++ b/contrib/scalar/scalar.txt @@ -14,6 +14,7 @@ scalar register [] scalar unregister [] scalar run ( all | config | commit-graph | fetch | loose-objects | pack-files ) [] scalar reconfigure [ --all | ] +scalar diagnose [] scalar delete DESCRIPTION @@ -139,6 +140,17 @@ reconfigure the enlistment. With the `--all` option, all enlistments currently registered with Scalar will be reconfigured. Use this option after each Scalar upgrade. +Diagnose +~~~~~~~~ + +diagnose []:: + When reporting issues with Scalar, it is often helpful to provide the + information gathered by this command, including logs and certain + statistics describing the data shape of the current enlistment. ++ +The output of this command is a `.zip` file that is written into +a directory adjacent to the worktree in the `src` directory. + Delete ~~~~~~ diff --git a/contrib/scalar/t/t9099-scalar.sh b/contrib/scalar/t/t9099-scalar.sh index bb42354a8b..fbb1df2049 100755 --- a/contrib/scalar/t/t9099-scalar.sh +++ b/contrib/scalar/t/t9099-scalar.sh @@ -98,4 +98,18 @@ test_expect_success '`scalar [...] ` errors out when dir is missing' ' grep "cloned. does not exist" err ' +SQ="'" +test_expect_success UNZIP 'scalar diagnose' ' + scalar clone "file://$(pwd)" cloned --single-branch && + scalar diagnose cloned >out 2>err && + sed -n "s/.*$SQ\\(.*\\.zip\\)$SQ.*/\\1/p" zip_path && + zip_path=$(cat zip_path) && + test -n "$zip_path" && + unzip -v "$zip_path" && + folder=${zip_path%.zip} && + test_path_is_missing "$folder" && + unzip -p "$zip_path" diagnostics.log >out && + test_file_not_empty out +' + test_done From 0ed5b13f2450fd26c0e52cef775deea477e4cdb4 Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Sat, 28 May 2022 16:11:16 -0700 Subject: [PATCH 5/7] scalar diagnose: include disk space information When analyzing problems with large worktrees/repositories, it is useful to know how close to a "full disk" situation Scalar/Git operates. Let's include this information. Signed-off-by: Johannes Schindelin Signed-off-by: Junio C Hamano --- contrib/scalar/scalar.c | 53 ++++++++++++++++++++++++++++++++ contrib/scalar/t/t9099-scalar.sh | 1 + 2 files changed, 54 insertions(+) diff --git a/contrib/scalar/scalar.c b/contrib/scalar/scalar.c index a1e05a2146..f06a2f3576 100644 --- a/contrib/scalar/scalar.c +++ b/contrib/scalar/scalar.c @@ -302,6 +302,58 @@ static int add_directory_to_archiver(struct strvec *archiver_args, return res; } +#ifndef WIN32 +#include +#endif + +static int get_disk_info(struct strbuf *out) +{ +#ifdef WIN32 + struct strbuf buf = STRBUF_INIT; + char volume_name[MAX_PATH], fs_name[MAX_PATH]; + DWORD serial_number, component_length, flags; + ULARGE_INTEGER avail2caller, total, avail; + + strbuf_realpath(&buf, ".", 1); + if (!GetDiskFreeSpaceExA(buf.buf, &avail2caller, &total, &avail)) { + error(_("could not determine free disk size for '%s'"), + buf.buf); + strbuf_release(&buf); + return -1; + } + + strbuf_setlen(&buf, offset_1st_component(buf.buf)); + if (!GetVolumeInformationA(buf.buf, volume_name, sizeof(volume_name), + &serial_number, &component_length, &flags, + fs_name, sizeof(fs_name))) { + error(_("could not get info for '%s'"), buf.buf); + strbuf_release(&buf); + return -1; + } + strbuf_addf(out, "Available space on '%s': ", buf.buf); + strbuf_humanise_bytes(out, avail2caller.QuadPart); + strbuf_addch(out, '\n'); + strbuf_release(&buf); +#else + struct strbuf buf = STRBUF_INIT; + struct statvfs stat; + + strbuf_realpath(&buf, ".", 1); + if (statvfs(buf.buf, &stat) < 0) { + error_errno(_("could not determine free disk size for '%s'"), + buf.buf); + strbuf_release(&buf); + return -1; + } + + strbuf_addf(out, "Available space on '%s': ", buf.buf); + strbuf_humanise_bytes(out, st_mult(stat.f_bsize, stat.f_bavail)); + strbuf_addf(out, " (mount flags 0x%lx)\n", stat.f_flag); + strbuf_release(&buf); +#endif + return 0; +} + /* printf-style interface, expects `=` argument */ static int set_config(const char *fmt, ...) { @@ -598,6 +650,7 @@ static int cmd_diagnose(int argc, const char **argv) get_version_info(&buf, 1); strbuf_addf(&buf, "Enlistment root: %s\n", the_repository->worktree); + get_disk_info(&buf); write_or_die(stdout_fd, buf.buf, buf.len); strvec_pushf(&archiver_args, "--add-virtual-file=diagnostics.log:%.*s", diff --git a/contrib/scalar/t/t9099-scalar.sh b/contrib/scalar/t/t9099-scalar.sh index fbb1df2049..6e52088919 100755 --- a/contrib/scalar/t/t9099-scalar.sh +++ b/contrib/scalar/t/t9099-scalar.sh @@ -102,6 +102,7 @@ SQ="'" test_expect_success UNZIP 'scalar diagnose' ' scalar clone "file://$(pwd)" cloned --single-branch && scalar diagnose cloned >out 2>err && + grep "Available space" out && sed -n "s/.*$SQ\\(.*\\.zip\\)$SQ.*/\\1/p" zip_path && zip_path=$(cat zip_path) && test -n "$zip_path" && From 93e804b2785e1f030737e31e7e18ddc997a368c7 Mon Sep 17 00:00:00 2001 From: Matthew John Cheetham Date: Sat, 28 May 2022 16:11:17 -0700 Subject: [PATCH 6/7] scalar: teach `diagnose` to gather packfile info It's helpful to see if there are other crud files in the pack directory. Let's teach the `scalar diagnose` command to gather file size information about pack files. While at it, also enumerate the pack files in the alternate object directories, if any are registered. Signed-off-by: Matthew John Cheetham Signed-off-by: Johannes Schindelin Signed-off-by: Junio C Hamano --- contrib/scalar/scalar.c | 30 ++++++++++++++++++++++++++++++ contrib/scalar/t/t9099-scalar.sh | 6 +++++- 2 files changed, 35 insertions(+), 1 deletion(-) diff --git a/contrib/scalar/scalar.c b/contrib/scalar/scalar.c index f06a2f3576..f745519038 100644 --- a/contrib/scalar/scalar.c +++ b/contrib/scalar/scalar.c @@ -12,6 +12,7 @@ #include "packfile.h" #include "help.h" #include "archive.h" +#include "object-store.h" /* * Remove the deepest subdirectory in the provided path string. Path must not @@ -594,6 +595,29 @@ cleanup: return res; } +static void dir_file_stats_objects(const char *full_path, size_t full_path_len, + const char *file_name, void *data) +{ + struct strbuf *buf = data; + struct stat st; + + if (!stat(full_path, &st)) + strbuf_addf(buf, "%-70s %16" PRIuMAX "\n", file_name, + (uintmax_t)st.st_size); +} + +static int dir_file_stats(struct object_directory *object_dir, void *data) +{ + struct strbuf *buf = data; + + strbuf_addf(buf, "Contents of %s:\n", object_dir->path); + + for_each_file_in_pack_dir(object_dir->path, dir_file_stats_objects, + data); + + return 0; +} + static int cmd_diagnose(int argc, const char **argv) { struct option options[] = { @@ -656,6 +680,12 @@ static int cmd_diagnose(int argc, const char **argv) "--add-virtual-file=diagnostics.log:%.*s", (int)buf.len, buf.buf); + strbuf_reset(&buf); + strbuf_addstr(&buf, "--add-virtual-file=packs-local.txt:"); + dir_file_stats(the_repository->objects->odb, &buf); + foreach_alt_odb(dir_file_stats, &buf); + strvec_push(&archiver_args, buf.buf); + if ((res = add_directory_to_archiver(&archiver_args, ".git", 0)) || (res = add_directory_to_archiver(&archiver_args, ".git/hooks", 0)) || (res = add_directory_to_archiver(&archiver_args, ".git/info", 0)) || diff --git a/contrib/scalar/t/t9099-scalar.sh b/contrib/scalar/t/t9099-scalar.sh index 6e52088919..2603e2278f 100755 --- a/contrib/scalar/t/t9099-scalar.sh +++ b/contrib/scalar/t/t9099-scalar.sh @@ -101,6 +101,8 @@ test_expect_success '`scalar [...] ` errors out when dir is missing' ' SQ="'" test_expect_success UNZIP 'scalar diagnose' ' scalar clone "file://$(pwd)" cloned --single-branch && + git repack && + echo "$(pwd)/.git/objects/" >>cloned/src/.git/objects/info/alternates && scalar diagnose cloned >out 2>err && grep "Available space" out && sed -n "s/.*$SQ\\(.*\\.zip\\)$SQ.*/\\1/p" zip_path && @@ -110,7 +112,9 @@ test_expect_success UNZIP 'scalar diagnose' ' folder=${zip_path%.zip} && test_path_is_missing "$folder" && unzip -p "$zip_path" diagnostics.log >out && - test_file_not_empty out + test_file_not_empty out && + unzip -p "$zip_path" packs-local.txt >out && + grep "$(pwd)/.git/objects" out ' test_done From 15d8adccab9a3146b760b089df59ce3e7ca2b451 Mon Sep 17 00:00:00 2001 From: Matthew John Cheetham Date: Sat, 28 May 2022 16:11:18 -0700 Subject: [PATCH 7/7] scalar: teach `diagnose` to gather loose objects information When operating at the scale that Scalar wants to support, certain data shapes are more likely to cause undesirable performance issues, such as large numbers of loose objects. By including statistics about this, `scalar diagnose` now makes it easier to identify such scenarios. Signed-off-by: Matthew John Cheetham Signed-off-by: Johannes Schindelin Signed-off-by: Junio C Hamano --- contrib/scalar/scalar.c | 59 ++++++++++++++++++++++++++++++++ contrib/scalar/t/t9099-scalar.sh | 5 ++- 2 files changed, 63 insertions(+), 1 deletion(-) diff --git a/contrib/scalar/scalar.c b/contrib/scalar/scalar.c index f745519038..28176914e5 100644 --- a/contrib/scalar/scalar.c +++ b/contrib/scalar/scalar.c @@ -618,6 +618,60 @@ static int dir_file_stats(struct object_directory *object_dir, void *data) return 0; } +static int count_files(char *path) +{ + DIR *dir = opendir(path); + struct dirent *e; + int count = 0; + + if (!dir) + return 0; + + while ((e = readdir(dir)) != NULL) + if (!is_dot_or_dotdot(e->d_name) && e->d_type == DT_REG) + count++; + + closedir(dir); + return count; +} + +static void loose_objs_stats(struct strbuf *buf, const char *path) +{ + DIR *dir = opendir(path); + struct dirent *e; + int count; + int total = 0; + unsigned char c; + struct strbuf count_path = STRBUF_INIT; + size_t base_path_len; + + if (!dir) + return; + + strbuf_addstr(buf, "Object directory stats for "); + strbuf_add_absolute_path(buf, path); + strbuf_addstr(buf, ":\n"); + + strbuf_add_absolute_path(&count_path, path); + strbuf_addch(&count_path, '/'); + base_path_len = count_path.len; + + while ((e = readdir(dir)) != NULL) + if (!is_dot_or_dotdot(e->d_name) && + e->d_type == DT_DIR && strlen(e->d_name) == 2 && + !hex_to_bytes(&c, e->d_name, 1)) { + strbuf_setlen(&count_path, base_path_len); + strbuf_addstr(&count_path, e->d_name); + total += (count = count_files(count_path.buf)); + strbuf_addf(buf, "%s : %7d files\n", e->d_name, count); + } + + strbuf_addf(buf, "Total: %d loose objects", total); + + strbuf_release(&count_path); + closedir(dir); +} + static int cmd_diagnose(int argc, const char **argv) { struct option options[] = { @@ -686,6 +740,11 @@ static int cmd_diagnose(int argc, const char **argv) foreach_alt_odb(dir_file_stats, &buf); strvec_push(&archiver_args, buf.buf); + strbuf_reset(&buf); + strbuf_addstr(&buf, "--add-virtual-file=objects-local.txt:"); + loose_objs_stats(&buf, ".git/objects"); + strvec_push(&archiver_args, buf.buf); + if ((res = add_directory_to_archiver(&archiver_args, ".git", 0)) || (res = add_directory_to_archiver(&archiver_args, ".git/hooks", 0)) || (res = add_directory_to_archiver(&archiver_args, ".git/info", 0)) || diff --git a/contrib/scalar/t/t9099-scalar.sh b/contrib/scalar/t/t9099-scalar.sh index 2603e2278f..10b1172a8a 100755 --- a/contrib/scalar/t/t9099-scalar.sh +++ b/contrib/scalar/t/t9099-scalar.sh @@ -103,6 +103,7 @@ test_expect_success UNZIP 'scalar diagnose' ' scalar clone "file://$(pwd)" cloned --single-branch && git repack && echo "$(pwd)/.git/objects/" >>cloned/src/.git/objects/info/alternates && + test_commit -C cloned/src loose && scalar diagnose cloned >out 2>err && grep "Available space" out && sed -n "s/.*$SQ\\(.*\\.zip\\)$SQ.*/\\1/p" zip_path && @@ -114,7 +115,9 @@ test_expect_success UNZIP 'scalar diagnose' ' unzip -p "$zip_path" diagnostics.log >out && test_file_not_empty out && unzip -p "$zip_path" packs-local.txt >out && - grep "$(pwd)/.git/objects" out + grep "$(pwd)/.git/objects" out && + unzip -p "$zip_path" objects-local.txt >out && + grep "^Total: [1-9]" out ' test_done