From 0bdaa12169bca5d69f2c58f96cc92d51280e9e26 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nguy=E1=BB=85n=20Th=C3=A1i=20Ng=E1=BB=8Dc=20Duy?= Date: Fri, 8 Feb 2013 10:48:25 +0700 Subject: [PATCH 1/4] git-count-objects.txt: describe each line in -v output MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The current description requires a bit of guessing (what clause corresponds to what printed line?) and lacks information, such as the unit of size and size-pack. Signed-off-by: Nguyễn Thái Ngọc Duy Signed-off-by: Junio C Hamano --- Documentation/git-count-objects.txt | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/Documentation/git-count-objects.txt b/Documentation/git-count-objects.txt index 23c80cea64..e8168230b4 100644 --- a/Documentation/git-count-objects.txt +++ b/Documentation/git-count-objects.txt @@ -20,11 +20,21 @@ OPTIONS ------- -v:: --verbose:: - In addition to the number of loose objects and disk - space consumed, it reports the number of in-pack - objects, number of packs, disk space consumed by those packs, - and number of objects that can be removed by running - `git prune-packed`. + Report in more detail: ++ +count: the number of loose objects ++ +size: disk space consumed by loose objects, in KiB ++ +in-pack: the number of in-pack objects ++ +size-pack: disk space consumed by the packs, in KiB ++ +prune-packable: the number of loose objects that are also present in +the packs. These objects could be pruned using `git prune-packed`. ++ +garbage: the number of files in loose object database that are not +valid loose objects GIT --- From d90906a90265fa1df34a7f155fe41b1393679fc4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nguy=E1=BB=85n=20Th=C3=A1i=20Ng=E1=BB=8Dc=20Duy?= Date: Wed, 13 Feb 2013 16:13:17 +0700 Subject: [PATCH 2/4] sha1_file: reorder code in prepare_packed_git_one() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The current loop does while (...) { if (it is not an .idx file) continue; process .idx file; } and is reordered to while (...) { if (it is an .idx file) { process .idx file; } } This makes it easier to add new extension file processing. Signed-off-by: Nguyễn Thái Ngọc Duy Signed-off-by: Junio C Hamano --- sha1_file.c | 30 ++++++++++++++---------------- 1 file changed, 14 insertions(+), 16 deletions(-) diff --git a/sha1_file.c b/sha1_file.c index 40b23297b2..239bee7846 100644 --- a/sha1_file.c +++ b/sha1_file.c @@ -1024,27 +1024,25 @@ static void prepare_packed_git_one(char *objdir, int local) int namelen = strlen(de->d_name); struct packed_git *p; - if (!has_extension(de->d_name, ".idx")) - continue; - if (len + namelen + 1 > sizeof(path)) continue; - /* Don't reopen a pack we already have. */ strcpy(path + len, de->d_name); - for (p = packed_git; p; p = p->next) { - if (!memcmp(path, p->pack_name, len + namelen - 4)) - break; + + if (has_extension(de->d_name, ".idx")) { + /* Don't reopen a pack we already have. */ + for (p = packed_git; p; p = p->next) { + if (!memcmp(path, p->pack_name, len + namelen - 4)) + break; + } + if (p == NULL && + /* + * See if it really is a valid .idx file with + * corresponding .pack file that we can map. + */ + (p = add_packed_git(path, len + namelen, local)) != NULL) + install_packed_git(p); } - if (p) - continue; - /* See if it really is a valid .idx file with corresponding - * .pack file that we can map. - */ - p = add_packed_git(path, len + namelen, local); - if (!p) - continue; - install_packed_git(p); } closedir(dir); } From 543c5caa6c93bb3d42545aeef334c6a175384db8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nguy=E1=BB=85n=20Th=C3=A1i=20Ng=E1=BB=8Dc=20Duy?= Date: Fri, 15 Feb 2013 19:07:10 +0700 Subject: [PATCH 3/4] count-objects: report garbage files in pack directory too MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit prepare_packed_git_one() is modified to allow count-objects to hook a report function to so we don't need to duplicate the pack searching logic in count-objects.c. When report_pack_garbage is NULL, the overhead is insignificant. The garbage is reported with warning() instead of error() in packed garbage case because it's not an error to have garbage. Loose garbage is still reported as errors and will be converted to warnings later. Signed-off-by: Nguyễn Thái Ngọc Duy Signed-off-by: Junio C Hamano --- Documentation/git-count-objects.txt | 4 +- builtin/count-objects.c | 12 ++++- cache.h | 3 ++ sha1_file.c | 83 ++++++++++++++++++++++++++++- t/t5304-prune.sh | 26 +++++++++ 5 files changed, 124 insertions(+), 4 deletions(-) diff --git a/Documentation/git-count-objects.txt b/Documentation/git-count-objects.txt index e8168230b4..1611d7c768 100644 --- a/Documentation/git-count-objects.txt +++ b/Documentation/git-count-objects.txt @@ -33,8 +33,8 @@ size-pack: disk space consumed by the packs, in KiB prune-packable: the number of loose objects that are also present in the packs. These objects could be pruned using `git prune-packed`. + -garbage: the number of files in loose object database that are not -valid loose objects +garbage: the number of files in object database that are not valid +loose objects nor valid packs GIT --- diff --git a/builtin/count-objects.c b/builtin/count-objects.c index 9afaa88f77..1706c8bd81 100644 --- a/builtin/count-objects.c +++ b/builtin/count-objects.c @@ -9,6 +9,14 @@ #include "builtin.h" #include "parse-options.h" +static unsigned long garbage; + +static void real_report_garbage(const char *desc, const char *path) +{ + warning("%s: %s", desc, path); + garbage++; +} + static void count_objects(DIR *d, char *path, int len, int verbose, unsigned long *loose, off_t *loose_size, @@ -76,7 +84,7 @@ int cmd_count_objects(int argc, const char **argv, const char *prefix) const char *objdir = get_object_directory(); int len = strlen(objdir); char *path = xmalloc(len + 50); - unsigned long loose = 0, packed = 0, packed_loose = 0, garbage = 0; + unsigned long loose = 0, packed = 0, packed_loose = 0; off_t loose_size = 0; struct option opts[] = { OPT__VERBOSE(&verbose, N_("be verbose")), @@ -87,6 +95,8 @@ int cmd_count_objects(int argc, const char **argv, const char *prefix) /* we do not take arguments other than flags for now */ if (argc) usage_with_options(count_objects_usage, opts); + if (verbose) + report_garbage = real_report_garbage; memcpy(path, objdir, len); if (len && objdir[len-1] != '/') path[len++] = '/'; diff --git a/cache.h b/cache.h index e493563f4c..82af219e3e 100644 --- a/cache.h +++ b/cache.h @@ -1057,6 +1057,9 @@ extern const char *parse_feature_value(const char *feature_list, const char *fea extern struct packed_git *parse_pack_index(unsigned char *sha1, const char *idx_path); +/* A hook for count-objects to report invalid files in pack directory */ +extern void (*report_garbage)(const char *desc, const char *path); + extern void prepare_packed_git(void); extern void reprepare_packed_git(void); extern void install_packed_git(struct packed_git *pack); diff --git a/sha1_file.c b/sha1_file.c index 239bee7846..16967d3b9a 100644 --- a/sha1_file.c +++ b/sha1_file.c @@ -21,6 +21,7 @@ #include "sha1-lookup.h" #include "bulk-checkin.h" #include "streaming.h" +#include "dir.h" #ifndef O_NOATIME #if defined(__linux__) && (defined(__i386__) || defined(__PPC__)) @@ -1000,6 +1001,63 @@ void install_packed_git(struct packed_git *pack) packed_git = pack; } +void (*report_garbage)(const char *desc, const char *path); + +static void report_helper(const struct string_list *list, + int seen_bits, int first, int last) +{ + const char *msg; + switch (seen_bits) { + case 0: + msg = "no corresponding .idx nor .pack"; + break; + case 1: + msg = "no corresponding .idx"; + break; + case 2: + msg = "no corresponding .pack"; + break; + default: + return; + } + for (; first < last; first++) + report_garbage(msg, list->items[first].string); +} + +static void report_pack_garbage(struct string_list *list) +{ + int i, baselen = -1, first = 0, seen_bits = 0; + + if (!report_garbage) + return; + + sort_string_list(list); + + for (i = 0; i < list->nr; i++) { + const char *path = list->items[i].string; + if (baselen != -1 && + strncmp(path, list->items[first].string, baselen)) { + report_helper(list, seen_bits, first, i); + baselen = -1; + seen_bits = 0; + } + if (baselen == -1) { + const char *dot = strrchr(path, '.'); + if (!dot) { + report_garbage("garbage found", path); + continue; + } + baselen = dot - path + 1; + first = i; + } + if (!strcmp(path + baselen, "pack")) + seen_bits |= 1; + else if (!strcmp(path + baselen, "idx")) + seen_bits |= 2; + } + report_helper(list, seen_bits, first, list->nr); +} + static void prepare_packed_git_one(char *objdir, int local) { /* Ensure that this buffer is large enough so that we can @@ -1009,6 +1067,7 @@ static void prepare_packed_git_one(char *objdir, int local) int len; DIR *dir; struct dirent *de; + struct string_list garbage = STRING_LIST_INIT_DUP; sprintf(path, "%s/pack", objdir); len = strlen(path); @@ -1024,7 +1083,17 @@ static void prepare_packed_git_one(char *objdir, int local) int namelen = strlen(de->d_name); struct packed_git *p; - if (len + namelen + 1 > sizeof(path)) + if (len + namelen + 1 > sizeof(path)) { + if (report_garbage) { + struct strbuf sb = STRBUF_INIT; + strbuf_addf(&sb, "%.*s/%s", len - 1, path, de->d_name); + report_garbage("path too long", sb.buf); + strbuf_release(&sb); + } + continue; + } + + if (is_dot_or_dotdot(de->d_name)) continue; strcpy(path + len, de->d_name); @@ -1043,8 +1112,20 @@ static void prepare_packed_git_one(char *objdir, int local) (p = add_packed_git(path, len + namelen, local)) != NULL) install_packed_git(p); } + + if (!report_garbage) + continue; + + if (has_extension(de->d_name, ".idx") || + has_extension(de->d_name, ".pack") || + has_extension(de->d_name, ".keep")) + string_list_append(&garbage, path); + else + report_garbage("garbage found", path); } closedir(dir); + report_pack_garbage(&garbage); + string_list_clear(&garbage, 0); } static int sort_pack(const void *a_, const void *b_) diff --git a/t/t5304-prune.sh b/t/t5304-prune.sh index d645328609..e4bb3a1457 100755 --- a/t/t5304-prune.sh +++ b/t/t5304-prune.sh @@ -195,4 +195,30 @@ test_expect_success 'gc: prune old objects after local clone' ' ) ' +test_expect_success 'garbage report in count-objects -v' ' + : >.git/objects/pack/foo && + : >.git/objects/pack/foo.bar && + : >.git/objects/pack/foo.keep && + : >.git/objects/pack/foo.pack && + : >.git/objects/pack/fake.bar && + : >.git/objects/pack/fake.keep && + : >.git/objects/pack/fake.pack && + : >.git/objects/pack/fake.idx && + : >.git/objects/pack/fake2.keep && + : >.git/objects/pack/fake3.idx && + git count-objects -v 2>stderr && + grep "index file .git/objects/pack/fake.idx is too small" stderr && + grep "^warning:" stderr | sort >actual && + cat >expected <<\EOF && +warning: garbage found: .git/objects/pack/fake.bar +warning: garbage found: .git/objects/pack/foo +warning: garbage found: .git/objects/pack/foo.bar +warning: no corresponding .idx nor .pack: .git/objects/pack/fake2.keep +warning: no corresponding .idx: .git/objects/pack/foo.keep +warning: no corresponding .idx: .git/objects/pack/foo.pack +warning: no corresponding .pack: .git/objects/pack/fake3.idx +EOF + test_cmp expected actual +' + test_done From 1a20dd49f8c4e6c62080f267ca77b357cb61be8e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nguy=E1=BB=85n=20Th=C3=A1i=20Ng=E1=BB=8Dc=20Duy?= Date: Wed, 13 Feb 2013 16:13:19 +0700 Subject: [PATCH 4/4] count-objects: report how much disk space taken by garbage files MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Also issue warnings on loose garbages instead of errors as a result of using report_garbage() function in count_objects() Signed-off-by: Nguyễn Thái Ngọc Duy Signed-off-by: Junio C Hamano --- Documentation/git-count-objects.txt | 2 ++ builtin/count-objects.c | 18 ++++++++++++------ 2 files changed, 14 insertions(+), 6 deletions(-) diff --git a/Documentation/git-count-objects.txt b/Documentation/git-count-objects.txt index 1611d7c768..da6e72e696 100644 --- a/Documentation/git-count-objects.txt +++ b/Documentation/git-count-objects.txt @@ -35,6 +35,8 @@ the packs. These objects could be pruned using `git prune-packed`. + garbage: the number of files in object database that are not valid loose objects nor valid packs ++ +size-garbage: disk space consumed by garbage files, in KiB GIT --- diff --git a/builtin/count-objects.c b/builtin/count-objects.c index 1706c8bd81..3a01a8d085 100644 --- a/builtin/count-objects.c +++ b/builtin/count-objects.c @@ -10,9 +10,13 @@ #include "parse-options.h" static unsigned long garbage; +static off_t size_garbage; static void real_report_garbage(const char *desc, const char *path) { + struct stat st; + if (!stat(path, &st)) + size_garbage += st.st_size; warning("%s: %s", desc, path); garbage++; } @@ -20,8 +24,7 @@ static void real_report_garbage(const char *desc, const char *path) static void count_objects(DIR *d, char *path, int len, int verbose, unsigned long *loose, off_t *loose_size, - unsigned long *packed_loose, - unsigned long *garbage) + unsigned long *packed_loose) { struct dirent *ent; while ((ent = readdir(d)) != NULL) { @@ -54,9 +57,11 @@ static void count_objects(DIR *d, char *path, int len, int verbose, } if (bad) { if (verbose) { - error("garbage found: %.*s/%s", - len + 2, path, ent->d_name); - (*garbage)++; + struct strbuf sb = STRBUF_INIT; + strbuf_addf(&sb, "%.*s/%s", + len + 2, path, ent->d_name); + report_garbage("garbage found", sb.buf); + strbuf_release(&sb); } continue; } @@ -107,7 +112,7 @@ int cmd_count_objects(int argc, const char **argv, const char *prefix) if (!d) continue; count_objects(d, path, len, verbose, - &loose, &loose_size, &packed_loose, &garbage); + &loose, &loose_size, &packed_loose); closedir(d); } if (verbose) { @@ -132,6 +137,7 @@ int cmd_count_objects(int argc, const char **argv, const char *prefix) printf("size-pack: %lu\n", (unsigned long) (size_pack / 1024)); printf("prune-packable: %lu\n", packed_loose); printf("garbage: %lu\n", garbage); + printf("size-garbage: %lu\n", (unsigned long) (size_garbage / 1024)); } else printf("%lu objects, %lu kilobytes\n",