From 06aaaa0bf70fe37d198893f4e25fa73b6516f8a9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nguy=E1=BB=85n=20Th=C3=A1i=20Ng=E1=BB=8Dc=20Duy?= Date: Wed, 1 Oct 2008 11:04:01 +0700 Subject: [PATCH 1/4] Extend index to save more flags MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The on-disk format of index only saves 16 bit flags, nearly all have been used. The last bit (CE_EXTENDED) is used to for future extension. This patch extends index entry format to save more flags in future. The new entry format will be used when CE_EXTENDED bit is 1. Because older implementation may not understand CE_EXTENDED bit and misread the new format, if there is any extended entry in index, index header version will turn 3, which makes it incompatible for older git. If there is none, header version will return to 2 again. Signed-off-by: Nguyễn Thái Ngọc Duy Signed-off-by: Shawn O. Pearce --- cache.h | 58 ++++++++++++++++++++++++++++++++++++++++++++++++---- read-cache.c | 51 ++++++++++++++++++++++++++++++++++++--------- 2 files changed, 95 insertions(+), 14 deletions(-) diff --git a/cache.h b/cache.h index 991544cf0b..964f7e31cb 100644 --- a/cache.h +++ b/cache.h @@ -115,6 +115,26 @@ struct ondisk_cache_entry { char name[FLEX_ARRAY]; /* more */ }; +/* + * This struct is used when CE_EXTENDED bit is 1 + * The struct must match ondisk_cache_entry exactly from + * ctime till flags + */ +struct ondisk_cache_entry_extended { + struct cache_time ctime; + struct cache_time mtime; + unsigned int dev; + unsigned int ino; + unsigned int mode; + unsigned int uid; + unsigned int gid; + unsigned int size; + unsigned char sha1[20]; + unsigned short flags; + unsigned short flags2; + char name[FLEX_ARRAY]; /* more */ +}; + struct cache_entry { unsigned int ce_ctime; unsigned int ce_mtime; @@ -136,7 +156,15 @@ struct cache_entry { #define CE_VALID (0x8000) #define CE_STAGESHIFT 12 -/* In-memory only */ +/* + * Range 0xFFFF0000 in ce_flags is divided into + * two parts: in-memory flags and on-disk ones. + * Flags in CE_EXTENDED_FLAGS will get saved on-disk + * if you want to save a new flag, add it in + * CE_EXTENDED_FLAGS + * + * In-memory only flags + */ #define CE_UPDATE (0x10000) #define CE_REMOVE (0x20000) #define CE_UPTODATE (0x40000) @@ -145,6 +173,24 @@ struct cache_entry { #define CE_HASHED (0x100000) #define CE_UNHASHED (0x200000) +/* + * Extended on-disk flags + */ +/* CE_EXTENDED2 is for future extension */ +#define CE_EXTENDED2 0x80000000 + +#define CE_EXTENDED_FLAGS (0) + +/* + * Safeguard to avoid saving wrong flags: + * - CE_EXTENDED2 won't get saved until its semantic is known + * - Bits in 0x0000FFFF have been saved in ce_flags already + * - Bits in 0x003F0000 are currently in-memory flags + */ +#if CE_EXTENDED_FLAGS & 0x803FFFFF +#error "CE_EXTENDED_FLAGS out of range" +#endif + /* * Copy the sha1 and stat state of a cache entry from one to * another. But we never change the name, or the hash state! @@ -177,7 +223,9 @@ static inline size_t ce_namelen(const struct cache_entry *ce) } #define ce_size(ce) cache_entry_size(ce_namelen(ce)) -#define ondisk_ce_size(ce) ondisk_cache_entry_size(ce_namelen(ce)) +#define ondisk_ce_size(ce) (((ce)->ce_flags & CE_EXTENDED) ? \ + ondisk_cache_entry_extended_size(ce_namelen(ce)) : \ + ondisk_cache_entry_size(ce_namelen(ce))) #define ce_stage(ce) ((CE_STAGEMASK & (ce)->ce_flags) >> CE_STAGESHIFT) #define ce_uptodate(ce) ((ce)->ce_flags & CE_UPTODATE) #define ce_mark_uptodate(ce) ((ce)->ce_flags |= CE_UPTODATE) @@ -220,8 +268,10 @@ static inline int ce_to_dtype(const struct cache_entry *ce) (S_ISREG(mode) ? (S_IFREG | ce_permissions(mode)) : \ S_ISLNK(mode) ? S_IFLNK : S_ISDIR(mode) ? S_IFDIR : S_IFGITLINK) -#define cache_entry_size(len) ((offsetof(struct cache_entry,name) + (len) + 8) & ~7) -#define ondisk_cache_entry_size(len) ((offsetof(struct ondisk_cache_entry,name) + (len) + 8) & ~7) +#define flexible_size(STRUCT,len) ((offsetof(struct STRUCT,name) + (len) + 8) & ~7) +#define cache_entry_size(len) flexible_size(cache_entry,len) +#define ondisk_cache_entry_size(len) flexible_size(ondisk_cache_entry,len) +#define ondisk_cache_entry_extended_size(len) flexible_size(ondisk_cache_entry_extended,len) struct index_state { struct cache_entry **cache; diff --git a/read-cache.c b/read-cache.c index c229fd4d0d..ea262fc37e 100644 --- a/read-cache.c +++ b/read-cache.c @@ -1098,7 +1098,7 @@ static int verify_hdr(struct cache_header *hdr, unsigned long size) if (hdr->hdr_signature != htonl(CACHE_SIGNATURE)) return error("bad signature"); - if (hdr->hdr_version != htonl(2)) + if (hdr->hdr_version != htonl(2) && hdr->hdr_version != htonl(3)) return error("bad index version"); git_SHA1_Init(&c); git_SHA1_Update(&c, hdr, size - 20); @@ -1133,6 +1133,7 @@ int read_index(struct index_state *istate) static void convert_from_disk(struct ondisk_cache_entry *ondisk, struct cache_entry *ce) { size_t len; + const char *name; ce->ce_ctime = ntohl(ondisk->ctime.sec); ce->ce_mtime = ntohl(ondisk->mtime.sec); @@ -1145,19 +1146,31 @@ static void convert_from_disk(struct ondisk_cache_entry *ondisk, struct cache_en /* On-disk flags are just 16 bits */ ce->ce_flags = ntohs(ondisk->flags); - /* For future extension: we do not understand this entry yet */ - if (ce->ce_flags & CE_EXTENDED) - die("Unknown index entry format"); hashcpy(ce->sha1, ondisk->sha1); len = ce->ce_flags & CE_NAMEMASK; + + if (ce->ce_flags & CE_EXTENDED) { + struct ondisk_cache_entry_extended *ondisk2; + int extended_flags; + ondisk2 = (struct ondisk_cache_entry_extended *)ondisk; + extended_flags = ntohs(ondisk2->flags2) << 16; + /* We do not yet understand any bit out of CE_EXTENDED_FLAGS */ + if (extended_flags & ~CE_EXTENDED_FLAGS) + die("Unknown index entry format %08x", extended_flags); + ce->ce_flags |= extended_flags; + name = ondisk2->name; + } + else + name = ondisk->name; + if (len == CE_NAMEMASK) - len = strlen(ondisk->name); + len = strlen(name); /* * NEEDSWORK: If the original index is crafted, this copy could * go unchecked. */ - memcpy(ce->name, ondisk->name, len + 1); + memcpy(ce->name, name, len + 1); } static inline size_t estimate_cache_size(size_t ondisk_size, unsigned int entries) @@ -1417,6 +1430,7 @@ static int ce_write_entry(git_SHA_CTX *c, int fd, struct cache_entry *ce) { int size = ondisk_ce_size(ce); struct ondisk_cache_entry *ondisk = xcalloc(1, size); + char *name; ondisk->ctime.sec = htonl(ce->ce_ctime); ondisk->ctime.nsec = 0; @@ -1430,7 +1444,15 @@ static int ce_write_entry(git_SHA_CTX *c, int fd, struct cache_entry *ce) ondisk->size = htonl(ce->ce_size); hashcpy(ondisk->sha1, ce->sha1); ondisk->flags = htons(ce->ce_flags); - memcpy(ondisk->name, ce->name, ce_namelen(ce)); + if (ce->ce_flags & CE_EXTENDED) { + struct ondisk_cache_entry_extended *ondisk2; + ondisk2 = (struct ondisk_cache_entry_extended *)ondisk; + ondisk2->flags2 = htons((ce->ce_flags & CE_EXTENDED_FLAGS) >> 16); + name = ondisk2->name; + } + else + name = ondisk->name; + memcpy(name, ce->name, ce_namelen(ce)); return ce_write(c, fd, ondisk, size); } @@ -1439,16 +1461,25 @@ int write_index(const struct index_state *istate, int newfd) { git_SHA_CTX c; struct cache_header hdr; - int i, err, removed; + int i, err, removed, extended; struct cache_entry **cache = istate->cache; int entries = istate->cache_nr; - for (i = removed = 0; i < entries; i++) + for (i = removed = extended = 0; i < entries; i++) { if (cache[i]->ce_flags & CE_REMOVE) removed++; + /* reduce extended entries if possible */ + cache[i]->ce_flags &= ~CE_EXTENDED; + if (cache[i]->ce_flags & CE_EXTENDED_FLAGS) { + extended++; + cache[i]->ce_flags |= CE_EXTENDED; + } + } + hdr.hdr_signature = htonl(CACHE_SIGNATURE); - hdr.hdr_version = htonl(2); + /* for extended format, increase version so older git won't try to read it */ + hdr.hdr_version = htonl(extended ? 3 : 2); hdr.hdr_entries = htonl(entries - removed); git_SHA1_Init(&c); From 69530cb0c0d5f08dac9bedff56c164b7370f9931 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Fri, 28 Nov 2008 18:41:21 -0800 Subject: [PATCH 2/4] builtin-rm.c: explain and clarify the "local change" logic Explain the logic to check local modification a bit more in the comment, especially because the existing comment that talks about "git rm --cached" was placed in a part that was not about "--cached" at all. Also clarify "if .. else if .." structure. Signed-off-by: Junio C Hamano --- builtin-rm.c | 53 +++++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 42 insertions(+), 11 deletions(-) diff --git a/builtin-rm.c b/builtin-rm.c index b7126e3e25..3d03da09df 100644 --- a/builtin-rm.c +++ b/builtin-rm.c @@ -31,7 +31,8 @@ static void add_list(const char *name) static int check_local_mod(unsigned char *head, int index_only) { - /* items in list are already sorted in the cache order, + /* + * Items in list are already sorted in the cache order, * so we could do this a lot more efficiently by using * tree_desc based traversal if we wanted to, but I am * lazy, and who cares if removal of files is a tad @@ -71,25 +72,55 @@ static int check_local_mod(unsigned char *head, int index_only) */ continue; } + + /* + * "rm" of a path that has changes need to be treated + * carefully not to allow losing local changes + * accidentally. A local change could be (1) file in + * work tree is different since the index; and/or (2) + * the user staged a content that is different from + * the current commit in the index. + * + * In such a case, you would need to --force the + * removal. However, "rm --cached" (remove only from + * the index) is safe if the index matches the file in + * the work tree or the HEAD commit, as it means that + * the content being removed is available elsewhere. + */ + + /* + * Is the index different from the file in the work tree? + */ if (ce_match_stat(ce, &st, 0)) local_changes = 1; + + /* + * Is the index different from the HEAD commit? By + * definition, before the very initial commit, + * anything staged in the index is treated by the same + * way as changed from the HEAD. + */ if (no_head || get_tree_entry(head, name, sha1, &mode) || ce->ce_mode != create_ce_mode(mode) || hashcmp(ce->sha1, sha1)) staged_changes = 1; - if (local_changes && staged_changes && - !(index_only && is_empty_blob_sha1(ce->sha1))) - errs = error("'%s' has staged content different " - "from both the file and the HEAD\n" - "(use -f to force removal)", name); + /* + * If the index does not match the file in the work + * tree and if it does not match the HEAD commit + * either, (1) "git rm" without --cached definitely + * will lose information; (2) "git rm --cached" will + * lose information unless it is about removing an + * "intent to add" entry. + */ + if (local_changes && staged_changes) { + if (!index_only || !is_empty_blob_sha1(ce->sha1)) + errs = error("'%s' has staged content different " + "from both the file and the HEAD\n" + "(use -f to force removal)", name); + } else if (!index_only) { - /* It's not dangerous to "git rm --cached" a - * file if the index matches the file or the - * HEAD, since it means the deleted content is - * still available somewhere. - */ if (staged_changes) errs = error("'%s' has changes staged in the index\n" "(use --cached to keep the file, " From 388b2acd6e649145d4368a10cf4df5f5bbbac2c2 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Fri, 28 Nov 2008 19:55:25 -0800 Subject: [PATCH 3/4] git add --intent-to-add: fix removal of cached emptiness This uses the extended index flag mechanism introduced earlier to mark the entries added to the index via "git add -N" with CE_INTENT_TO_ADD. The logic to detect an "intent to add" entry for the purpose of allowing "git rm --cached $path" is tightened to check not just for a staged empty blob, but with the CE_INTENT_TO_ADD bit. This protects an empty blob that was explicitly added and then modified in the work tree from being dropped with this sequence: $ >empty $ git add empty $ echo "non empty" >empty $ git rm --cached empty Signed-off-by: Junio C Hamano --- builtin-rm.c | 2 +- cache.h | 3 ++- read-cache.c | 2 ++ t/t3600-rm.sh | 4 ++-- 4 files changed, 7 insertions(+), 4 deletions(-) diff --git a/builtin-rm.c b/builtin-rm.c index 3d03da09df..c11f455858 100644 --- a/builtin-rm.c +++ b/builtin-rm.c @@ -115,7 +115,7 @@ static int check_local_mod(unsigned char *head, int index_only) * "intent to add" entry. */ if (local_changes && staged_changes) { - if (!index_only || !is_empty_blob_sha1(ce->sha1)) + if (!index_only || !(ce->ce_flags & CE_INTENT_TO_ADD)) errs = error("'%s' has staged content different " "from both the file and the HEAD\n" "(use -f to force removal)", name); diff --git a/cache.h b/cache.h index ef2e7f93f7..f15b3fc906 100644 --- a/cache.h +++ b/cache.h @@ -176,10 +176,11 @@ struct cache_entry { /* * Extended on-disk flags */ +#define CE_INTENT_TO_ADD 0x20000000 /* CE_EXTENDED2 is for future extension */ #define CE_EXTENDED2 0x80000000 -#define CE_EXTENDED_FLAGS (0) +#define CE_EXTENDED_FLAGS (CE_INTENT_TO_ADD) /* * Safeguard to avoid saving wrong flags: diff --git a/read-cache.c b/read-cache.c index abc627ba0b..fa30a0f885 100644 --- a/read-cache.c +++ b/read-cache.c @@ -546,6 +546,8 @@ int add_to_index(struct index_state *istate, const char *path, struct stat *st, ce->ce_flags = namelen; if (!intent_only) fill_stat_cache_info(ce, st); + else + ce->ce_flags |= CE_INTENT_TO_ADD; if (trust_executable_bit && has_symlinks) ce->ce_mode = create_ce_mode(st_mode); diff --git a/t/t3600-rm.sh b/t/t3600-rm.sh index 5b4d6f7138..b7d46e50a8 100755 --- a/t/t3600-rm.sh +++ b/t/t3600-rm.sh @@ -187,8 +187,8 @@ test_expect_success 'but with -f it should work.' ' test_must_fail git ls-files --error-unmatch baz ' -test_expect_failure 'refuse to remove cached empty file with modifications' ' - touch empty && +test_expect_success 'refuse to remove cached empty file with modifications' ' + >empty && git add empty && echo content >empty && test_must_fail git rm --cached empty From 331fcb598ec0127fd89c992361bc573dcd3a4a63 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Fri, 28 Nov 2008 19:56:34 -0800 Subject: [PATCH 4/4] git add --intent-to-add: do not let an empty blob be committed by accident Writing a tree out of an index with an "intent to add" entry is blocked. This implies that you cannot "git commit" from such a state; however you can still do "git commit -a" or "git commit $that_path". Signed-off-by: Junio C Hamano --- builtin-commit.c | 2 +- builtin-write-tree.c | 2 +- cache-tree.c | 10 +++++++--- read-cache.c | 8 ++++++++ t/t2203-add-intent.sh | 28 ++++++++++++++++++++++++++++ 5 files changed, 45 insertions(+), 5 deletions(-) diff --git a/builtin-commit.c b/builtin-commit.c index 1677e6b45f..2b499fa543 100644 --- a/builtin-commit.c +++ b/builtin-commit.c @@ -639,7 +639,7 @@ static int prepare_to_commit(const char *index_file, const char *prefix) active_cache_tree = cache_tree(); if (cache_tree_update(active_cache_tree, active_cache, active_nr, 0, 0) < 0) { - error("Error building trees; the index is unmerged?"); + error("Error building trees"); return 0; } diff --git a/builtin-write-tree.c b/builtin-write-tree.c index 52a3c015ff..9d640508dd 100644 --- a/builtin-write-tree.c +++ b/builtin-write-tree.c @@ -42,7 +42,7 @@ int cmd_write_tree(int argc, const char **argv, const char *unused_prefix) die("%s: error reading the index", me); break; case WRITE_TREE_UNMERGED_INDEX: - die("%s: error building trees; the index is unmerged?", me); + die("%s: error building trees", me); break; case WRITE_TREE_PREFIX_ERROR: die("%s: prefix %s not found", me, prefix); diff --git a/cache-tree.c b/cache-tree.c index 5f8ee87bb1..3d8f218a5f 100644 --- a/cache-tree.c +++ b/cache-tree.c @@ -155,13 +155,17 @@ static int verify_cache(struct cache_entry **cache, funny = 0; for (i = 0; i < entries; i++) { struct cache_entry *ce = cache[i]; - if (ce_stage(ce)) { + if (ce_stage(ce) || (ce->ce_flags & CE_INTENT_TO_ADD)) { if (10 < ++funny) { fprintf(stderr, "...\n"); break; } - fprintf(stderr, "%s: unmerged (%s)\n", - ce->name, sha1_to_hex(ce->sha1)); + if (ce_stage(ce)) + fprintf(stderr, "%s: unmerged (%s)\n", + ce->name, sha1_to_hex(ce->sha1)); + else + fprintf(stderr, "%s: not added yet\n", + ce->name); } } if (funny) diff --git a/read-cache.c b/read-cache.c index fa30a0f885..8579663ee0 100644 --- a/read-cache.c +++ b/read-cache.c @@ -257,6 +257,14 @@ int ie_match_stat(const struct index_state *istate, if (!ignore_valid && (ce->ce_flags & CE_VALID)) return 0; + /* + * Intent-to-add entries have not been added, so the index entry + * by definition never matches what is in the work tree until it + * actually gets added. + */ + if (ce->ce_flags & CE_INTENT_TO_ADD) + return DATA_CHANGED | TYPE_CHANGED | MODE_CHANGED; + changed = ce_match_stat_basic(ce, st); /* diff --git a/t/t2203-add-intent.sh b/t/t2203-add-intent.sh index d4de35ea06..58a329961e 100755 --- a/t/t2203-add-intent.sh +++ b/t/t2203-add-intent.sh @@ -32,5 +32,33 @@ test_expect_success 'intent to add does not clobber existing paths' ' ! grep "$empty" actual ' +test_expect_success 'cannot commit with i-t-a entry' ' + test_tick && + git commit -a -m initial && + git reset --hard && + + echo xyzzy >rezrov && + echo frotz >nitfol && + git add rezrov && + git add -N nitfol && + test_must_fail git commit +' + +test_expect_success 'can commit with an unrelated i-t-a entry in index' ' + git reset --hard && + echo xyzzy >rezrov && + echo frotz >nitfol && + git add rezrov && + git add -N nitfol && + git commit -m partial rezrov +' + +test_expect_success 'can "commit -a" with an i-t-a entry' ' + git reset --hard && + : >nitfol && + git add -N nitfol && + git commit -a -m all +' + test_done