Merge branch 'ee/clean-remove-dirs'
Replace "is this subdirectory a separate repository that should not be touched?" check "git clean" does by checking if it has .git/HEAD using the submodule-related code with a more optimized check. * ee/clean-remove-dirs: read_gitfile_gently: fix use-after-free clean: improve performance when removing lots of directories p7300: add performance tests for clean t7300: add tests to document behavior of clean and nested git setup: sanity check file size in read_gitfile_gently setup: add gentle version of read_gitfile
This commit is contained in:
		| @ -10,7 +10,6 @@ | ||||
| #include "cache.h" | ||||
| #include "dir.h" | ||||
| #include "parse-options.h" | ||||
| #include "refs.h" | ||||
| #include "string-list.h" | ||||
| #include "quote.h" | ||||
| #include "column.h" | ||||
| @ -148,6 +147,31 @@ static int exclude_cb(const struct option *opt, const char *arg, int unset) | ||||
| 	return 0; | ||||
| } | ||||
|  | ||||
| /* | ||||
|  * Return 1 if the given path is the root of a git repository or | ||||
|  * submodule else 0. Will not return 1 for bare repositories with the | ||||
|  * exception of creating a bare repository in "foo/.git" and calling | ||||
|  * is_git_repository("foo"). | ||||
|  */ | ||||
| static int is_git_repository(struct strbuf *path) | ||||
| { | ||||
| 	int ret = 0; | ||||
| 	int gitfile_error; | ||||
| 	size_t orig_path_len = path->len; | ||||
| 	assert(orig_path_len != 0); | ||||
| 	if (path->buf[orig_path_len - 1] != '/') | ||||
| 		strbuf_addch(path, '/'); | ||||
| 	strbuf_addstr(path, ".git"); | ||||
| 	if (read_gitfile_gently(path->buf, &gitfile_error) || is_git_directory(path->buf)) | ||||
| 		ret = 1; | ||||
| 	if (gitfile_error == READ_GITFILE_ERR_OPEN_FAILED || | ||||
| 	    gitfile_error == READ_GITFILE_ERR_READ_FAILED) | ||||
| 		ret = 1;  /* This could be a real .git file, take the | ||||
| 			   * safe option and avoid cleaning */ | ||||
| 	strbuf_setlen(path, orig_path_len); | ||||
| 	return ret; | ||||
| } | ||||
|  | ||||
| static int remove_dirs(struct strbuf *path, const char *prefix, int force_flag, | ||||
| 		int dry_run, int quiet, int *dir_gone) | ||||
| { | ||||
| @ -155,13 +179,11 @@ static int remove_dirs(struct strbuf *path, const char *prefix, int force_flag, | ||||
| 	struct strbuf quoted = STRBUF_INIT; | ||||
| 	struct dirent *e; | ||||
| 	int res = 0, ret = 0, gone = 1, original_len = path->len, len; | ||||
| 	unsigned char submodule_head[20]; | ||||
| 	struct string_list dels = STRING_LIST_INIT_DUP; | ||||
|  | ||||
| 	*dir_gone = 1; | ||||
|  | ||||
| 	if ((force_flag & REMOVE_DIR_KEEP_NESTED_GIT) && | ||||
| 			!resolve_gitlink_ref(path->buf, "HEAD", submodule_head)) { | ||||
| 	if ((force_flag & REMOVE_DIR_KEEP_NESTED_GIT) && is_git_repository(path)) { | ||||
| 		if (!quiet) { | ||||
| 			quote_path_relative(path->buf, prefix, "ed); | ||||
| 			printf(dry_run ?  _(msg_would_skip_git_dir) : _(msg_skip_git_dir), | ||||
|  | ||||
							
								
								
									
										12
									
								
								cache.h
									
									
									
									
									
								
							
							
						
						
									
										12
									
								
								cache.h
									
									
									
									
									
								
							| @ -447,7 +447,17 @@ extern int get_common_dir(struct strbuf *sb, const char *gitdir); | ||||
| extern const char *get_git_namespace(void); | ||||
| extern const char *strip_namespace(const char *namespaced_ref); | ||||
| extern const char *get_git_work_tree(void); | ||||
| extern const char *read_gitfile(const char *path); | ||||
|  | ||||
| #define READ_GITFILE_ERR_STAT_FAILED 1 | ||||
| #define READ_GITFILE_ERR_NOT_A_FILE 2 | ||||
| #define READ_GITFILE_ERR_OPEN_FAILED 3 | ||||
| #define READ_GITFILE_ERR_READ_FAILED 4 | ||||
| #define READ_GITFILE_ERR_INVALID_FORMAT 5 | ||||
| #define READ_GITFILE_ERR_NO_PATH 6 | ||||
| #define READ_GITFILE_ERR_NOT_A_REPO 7 | ||||
| #define READ_GITFILE_ERR_TOO_LARGE 8 | ||||
| extern const char *read_gitfile_gently(const char *path, int *return_error_code); | ||||
| #define read_gitfile(path) read_gitfile_gently((path), NULL) | ||||
| extern const char *resolve_gitdir(const char *suspect); | ||||
| extern void set_git_work_tree(const char *tree); | ||||
|  | ||||
|  | ||||
							
								
								
									
										89
									
								
								setup.c
									
									
									
									
									
								
							
							
						
						
									
										89
									
								
								setup.c
									
									
									
									
									
								
							| @ -411,35 +411,58 @@ static void update_linked_gitdir(const char *gitfile, const char *gitdir) | ||||
| /* | ||||
|  * Try to read the location of the git directory from the .git file, | ||||
|  * return path to git directory if found. | ||||
|  * | ||||
|  * On failure, if return_error_code is not NULL, return_error_code | ||||
|  * will be set to an error code and NULL will be returned. If | ||||
|  * return_error_code is NULL the function will die instead (for most | ||||
|  * cases). | ||||
|  */ | ||||
| const char *read_gitfile(const char *path) | ||||
| const char *read_gitfile_gently(const char *path, int *return_error_code) | ||||
| { | ||||
| 	char *buf; | ||||
| 	char *dir; | ||||
| 	const int max_file_size = 1 << 20;  /* 1MB */ | ||||
| 	int error_code = 0; | ||||
| 	char *buf = NULL; | ||||
| 	char *dir = NULL; | ||||
| 	const char *slash; | ||||
| 	struct stat st; | ||||
| 	int fd; | ||||
| 	ssize_t len; | ||||
|  | ||||
| 	if (stat(path, &st)) | ||||
| 		return NULL; | ||||
| 	if (!S_ISREG(st.st_mode)) | ||||
| 		return NULL; | ||||
| 	if (stat(path, &st)) { | ||||
| 		error_code = READ_GITFILE_ERR_STAT_FAILED; | ||||
| 		goto cleanup_return; | ||||
| 	} | ||||
| 	if (!S_ISREG(st.st_mode)) { | ||||
| 		error_code = READ_GITFILE_ERR_NOT_A_FILE; | ||||
| 		goto cleanup_return; | ||||
| 	} | ||||
| 	if (st.st_size > max_file_size) { | ||||
| 		error_code = READ_GITFILE_ERR_TOO_LARGE; | ||||
| 		goto cleanup_return; | ||||
| 	} | ||||
| 	fd = open(path, O_RDONLY); | ||||
| 	if (fd < 0) | ||||
| 		die_errno("Error opening '%s'", path); | ||||
| 	if (fd < 0) { | ||||
| 		error_code = READ_GITFILE_ERR_OPEN_FAILED; | ||||
| 		goto cleanup_return; | ||||
| 	} | ||||
| 	buf = xmalloc(st.st_size + 1); | ||||
| 	len = read_in_full(fd, buf, st.st_size); | ||||
| 	close(fd); | ||||
| 	if (len != st.st_size) | ||||
| 		die("Error reading %s", path); | ||||
| 	if (len != st.st_size) { | ||||
| 		error_code = READ_GITFILE_ERR_READ_FAILED; | ||||
| 		goto cleanup_return; | ||||
| 	} | ||||
| 	buf[len] = '\0'; | ||||
| 	if (!starts_with(buf, "gitdir: ")) | ||||
| 		die("Invalid gitfile format: %s", path); | ||||
| 	if (!starts_with(buf, "gitdir: ")) { | ||||
| 		error_code = READ_GITFILE_ERR_INVALID_FORMAT; | ||||
| 		goto cleanup_return; | ||||
| 	} | ||||
| 	while (buf[len - 1] == '\n' || buf[len - 1] == '\r') | ||||
| 		len--; | ||||
| 	if (len < 9) | ||||
| 		die("No path in gitfile: %s", path); | ||||
| 	if (len < 9) { | ||||
| 		error_code = READ_GITFILE_ERR_NO_PATH; | ||||
| 		goto cleanup_return; | ||||
| 	} | ||||
| 	buf[len] = '\0'; | ||||
| 	dir = buf + 8; | ||||
|  | ||||
| @ -453,15 +476,41 @@ const char *read_gitfile(const char *path) | ||||
| 		free(buf); | ||||
| 		buf = dir; | ||||
| 	} | ||||
|  | ||||
| 	if (!is_git_directory(dir)) | ||||
| 		die("Not a git repository: %s", dir); | ||||
|  | ||||
| 	if (!is_git_directory(dir)) { | ||||
| 		error_code = READ_GITFILE_ERR_NOT_A_REPO; | ||||
| 		goto cleanup_return; | ||||
| 	} | ||||
| 	update_linked_gitdir(path, dir); | ||||
| 	path = real_path(dir); | ||||
|  | ||||
| cleanup_return: | ||||
| 	if (return_error_code) | ||||
| 		*return_error_code = error_code; | ||||
| 	else if (error_code) { | ||||
| 		switch (error_code) { | ||||
| 		case READ_GITFILE_ERR_STAT_FAILED: | ||||
| 		case READ_GITFILE_ERR_NOT_A_FILE: | ||||
| 			/* non-fatal; follow return path */ | ||||
| 			break; | ||||
| 		case READ_GITFILE_ERR_OPEN_FAILED: | ||||
| 			die_errno("Error opening '%s'", path); | ||||
| 		case READ_GITFILE_ERR_TOO_LARGE: | ||||
| 			die("Too large to be a .git file: '%s'", path); | ||||
| 		case READ_GITFILE_ERR_READ_FAILED: | ||||
| 			die("Error reading %s", path); | ||||
| 		case READ_GITFILE_ERR_INVALID_FORMAT: | ||||
| 			die("Invalid gitfile format: %s", path); | ||||
| 		case READ_GITFILE_ERR_NO_PATH: | ||||
| 			die("No path in gitfile: %s", path); | ||||
| 		case READ_GITFILE_ERR_NOT_A_REPO: | ||||
| 			die("Not a git repository: %s", dir); | ||||
| 		default: | ||||
| 			assert(0); | ||||
| 		} | ||||
| 	} | ||||
|  | ||||
| 	free(buf); | ||||
| 	return path; | ||||
| 	return error_code ? NULL : path; | ||||
| } | ||||
|  | ||||
| static const char *setup_explicit_git_dir(const char *gitdirenv, | ||||
|  | ||||
							
								
								
									
										31
									
								
								t/perf/p7300-clean.sh
									
									
									
									
									
										Executable file
									
								
							
							
						
						
									
										31
									
								
								t/perf/p7300-clean.sh
									
									
									
									
									
										Executable file
									
								
							| @ -0,0 +1,31 @@ | ||||
| #!/bin/sh | ||||
|  | ||||
| test_description="Test git-clean performance" | ||||
|  | ||||
| . ./perf-lib.sh | ||||
|  | ||||
| test_perf_default_repo | ||||
| test_checkout_worktree | ||||
|  | ||||
| test_expect_success 'setup untracked directory with many sub dirs' ' | ||||
| 	rm -rf 500_sub_dirs 100000_sub_dirs clean_test_dir && | ||||
| 	mkdir 500_sub_dirs 100000_sub_dirs clean_test_dir && | ||||
| 	for i in $(test_seq 1 500) | ||||
| 	do | ||||
| 		mkdir 500_sub_dirs/dir$i || return $? | ||||
| 	done && | ||||
| 	for i in $(test_seq 1 200) | ||||
| 	do | ||||
| 		cp -r 500_sub_dirs 100000_sub_dirs/dir$i || return $? | ||||
| 	done | ||||
| ' | ||||
|  | ||||
| test_perf 'clean many untracked sub dirs, check for nested git' ' | ||||
| 	git clean -n -q -f -d 100000_sub_dirs/ | ||||
| ' | ||||
|  | ||||
| test_perf 'clean many untracked sub dirs, ignore nested git' ' | ||||
| 	git clean -n -q -f -f -d 100000_sub_dirs/ | ||||
| ' | ||||
|  | ||||
| test_done | ||||
							
								
								
									
										140
									
								
								t/t7300-clean.sh
									
									
									
									
									
								
							
							
						
						
									
										140
									
								
								t/t7300-clean.sh
									
									
									
									
									
								
							| @ -455,6 +455,146 @@ test_expect_success 'nested git work tree' ' | ||||
| 	! test -d bar | ||||
| ' | ||||
|  | ||||
| test_expect_success 'should clean things that almost look like git but are not' ' | ||||
| 	rm -fr almost_git almost_bare_git almost_submodule && | ||||
| 	mkdir -p almost_git/.git/objects && | ||||
| 	mkdir -p almost_git/.git/refs && | ||||
| 	cat >almost_git/.git/HEAD <<-\EOF && | ||||
| 	garbage | ||||
| 	EOF | ||||
| 	cp -r almost_git/.git/ almost_bare_git && | ||||
| 	mkdir almost_submodule/ && | ||||
| 	cat >almost_submodule/.git <<-\EOF && | ||||
| 	garbage | ||||
| 	EOF | ||||
| 	test_when_finished "rm -rf almost_*" && | ||||
| 	git clean -f -d && | ||||
| 	test_path_is_missing almost_git && | ||||
| 	test_path_is_missing almost_bare_git && | ||||
| 	test_path_is_missing almost_submodule | ||||
| ' | ||||
|  | ||||
| test_expect_success 'should not clean submodules' ' | ||||
| 	rm -fr repo to_clean sub1 sub2 && | ||||
| 	mkdir repo to_clean && | ||||
| 	( | ||||
| 		cd repo && | ||||
| 		git init && | ||||
| 		test_commit msg hello.world | ||||
| 	) && | ||||
| 	git submodule add ./repo/.git sub1 && | ||||
| 	git commit -m "sub1" && | ||||
| 	git branch before_sub2 && | ||||
| 	git submodule add ./repo/.git sub2 && | ||||
| 	git commit -m "sub2" && | ||||
| 	git checkout before_sub2 && | ||||
| 	>to_clean/should_clean.this && | ||||
| 	git clean -f -d && | ||||
| 	test_path_is_file repo/.git/index && | ||||
| 	test_path_is_file repo/hello.world && | ||||
| 	test_path_is_file sub1/.git && | ||||
| 	test_path_is_file sub1/hello.world && | ||||
| 	test_path_is_file sub2/.git && | ||||
| 	test_path_is_file sub2/hello.world && | ||||
| 	test_path_is_missing to_clean | ||||
| ' | ||||
|  | ||||
| test_expect_success 'should avoid cleaning possible submodules' ' | ||||
| 	rm -fr to_clean possible_sub1 && | ||||
| 	mkdir to_clean possible_sub1 && | ||||
| 	test_when_finished "rm -rf possible_sub*" && | ||||
| 	echo "gitdir: foo" >possible_sub1/.git && | ||||
| 	>possible_sub1/hello.world && | ||||
| 	chmod 0 possible_sub1/.git && | ||||
| 	>to_clean/should_clean.this && | ||||
| 	git clean -f -d && | ||||
| 	test_path_is_file possible_sub1/.git && | ||||
| 	test_path_is_file possible_sub1/hello.world && | ||||
| 	test_path_is_missing to_clean | ||||
| ' | ||||
|  | ||||
| test_expect_success 'nested (empty) git should be kept' ' | ||||
| 	rm -fr empty_repo to_clean && | ||||
| 	git init empty_repo && | ||||
| 	mkdir to_clean && | ||||
| 	>to_clean/should_clean.this && | ||||
| 	git clean -f -d && | ||||
| 	test_path_is_file empty_repo/.git/HEAD && | ||||
| 	test_path_is_missing to_clean | ||||
| ' | ||||
|  | ||||
| test_expect_success 'nested bare repositories should be cleaned' ' | ||||
| 	rm -fr bare1 bare2 subdir && | ||||
| 	git init --bare bare1 && | ||||
| 	git clone --local --bare . bare2 && | ||||
| 	mkdir subdir && | ||||
| 	cp -r bare2 subdir/bare3 && | ||||
| 	git clean -f -d && | ||||
| 	test_path_is_missing bare1 && | ||||
| 	test_path_is_missing bare2 && | ||||
| 	test_path_is_missing subdir | ||||
| ' | ||||
|  | ||||
| test_expect_failure 'nested (empty) bare repositories should be cleaned even when in .git' ' | ||||
| 	rm -fr strange_bare && | ||||
| 	mkdir strange_bare && | ||||
| 	git init --bare strange_bare/.git && | ||||
| 	git clean -f -d && | ||||
| 	test_path_is_missing strange_bare | ||||
| ' | ||||
|  | ||||
| test_expect_failure 'nested (non-empty) bare repositories should be cleaned even when in .git' ' | ||||
| 	rm -fr strange_bare && | ||||
| 	mkdir strange_bare && | ||||
| 	git clone --local --bare . strange_bare/.git && | ||||
| 	git clean -f -d && | ||||
| 	test_path_is_missing strange_bare | ||||
| ' | ||||
|  | ||||
| test_expect_success 'giving path in nested git work tree will remove it' ' | ||||
| 	rm -fr repo && | ||||
| 	mkdir repo && | ||||
| 	( | ||||
| 		cd repo && | ||||
| 		git init && | ||||
| 		mkdir -p bar/baz && | ||||
| 		test_commit msg bar/baz/hello.world | ||||
| 	) && | ||||
| 	git clean -f -d repo/bar/baz && | ||||
| 	test_path_is_file repo/.git/HEAD && | ||||
| 	test_path_is_dir repo/bar/ && | ||||
| 	test_path_is_missing repo/bar/baz | ||||
| ' | ||||
|  | ||||
| test_expect_success 'giving path to nested .git will not remove it' ' | ||||
| 	rm -fr repo && | ||||
| 	mkdir repo untracked && | ||||
| 	( | ||||
| 		cd repo && | ||||
| 		git init && | ||||
| 		test_commit msg hello.world | ||||
| 	) && | ||||
| 	git clean -f -d repo/.git && | ||||
| 	test_path_is_file repo/.git/HEAD && | ||||
| 	test_path_is_dir repo/.git/refs && | ||||
| 	test_path_is_dir repo/.git/objects && | ||||
| 	test_path_is_dir untracked/ | ||||
| ' | ||||
|  | ||||
| test_expect_success 'giving path to nested .git/ will remove contents' ' | ||||
| 	rm -fr repo untracked && | ||||
| 	mkdir repo untracked && | ||||
| 	( | ||||
| 		cd repo && | ||||
| 		git init && | ||||
| 		test_commit msg hello.world | ||||
| 	) && | ||||
| 	git clean -f -d repo/.git/ && | ||||
| 	test_path_is_dir repo/.git && | ||||
| 	test_dir_is_empty repo/.git && | ||||
| 	test_path_is_dir untracked/ | ||||
| ' | ||||
|  | ||||
| test_expect_success 'force removal of nested git work tree' ' | ||||
| 	rm -fr foo bar baz && | ||||
| 	mkdir -p foo bar baz/boo && | ||||
|  | ||||
		Reference in New Issue
	
	Block a user
	 Junio C Hamano
					Junio C Hamano