Merge branch 'jk/big-and-future-archive-tar'
"git archive" learned to handle files that are larger than 8GB and commits far in the future than expressible by the traditional US-TAR format. * jk/big-and-future-archive-tar: archive-tar: drop return value archive-tar: write extended headers for far-future mtime archive-tar: write extended headers for file sizes >= 8GB t5000: test tar files that overflow ustar headers t9300: factor out portable "head -c" replacement
This commit is contained in:
		| @ -18,6 +18,16 @@ static int tar_umask = 002; | |||||||
| static int write_tar_filter_archive(const struct archiver *ar, | static int write_tar_filter_archive(const struct archiver *ar, | ||||||
| 				    struct archiver_args *args); | 				    struct archiver_args *args); | ||||||
|  |  | ||||||
|  | /* | ||||||
|  |  * This is the max value that a ustar size header can specify, as it is fixed | ||||||
|  |  * at 11 octal digits. POSIX specifies that we switch to extended headers at | ||||||
|  |  * this size. | ||||||
|  |  * | ||||||
|  |  * Likewise for the mtime (which happens to use a buffer of the same size). | ||||||
|  |  */ | ||||||
|  | #define USTAR_MAX_SIZE 077777777777UL | ||||||
|  | #define USTAR_MAX_MTIME 077777777777UL | ||||||
|  |  | ||||||
| /* writes out the whole block, but only if it is full */ | /* writes out the whole block, but only if it is full */ | ||||||
| static void write_if_needed(void) | static void write_if_needed(void) | ||||||
| { | { | ||||||
| @ -137,6 +147,20 @@ static void strbuf_append_ext_header(struct strbuf *sb, const char *keyword, | |||||||
| 	strbuf_addch(sb, '\n'); | 	strbuf_addch(sb, '\n'); | ||||||
| } | } | ||||||
|  |  | ||||||
|  | /* | ||||||
|  |  * Like strbuf_append_ext_header, but for numeric values. | ||||||
|  |  */ | ||||||
|  | static void strbuf_append_ext_header_uint(struct strbuf *sb, | ||||||
|  | 					  const char *keyword, | ||||||
|  | 					  uintmax_t value) | ||||||
|  | { | ||||||
|  | 	char buf[40]; /* big enough for 2^128 in decimal, plus NUL */ | ||||||
|  | 	int len; | ||||||
|  |  | ||||||
|  | 	len = xsnprintf(buf, sizeof(buf), "%"PRIuMAX, value); | ||||||
|  | 	strbuf_append_ext_header(sb, keyword, buf, len); | ||||||
|  | } | ||||||
|  |  | ||||||
| static unsigned int ustar_header_chksum(const struct ustar_header *header) | static unsigned int ustar_header_chksum(const struct ustar_header *header) | ||||||
| { | { | ||||||
| 	const unsigned char *p = (const unsigned char *)header; | 	const unsigned char *p = (const unsigned char *)header; | ||||||
| @ -208,7 +232,7 @@ static int write_tar_entry(struct archiver_args *args, | |||||||
| 	struct ustar_header header; | 	struct ustar_header header; | ||||||
| 	struct strbuf ext_header = STRBUF_INIT; | 	struct strbuf ext_header = STRBUF_INIT; | ||||||
| 	unsigned int old_mode = mode; | 	unsigned int old_mode = mode; | ||||||
| 	unsigned long size; | 	unsigned long size, size_in_header; | ||||||
| 	void *buffer; | 	void *buffer; | ||||||
| 	int err = 0; | 	int err = 0; | ||||||
|  |  | ||||||
| @ -267,7 +291,13 @@ static int write_tar_entry(struct archiver_args *args, | |||||||
| 			memcpy(header.linkname, buffer, size); | 			memcpy(header.linkname, buffer, size); | ||||||
| 	} | 	} | ||||||
|  |  | ||||||
| 	prepare_header(args, &header, mode, size); | 	size_in_header = size; | ||||||
|  | 	if (S_ISREG(mode) && size > USTAR_MAX_SIZE) { | ||||||
|  | 		size_in_header = 0; | ||||||
|  | 		strbuf_append_ext_header_uint(&ext_header, "size", size); | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	prepare_header(args, &header, mode, size_in_header); | ||||||
|  |  | ||||||
| 	if (ext_header.len > 0) { | 	if (ext_header.len > 0) { | ||||||
| 		err = write_extended_header(args, sha1, ext_header.buf, | 		err = write_extended_header(args, sha1, ext_header.buf, | ||||||
| @ -289,15 +319,25 @@ static int write_tar_entry(struct archiver_args *args, | |||||||
| 	return err; | 	return err; | ||||||
| } | } | ||||||
|  |  | ||||||
| static int write_global_extended_header(struct archiver_args *args) | static void write_global_extended_header(struct archiver_args *args) | ||||||
| { | { | ||||||
| 	const unsigned char *sha1 = args->commit_sha1; | 	const unsigned char *sha1 = args->commit_sha1; | ||||||
| 	struct strbuf ext_header = STRBUF_INIT; | 	struct strbuf ext_header = STRBUF_INIT; | ||||||
| 	struct ustar_header header; | 	struct ustar_header header; | ||||||
| 	unsigned int mode; | 	unsigned int mode; | ||||||
| 	int err = 0; |  | ||||||
|  |  | ||||||
| 	strbuf_append_ext_header(&ext_header, "comment", sha1_to_hex(sha1), 40); | 	if (sha1) | ||||||
|  | 		strbuf_append_ext_header(&ext_header, "comment", | ||||||
|  | 					 sha1_to_hex(sha1), 40); | ||||||
|  | 	if (args->time > USTAR_MAX_MTIME) { | ||||||
|  | 		strbuf_append_ext_header_uint(&ext_header, "mtime", | ||||||
|  | 					      args->time); | ||||||
|  | 		args->time = USTAR_MAX_MTIME; | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	if (!ext_header.len) | ||||||
|  | 		return; | ||||||
|  |  | ||||||
| 	memset(&header, 0, sizeof(header)); | 	memset(&header, 0, sizeof(header)); | ||||||
| 	*header.typeflag = TYPEFLAG_GLOBAL_HEADER; | 	*header.typeflag = TYPEFLAG_GLOBAL_HEADER; | ||||||
| 	mode = 0100666; | 	mode = 0100666; | ||||||
| @ -306,7 +346,6 @@ static int write_global_extended_header(struct archiver_args *args) | |||||||
| 	write_blocked(&header, sizeof(header)); | 	write_blocked(&header, sizeof(header)); | ||||||
| 	write_blocked(ext_header.buf, ext_header.len); | 	write_blocked(ext_header.buf, ext_header.len); | ||||||
| 	strbuf_release(&ext_header); | 	strbuf_release(&ext_header); | ||||||
| 	return err; |  | ||||||
| } | } | ||||||
|  |  | ||||||
| static struct archiver **tar_filters; | static struct archiver **tar_filters; | ||||||
| @ -382,10 +421,8 @@ static int write_tar_archive(const struct archiver *ar, | |||||||
| { | { | ||||||
| 	int err = 0; | 	int err = 0; | ||||||
|  |  | ||||||
| 	if (args->commit_sha1) | 	write_global_extended_header(args); | ||||||
| 		err = write_global_extended_header(args); | 	err = write_archive_entries(args, write_tar_entry); | ||||||
| 	if (!err) |  | ||||||
| 		err = write_archive_entries(args, write_tar_entry); |  | ||||||
| 	if (!err) | 	if (!err) | ||||||
| 		write_trailer(); | 		write_trailer(); | ||||||
| 	return err; | 	return err; | ||||||
|  | |||||||
| @ -319,4 +319,78 @@ test_expect_success 'catch non-matching pathspec' ' | |||||||
| 	test_must_fail git archive -v HEAD -- "*.abc" >/dev/null | 	test_must_fail git archive -v HEAD -- "*.abc" >/dev/null | ||||||
| ' | ' | ||||||
|  |  | ||||||
|  | # Pull the size and date of each entry in a tarfile using the system tar. | ||||||
|  | # | ||||||
|  | # We'll pull out only the year from the date; that avoids any question of | ||||||
|  | # timezones impacting the result (as long as we keep our test times away from a | ||||||
|  | # year boundary; our reference times are all in August). | ||||||
|  | # | ||||||
|  | # The output of tar_info is expected to be "<size> <year>", both in decimal. It | ||||||
|  | # ignores the return value of tar. We have to do this, because some of our test | ||||||
|  | # input is only partial (the real data is 64GB in some cases). | ||||||
|  | tar_info () { | ||||||
|  | 	"$TAR" tvf "$1" | | ||||||
|  | 	awk '{ | ||||||
|  | 		split($4, date, "-") | ||||||
|  | 		print $3 " " date[1] | ||||||
|  | 	}' | ||||||
|  | } | ||||||
|  |  | ||||||
|  | # See if our system tar can handle a tar file with huge sizes and dates far in | ||||||
|  | # the future, and that we can actually parse its output. | ||||||
|  | # | ||||||
|  | # The reference file was generated by GNU tar, and the magic time and size are | ||||||
|  | # both octal 01000000000001, which overflows normal ustar fields. | ||||||
|  | test_lazy_prereq TAR_HUGE ' | ||||||
|  | 	echo "68719476737 4147" >expect && | ||||||
|  | 	tar_info "$TEST_DIRECTORY"/t5000/huge-and-future.tar >actual && | ||||||
|  | 	test_cmp expect actual | ||||||
|  | ' | ||||||
|  |  | ||||||
|  | test_expect_success 'set up repository with huge blob' ' | ||||||
|  | 	obj_d=19 && | ||||||
|  | 	obj_f=f9c8273ec45a8938e6999cb59b3ff66739902a && | ||||||
|  | 	obj=${obj_d}${obj_f} && | ||||||
|  | 	mkdir -p .git/objects/$obj_d && | ||||||
|  | 	cp "$TEST_DIRECTORY"/t5000/$obj .git/objects/$obj_d/$obj_f && | ||||||
|  | 	rm -f .git/index && | ||||||
|  | 	git update-index --add --cacheinfo 100644,$obj,huge && | ||||||
|  | 	git commit -m huge | ||||||
|  | ' | ||||||
|  |  | ||||||
|  | # We expect git to die with SIGPIPE here (otherwise we | ||||||
|  | # would generate the whole 64GB). | ||||||
|  | test_expect_success 'generate tar with huge size' ' | ||||||
|  | 	{ | ||||||
|  | 		git archive HEAD | ||||||
|  | 		echo $? >exit-code | ||||||
|  | 	} | test_copy_bytes 4096 >huge.tar && | ||||||
|  | 	echo 141 >expect && | ||||||
|  | 	test_cmp expect exit-code | ||||||
|  | ' | ||||||
|  |  | ||||||
|  | test_expect_success TAR_HUGE 'system tar can read our huge size' ' | ||||||
|  | 	echo 68719476737 >expect && | ||||||
|  | 	tar_info huge.tar | cut -d" " -f1 >actual && | ||||||
|  | 	test_cmp expect actual | ||||||
|  | ' | ||||||
|  |  | ||||||
|  | test_expect_success 'set up repository with far-future commit' ' | ||||||
|  | 	rm -f .git/index && | ||||||
|  | 	echo content >file && | ||||||
|  | 	git add file && | ||||||
|  | 	GIT_COMMITTER_DATE="@68719476737 +0000" \ | ||||||
|  | 		git commit -m "tempori parendum" | ||||||
|  | ' | ||||||
|  |  | ||||||
|  | test_expect_success 'generate tar with future mtime' ' | ||||||
|  | 	git archive HEAD >future.tar | ||||||
|  | ' | ||||||
|  |  | ||||||
|  | test_expect_success TAR_HUGE 'system tar can read our future mtime' ' | ||||||
|  | 	echo 4147 >expect && | ||||||
|  | 	tar_info future.tar | cut -d" " -f2 >actual && | ||||||
|  | 	test_cmp expect actual | ||||||
|  | ' | ||||||
|  |  | ||||||
| test_done | test_done | ||||||
|  | |||||||
							
								
								
									
										
											BIN
										
									
								
								t/t5000/19f9c8273ec45a8938e6999cb59b3ff66739902a
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										
											BIN
										
									
								
								t/t5000/19f9c8273ec45a8938e6999cb59b3ff66739902a
									
									
									
									
									
										Normal file
									
								
							
										
											Binary file not shown.
										
									
								
							
							
								
								
									
										
											BIN
										
									
								
								t/t5000/huge-and-future.tar
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										
											BIN
										
									
								
								t/t5000/huge-and-future.tar
									
									
									
									
									
										Normal file
									
								
							
										
											Binary file not shown.
										
									
								
							| @ -7,23 +7,6 @@ test_description='test git fast-import utility' | |||||||
| . ./test-lib.sh | . ./test-lib.sh | ||||||
| . "$TEST_DIRECTORY"/diff-lib.sh ;# test-lib chdir's into trash | . "$TEST_DIRECTORY"/diff-lib.sh ;# test-lib chdir's into trash | ||||||
|  |  | ||||||
| # Print $1 bytes from stdin to stdout. |  | ||||||
| # |  | ||||||
| # This could be written as "head -c $1", but IRIX "head" does not |  | ||||||
| # support the -c option. |  | ||||||
| head_c () { |  | ||||||
| 	perl -e ' |  | ||||||
| 		my $len = $ARGV[1]; |  | ||||||
| 		while ($len > 0) { |  | ||||||
| 			my $s; |  | ||||||
| 			my $nread = sysread(STDIN, $s, $len); |  | ||||||
| 			die "cannot read: $!" unless defined($nread); |  | ||||||
| 			print $s; |  | ||||||
| 			$len -= $nread; |  | ||||||
| 		} |  | ||||||
| 	' - "$1" |  | ||||||
| } |  | ||||||
|  |  | ||||||
| verify_packs () { | verify_packs () { | ||||||
| 	for p in .git/objects/pack/*.pack | 	for p in .git/objects/pack/*.pack | ||||||
| 	do | 	do | ||||||
| @ -2481,7 +2464,7 @@ test_expect_success PIPE 'R: copy using cat-file' ' | |||||||
|  |  | ||||||
| 		read blob_id type size <&3 && | 		read blob_id type size <&3 && | ||||||
| 		echo "$blob_id $type $size" >response && | 		echo "$blob_id $type $size" >response && | ||||||
| 		head_c $size >blob <&3 && | 		test_copy_bytes $size >blob <&3 && | ||||||
| 		read newline <&3 && | 		read newline <&3 && | ||||||
|  |  | ||||||
| 		cat <<-EOF && | 		cat <<-EOF && | ||||||
| @ -2524,7 +2507,7 @@ test_expect_success PIPE 'R: print blob mid-commit' ' | |||||||
| 		EOF | 		EOF | ||||||
|  |  | ||||||
| 		read blob_id type size <&3 && | 		read blob_id type size <&3 && | ||||||
| 		head_c $size >actual <&3 && | 		test_copy_bytes $size >actual <&3 && | ||||||
| 		read newline <&3 && | 		read newline <&3 && | ||||||
|  |  | ||||||
| 		echo | 		echo | ||||||
| @ -2559,7 +2542,7 @@ test_expect_success PIPE 'R: print staged blob within commit' ' | |||||||
| 		echo "cat-blob $to_get" && | 		echo "cat-blob $to_get" && | ||||||
|  |  | ||||||
| 		read blob_id type size <&3 && | 		read blob_id type size <&3 && | ||||||
| 		head_c $size >actual <&3 && | 		test_copy_bytes $size >actual <&3 && | ||||||
| 		read newline <&3 && | 		read newline <&3 && | ||||||
|  |  | ||||||
| 		echo deleteall | 		echo deleteall | ||||||
|  | |||||||
| @ -961,3 +961,17 @@ test_env () { | |||||||
| 		done | 		done | ||||||
| 	) | 	) | ||||||
| } | } | ||||||
|  |  | ||||||
|  | # Read up to "$1" bytes (or to EOF) from stdin and write them to stdout. | ||||||
|  | test_copy_bytes () { | ||||||
|  | 	perl -e ' | ||||||
|  | 		my $len = $ARGV[1]; | ||||||
|  | 		while ($len > 0) { | ||||||
|  | 			my $s; | ||||||
|  | 			my $nread = sysread(STDIN, $s, $len); | ||||||
|  | 			die "cannot read: $!" unless defined($nread); | ||||||
|  | 			print $s; | ||||||
|  | 			$len -= $nread; | ||||||
|  | 		} | ||||||
|  | 	' - "$1" | ||||||
|  | } | ||||||
|  | |||||||
		Reference in New Issue
	
	Block a user
	 Junio C Hamano
					Junio C Hamano