chainlint: annotate original test definition rather than token stream

When chainlint detects problems in a test, such as a broken &&-chain, it prints out the test with "?!FOO?!" annotations inserted at each problem location. However, rather than annotating the original test definition, it instead dumps out a parsed token representation of the test. Since it lacks comments, indentations, here-doc bodies, and so forth, this tokenized representation can be difficult for the test author to digest and relate back to the original test definition. However, now that each parsed token carries positional information, the location of a detected problem can be pinpointed precisely in the original test definition. Therefore, take advantage of this information to annotate the test definition itself rather than annotating the parsed token stream, thus making it easier for a test author to relate a problem back to the source. Maintaining the positional meta-information associated with each detected problem requires a slight change in how the problems are managed internally. In particular, shell syntax such as: msg="total: $(cd data; wc -w *.txt) words" requires the lexical analyzer to recursively invoke the parser in order to detect problems within the $(...) expression inside the double-quoted string. In this case, the recursive parse context will detect the broken &&-chain between the `cd` and `wc` commands, returning the token stream: cd data ; ?!AMP?! wc -w *.txt However, the parent parse context will see everything inside the double-quotes as a single string token: "total: $(cd data ; ?!AMP?! wc -w *.txt) words" losing whatever positional information was attached to the ";" token where the problem was detected. One way to preserve the positional information of a detected problem in a recursive parse context within a string would be to attach the positional information to the annotation textually; for instance: "total: $(cd data ; ?!AMP:21:22?! wc -w *.txt) words" and then extract the positional information when annotating the original test definition. However, a cleaner and much simpler approach is to maintain the list of detected problems separately rather than embedding the problems as annotations directly in the parsed token stream. Not only does this ensure that positional information within recursive parse contexts is not lost, but it keeps the token stream free from non-token pollution, which may simplify implementation of validations added in the future since they won't have to handle non-token "?!FOO!?" items specially. Finally, the chainlint self-test "expect" files need a few mechanical adjustments now that the original test definitions are emitted rather than the parsed token stream. In particular, the following items missing from the historic parsed-token output are now preserved verbatim: * indentation (and whitespace, in general) * comments * here-doc bodies * here-doc tag quoting (i.e. "\EOF") * line-splices (i.e. "\" at the end of a line) Signed-off-by: Eric Sunshine <sunshine@sunshineco.com> Signed-off-by: Taylor Blau <me@ttaylorr.com>
2022-11-08 19:08:30 +00:00
parent 5f0321a9f2
commit 73c768dae9
22 changed files with 163 additions and 33 deletions
--- a/t/chainlint.pl
+++ b/t/chainlint.pl
@ -459,6 +459,13 @@ package TestParser;
 use base 'ShellParser';
 sub new {
 	my $class = shift @_;
 	my $self = $class->SUPER::new(@_);
 	$self->{problems} = [];
 	return $self;
 }
 sub find_non_nl {
 	my $tokens = shift @_;
 	my $n = shift @_;
@ -498,7 +505,7 @@ sub parse_loop_body {
 	return @tokens if ends_with(\@tokens, [qr/^\|\|$/, "\n", qr/^echo$/, qr/^.+$/]);
 	# flag missing "return/exit" handling explicit failure in loop body
 	my $n = find_non_nl(\@tokens);
-	splice(@tokens, $n + 1, 0, ['?!LOOP?!', $tokens[$n]->[1], $tokens[$n]->[2]]);
+	push(@{$self->{problems}}, ['LOOP', $tokens[$n]]);
 	return @tokens;
 }
@ -511,6 +518,7 @@ my @safe_endings = (
 sub accumulate {
 	my ($self, $tokens, $cmd) = @_;
 	my $problems = $self->{problems};
 	# no previous command to check for missing "&&"
 	goto DONE unless @$tokens;
@ -531,13 +539,13 @@ sub accumulate {
 	# failure explicitly), then okay for all preceding commands to be
 	# missing "&&"
 	if ($$cmd[0]->[0] =~ /^(?:false|return|exit)$/) {
-		@$tokens = grep {$_->[0] !~ /^\?!AMP\?!$/} @$tokens;
+		@$problems = grep {$_->[0] ne 'AMP'} @$problems;
 		goto DONE;
 	}
 	# flag missing "&&" at end of previous command
 	my $n = find_non_nl($tokens);
-	splice(@$tokens, $n + 1, 0, ['?!AMP?!', $$tokens[$n]->[1], $$tokens[$n]->[2]]) unless $n < 0;
+	push(@$problems, ['AMP', $tokens->[$n]]) unless $n < 0;
 DONE:
 	$self->SUPER::accumulate($tokens, $cmd);
@ -594,12 +602,21 @@ sub check_test {
 	$self->{ntests}++;
 	my $parser = TestParser->new(\$body);
 	my @tokens = $parser->parse();
-	return unless $emit_all || grep {$_->[0] =~ /\?![^?]+\?!/} @tokens;
+	my $problems = $parser->{problems};
 	return unless $emit_all || @$problems;
 	my $c = main::fd_colors(1);
-	my $checked = join(' ', map {$_->[0]} @tokens);
+	my $start = 0;
 	my $checked = '';
 	for (sort {$a->[1]->[2] <=> $b->[1]->[2]} @$problems) {
 		my ($label, $token) = @$_;
 		my $pos = $token->[2];
 		$checked .= substr($body, $start, $pos - $start) . " ?!$label?! ";
 		$start = $pos;
 	}
 	$checked .= substr($body, $start);
 	$checked =~ s/^\n//;
-	$checked =~ s/^ //mg;
+	$checked =~ s/(\s) \?!/$1?!/mg;
-	$checked =~ s/ $//mg;
+	$checked =~ s/\?! (\s)/?!$1/mg;
 	$checked =~ s/(\?![^?]+\?!)/$c->{rev}$c->{red}$1$c->{reset}/mg;
 	$checked .= "\n" unless $checked =~ /\n$/;
 	push(@{$self->{output}}, "$c->{blue}# chainlint: $title$c->{reset}\n$checked");
--- a/t/chainlint/block-comment.expect
+++ b/t/chainlint/block-comment.expect
@ -1,6 +1,8 @@
 (
 	{
 		# show a
 		echo a &&
 		# show b
 		echo b
 	}
 )
--- a/t/chainlint/case-comment.expect
+++ b/t/chainlint/case-comment.expect
@ -1,7 +1,10 @@
 (
 	case "$x" in
 	# found foo
 	x) foo ;;
 	# found other
 	*)
 		# treat it as bar
 		bar
 		;;
 	esac
--- a/t/chainlint/close-subshell.expect
+++ b/t/chainlint/close-subshell.expect
@ -15,7 +15,8 @@
 ) | wuzzle &&
 (
 	bop
-) | fazz 	fozz &&
+) | fazz \
 	fozz &&
 (
 	bup
 ) |
--- a/t/chainlint/comment.expect
+++ b/t/chainlint/comment.expect
@ -1,4 +1,8 @@
 (
 	# comment 1
 	nothing &&
 	# comment 2
 	something
 	# comment 3
 	# comment 4
 )
--- a/t/chainlint/double-here-doc.expect
+++ b/t/chainlint/double-here-doc.expect
@ -1,2 +1,12 @@
-run_sub_test_lib_test_err run-inv-range-start "--run invalid range start" --run="a-5" <<-EOF &&
+run_sub_test_lib_test_err run-inv-range-start \
-check_sub_test_lib_test_err run-inv-range-start <<-EOF_OUT 3 <<-EOF_ERR
+	"--run invalid range start" \
 	--run="a-5" <<-\EOF &&
 test_expect_success "passing test #1" "true"
 test_done
 EOF
 check_sub_test_lib_test_err run-inv-range-start \
 	<<-\EOF_OUT 3<<-EOF_ERR
 > FATAL: Unexpected exit with code 1
 EOF_OUT
 > error: --run: invalid non-numeric in range start: ${SQ}a-5${SQ}
 EOF_ERR
--- a/t/chainlint/empty-here-doc.expect
+++ b/t/chainlint/empty-here-doc.expect
@ -1,3 +1,4 @@
 git ls-tree $tree path > current &&
-cat > expected <<EOF &&
+cat > expected <<\EOF &&
 EOF
 test_output
--- a/t/chainlint/for-loop.expect
+++ b/t/chainlint/for-loop.expect
@ -2,7 +2,9 @@
 	for i in a b c
 	do
 		echo $i ?!AMP?!
-		cat <<-EOF ?!LOOP?!
+		cat <<-\EOF ?!LOOP?!
 		bar
 		EOF
 	done ?!AMP?!
 	for i in a b c; do
 		echo $i &&
--- a/t/chainlint/here-doc-close-subshell.expect
+++ b/t/chainlint/here-doc-close-subshell.expect
@ -1,2 +1,4 @@
 (
-	cat <<-INPUT)
+	cat <<-\INPUT)
 	fizz
 	INPUT
--- a/t/chainlint/here-doc-indent-operator.expect
+++ b/t/chainlint/here-doc-indent-operator.expect
@ -1,5 +1,11 @@
-cat > expect <<-EOF &&
+cat >expect <<- EOF &&
 header: 43475048 1 $(test_oid oid_version) $NUM_CHUNKS 0
 num_commits: $1
 chunks: oid_fanout oid_lookup commit_metadata generation_data bloom_indexes bloom_data
 EOF
-cat > expect <<-EOF ?!AMP?!
+cat >expect << -EOF ?!AMP?!
 this is not indented
 -EOF
 cleanup
--- a/t/chainlint/here-doc-multi-line-command-subst.expect
+++ b/t/chainlint/here-doc-multi-line-command-subst.expect
@ -1,5 +1,8 @@
 (
-	x=$(bobble <<-END &&
+	x=$(bobble <<-\END &&
 		fossil
 		vegetable
 		END
 		wiffle) ?!AMP?!
 	echo $x
 )
--- a/t/chainlint/here-doc-multi-line-string.expect
+++ b/t/chainlint/here-doc-multi-line-string.expect
@ -1,5 +1,7 @@
 (
-	cat <<-TXT && echo "multi-line
+	cat <<-\TXT && echo "multi-line
 	string" ?!AMP?!
 	fizzle
 	TXT
 	bap
 )
--- a/t/chainlint/here-doc.expect
+++ b/t/chainlint/here-doc.expect
@ -1,7 +1,25 @@
-boodle wobba        gorgo snoot        wafta snurb <<EOF &&
+boodle wobba \
 	gorgo snoot \
 	wafta snurb <<EOF &&
 quoth the raven,
 nevermore...
 EOF
 cat <<-Arbitrary_Tag_42 >foo &&
 snoz
 boz
 woz
 Arbitrary_Tag_42
-cat <<zump >boo &&
+cat <<"zump" >boo &&
 snoz
 boz
 woz
 zump
-horticulture <<EOF
+horticulture <<\EOF
 gomez
 morticia
 wednesday
 pugsly
 EOF
--- a/t/chainlint/if-then-else.expect
+++ b/t/chainlint/if-then-else.expect
@ -8,7 +8,9 @@
 		echo foo
 	else
 		echo foo &&
-		cat <<-EOF
+		cat <<-\EOF
 		bar
 		EOF
 	fi ?!AMP?!
 	echo poodle
 ) &&
--- a/t/chainlint/incomplete-line.expect
+++ b/t/chainlint/incomplete-line.expect
@ -1,4 +1,10 @@
-line 1 line 2 line 3 line 4 &&
+line 1 \
 line 2 \
 line 3 \
 line 4 &&
 (
-	line 5 	line 6 	line 7 	line 8
+	line 5 \
 	line 6 \
 	line 7 \
 	line 8
 )
--- a/t/chainlint/inline-comment.expect
+++ b/t/chainlint/inline-comment.expect
@ -1,6 +1,6 @@
 (
-	foobar &&
+	foobar && # comment 1
-	barfoo ?!AMP?!
+	barfoo ?!AMP?! # wrong position for &&
 	flibble "not a # comment"
 ) &&
--- a/t/chainlint/loop-detect-status.expect
+++ b/t/chainlint/loop-detect-status.expect
@ -2,7 +2,7 @@
 do
 	printf "Generating blob $i/$blobcount\r" >& 2 &&
 	printf "blob\nmark :$i\ndata $blobsize\n" &&
-
+	#test-tool genrandom $i $blobsize &&
 	printf "%-${blobsize}s" $i &&
 	echo "M 100644 :$i $i" >> commit &&
 	i=$(($i+1)) ||
--- a/t/chainlint/nested-here-doc.expect
+++ b/t/chainlint/nested-here-doc.expect
@ -1,7 +1,30 @@
 cat <<ARBITRARY >foop &&
 naddle
 fub <<EOF
 	nozzle
 	noodle
 EOF
 formp
 ARBITRARY
 (
-	cat <<-INPUT_END &&
+	cat <<-\INPUT_END &&
-	cat <<-EOT ?!AMP?!
+	fish are mice
 	but geese go slow
 	data <<EOF
 		perl is lerp
 		and nothing else
 	EOF
 	toink
 	INPUT_END
 	cat <<-\EOT ?!AMP?!
 	text goes here
 	data <<EOF
 		data goes here
 	EOF
 	more test here
 	EOT
 	foobar
 )
--- a/t/chainlint/nested-subshell-comment.expect
+++ b/t/chainlint/nested-subshell-comment.expect
@ -2,6 +2,8 @@
 	foo &&
 	(
 		bar &&
 		# bottles wobble while fiddles gobble
 		# minor numbers of cows (or do they?)
 		baz &&
 		snaff
 	) ?!AMP?!
--- a/t/chainlint/subshell-here-doc.expect
+++ b/t/chainlint/subshell-here-doc.expect
@ -1,10 +1,30 @@
 (
-	echo wobba 	       gorgo snoot 	       wafta snurb <<-EOF &&
+	echo wobba \
 		gorgo snoot \
 		wafta snurb <<-EOF &&
 	quoth the raven,
 	nevermore...
 	EOF
 	cat <<EOF >bip ?!AMP?!
-	echo <<-EOF >bop
+	fish fly high
 EOF
 	echo <<-\EOF >bop
 	gomez
 	morticia
 	wednesday
 	pugsly
 	EOF
 ) &&
 (
-	cat <<-ARBITRARY >bup &&
+	cat <<-\ARBITRARY >bup &&
-	cat <<-ARBITRARY3 >bup3 &&
+	glink
 	FIZZ
 	ARBITRARY
 	cat <<-"ARBITRARY3" >bup3 &&
 	glink
 	FIZZ
 	ARBITRARY3
 	meep
 )
--- a/t/chainlint/t7900-subtree.expect
+++ b/t/chainlint/t7900-subtree.expect
@ -4,12 +4,16 @@ sub2
 sub3
 sub4" &&
 	chks_sub=$(cat <<TXT | sed "s,^,sub dir/,"
 $chks
 TXT
 ) &&
 	chkms="main-sub1
 main-sub2
 main-sub3
 main-sub4" &&
 	chkms_sub=$(cat <<TXT | sed "s,^,sub dir/,"
 $chkms
 TXT
 ) &&
 	subfiles=$(git ls-files) &&
 	check_equal "$subfiles" "$chkms
--- a/t/chainlint/while-loop.expect
+++ b/t/chainlint/while-loop.expect
@ -2,7 +2,9 @@
 	while true
 	do
 		echo foo ?!AMP?!
-		cat <<-EOF ?!LOOP?!
+		cat <<-\EOF ?!LOOP?!
 		bar
 		EOF
 	done ?!AMP?!
 	while true; do
 		echo foo &&