mirror of
https://github.com/git/git.git
synced 2026-06-19 15:39:47 +02:00
f1b0af0c61
Without a lint guard, bare grep assertions will creep back into tests over time, defeating the previous commit's conversion. Add greplint.pl to catch bare 'grep' used as a test assertion (where 'test_grep' should be used) and '! test_grep' (where 'test_grep !' should be used). greplint.pl reuses the shared shell parser from lib-shell-parser.pl to tokenize test bodies. The Lexer collapses heredocs, command substitutions, and quoted strings into single tokens, so 'grep' appearing inside these contexts is not flagged. A flat walk over the token stream tracks command position and pipeline state to distinguish assertion greps from filter greps. For double-quoted test bodies, a source-line walk counts backslash-continuation lines that the Lexer consumes without emitting into the body text, adjusting the reported line number accordingly. Add test fixtures in greplint/ (modeled on chainlint/) covering detection of bare grep assertions, correct skipping of filters, pipelines, redirects, command substitutions, and lint-ok annotations. Wire into the Makefile as: - test-greplint: runs greplint.pl on $(T) $(THELPERS) $(TPERF) - check-greplint: runs greplint.pl on fixtures, diffs against expected - clean-greplint: removes temp dir Add eol=lf entries in t/.gitattributes for greplint fixtures, matching chainlint, so that check-greplint passes on Windows where core.autocrlf would otherwise cause CRLF mismatches between expected and actual output. Signed-off-by: Michael Montalbo <mmontalbo@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
242 lines
7.5 KiB
Perl
242 lines
7.5 KiB
Perl
#!/usr/bin/env perl
|
|
|
|
# Detect bare 'grep' used as a test assertion where 'test_grep'
|
|
# should be used, and '! test_grep' where 'test_grep !' should
|
|
# be used.
|
|
#
|
|
# The shared shell parser tokenizes test bodies so that 'grep'
|
|
# inside heredocs, command substitutions like $(grep ...), and
|
|
# quoted strings is collapsed into a single token and never seen
|
|
# by our check. A line-oriented approach would need to track
|
|
# heredoc delimiters, nested $() depth, and cross-line pipe
|
|
# state to avoid false positives on patterns like:
|
|
#
|
|
# write_script foo.sh <<-\EOF
|
|
# grep pattern file # data, not an assertion
|
|
# EOF
|
|
#
|
|
# The Lexer already handles these.
|
|
|
|
use warnings;
|
|
use strict;
|
|
use File::Basename;
|
|
do(dirname($0) . "/lib-shell-parser.pl")
|
|
or die "$0: failed to load lib-shell-parser.pl: $@$!\n";
|
|
|
|
my $exit_code = 0;
|
|
|
|
# GrepLintParser inherits ScriptParser's ability to find
|
|
# test_expect_success/failure blocks and call check_test()
|
|
# on each body. We override check_test() to walk the token
|
|
# stream looking for bare grep assertions.
|
|
package GrepLintParser;
|
|
|
|
our @ISA = ('ScriptParser');
|
|
|
|
# After these tokens, the next token is a command word.
|
|
# For example, in 'echo foo && grep bar file', the 'grep'
|
|
# after '&&' is at command position and should be flagged.
|
|
my %cmd_start = map { $_ => 1 } qw(&& || ; ;; do then else elif), "\n", '{', '(';
|
|
|
|
# Tokens indicating grep's output is piped or redirected.
|
|
my %filter_op = map { $_ => 1 } qw(| > >> <);
|
|
|
|
# A token is at "command word" position if the shell would
|
|
# interpret it as a program name rather than an argument.
|
|
# Only 'grep' at command position is an assertion we should
|
|
# flag; 'grep' as an argument ('test_must_fail grep') or
|
|
# value ('for cmd in grep sed') is not.
|
|
sub is_command_word {
|
|
my ($tokens, $pos) = @_;
|
|
return 1 if $pos == 0;
|
|
for (my $j = $pos - 1; $j >= 0; $j--) {
|
|
my $t = $tokens->[$j]->[0];
|
|
# After a separator or pipe, a new command starts.
|
|
return 1 if $cmd_start{$t} || $t eq '|';
|
|
# After '}' or ')', what follows is a separator or
|
|
# redirect on the compound command, not a new command.
|
|
return 0 if $t eq '}' || $t eq ')';
|
|
# '!' is a prefix that does not consume command
|
|
# position; keep scanning to find what precedes it.
|
|
next if $t eq '!';
|
|
# Any other word means we are past the command word.
|
|
return 0;
|
|
}
|
|
return 1;
|
|
}
|
|
|
|
# Some bare greps are intentional (e.g. file may not exist,
|
|
# data filter). A '# lint-ok' annotation on the source line
|
|
# suppresses the warning.
|
|
sub lint_ok {
|
|
my ($raw_lines, $ln) = @_;
|
|
if ($ln < 1 || $ln > @$raw_lines) {
|
|
warn "lint_ok: line number $ln out of range (1.." .
|
|
scalar(@$raw_lines) . ")\n";
|
|
return 0;
|
|
}
|
|
return $raw_lines->[$ln - 1] =~ /lint-ok/;
|
|
}
|
|
|
|
# Grep is a filter (not an assertion) if it receives piped
|
|
# input or sends its output to a pipe or redirect. Check
|
|
# both directions from grep's position in the token stream.
|
|
sub is_filter {
|
|
my ($tokens, $pos) = @_;
|
|
# Backward: is grep receiving piped input?
|
|
# Newlines don't break pipes ('cmd |\n grep' is one
|
|
# pipeline), so skip past them.
|
|
for (my $j = $pos - 1; $j >= 0; $j--) {
|
|
my $t = $tokens->[$j]->[0];
|
|
return 1 if $t eq '|';
|
|
next if $t eq "\n";
|
|
last if $cmd_start{$t} || $t eq '}' || $t eq ')';
|
|
}
|
|
# Forward: is grep piping or redirecting output?
|
|
# Unlike the backward scan, we do not skip newlines here:
|
|
# a bare newline is a command boundary, and redirects or
|
|
# pipes must appear on the same line as grep (or after a
|
|
# line continuation, which the Lexer consumes).
|
|
for (my $j = $pos + 1; $j < @$tokens; $j++) {
|
|
my $t = $tokens->[$j]->[0];
|
|
return 0 if $cmd_start{$t};
|
|
return 1 if $filter_op{$t};
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
# Map a body-relative line number to a file line number.
|
|
# For double-quoted bodies, backslash-continuation lines
|
|
# (\<newline>) are consumed by the Lexer without appearing
|
|
# in the body text, so the inner parser sees fewer lines
|
|
# than the source file has. We walk the source lines to
|
|
# count continuations and adjust accordingly.
|
|
sub body_to_file_line {
|
|
my ($body_lineno, $body_token, $raw_lines, $body_start) = @_;
|
|
my $body_text = $body_token->[0];
|
|
my $body_end_line = $body_token->[4];
|
|
unless ($body_start && $body_start >= 1) {
|
|
warn "body_start is not a positive integer\n";
|
|
return $body_lineno;
|
|
}
|
|
my $file_lineno = $body_lineno + $body_start - 1;
|
|
# Only double-quoted bodies have line splices.
|
|
return $file_lineno unless $body_text =~ /^"/;
|
|
my $adj = 0;
|
|
my $lines_seen = 0;
|
|
unless ($body_end_line && $body_end_line >= $body_start) {
|
|
warn "body_end_line is not set for double-quoted body\n";
|
|
return $file_lineno;
|
|
}
|
|
my $end = $body_end_line;
|
|
if ($end > @$raw_lines) {
|
|
warn "body_end_line ($end) exceeds file length (" .
|
|
scalar(@$raw_lines) . ")\n";
|
|
return $file_lineno;
|
|
}
|
|
my $src_ln = $body_start;
|
|
while ($src_ln <= $end && $lines_seen < $body_lineno) {
|
|
my $line = $raw_lines->[$src_ln - 1];
|
|
# Odd trailing backslashes = continuation (\<nl>).
|
|
# Even = escaped backslashes (\\), not a continuation.
|
|
if ($line =~ /(\\*)$/ && length($1) % 2 == 1) {
|
|
$adj++;
|
|
} else {
|
|
$lines_seen++;
|
|
}
|
|
$src_ln++;
|
|
}
|
|
if ($lines_seen < $body_lineno) {
|
|
warn "body_lineno ($body_lineno) not found within body range " .
|
|
"($body_start..$end)\n";
|
|
}
|
|
return $file_lineno + $adj;
|
|
}
|
|
|
|
# ScriptParser calls this for each test body found in the script.
|
|
sub check_test {
|
|
my $self = shift @_;
|
|
my $title = ScriptParser::unwrap(shift @_);
|
|
my $body_token = shift @_;
|
|
my $body_start = $body_token->[3];
|
|
my $body = ScriptParser::unwrap($body_token);
|
|
# Handle heredoc-style test bodies:
|
|
# test_expect_success 'title' - <<\EOF
|
|
# grep pattern file
|
|
# EOF
|
|
# The '-' signals that the body follows as a heredoc.
|
|
if ($body eq '-') {
|
|
my $herebody = shift @_;
|
|
if ($herebody) {
|
|
$body = $herebody->{content};
|
|
$body_start = $herebody->{start_line};
|
|
}
|
|
}
|
|
return unless $body;
|
|
|
|
my $raw_lines = $self->{raw_lines};
|
|
|
|
# The outer parser gives us the body as an opaque string.
|
|
# Parse it to get individual tokens with command boundaries.
|
|
my $parser = ShellParser->new(\$body);
|
|
my @tokens = $parser->parse();
|
|
|
|
my $file = $self->{file};
|
|
|
|
for (my $i = 0; $i < @tokens; $i++) {
|
|
my $text = $tokens[$i]->[0];
|
|
next unless is_command_word(\@tokens, $i);
|
|
|
|
my $token_lineno = $tokens[$i]->[3];
|
|
unless (defined($token_lineno) && $token_lineno >= 1) {
|
|
warn "token has no line number\n";
|
|
next;
|
|
}
|
|
my $file_lineno = body_to_file_line(
|
|
$token_lineno,
|
|
$body_token, $raw_lines, $body_start);
|
|
|
|
# '!' negates the exit code without consuming command
|
|
# position. '! test_grep' is an anti-pattern because
|
|
# test_grep only prints diagnostics on grep failure,
|
|
# and '!' inverts after that decision is already made.
|
|
if ($text eq '!') {
|
|
if ($i + 1 < @tokens &&
|
|
$tokens[$i + 1]->[0] eq 'test_grep' &&
|
|
!lint_ok($raw_lines, $file_lineno)) {
|
|
print "$file:$file_lineno: error: ",
|
|
'use "test_grep !" instead of ',
|
|
'"! test_grep"', "\n";
|
|
$exit_code = 1;
|
|
}
|
|
next;
|
|
}
|
|
|
|
# Bare grep as a command (not a filter) is a test
|
|
# assertion that should use test_grep for better
|
|
# failure diagnostics.
|
|
if ($text eq 'grep' &&
|
|
!is_filter(\@tokens, $i) &&
|
|
!lint_ok($raw_lines, $file_lineno)) {
|
|
print "$file:$file_lineno: error: ",
|
|
"bare grep outside pipeline ",
|
|
"(use test_grep)\n";
|
|
$exit_code = 1;
|
|
}
|
|
}
|
|
}
|
|
|
|
package main;
|
|
|
|
for my $file (@ARGV) {
|
|
open(my $fh, '<:unix:crlf', $file) or die "$0: $file: $!\n";
|
|
my @raw_lines = <$fh>;
|
|
close $fh;
|
|
my $s = join('', @raw_lines);
|
|
my $parser = GrepLintParser->new(\$s);
|
|
$parser->{file} = $file;
|
|
$parser->{raw_lines} = \@raw_lines;
|
|
$parser->parse();
|
|
}
|
|
exit $exit_code;
|