git-cvsserver: add ability to guess -kb from contents

If "gitcvs.allbinary" is set to "guess", then any file that has
not been explicitly marked as binary or text using the "crlf" attribute
and the "gitcvs.usecrlfattr" config will guess binary based on the contents
of the file.

Signed-off-by: Matthew Ogilvie <mmogilvi_git@miniinfo.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
This commit is contained in:
Matthew Ogilvie
2008-05-14 22:35:48 -06:00
committed by Junio C Hamano
parent 8a06a63297
commit 90948a4289
4 changed files with 354 additions and 25 deletions

View File

@ -502,7 +502,7 @@ sub req_add
print $state->{CVSROOT} . "/$state->{module}/$filename\n";
# this is an "entries" line
my $kopts = kopts_from_path($filename);
my $kopts = kopts_from_path($filename,"sha1",$meta->{filehash});
$log->debug("/$filepart/1.$meta->{revision}//$kopts/");
print "/$filepart/1.$meta->{revision}//$kopts/\n";
# permissions
@ -533,7 +533,8 @@ sub req_add
print "Checked-in $dirpart\n";
print "$filename\n";
my $kopts = kopts_from_path($filename);
my $kopts = kopts_from_path($filename,"file",
$state->{entries}{$filename}{modified_filename});
print "/$filepart/0//$kopts/\n";
my $requestedKopts = $state->{opt}{k};
@ -631,7 +632,7 @@ sub req_remove
print "Checked-in $dirpart\n";
print "$filename\n";
my $kopts = kopts_from_path($filename);
my $kopts = kopts_from_path($filename,"sha1",$meta->{filehash});
print "/$filepart/-1.$wrev//$kopts/\n";
$rmcount++;
@ -910,7 +911,7 @@ sub req_co
print $state->{CVSROOT} . "/$module/" . ( defined ( $git->{dir} ) and $git->{dir} ne "./" ? $git->{dir} . "/" : "" ) . "$git->{name}\n";
# this is an "entries" line
my $kopts = kopts_from_path($fullName);
my $kopts = kopts_from_path($fullName,"sha1",$git->{filehash});
print "/$git->{name}/1.$git->{revision}//$kopts/\n";
# permissions
print "u=$git->{mode},g=$git->{mode},o=$git->{mode}\n";
@ -1119,7 +1120,7 @@ sub req_update
print $state->{CVSROOT} . "/$state->{module}/$filename\n";
# this is an "entries" line
my $kopts = kopts_from_path($filename);
my $kopts = kopts_from_path($filename,"sha1",$meta->{filehash});
$log->debug("/$filepart/1.$meta->{revision}//$kopts/");
print "/$filepart/1.$meta->{revision}//$kopts/\n";
@ -1167,7 +1168,8 @@ sub req_update
print "Merged $dirpart\n";
$log->debug($state->{CVSROOT} . "/$state->{module}/$filename");
print $state->{CVSROOT} . "/$state->{module}/$filename\n";
my $kopts = kopts_from_path("$dirpart/$filepart");
my $kopts = kopts_from_path("$dirpart/$filepart",
"file",$mergedFile);
$log->debug("/$filepart/1.$meta->{revision}//$kopts/");
print "/$filepart/1.$meta->{revision}//$kopts/\n";
}
@ -1183,7 +1185,8 @@ sub req_update
{
print "Merged $dirpart\n";
print $state->{CVSROOT} . "/$state->{module}/$filename\n";
my $kopts = kopts_from_path("$dirpart/$filepart");
my $kopts = kopts_from_path("$dirpart/$filepart",
"file",$mergedFile);
print "/$filepart/1.$meta->{revision}/+/$kopts/\n";
}
}
@ -1434,7 +1437,7 @@ sub req_ci
}
print "Checked-in $dirpart\n";
print "$filename\n";
my $kopts = kopts_from_path($filename);
my $kopts = kopts_from_path($filename,"sha1",$meta->{filehash});
print "/$filepart/1.$meta->{revision}//$kopts/\n";
}
}
@ -2312,7 +2315,7 @@ sub cleanupTmpDir
# file should get -kb.
sub kopts_from_path
{
my ($path) = @_;
my ($path, $srcType, $name) = @_;
if ( defined ( $cfg->{gitcvs}{usecrlfattr} ) and
$cfg->{gitcvs}{usecrlfattr} =~ /\s*(1|true|yes)\s*$/i )
@ -2332,15 +2335,55 @@ sub kopts_from_path
}
}
unless ( defined ( $cfg->{gitcvs}{allbinary} ) and $cfg->{gitcvs}{allbinary} =~ /^\s*(1|true|yes)\s*$/i )
if ( defined ( $cfg->{gitcvs}{allbinary} ) )
{
# Return "" to give no special treatment to any path
return "";
} else {
# Alternatively, to have all files treated as if they are binary (which
# is more like git itself), always return the "-kb" option
return "-kb";
if( ($cfg->{gitcvs}{allbinary} =~ /^\s*(1|true|yes)\s*$/i) )
{
return "-kb";
}
elsif( ($cfg->{gitcvs}{allbinary} =~ /^\s*guess\s*$/i) )
{
if( $srcType eq "sha1Or-k" &&
!defined($name) )
{
my ($ret)=$state->{entries}{$path}{options};
if( !defined($ret) )
{
$ret=$state->{opt}{k};
if(defined($ret))
{
$ret="-k$ret";
}
else
{
$ret="";
}
}
if( ! ($ret=~/^(|-kb|-kkv|-kkvl|-kk|-ko|-kv)$/) )
{
print "E Bad -k option\n";
$log->warn("Bad -k option: $ret");
die "Error: Bad -k option: $ret\n";
}
return $ret;
}
else
{
if( is_binary($srcType,$name) )
{
$log->debug("... as binary");
return "-kb";
}
else
{
$log->debug("... as text");
}
}
}
}
# Return "" to give no special treatment to any path
return "";
}
sub check_attr
@ -2360,6 +2403,124 @@ sub check_attr
}
}
# This should have the same heuristics as convert.c:is_binary() and related.
# Note that the bare CR test is done by callers in convert.c.
sub is_binary
{
my ($srcType,$name) = @_;
$log->debug("is_binary($srcType,$name)");
# Minimize amount of interpreted code run in the inner per-character
# loop for large files, by totalling each character value and
# then analyzing the totals.
my @counts;
my $i;
for($i=0;$i<256;$i++)
{
$counts[$i]=0;
}
my $fh = open_blob_or_die($srcType,$name);
my $line;
while( defined($line=<$fh>) )
{
# Any '\0' and bare CR are considered binary.
if( $line =~ /\0|(\r[^\n])/ )
{
close($fh);
return 1;
}
# Count up each character in the line:
my $len=length($line);
for($i=0;$i<$len;$i++)
{
$counts[ord(substr($line,$i,1))]++;
}
}
close $fh;
# Don't count CR and LF as either printable/nonprintable
$counts[ord("\n")]=0;
$counts[ord("\r")]=0;
# Categorize individual character count into printable and nonprintable:
my $printable=0;
my $nonprintable=0;
for($i=0;$i<256;$i++)
{
if( $i < 32 &&
$i != ord("\b") &&
$i != ord("\t") &&
$i != 033 && # ESC
$i != 014 ) # FF
{
$nonprintable+=$counts[$i];
}
elsif( $i==127 ) # DEL
{
$nonprintable+=$counts[$i];
}
else
{
$printable+=$counts[$i];
}
}
return ($printable >> 7) < $nonprintable;
}
# Returns open file handle. Possible invocations:
# - open_blob_or_die("file",$filename);
# - open_blob_or_die("sha1",$filehash);
sub open_blob_or_die
{
my ($srcType,$name) = @_;
my ($fh);
if( $srcType eq "file" )
{
if( !open $fh,"<",$name )
{
$log->warn("Unable to open file $name: $!");
die "Unable to open file $name: $!\n";
}
}
elsif( $srcType eq "sha1" || $srcType eq "sha1Or-k" )
{
unless ( defined ( $name ) and $name =~ /^[a-zA-Z0-9]{40}$/ )
{
$log->warn("Need filehash");
die "Need filehash\n";
}
my $type = `git cat-file -t $name`;
chomp $type;
unless ( defined ( $type ) and $type eq "blob" )
{
$log->warn("Invalid type '$type' for '$name'");
die ( "Invalid type '$type' (expected 'blob')" )
}
my $size = `git cat-file -s $name`;
chomp $size;
$log->debug("open_blob_or_die($name) size=$size, type=$type");
unless( open $fh, '-|', "git", "cat-file", "blob", $name )
{
$log->warn("Unable to open sha1 $name");
die "Unable to open sha1 $name\n";
}
}
else
{
$log->warn("Unknown type of blob source: $srcType");
die "Unknown type of blob source: $srcType\n";
}
return $fh;
}
# Generate a CVS author name from Git author information, by taking
# the first eight characters of the user part of the email address.
sub cvs_author