git-cvsserver: add ability to guess -kb from contents
If "gitcvs.allbinary" is set to "guess", then any file that has not been explicitly marked as binary or text using the "crlf" attribute and the "gitcvs.usecrlfattr" config will guess binary based on the contents of the file. Signed-off-by: Matthew Ogilvie <mmogilvi_git@miniinfo.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
This commit is contained in:

committed by
Junio C Hamano

parent
8a06a63297
commit
90948a4289
@ -502,7 +502,7 @@ sub req_add
|
||||
print $state->{CVSROOT} . "/$state->{module}/$filename\n";
|
||||
|
||||
# this is an "entries" line
|
||||
my $kopts = kopts_from_path($filename);
|
||||
my $kopts = kopts_from_path($filename,"sha1",$meta->{filehash});
|
||||
$log->debug("/$filepart/1.$meta->{revision}//$kopts/");
|
||||
print "/$filepart/1.$meta->{revision}//$kopts/\n";
|
||||
# permissions
|
||||
@ -533,7 +533,8 @@ sub req_add
|
||||
|
||||
print "Checked-in $dirpart\n";
|
||||
print "$filename\n";
|
||||
my $kopts = kopts_from_path($filename);
|
||||
my $kopts = kopts_from_path($filename,"file",
|
||||
$state->{entries}{$filename}{modified_filename});
|
||||
print "/$filepart/0//$kopts/\n";
|
||||
|
||||
my $requestedKopts = $state->{opt}{k};
|
||||
@ -631,7 +632,7 @@ sub req_remove
|
||||
|
||||
print "Checked-in $dirpart\n";
|
||||
print "$filename\n";
|
||||
my $kopts = kopts_from_path($filename);
|
||||
my $kopts = kopts_from_path($filename,"sha1",$meta->{filehash});
|
||||
print "/$filepart/-1.$wrev//$kopts/\n";
|
||||
|
||||
$rmcount++;
|
||||
@ -910,7 +911,7 @@ sub req_co
|
||||
print $state->{CVSROOT} . "/$module/" . ( defined ( $git->{dir} ) and $git->{dir} ne "./" ? $git->{dir} . "/" : "" ) . "$git->{name}\n";
|
||||
|
||||
# this is an "entries" line
|
||||
my $kopts = kopts_from_path($fullName);
|
||||
my $kopts = kopts_from_path($fullName,"sha1",$git->{filehash});
|
||||
print "/$git->{name}/1.$git->{revision}//$kopts/\n";
|
||||
# permissions
|
||||
print "u=$git->{mode},g=$git->{mode},o=$git->{mode}\n";
|
||||
@ -1119,7 +1120,7 @@ sub req_update
|
||||
print $state->{CVSROOT} . "/$state->{module}/$filename\n";
|
||||
|
||||
# this is an "entries" line
|
||||
my $kopts = kopts_from_path($filename);
|
||||
my $kopts = kopts_from_path($filename,"sha1",$meta->{filehash});
|
||||
$log->debug("/$filepart/1.$meta->{revision}//$kopts/");
|
||||
print "/$filepart/1.$meta->{revision}//$kopts/\n";
|
||||
|
||||
@ -1167,7 +1168,8 @@ sub req_update
|
||||
print "Merged $dirpart\n";
|
||||
$log->debug($state->{CVSROOT} . "/$state->{module}/$filename");
|
||||
print $state->{CVSROOT} . "/$state->{module}/$filename\n";
|
||||
my $kopts = kopts_from_path("$dirpart/$filepart");
|
||||
my $kopts = kopts_from_path("$dirpart/$filepart",
|
||||
"file",$mergedFile);
|
||||
$log->debug("/$filepart/1.$meta->{revision}//$kopts/");
|
||||
print "/$filepart/1.$meta->{revision}//$kopts/\n";
|
||||
}
|
||||
@ -1183,7 +1185,8 @@ sub req_update
|
||||
{
|
||||
print "Merged $dirpart\n";
|
||||
print $state->{CVSROOT} . "/$state->{module}/$filename\n";
|
||||
my $kopts = kopts_from_path("$dirpart/$filepart");
|
||||
my $kopts = kopts_from_path("$dirpart/$filepart",
|
||||
"file",$mergedFile);
|
||||
print "/$filepart/1.$meta->{revision}/+/$kopts/\n";
|
||||
}
|
||||
}
|
||||
@ -1434,7 +1437,7 @@ sub req_ci
|
||||
}
|
||||
print "Checked-in $dirpart\n";
|
||||
print "$filename\n";
|
||||
my $kopts = kopts_from_path($filename);
|
||||
my $kopts = kopts_from_path($filename,"sha1",$meta->{filehash});
|
||||
print "/$filepart/1.$meta->{revision}//$kopts/\n";
|
||||
}
|
||||
}
|
||||
@ -2312,7 +2315,7 @@ sub cleanupTmpDir
|
||||
# file should get -kb.
|
||||
sub kopts_from_path
|
||||
{
|
||||
my ($path) = @_;
|
||||
my ($path, $srcType, $name) = @_;
|
||||
|
||||
if ( defined ( $cfg->{gitcvs}{usecrlfattr} ) and
|
||||
$cfg->{gitcvs}{usecrlfattr} =~ /\s*(1|true|yes)\s*$/i )
|
||||
@ -2332,15 +2335,55 @@ sub kopts_from_path
|
||||
}
|
||||
}
|
||||
|
||||
unless ( defined ( $cfg->{gitcvs}{allbinary} ) and $cfg->{gitcvs}{allbinary} =~ /^\s*(1|true|yes)\s*$/i )
|
||||
if ( defined ( $cfg->{gitcvs}{allbinary} ) )
|
||||
{
|
||||
# Return "" to give no special treatment to any path
|
||||
return "";
|
||||
} else {
|
||||
# Alternatively, to have all files treated as if they are binary (which
|
||||
# is more like git itself), always return the "-kb" option
|
||||
return "-kb";
|
||||
if( ($cfg->{gitcvs}{allbinary} =~ /^\s*(1|true|yes)\s*$/i) )
|
||||
{
|
||||
return "-kb";
|
||||
}
|
||||
elsif( ($cfg->{gitcvs}{allbinary} =~ /^\s*guess\s*$/i) )
|
||||
{
|
||||
if( $srcType eq "sha1Or-k" &&
|
||||
!defined($name) )
|
||||
{
|
||||
my ($ret)=$state->{entries}{$path}{options};
|
||||
if( !defined($ret) )
|
||||
{
|
||||
$ret=$state->{opt}{k};
|
||||
if(defined($ret))
|
||||
{
|
||||
$ret="-k$ret";
|
||||
}
|
||||
else
|
||||
{
|
||||
$ret="";
|
||||
}
|
||||
}
|
||||
if( ! ($ret=~/^(|-kb|-kkv|-kkvl|-kk|-ko|-kv)$/) )
|
||||
{
|
||||
print "E Bad -k option\n";
|
||||
$log->warn("Bad -k option: $ret");
|
||||
die "Error: Bad -k option: $ret\n";
|
||||
}
|
||||
|
||||
return $ret;
|
||||
}
|
||||
else
|
||||
{
|
||||
if( is_binary($srcType,$name) )
|
||||
{
|
||||
$log->debug("... as binary");
|
||||
return "-kb";
|
||||
}
|
||||
else
|
||||
{
|
||||
$log->debug("... as text");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
# Return "" to give no special treatment to any path
|
||||
return "";
|
||||
}
|
||||
|
||||
sub check_attr
|
||||
@ -2360,6 +2403,124 @@ sub check_attr
|
||||
}
|
||||
}
|
||||
|
||||
# This should have the same heuristics as convert.c:is_binary() and related.
|
||||
# Note that the bare CR test is done by callers in convert.c.
|
||||
sub is_binary
|
||||
{
|
||||
my ($srcType,$name) = @_;
|
||||
$log->debug("is_binary($srcType,$name)");
|
||||
|
||||
# Minimize amount of interpreted code run in the inner per-character
|
||||
# loop for large files, by totalling each character value and
|
||||
# then analyzing the totals.
|
||||
my @counts;
|
||||
my $i;
|
||||
for($i=0;$i<256;$i++)
|
||||
{
|
||||
$counts[$i]=0;
|
||||
}
|
||||
|
||||
my $fh = open_blob_or_die($srcType,$name);
|
||||
my $line;
|
||||
while( defined($line=<$fh>) )
|
||||
{
|
||||
# Any '\0' and bare CR are considered binary.
|
||||
if( $line =~ /\0|(\r[^\n])/ )
|
||||
{
|
||||
close($fh);
|
||||
return 1;
|
||||
}
|
||||
|
||||
# Count up each character in the line:
|
||||
my $len=length($line);
|
||||
for($i=0;$i<$len;$i++)
|
||||
{
|
||||
$counts[ord(substr($line,$i,1))]++;
|
||||
}
|
||||
}
|
||||
close $fh;
|
||||
|
||||
# Don't count CR and LF as either printable/nonprintable
|
||||
$counts[ord("\n")]=0;
|
||||
$counts[ord("\r")]=0;
|
||||
|
||||
# Categorize individual character count into printable and nonprintable:
|
||||
my $printable=0;
|
||||
my $nonprintable=0;
|
||||
for($i=0;$i<256;$i++)
|
||||
{
|
||||
if( $i < 32 &&
|
||||
$i != ord("\b") &&
|
||||
$i != ord("\t") &&
|
||||
$i != 033 && # ESC
|
||||
$i != 014 ) # FF
|
||||
{
|
||||
$nonprintable+=$counts[$i];
|
||||
}
|
||||
elsif( $i==127 ) # DEL
|
||||
{
|
||||
$nonprintable+=$counts[$i];
|
||||
}
|
||||
else
|
||||
{
|
||||
$printable+=$counts[$i];
|
||||
}
|
||||
}
|
||||
|
||||
return ($printable >> 7) < $nonprintable;
|
||||
}
|
||||
|
||||
# Returns open file handle. Possible invocations:
|
||||
# - open_blob_or_die("file",$filename);
|
||||
# - open_blob_or_die("sha1",$filehash);
|
||||
sub open_blob_or_die
|
||||
{
|
||||
my ($srcType,$name) = @_;
|
||||
my ($fh);
|
||||
if( $srcType eq "file" )
|
||||
{
|
||||
if( !open $fh,"<",$name )
|
||||
{
|
||||
$log->warn("Unable to open file $name: $!");
|
||||
die "Unable to open file $name: $!\n";
|
||||
}
|
||||
}
|
||||
elsif( $srcType eq "sha1" || $srcType eq "sha1Or-k" )
|
||||
{
|
||||
unless ( defined ( $name ) and $name =~ /^[a-zA-Z0-9]{40}$/ )
|
||||
{
|
||||
$log->warn("Need filehash");
|
||||
die "Need filehash\n";
|
||||
}
|
||||
|
||||
my $type = `git cat-file -t $name`;
|
||||
chomp $type;
|
||||
|
||||
unless ( defined ( $type ) and $type eq "blob" )
|
||||
{
|
||||
$log->warn("Invalid type '$type' for '$name'");
|
||||
die ( "Invalid type '$type' (expected 'blob')" )
|
||||
}
|
||||
|
||||
my $size = `git cat-file -s $name`;
|
||||
chomp $size;
|
||||
|
||||
$log->debug("open_blob_or_die($name) size=$size, type=$type");
|
||||
|
||||
unless( open $fh, '-|', "git", "cat-file", "blob", $name )
|
||||
{
|
||||
$log->warn("Unable to open sha1 $name");
|
||||
die "Unable to open sha1 $name\n";
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
$log->warn("Unknown type of blob source: $srcType");
|
||||
die "Unknown type of blob source: $srcType\n";
|
||||
}
|
||||
return $fh;
|
||||
}
|
||||
|
||||
# Generate a CVS author name from Git author information, by taking
|
||||
# the first eight characters of the user part of the email address.
|
||||
sub cvs_author
|
||||
|
Reference in New Issue
Block a user