perl: make SVN code hash independent

There are several places throughout git-svn that use various hard-coded
constants.  For matching object IDs, use the $oid variable.  Compute the
record size we use for our revision storage based on the object ID.

When parsing the revision map format, use a wildcard in the pack format
since we know that the data we're parsing is always exactly the record
size.  This lets us continue to use a constant for the pack format.

Finally, update several comments to reflect the fact that an object ID
may be of one of multiple sizes.

Signed-off-by: brian m. carlson <sandals@crustytoothpaste.net>
Acked-by: Eric Wong <e@80x24.org>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
This commit is contained in:
brian m. carlson
2020-06-22 18:04:14 +00:00
committed by Junio C Hamano
parent ff508e227c
commit 94b2ee1aee
5 changed files with 48 additions and 45 deletions

View File

@ -2008,10 +2008,10 @@ sub cmt_sha2rev_batch {
print $out $sha, "\n"; print $out $sha, "\n";
while (my $line = <$in>) { while (my $line = <$in>) {
if ($first && $line =~ /^[[:xdigit:]]{40}\smissing$/) { if ($first && $line =~ /^$::oid\smissing$/) {
last; last;
} elsif ($first && } elsif ($first &&
$line =~ /^[[:xdigit:]]{40}\scommit\s(\d+)$/) { $line =~ /^$::oid\scommit\s(\d+)$/) {
$first = 0; $first = 0;
$size = $1; $size = $1;
next; next;

View File

@ -2,7 +2,7 @@ package Git::SVN;
use strict; use strict;
use warnings; use warnings;
use Fcntl qw/:DEFAULT :seek/; use Fcntl qw/:DEFAULT :seek/;
use constant rev_map_fmt => 'NH40'; use constant rev_map_fmt => 'NH*';
use vars qw/$_no_metadata use vars qw/$_no_metadata
$_repack $_repack_flags $_use_svm_props $_head $_repack $_repack_flags $_use_svm_props $_head
$_use_svnsync_props $no_reuse_existing $_use_svnsync_props $no_reuse_existing
@ -2087,10 +2087,10 @@ sub rebuild_from_rev_db {
open my $fh, '<', $path or croak "open: $!"; open my $fh, '<', $path or croak "open: $!";
binmode $fh or croak "binmode: $!"; binmode $fh or croak "binmode: $!";
while (<$fh>) { while (<$fh>) {
length($_) == 41 or croak "inconsistent size in ($_) != 41"; length($_) == $::oid_length + 1 or croak "inconsistent size in ($_)";
chomp($_); chomp($_);
++$r; ++$r;
next if $_ eq ('0' x 40); next if $_ eq ('0' x $::oid_length);
$self->rev_map_set($r, $_); $self->rev_map_set($r, $_);
print "r$r = $_\n"; print "r$r = $_\n";
} }
@ -2196,9 +2196,9 @@ sub rebuild {
# (mainly tags) # (mainly tags)
# #
# The format is this: # The format is this:
# - 24 bytes for every record, # - 24 or 36 bytes for every record,
# * 4 bytes for the integer representing an SVN revision number # * 4 bytes for the integer representing an SVN revision number
# * 20 bytes representing the sha1 of a git commit # * 20 or 32 bytes representing the oid of a git commit
# - No empty padding records like the old format # - No empty padding records like the old format
# (except the last record, which can be overwritten) # (except the last record, which can be overwritten)
# - new records are written append-only since SVN revision numbers # - new records are written append-only since SVN revision numbers
@ -2207,7 +2207,7 @@ sub rebuild {
# - Piping the file to xxd -c24 is a good way of dumping it for # - Piping the file to xxd -c24 is a good way of dumping it for
# viewing or editing (piped back through xxd -r), should the need # viewing or editing (piped back through xxd -r), should the need
# ever arise. # ever arise.
# - The last record can be padding revision with an all-zero sha1 # - The last record can be padding revision with an all-zero oid
# This is used to optimize fetch performance when using multiple # This is used to optimize fetch performance when using multiple
# "fetch" directives in .git/config # "fetch" directives in .git/config
# #
@ -2215,38 +2215,39 @@ sub rebuild {
sub _rev_map_set { sub _rev_map_set {
my ($fh, $rev, $commit) = @_; my ($fh, $rev, $commit) = @_;
my $record_size = ($::oid_length / 2) + 4;
binmode $fh or croak "binmode: $!"; binmode $fh or croak "binmode: $!";
my $size = (stat($fh))[7]; my $size = (stat($fh))[7];
($size % 24) == 0 or croak "inconsistent size: $size"; ($size % $record_size) == 0 or croak "inconsistent size: $size";
my $wr_offset = 0; my $wr_offset = 0;
if ($size > 0) { if ($size > 0) {
sysseek($fh, -24, SEEK_END) or croak "seek: $!"; sysseek($fh, -$record_size, SEEK_END) or croak "seek: $!";
my $read = sysread($fh, my $buf, 24) or croak "read: $!"; my $read = sysread($fh, my $buf, $record_size) or croak "read: $!";
$read == 24 or croak "read only $read bytes (!= 24)"; $read == $record_size or croak "read only $read bytes (!= $record_size)";
my ($last_rev, $last_commit) = unpack(rev_map_fmt, $buf); my ($last_rev, $last_commit) = unpack(rev_map_fmt, $buf);
if ($last_commit eq ('0' x40)) { if ($last_commit eq ('0' x $::oid_length)) {
if ($size >= 48) { if ($size >= ($record_size * 2)) {
sysseek($fh, -48, SEEK_END) or croak "seek: $!"; sysseek($fh, -($record_size * 2), SEEK_END) or croak "seek: $!";
$read = sysread($fh, $buf, 24) or $read = sysread($fh, $buf, $record_size) or
croak "read: $!"; croak "read: $!";
$read == 24 or $read == $record_size or
croak "read only $read bytes (!= 24)"; croak "read only $read bytes (!= $record_size)";
($last_rev, $last_commit) = ($last_rev, $last_commit) =
unpack(rev_map_fmt, $buf); unpack(rev_map_fmt, $buf);
if ($last_commit eq ('0' x40)) { if ($last_commit eq ('0' x $::oid_length)) {
croak "inconsistent .rev_map\n"; croak "inconsistent .rev_map\n";
} }
} }
if ($last_rev >= $rev) { if ($last_rev >= $rev) {
croak "last_rev is higher!: $last_rev >= $rev"; croak "last_rev is higher!: $last_rev >= $rev";
} }
$wr_offset = -24; $wr_offset = -$record_size;
} }
} }
sysseek($fh, $wr_offset, SEEK_END) or croak "seek: $!"; sysseek($fh, $wr_offset, SEEK_END) or croak "seek: $!";
syswrite($fh, pack(rev_map_fmt, $rev, $commit), 24) == 24 or syswrite($fh, pack(rev_map_fmt, $rev, $commit), $record_size) == $record_size or
croak "write: $!"; croak "write: $!";
} }
@ -2271,7 +2272,7 @@ sub mkfile {
sub rev_map_set { sub rev_map_set {
my ($self, $rev, $commit, $update_ref, $uuid) = @_; my ($self, $rev, $commit, $update_ref, $uuid) = @_;
defined $commit or die "missing arg3\n"; defined $commit or die "missing arg3\n";
length $commit == 40 or die "arg3 must be a full SHA1 hexsum\n"; $commit =~ /^$::oid$/ or die "arg3 must be a full hex object ID\n";
my $db = $self->map_path($uuid); my $db = $self->map_path($uuid);
my $db_lock = "$db.lock"; my $db_lock = "$db.lock";
my $sigmask; my $sigmask;
@ -2344,29 +2345,30 @@ sub rev_map_max {
sub rev_map_max_norebuild { sub rev_map_max_norebuild {
my ($self, $want_commit) = @_; my ($self, $want_commit) = @_;
my $record_size = ($::oid_length / 2) + 4;
my $map_path = $self->map_path; my $map_path = $self->map_path;
stat $map_path or return $want_commit ? (0, undef) : 0; stat $map_path or return $want_commit ? (0, undef) : 0;
sysopen(my $fh, $map_path, O_RDONLY) or croak "open: $!"; sysopen(my $fh, $map_path, O_RDONLY) or croak "open: $!";
binmode $fh or croak "binmode: $!"; binmode $fh or croak "binmode: $!";
my $size = (stat($fh))[7]; my $size = (stat($fh))[7];
($size % 24) == 0 or croak "inconsistent size: $size"; ($size % $record_size) == 0 or croak "inconsistent size: $size";
if ($size == 0) { if ($size == 0) {
close $fh or croak "close: $!"; close $fh or croak "close: $!";
return $want_commit ? (0, undef) : 0; return $want_commit ? (0, undef) : 0;
} }
sysseek($fh, -24, SEEK_END) or croak "seek: $!"; sysseek($fh, -$record_size, SEEK_END) or croak "seek: $!";
sysread($fh, my $buf, 24) == 24 or croak "read: $!"; sysread($fh, my $buf, $record_size) == $record_size or croak "read: $!";
my ($r, $c) = unpack(rev_map_fmt, $buf); my ($r, $c) = unpack(rev_map_fmt, $buf);
if ($want_commit && $c eq ('0' x40)) { if ($want_commit && $c eq ('0' x $::oid_length)) {
if ($size < 48) { if ($size < $record_size * 2) {
return $want_commit ? (0, undef) : 0; return $want_commit ? (0, undef) : 0;
} }
sysseek($fh, -48, SEEK_END) or croak "seek: $!"; sysseek($fh, -($record_size * 2), SEEK_END) or croak "seek: $!";
sysread($fh, $buf, 24) == 24 or croak "read: $!"; sysread($fh, $buf, $record_size) == $record_size or croak "read: $!";
($r, $c) = unpack(rev_map_fmt, $buf); ($r, $c) = unpack(rev_map_fmt, $buf);
if ($c eq ('0'x40)) { if ($c eq ('0' x $::oid_length)) {
croak "Penultimate record is all-zeroes in $map_path"; croak "Penultimate record is all-zeroes in $map_path";
} }
} }
@ -2387,30 +2389,31 @@ sub rev_map_get {
sub _rev_map_get { sub _rev_map_get {
my ($fh, $rev) = @_; my ($fh, $rev) = @_;
my $record_size = ($::oid_length / 2) + 4;
binmode $fh or croak "binmode: $!"; binmode $fh or croak "binmode: $!";
my $size = (stat($fh))[7]; my $size = (stat($fh))[7];
($size % 24) == 0 or croak "inconsistent size: $size"; ($size % $record_size) == 0 or croak "inconsistent size: $size";
if ($size == 0) { if ($size == 0) {
return undef; return undef;
} }
my ($l, $u) = (0, $size - 24); my ($l, $u) = (0, $size - $record_size);
my ($r, $c, $buf); my ($r, $c, $buf);
while ($l <= $u) { while ($l <= $u) {
my $i = int(($l/24 + $u/24) / 2) * 24; my $i = int(($l/$record_size + $u/$record_size) / 2) * $record_size;
sysseek($fh, $i, SEEK_SET) or croak "seek: $!"; sysseek($fh, $i, SEEK_SET) or croak "seek: $!";
sysread($fh, my $buf, 24) == 24 or croak "read: $!"; sysread($fh, my $buf, $record_size) == $record_size or croak "read: $!";
my ($r, $c) = unpack(rev_map_fmt, $buf); my ($r, $c) = unpack(rev_map_fmt, $buf);
if ($r < $rev) { if ($r < $rev) {
$l = $i + 24; $l = $i + $record_size;
} elsif ($r > $rev) { } elsif ($r > $rev) {
$u = $i - 24; $u = $i - $record_size;
} else { # $r == $rev } else { # $r == $rev
return $c eq ('0' x 40) ? undef : $c; return $c eq ('0' x $::oid_length) ? undef : $c;
} }
} }
undef; undef;

View File

@ -400,12 +400,12 @@ sub T {
($m->{mode_b} !~ /^120/ && $m->{mode_a} =~ /^120/)) { ($m->{mode_b} !~ /^120/ && $m->{mode_a} =~ /^120/)) {
$self->D({ $self->D({
mode_a => $m->{mode_a}, mode_b => '000000', mode_a => $m->{mode_a}, mode_b => '000000',
sha1_a => $m->{sha1_a}, sha1_b => '0' x 40, sha1_a => $m->{sha1_a}, sha1_b => '0' x $::oid_length,
chg => 'D', file_b => $m->{file_b} chg => 'D', file_b => $m->{file_b}
}, $deletions); }, $deletions);
$self->A({ $self->A({
mode_a => '000000', mode_b => $m->{mode_b}, mode_a => '000000', mode_b => $m->{mode_b},
sha1_a => '0' x 40, sha1_b => $m->{sha1_b}, sha1_a => '0' x $::oid_length, sha1_b => $m->{sha1_b},
chg => 'A', file_b => $m->{file_b} chg => 'A', file_b => $m->{file_b}
}, $deletions); }, $deletions);
return; return;
@ -434,7 +434,7 @@ sub _chg_file_get_blob ($$$$) {
$self->change_file_prop($fbat,'svn:special',undef); $self->change_file_prop($fbat,'svn:special',undef);
} }
my $blob = $m->{"sha1_$which"}; my $blob = $m->{"sha1_$which"};
return ($fh,) if ($blob =~ /^0{40}$/); return ($fh,) if ($blob =~ /^0+$/);
my $size = $::_repository->cat_blob($blob, $fh); my $size = $::_repository->cat_blob($blob, $fh);
croak "Failed to read object $blob" if ($size < 0); croak "Failed to read object $blob" if ($size < 0);
$fh->flush == 0 or croak $!; $fh->flush == 0 or croak $!;

View File

@ -173,7 +173,7 @@ sub delete_entry {
# remove entire directories. # remove entire directories.
my ($tree) = (command('ls-tree', '-z', $self->{c}, "./$gpath") my ($tree) = (command('ls-tree', '-z', $self->{c}, "./$gpath")
=~ /\A040000 tree ([a-f\d]{40})\t\Q$gpath\E\0/); =~ /\A040000 tree ($::oid)\t\Q$gpath\E\0/);
if ($tree) { if ($tree) {
my ($ls, $ctx) = command_output_pipe(qw/ls-tree my ($ls, $ctx) = command_output_pipe(qw/ls-tree
-r --name-only -z/, -r --name-only -z/,
@ -203,7 +203,7 @@ sub open_file {
my $gpath = $self->git_path($path); my $gpath = $self->git_path($path);
($mode, $blob) = (command('ls-tree', '-z', $self->{c}, "./$gpath") ($mode, $blob) = (command('ls-tree', '-z', $self->{c}, "./$gpath")
=~ /\A(\d{6}) blob ([a-f\d]{40})\t\Q$gpath\E\0/); =~ /\A(\d{6}) blob ($::oid)\t\Q$gpath\E\0/);
unless (defined $mode && defined $blob) { unless (defined $mode && defined $blob) {
die "$path was not found in commit $self->{c} (r$rev)\n"; die "$path was not found in commit $self->{c} (r$rev)\n";
} }
@ -413,7 +413,7 @@ sub close_file {
$hash = $::_repository->hash_and_insert_object( $hash = $::_repository->hash_and_insert_object(
Git::temp_path($fh)); Git::temp_path($fh));
$hash =~ /^[a-f\d]{40}$/ or die "not a sha1: $hash\n"; $hash =~ /^$::oid$/ or die "not an object ID: $hash\n";
Git::temp_release($fb->{base}, 1); Git::temp_release($fb->{base}, 1);
Git::temp_release($fh, 1); Git::temp_release($fh, 1);

View File

@ -486,11 +486,11 @@ sub gs_fetch_loop_common {
$reload_ra->() if $ra_invalid; $reload_ra->() if $ra_invalid;
} }
# pre-fill the .rev_db since it'll eventually get filled in # pre-fill the .rev_db since it'll eventually get filled in
# with '0' x40 if something new gets committed # with '0' x $oid_length if something new gets committed
foreach my $gs (@$gsv) { foreach my $gs (@$gsv) {
next if $gs->rev_map_max >= $max; next if $gs->rev_map_max >= $max;
next if defined $gs->rev_map_get($max); next if defined $gs->rev_map_get($max);
$gs->rev_map_set($max, 0 x40); $gs->rev_map_set($max, 0 x $::oid_length);
} }
foreach my $g (@$globs) { foreach my $g (@$globs) {
my $k = "svn-remote.$g->{remote}.$g->{t}-maxRev"; my $k = "svn-remote.$g->{remote}.$g->{t}-maxRev";