Merge branch 'db/text-delta' into svn-fe

* db/text-delta:
  vcs-svn: avoid hangs from corrupt deltas
  vcs-svn: guard against overflow when computing preimage length
  vcs-svn: implement text-delta handling
This commit is contained in:
Jonathan Nieder
2011-06-15 06:17:14 -05:00
5 changed files with 280 additions and 14 deletions

View File

@ -9,7 +9,7 @@ SYNOPSIS
-------- --------
[verse] [verse]
mkfifo backchannel && mkfifo backchannel &&
svnadmin dump --incremental REPO | svnadmin dump --deltas REPO |
svn-fe [url] 3<backchannel | svn-fe [url] 3<backchannel |
git fast-import --cat-blob-fd=3 3>backchannel git fast-import --cat-blob-fd=3 3>backchannel
@ -32,9 +32,6 @@ Subversion's repository dump format is documented in full in
Files in this format can be generated using the 'svnadmin dump' or Files in this format can be generated using the 'svnadmin dump' or
'svk admin dump' command. 'svk admin dump' command.
Dumps produced with 'svnadmin dump --deltas' (dumpfile format v3)
are not supported.
OUTPUT FORMAT OUTPUT FORMAT
------------- -------------
The fast-import format is documented by the git-fast-import(1) The fast-import format is documented by the git-fast-import(1)

View File

@ -18,12 +18,13 @@ reinit_git () {
try_dump () { try_dump () {
input=$1 && input=$1 &&
maybe_fail=${2:+test_$2} && maybe_fail_svnfe=${2:+test_$2} &&
maybe_fail_fi=${3:+test_$3} &&
{ {
$maybe_fail test-svn-fe "$input" >stream 3<backflow & $maybe_fail_svnfe test-svn-fe "$input" >stream 3<backflow &
} && } &&
git fast-import --cat-blob-fd=3 <stream 3>backflow && $maybe_fail_fi git fast-import --cat-blob-fd=3 <stream 3>backflow &&
wait $! wait $!
} }
@ -674,7 +675,7 @@ test_expect_success PIPE 'change file mode and reiterate content' '
test_cmp hello actual.target test_cmp hello actual.target
' '
test_expect_success PIPE 'deltas not supported' ' test_expect_success PIPE 'deltas supported' '
reinit_git && reinit_git &&
{ {
# (old) h + (inline) ello + (old) \n # (old) h + (inline) ello + (old) \n
@ -735,7 +736,7 @@ test_expect_success PIPE 'deltas not supported' '
echo PROPS-END && echo PROPS-END &&
cat delta cat delta
} >delta.dump && } >delta.dump &&
test_must_fail try_dump delta.dump try_dump delta.dump
' '
test_expect_success PIPE 'property deltas supported' ' test_expect_success PIPE 'property deltas supported' '
@ -942,6 +943,143 @@ test_expect_success PIPE 'deltas for typechange' '
test_cmp expect actual test_cmp expect actual
' '
test_expect_success PIPE 'deltas need not consume the whole preimage' '
reinit_git &&
cat >expect <<-\EOF &&
OBJID
:120000 100644 OBJID OBJID T postimage
OBJID
:100644 120000 OBJID OBJID T postimage
OBJID
:000000 100644 OBJID OBJID A postimage
EOF
echo "first preimage" >expect.1 &&
printf target >expect.2 &&
printf lnk >expect.3 &&
{
printf "SVNQ%b%b%b" "QQ\017\001\017" "\0217" "first preimage\n" |
q_to_nul
} >delta.1 &&
{
properties svn:special "*" &&
echo PROPS-END
} >symlink.props &&
{
printf "SVNQ%b%b%b" "Q\002\013\004\012" "\0201\001\001\0211" "lnk target" |
q_to_nul
} >delta.2 &&
{
printf "SVNQ%b%b" "Q\004\003\004Q" "\001Q\002\002" |
q_to_nul
} >delta.3 &&
{
cat <<-\EOF &&
SVN-fs-dump-format-version: 3
Revision-number: 1
Prop-content-length: 10
Content-length: 10
PROPS-END
Node-path: postimage
Node-kind: file
Node-action: add
Text-delta: true
Prop-content-length: 10
EOF
echo Text-content-length: $(wc -c <delta.1) &&
echo Content-length: $((10 + $(wc -c <delta.1))) &&
echo &&
echo PROPS-END &&
cat delta.1 &&
cat <<-\EOF &&
Revision-number: 2
Prop-content-length: 10
Content-length: 10
PROPS-END
Node-path: postimage
Node-kind: file
Node-action: change
Text-delta: true
EOF
echo Prop-content-length: $(wc -c <symlink.props) &&
echo Text-content-length: $(wc -c <delta.2) &&
echo Content-length: $(($(wc -c <symlink.props) + $(wc -c <delta.2))) &&
echo &&
cat symlink.props &&
cat delta.2 &&
cat <<-\EOF &&
Revision-number: 3
Prop-content-length: 10
Content-length: 10
PROPS-END
Node-path: postimage
Node-kind: file
Node-action: change
Text-delta: true
Prop-content-length: 10
EOF
echo Text-content-length: $(wc -c <delta.3) &&
echo Content-length: $((10 + $(wc -c <delta.3))) &&
echo &&
echo PROPS-END &&
cat delta.3 &&
echo
} >deltapartial.dump &&
try_dump deltapartial.dump &&
{
git rev-list HEAD |
git diff-tree --root --stdin |
sed "s/$_x40/OBJID/g"
} >actual &&
test_cmp expect actual &&
git show HEAD:postimage >actual.3 &&
git show HEAD^:postimage >actual.2 &&
git show HEAD^^:postimage >actual.1 &&
test_cmp expect.1 actual.1 &&
test_cmp expect.2 actual.2 &&
test_cmp expect.3 actual.3
'
test_expect_success PIPE 'no hang for delta trying to read past end of preimage' '
reinit_git &&
{
# COPY 1
printf "SVNQ%b%b" "Q\001\001\002Q" "\001Q" |
q_to_nul
} >greedy.delta &&
{
cat <<-\EOF &&
SVN-fs-dump-format-version: 3
Revision-number: 1
Prop-content-length: 10
Content-length: 10
PROPS-END
Node-path: bootstrap
Node-kind: file
Node-action: add
Text-delta: true
Prop-content-length: 10
EOF
echo Text-content-length: $(wc -c <greedy.delta) &&
echo Content-length: $((10 + $(wc -c <greedy.delta))) &&
echo &&
echo PROPS-END &&
cat greedy.delta &&
echo
} >greedydelta.dump &&
try_dump greedydelta.dump must_fail might_fail
'
test_expect_success 'set up svn repo' ' test_expect_success 'set up svn repo' '
svnconf=$PWD/svnconf && svnconf=$PWD/svnconf &&

View File

@ -7,15 +7,38 @@
#include "strbuf.h" #include "strbuf.h"
#include "quote.h" #include "quote.h"
#include "fast_export.h" #include "fast_export.h"
#include "line_buffer.h"
#include "repo_tree.h" #include "repo_tree.h"
#include "strbuf.h" #include "strbuf.h"
#include "svndiff.h"
#include "sliding_window.h"
#include "line_buffer.h"
#define MAX_GITSVN_LINE_LEN 4096 #define MAX_GITSVN_LINE_LEN 4096
#define REPORT_FILENO 3
static uint32_t first_commit_done; static uint32_t first_commit_done;
static struct line_buffer postimage = LINE_BUFFER_INIT;
static struct line_buffer report_buffer = LINE_BUFFER_INIT; static struct line_buffer report_buffer = LINE_BUFFER_INIT;
/* NEEDSWORK: move to fast_export_init() */
static int init_postimage(void)
{
static int postimage_initialized;
if (postimage_initialized)
return 0;
postimage_initialized = 1;
return buffer_tmpfile_init(&postimage);
}
static int init_report_buffer(int fd)
{
static int report_buffer_initialized;
if (report_buffer_initialized)
return 0;
report_buffer_initialized = 1;
return buffer_fdinit(&report_buffer, fd);
}
void fast_export_init(int fd) void fast_export_init(int fd)
{ {
if (buffer_fdinit(&report_buffer, fd)) if (buffer_fdinit(&report_buffer, fd))
@ -132,6 +155,89 @@ static void die_short_read(struct line_buffer *input)
die("invalid dump: unexpected end of file"); die("invalid dump: unexpected end of file");
} }
static int ends_with(const char *s, size_t len, const char *suffix)
{
const size_t suffixlen = strlen(suffix);
if (len < suffixlen)
return 0;
return !memcmp(s + len - suffixlen, suffix, suffixlen);
}
static int parse_cat_response_line(const char *header, off_t *len)
{
size_t headerlen = strlen(header);
uintmax_t n;
const char *type;
const char *end;
if (ends_with(header, headerlen, " missing"))
return error("cat-blob reports missing blob: %s", header);
type = memmem(header, headerlen, " blob ", strlen(" blob "));
if (!type)
return error("cat-blob header has wrong object type: %s", header);
n = strtoumax(type + strlen(" blob "), (char **) &end, 10);
if (end == type + strlen(" blob "))
return error("cat-blob header does not contain length: %s", header);
if (memchr(type + strlen(" blob "), '-', end - type - strlen(" blob ")))
return error("cat-blob header contains negative length: %s", header);
if (n == UINTMAX_MAX || n > maximum_signed_value_of_type(off_t))
return error("blob too large for current definition of off_t");
*len = n;
if (*end)
return error("cat-blob header contains garbage after length: %s", header);
return 0;
}
static void check_preimage_overflow(off_t a, off_t b)
{
if (signed_add_overflows(a, b))
die("blob too large for current definition of off_t");
}
static long apply_delta(off_t len, struct line_buffer *input,
const char *old_data, uint32_t old_mode)
{
long ret;
struct sliding_view preimage = SLIDING_VIEW_INIT(&report_buffer, 0);
FILE *out;
if (init_postimage() || !(out = buffer_tmpfile_rewind(&postimage)))
die("cannot open temporary file for blob retrieval");
if (init_report_buffer(REPORT_FILENO))
die("cannot open fd 3 for feedback from fast-import");
if (old_data) {
const char *response;
printf("cat-blob %s\n", old_data);
fflush(stdout);
response = get_response_line();
if (parse_cat_response_line(response, &preimage.max_off))
die("invalid cat-blob response: %s", response);
check_preimage_overflow(preimage.max_off, 1);
}
if (old_mode == REPO_MODE_LNK) {
strbuf_addstr(&preimage.buf, "link ");
check_preimage_overflow(preimage.max_off, strlen("link "));
preimage.max_off += strlen("link ");
check_preimage_overflow(preimage.max_off, 1);
}
if (svndiff0_apply(input, len, &preimage, out))
die("cannot apply delta");
if (old_data) {
/* Read the remainder of preimage and trailing newline. */
assert(!signed_add_overflows(preimage.max_off, 1));
preimage.max_off++; /* room for newline */
if (move_window(&preimage, preimage.max_off - 1, 1))
die("cannot seek to end of input");
if (preimage.buf.buf[0] != '\n')
die("missing newline after cat-blob response");
}
ret = buffer_tmpfile_prepare_to_read(&postimage);
if (ret < 0)
die("cannot read temporary file for blob retrieval");
strbuf_release(&preimage.buf);
return ret;
}
void fast_export_data(uint32_t mode, uint32_t len, struct line_buffer *input) void fast_export_data(uint32_t mode, uint32_t len, struct line_buffer *input)
{ {
if (mode == REPO_MODE_LNK) { if (mode == REPO_MODE_LNK) {
@ -199,3 +305,20 @@ int fast_export_ls(const char *path, uint32_t *mode, struct strbuf *dataref)
ls_from_active_commit(path); ls_from_active_commit(path);
return parse_ls_response(get_response_line(), mode, dataref); return parse_ls_response(get_response_line(), mode, dataref);
} }
void fast_export_blob_delta(uint32_t mode,
uint32_t old_mode, const char *old_data,
uint32_t len, struct line_buffer *input)
{
long postimage_len;
if (len > maximum_signed_value_of_type(off_t))
die("enormous delta");
postimage_len = apply_delta((off_t) len, input, old_data, old_mode);
if (mode == REPO_MODE_LNK) {
buffer_skip_bytes(&postimage, strlen("link "));
postimage_len -= strlen("link ");
}
printf("data %ld\n", postimage_len);
buffer_copy_bytes(&postimage, postimage_len);
fputc('\n', stdout);
}

View File

@ -15,6 +15,9 @@ void fast_export_begin_commit(uint32_t revision, const char *author,
const char *url, unsigned long timestamp); const char *url, unsigned long timestamp);
void fast_export_end_commit(uint32_t revision); void fast_export_end_commit(uint32_t revision);
void fast_export_data(uint32_t mode, uint32_t len, struct line_buffer *input); void fast_export_data(uint32_t mode, uint32_t len, struct line_buffer *input);
void fast_export_blob_delta(uint32_t mode,
uint32_t old_mode, const char *old_data,
uint32_t len, struct line_buffer *input);
/* If there is no such file at that rev, returns -1, errno == ENOENT. */ /* If there is no such file at that rev, returns -1, errno == ENOENT. */
int fast_export_ls_rev(uint32_t rev, const char *path, int fast_export_ls_rev(uint32_t rev, const char *path,

View File

@ -217,9 +217,7 @@ static void handle_node(void)
*/ */
static const char *const empty_blob = "::empty::"; static const char *const empty_blob = "::empty::";
const char *old_data = NULL; const char *old_data = NULL;
uint32_t old_mode = REPO_MODE_BLB;
if (node_ctx.text_delta)
die("text deltas not supported");
if (node_ctx.action == NODEACT_DELETE) { if (node_ctx.action == NODEACT_DELETE) {
if (have_text || have_props || node_ctx.srcRev) if (have_text || have_props || node_ctx.srcRev)
@ -255,6 +253,7 @@ static void handle_node(void)
if (mode != REPO_MODE_DIR && type == REPO_MODE_DIR) if (mode != REPO_MODE_DIR && type == REPO_MODE_DIR)
die("invalid dump: cannot modify a file into a directory"); die("invalid dump: cannot modify a file into a directory");
node_ctx.type = mode; node_ctx.type = mode;
old_mode = mode;
} else if (node_ctx.action == NODEACT_ADD) { } else if (node_ctx.action == NODEACT_ADD) {
if (type == REPO_MODE_DIR) if (type == REPO_MODE_DIR)
old_data = NULL; old_data = NULL;
@ -289,8 +288,14 @@ static void handle_node(void)
fast_export_modify(node_ctx.dst.buf, node_ctx.type, old_data); fast_export_modify(node_ctx.dst.buf, node_ctx.type, old_data);
return; return;
} }
if (!node_ctx.text_delta) {
fast_export_modify(node_ctx.dst.buf, node_ctx.type, "inline");
fast_export_data(node_ctx.type, node_ctx.textLength, &input);
return;
}
fast_export_modify(node_ctx.dst.buf, node_ctx.type, "inline"); fast_export_modify(node_ctx.dst.buf, node_ctx.type, "inline");
fast_export_data(node_ctx.type, node_ctx.textLength, &input); fast_export_blob_delta(node_ctx.type, old_mode, old_data,
node_ctx.textLength, &input);
} }
static void begin_revision(void) static void begin_revision(void)