vcs-svn: implement text-delta handling

Handle input in Subversion's dumpfile format, version 3.  This is the
format produced by "svnrdump dump" and "svnadmin dump --deltas", and
the main difference between v3 dumpfiles and the dumpfiles already
handled is that these can include nodes whose properties and text are
expressed relative to some other node.

To handle such nodes, we find which node the text and properties are
based on, handle its property changes, use the cat-blob command to
request the basis blob from the fast-import backend, use the
svndiff0_apply() helper to apply the text delta on the fly, writing
output to a temporary file, and then measure that postimage file's
length and write its content to the fast-import stream.

The temporary postimage file is shared between delta-using nodes to
avoid some file system overhead.

The svn-fe interface needs to be more complicated to accomodate the
backward flow of information from the fast-import backend to svn-fe.
The backflow fd is not needed when parsing streams without deltas,
though, so existing scripts using svn-fe on v2 dumps should
continue to work.

NEEDSWORK: generalize interface so caller sets the backflow fd, close
temporary file before exiting

Signed-off-by: David Barr <david.barr@cordelta.com>
Signed-off-by: Jonathan Nieder <jrnieder@gmail.com>
Signed-off-by: David Barr <david.barr@cordelta.com>
Signed-off-by: Jonathan Nieder <jrnieder@gmail.com>
This commit is contained in:
David Barr
2011-03-19 18:20:54 +11:00
committed by Jonathan Nieder
parent e9f3f8b6f4
commit 7a75e661c5
5 changed files with 227 additions and 11 deletions

View File

@ -7,15 +7,38 @@
#include "strbuf.h"
#include "quote.h"
#include "fast_export.h"
#include "line_buffer.h"
#include "repo_tree.h"
#include "strbuf.h"
#include "svndiff.h"
#include "sliding_window.h"
#include "line_buffer.h"
#define MAX_GITSVN_LINE_LEN 4096
#define REPORT_FILENO 3
static uint32_t first_commit_done;
static struct line_buffer postimage = LINE_BUFFER_INIT;
static struct line_buffer report_buffer = LINE_BUFFER_INIT;
/* NEEDSWORK: move to fast_export_init() */
static int init_postimage(void)
{
static int postimage_initialized;
if (postimage_initialized)
return 0;
postimage_initialized = 1;
return buffer_tmpfile_init(&postimage);
}
static int init_report_buffer(int fd)
{
static int report_buffer_initialized;
if (report_buffer_initialized)
return 0;
report_buffer_initialized = 1;
return buffer_fdinit(&report_buffer, fd);
}
void fast_export_init(int fd)
{
if (buffer_fdinit(&report_buffer, fd))
@ -132,6 +155,73 @@ static void die_short_read(struct line_buffer *input)
die("invalid dump: unexpected end of file");
}
static int ends_with(const char *s, size_t len, const char *suffix)
{
const size_t suffixlen = strlen(suffix);
if (len < suffixlen)
return 0;
return !memcmp(s + len - suffixlen, suffix, suffixlen);
}
static int parse_cat_response_line(const char *header, off_t *len)
{
size_t headerlen = strlen(header);
const char *type;
const char *end;
if (ends_with(header, headerlen, " missing"))
return error("cat-blob reports missing blob: %s", header);
type = memmem(header, headerlen, " blob ", strlen(" blob "));
if (!type)
return error("cat-blob header has wrong object type: %s", header);
*len = strtoumax(type + strlen(" blob "), (char **) &end, 10);
if (end == type + strlen(" blob "))
return error("cat-blob header does not contain length: %s", header);
if (*end)
return error("cat-blob header contains garbage after length: %s", header);
return 0;
}
static long apply_delta(off_t len, struct line_buffer *input,
const char *old_data, uint32_t old_mode)
{
long ret;
off_t preimage_len = 0;
struct sliding_view preimage = SLIDING_VIEW_INIT(&report_buffer);
FILE *out;
if (init_postimage() || !(out = buffer_tmpfile_rewind(&postimage)))
die("cannot open temporary file for blob retrieval");
if (init_report_buffer(REPORT_FILENO))
die("cannot open fd 3 for feedback from fast-import");
if (old_data) {
const char *response;
printf("cat-blob %s\n", old_data);
fflush(stdout);
response = get_response_line();
if (parse_cat_response_line(response, &preimage_len))
die("invalid cat-blob response: %s", response);
}
if (old_mode == REPO_MODE_LNK) {
strbuf_addstr(&preimage.buf, "link ");
preimage_len += strlen("link ");
}
if (svndiff0_apply(input, len, &preimage, out))
die("cannot apply delta");
if (old_data) {
/* Read the remainder of preimage and trailing newline. */
if (move_window(&preimage, preimage_len, 1))
die("cannot seek to end of input");
if (preimage.buf.buf[0] != '\n')
die("missing newline after cat-blob response");
}
ret = buffer_tmpfile_prepare_to_read(&postimage);
if (ret < 0)
die("cannot read temporary file for blob retrieval");
strbuf_release(&preimage.buf);
return ret;
}
void fast_export_data(uint32_t mode, uint32_t len, struct line_buffer *input)
{
if (mode == REPO_MODE_LNK) {
@ -199,3 +289,20 @@ int fast_export_ls(const char *path, uint32_t *mode, struct strbuf *dataref)
ls_from_active_commit(path);
return parse_ls_response(get_response_line(), mode, dataref);
}
void fast_export_blob_delta(uint32_t mode,
uint32_t old_mode, const char *old_data,
uint32_t len, struct line_buffer *input)
{
long postimage_len;
if (len > maximum_signed_value_of_type(off_t))
die("enormous delta");
postimage_len = apply_delta((off_t) len, input, old_data, old_mode);
if (mode == REPO_MODE_LNK) {
buffer_skip_bytes(&postimage, strlen("link "));
postimage_len -= strlen("link ");
}
printf("data %ld\n", postimage_len);
buffer_copy_bytes(&postimage, postimage_len);
fputc('\n', stdout);
}

View File

@ -15,6 +15,9 @@ void fast_export_begin_commit(uint32_t revision, const char *author,
const char *url, unsigned long timestamp);
void fast_export_end_commit(uint32_t revision);
void fast_export_data(uint32_t mode, uint32_t len, struct line_buffer *input);
void fast_export_blob_delta(uint32_t mode,
uint32_t old_mode, const char *old_data,
uint32_t len, struct line_buffer *input);
/* If there is no such file at that rev, returns -1, errno == ENOENT. */
int fast_export_ls_rev(uint32_t rev, const char *path,

View File

@ -217,9 +217,7 @@ static void handle_node(void)
*/
static const char *const empty_blob = "::empty::";
const char *old_data = NULL;
if (node_ctx.text_delta)
die("text deltas not supported");
uint32_t old_mode = REPO_MODE_BLB;
if (node_ctx.action == NODEACT_DELETE) {
if (have_text || have_props || node_ctx.srcRev)
@ -255,6 +253,7 @@ static void handle_node(void)
if (mode != REPO_MODE_DIR && type == REPO_MODE_DIR)
die("invalid dump: cannot modify a file into a directory");
node_ctx.type = mode;
old_mode = mode;
} else if (node_ctx.action == NODEACT_ADD) {
if (type == REPO_MODE_DIR)
old_data = NULL;
@ -289,8 +288,14 @@ static void handle_node(void)
fast_export_modify(node_ctx.dst.buf, node_ctx.type, old_data);
return;
}
if (!node_ctx.text_delta) {
fast_export_modify(node_ctx.dst.buf, node_ctx.type, "inline");
fast_export_data(node_ctx.type, node_ctx.textLength, &input);
return;
}
fast_export_modify(node_ctx.dst.buf, node_ctx.type, "inline");
fast_export_data(node_ctx.type, node_ctx.textLength, &input);
fast_export_blob_delta(node_ctx.type, old_mode, old_data,
node_ctx.textLength, &input);
}
static void begin_revision(void)