Merge branch 'jc/bindiff'

* jc/bindiff:
  improve base85 generated assembly code
  binary diff and apply: testsuite.
  binary diff: further updates.
  binary patch.
This commit is contained in:
Junio C Hamano
2006-05-09 14:16:56 -07:00
8 changed files with 580 additions and 74 deletions

308
apply.c
View File

@ -10,6 +10,7 @@
#include "cache.h"
#include "quote.h"
#include "blob.h"
#include "delta.h"
// --check turns on checking that the working tree matches the
// files that are being modified, but doesn't apply the patch
@ -114,6 +115,9 @@ struct patch {
char *new_name, *old_name, *def_name;
unsigned int old_mode, new_mode;
int is_rename, is_copy, is_new, is_delete, is_binary;
#define BINARY_DELTA_DEFLATED 1
#define BINARY_LITERAL_DEFLATED 2
unsigned long deflate_origlen;
int lines_added, lines_deleted;
int score;
struct fragment *fragments;
@ -967,6 +971,88 @@ static inline int metadata_changes(struct patch *patch)
patch->old_mode != patch->new_mode);
}
static int parse_binary(char *buffer, unsigned long size, struct patch *patch)
{
/* We have read "GIT binary patch\n"; what follows is a line
* that says the patch method (currently, either "deflated
* literal" or "deflated delta") and the length of data before
* deflating; a sequence of 'length-byte' followed by base-85
* encoded data follows.
*
* Each 5-byte sequence of base-85 encodes up to 4 bytes,
* and we would limit the patch line to 66 characters,
* so one line can fit up to 13 groups that would decode
* to 52 bytes max. The length byte 'A'-'Z' corresponds
* to 1-26 bytes, and 'a'-'z' corresponds to 27-52 bytes.
* The end of binary is signalled with an empty line.
*/
int llen, used;
struct fragment *fragment;
char *data = NULL;
patch->fragments = fragment = xcalloc(1, sizeof(*fragment));
/* Grab the type of patch */
llen = linelen(buffer, size);
used = llen;
linenr++;
if (!strncmp(buffer, "delta ", 6)) {
patch->is_binary = BINARY_DELTA_DEFLATED;
patch->deflate_origlen = strtoul(buffer + 6, NULL, 10);
}
else if (!strncmp(buffer, "literal ", 8)) {
patch->is_binary = BINARY_LITERAL_DEFLATED;
patch->deflate_origlen = strtoul(buffer + 8, NULL, 10);
}
else
return error("unrecognized binary patch at line %d: %.*s",
linenr-1, llen-1, buffer);
buffer += llen;
while (1) {
int byte_length, max_byte_length, newsize;
llen = linelen(buffer, size);
used += llen;
linenr++;
if (llen == 1)
break;
/* Minimum line is "A00000\n" which is 7-byte long,
* and the line length must be multiple of 5 plus 2.
*/
if ((llen < 7) || (llen-2) % 5)
goto corrupt;
max_byte_length = (llen - 2) / 5 * 4;
byte_length = *buffer;
if ('A' <= byte_length && byte_length <= 'Z')
byte_length = byte_length - 'A' + 1;
else if ('a' <= byte_length && byte_length <= 'z')
byte_length = byte_length - 'a' + 27;
else
goto corrupt;
/* if the input length was not multiple of 4, we would
* have filler at the end but the filler should never
* exceed 3 bytes
*/
if (max_byte_length < byte_length ||
byte_length <= max_byte_length - 4)
goto corrupt;
newsize = fragment->size + byte_length;
data = xrealloc(data, newsize);
if (decode_85(data + fragment->size,
buffer + 1,
byte_length))
goto corrupt;
fragment->size = newsize;
buffer += llen;
size -= llen;
}
fragment->patch = data;
return used;
corrupt:
return error("corrupt binary patch at line %d: %.*s",
linenr-1, llen-1, buffer);
}
static int parse_chunk(char *buffer, unsigned long size, struct patch *patch)
{
int hdrsize, patchsize;
@ -983,19 +1069,34 @@ static int parse_chunk(char *buffer, unsigned long size, struct patch *patch)
"Files ",
NULL,
};
static const char git_binary[] = "GIT binary patch\n";
int i;
int hd = hdrsize + offset;
unsigned long llen = linelen(buffer + hd, size - hd);
if (!memcmp(" differ\n", buffer + hd + llen - 8, 8))
if (llen == sizeof(git_binary) - 1 &&
!memcmp(git_binary, buffer + hd, llen)) {
int used;
linenr++;
used = parse_binary(buffer + hd + llen,
size - hd - llen, patch);
if (used)
patchsize = used + llen;
else
patchsize = 0;
}
else if (!memcmp(" differ\n", buffer + hd + llen - 8, 8)) {
for (i = 0; binhdr[i]; i++) {
int len = strlen(binhdr[i]);
if (len < size - hd &&
!memcmp(binhdr[i], buffer + hd, len)) {
linenr++;
patch->is_binary = 1;
patchsize = llen;
break;
}
}
}
/* Empty patch cannot be applied if:
* - it is a binary patch and we do not do binary_replace, or
@ -1346,75 +1447,149 @@ static int apply_one_fragment(struct buffer_desc *desc, struct fragment *frag)
return offset;
}
static char *inflate_it(const void *data, unsigned long size,
unsigned long inflated_size)
{
z_stream stream;
void *out;
int st;
memset(&stream, 0, sizeof(stream));
stream.next_in = (unsigned char *)data;
stream.avail_in = size;
stream.next_out = out = xmalloc(inflated_size);
stream.avail_out = inflated_size;
inflateInit(&stream);
st = inflate(&stream, Z_FINISH);
if ((st != Z_STREAM_END) || stream.total_out != inflated_size) {
free(out);
return NULL;
}
return out;
}
static int apply_binary_fragment(struct buffer_desc *desc, struct patch *patch)
{
unsigned long dst_size;
struct fragment *fragment = patch->fragments;
void *data;
void *result;
data = inflate_it(fragment->patch, fragment->size,
patch->deflate_origlen);
if (!data)
return error("corrupt patch data");
switch (patch->is_binary) {
case BINARY_DELTA_DEFLATED:
result = patch_delta(desc->buffer, desc->size,
data,
patch->deflate_origlen,
&dst_size);
free(desc->buffer);
desc->buffer = result;
free(data);
break;
case BINARY_LITERAL_DEFLATED:
free(desc->buffer);
desc->buffer = data;
dst_size = patch->deflate_origlen;
break;
}
if (!desc->buffer)
return -1;
desc->size = desc->alloc = dst_size;
return 0;
}
static int apply_binary(struct buffer_desc *desc, struct patch *patch)
{
const char *name = patch->old_name ? patch->old_name : patch->new_name;
unsigned char sha1[20];
unsigned char hdr[50];
int hdrlen;
if (!allow_binary_replacement)
return error("cannot apply binary patch to '%s' "
"without --allow-binary-replacement",
name);
/* For safety, we require patch index line to contain
* full 40-byte textual SHA1 for old and new, at least for now.
*/
if (strlen(patch->old_sha1_prefix) != 40 ||
strlen(patch->new_sha1_prefix) != 40 ||
get_sha1_hex(patch->old_sha1_prefix, sha1) ||
get_sha1_hex(patch->new_sha1_prefix, sha1))
return error("cannot apply binary patch to '%s' "
"without full index line", name);
if (patch->old_name) {
/* See if the old one matches what the patch
* applies to.
*/
write_sha1_file_prepare(desc->buffer, desc->size,
blob_type, sha1, hdr, &hdrlen);
if (strcmp(sha1_to_hex(sha1), patch->old_sha1_prefix))
return error("the patch applies to '%s' (%s), "
"which does not match the "
"current contents.",
name, sha1_to_hex(sha1));
}
else {
/* Otherwise, the old one must be empty. */
if (desc->size)
return error("the patch applies to an empty "
"'%s' but it is not empty", name);
}
get_sha1_hex(patch->new_sha1_prefix, sha1);
if (!memcmp(sha1, null_sha1, 20)) {
free(desc->buffer);
desc->alloc = desc->size = 0;
desc->buffer = NULL;
return 0; /* deletion patch */
}
if (has_sha1_file(sha1)) {
/* We already have the postimage */
char type[10];
unsigned long size;
free(desc->buffer);
desc->buffer = read_sha1_file(sha1, type, &size);
if (!desc->buffer)
return error("the necessary postimage %s for "
"'%s' cannot be read",
patch->new_sha1_prefix, name);
desc->alloc = desc->size = size;
}
else {
/* We have verified desc matches the preimage;
* apply the patch data to it, which is stored
* in the patch->fragments->{patch,size}.
*/
if (apply_binary_fragment(desc, patch))
return error("binary patch does not apply to '%s'",
name);
/* verify that the result matches */
write_sha1_file_prepare(desc->buffer, desc->size, blob_type,
sha1, hdr, &hdrlen);
if (strcmp(sha1_to_hex(sha1), patch->new_sha1_prefix))
return error("binary patch to '%s' creates incorrect result", name);
}
return 0;
}
static int apply_fragments(struct buffer_desc *desc, struct patch *patch)
{
struct fragment *frag = patch->fragments;
const char *name = patch->old_name ? patch->old_name : patch->new_name;
if (patch->is_binary) {
unsigned char sha1[20];
if (!allow_binary_replacement)
return error("cannot apply binary patch to '%s' "
"without --allow-binary-replacement",
name);
/* For safety, we require patch index line to contain
* full 40-byte textual SHA1 for old and new, at least for now.
*/
if (strlen(patch->old_sha1_prefix) != 40 ||
strlen(patch->new_sha1_prefix) != 40 ||
get_sha1_hex(patch->old_sha1_prefix, sha1) ||
get_sha1_hex(patch->new_sha1_prefix, sha1))
return error("cannot apply binary patch to '%s' "
"without full index line", name);
if (patch->old_name) {
unsigned char hdr[50];
int hdrlen;
/* See if the old one matches what the patch
* applies to.
*/
write_sha1_file_prepare(desc->buffer, desc->size,
blob_type, sha1, hdr, &hdrlen);
if (strcmp(sha1_to_hex(sha1), patch->old_sha1_prefix))
return error("the patch applies to '%s' (%s), "
"which does not match the "
"current contents.",
name, sha1_to_hex(sha1));
}
else {
/* Otherwise, the old one must be empty. */
if (desc->size)
return error("the patch applies to an empty "
"'%s' but it is not empty", name);
}
/* For now, we do not record post-image data in the patch,
* and require the object already present in the recipient's
* object database.
*/
if (desc->buffer) {
free(desc->buffer);
desc->alloc = desc->size = 0;
}
get_sha1_hex(patch->new_sha1_prefix, sha1);
if (memcmp(sha1, null_sha1, 20)) {
char type[10];
unsigned long size;
desc->buffer = read_sha1_file(sha1, type, &size);
if (!desc->buffer)
return error("the necessary postimage %s for "
"'%s' does not exist",
patch->new_sha1_prefix, name);
desc->alloc = desc->size = size;
}
return 0;
}
if (patch->is_binary)
return apply_binary(desc, patch);
while (frag) {
if (apply_one_fragment(desc, frag) < 0)
@ -1983,7 +2158,8 @@ int main(int argc, char **argv)
diffstat = 1;
continue;
}
if (!strcmp(arg, "--allow-binary-replacement")) {
if (!strcmp(arg, "--allow-binary-replacement") ||
!strcmp(arg, "--binary")) {
allow_binary_replacement = 1;
continue;
}