commit/commit-tree: correct latin1 to utf-8

When a line in the message is not a valid utf-8, "git mailinfo"
attempts to convert it to utf-8 assuming the input is latin1 (and
punt if it does not convert cleanly).  Using the same heuristics in
"git commit" and "git commit-tree" lets the editor output be in
latin1 to make the overall system more consistent.

Signed-off-by: Junio C Hamano <gitster@pobox.com>
This commit is contained in:
Linus Torvalds
2012-06-28 11:24:14 -07:00
committed by Junio C Hamano
parent 4c8a9db6f7
commit 08a94a145c
2 changed files with 88 additions and 28 deletions

View File

@ -481,36 +481,12 @@ static struct strbuf *decode_b_segment(const struct strbuf *b_seg)
return out;
}
/*
* When there is no known charset, guess.
*
* Right now we assume that if the target is UTF-8 (the default),
* and it already looks like UTF-8 (which includes US-ASCII as its
* subset, of course) then that is what it is and there is nothing
* to do.
*
* Otherwise, we default to assuming it is Latin1 for historical
* reasons.
*/
static const char *guess_charset(const struct strbuf *line, const char *target_charset)
{
if (is_encoding_utf8(target_charset)) {
if (is_utf8(line->buf))
return NULL;
}
return "ISO8859-1";
}
static void convert_to_utf8(struct strbuf *line, const char *charset)
{
char *out;
if (!charset || !*charset) {
charset = guess_charset(line, metainfo_charset);
if (!charset)
return;
}
if (!charset || !*charset)
return;
if (!strcasecmp(metainfo_charset, charset))
return;
out = reencode_string(line->buf, metainfo_charset, charset);