commit/commit-tree: correct latin1 to utf-8
When a line in the message is not a valid utf-8, "git mailinfo" attempts to convert it to utf-8 assuming the input is latin1 (and punt if it does not convert cleanly). Using the same heuristics in "git commit" and "git commit-tree" lets the editor output be in latin1 to make the overall system more consistent. Signed-off-by: Junio C Hamano <gitster@pobox.com>
This commit is contained in:

committed by
Junio C Hamano

parent
4c8a9db6f7
commit
08a94a145c
@ -481,36 +481,12 @@ static struct strbuf *decode_b_segment(const struct strbuf *b_seg)
|
||||
return out;
|
||||
}
|
||||
|
||||
/*
|
||||
* When there is no known charset, guess.
|
||||
*
|
||||
* Right now we assume that if the target is UTF-8 (the default),
|
||||
* and it already looks like UTF-8 (which includes US-ASCII as its
|
||||
* subset, of course) then that is what it is and there is nothing
|
||||
* to do.
|
||||
*
|
||||
* Otherwise, we default to assuming it is Latin1 for historical
|
||||
* reasons.
|
||||
*/
|
||||
static const char *guess_charset(const struct strbuf *line, const char *target_charset)
|
||||
{
|
||||
if (is_encoding_utf8(target_charset)) {
|
||||
if (is_utf8(line->buf))
|
||||
return NULL;
|
||||
}
|
||||
return "ISO8859-1";
|
||||
}
|
||||
|
||||
static void convert_to_utf8(struct strbuf *line, const char *charset)
|
||||
{
|
||||
char *out;
|
||||
|
||||
if (!charset || !*charset) {
|
||||
charset = guess_charset(line, metainfo_charset);
|
||||
if (!charset)
|
||||
return;
|
||||
}
|
||||
|
||||
if (!charset || !*charset)
|
||||
return;
|
||||
if (!strcasecmp(metainfo_charset, charset))
|
||||
return;
|
||||
out = reencode_string(line->buf, metainfo_charset, charset);
|
||||
|
Reference in New Issue
Block a user