Merge branch 'ks/rfc2047-one-char-at-a-time' into maint
When "format-patch" quoted a non-ascii strings on the header files, it incorrectly applied rfc2047 and chopped a single character in the middle of it. * ks/rfc2047-one-char-at-a-time: format-patch: RFC 2047 says multi-octet character may not be split
This commit is contained in:
39
utf8.c
39
utf8.c
@ -531,3 +531,42 @@ char *reencode_string(const char *in, const char *out_encoding, const char *in_e
|
||||
return out;
|
||||
}
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Returns first character length in bytes for multi-byte `text` according to
|
||||
* `encoding`.
|
||||
*
|
||||
* - The `text` pointer is updated to point at the next character.
|
||||
* - When `remainder_p` is not NULL, on entry `*remainder_p` is how much bytes
|
||||
* we can consume from text, and on exit `*remainder_p` is reduced by returned
|
||||
* character length. Otherwise `text` is treated as limited by NUL.
|
||||
*/
|
||||
int mbs_chrlen(const char **text, size_t *remainder_p, const char *encoding)
|
||||
{
|
||||
int chrlen;
|
||||
const char *p = *text;
|
||||
size_t r = (remainder_p ? *remainder_p : SIZE_MAX);
|
||||
|
||||
if (r < 1)
|
||||
return 0;
|
||||
|
||||
if (is_encoding_utf8(encoding)) {
|
||||
pick_one_utf8_char(&p, &r);
|
||||
|
||||
chrlen = p ? (p - *text)
|
||||
: 1 /* not valid UTF-8 -> raw byte sequence */;
|
||||
}
|
||||
else {
|
||||
/*
|
||||
* TODO use iconv to decode one char and obtain its chrlen
|
||||
* for now, let's treat encodings != UTF-8 as one-byte
|
||||
*/
|
||||
chrlen = 1;
|
||||
}
|
||||
|
||||
*text += chrlen;
|
||||
if (remainder_p)
|
||||
*remainder_p -= chrlen;
|
||||
|
||||
return chrlen;
|
||||
}
|
||||
|
Reference in New Issue
Block a user