Merge branch 'jk/utf-8-can-be-spelled-differently'
Some platforms and users spell UTF-8 differently; retry with the most official "UTF-8" when the system does not understand the user-supplied encoding name that are the common alternative spellings of UTF-8. * jk/utf-8-can-be-spelled-differently: utf8: accept alternate spellings of UTF-8
This commit is contained in:
16
utf8.c
16
utf8.c
@ -507,9 +507,25 @@ char *reencode_string(const char *in, const char *out_encoding, const char *in_e
|
|||||||
|
|
||||||
if (!in_encoding)
|
if (!in_encoding)
|
||||||
return NULL;
|
return NULL;
|
||||||
|
|
||||||
|
conv = iconv_open(out_encoding, in_encoding);
|
||||||
|
if (conv == (iconv_t) -1) {
|
||||||
|
/*
|
||||||
|
* Some platforms do not have the variously spelled variants of
|
||||||
|
* UTF-8, so let's fall back to trying the most official
|
||||||
|
* spelling. We do so only as a fallback in case the platform
|
||||||
|
* does understand the user's spelling, but not our official
|
||||||
|
* one.
|
||||||
|
*/
|
||||||
|
if (is_encoding_utf8(in_encoding))
|
||||||
|
in_encoding = "UTF-8";
|
||||||
|
if (is_encoding_utf8(out_encoding))
|
||||||
|
out_encoding = "UTF-8";
|
||||||
conv = iconv_open(out_encoding, in_encoding);
|
conv = iconv_open(out_encoding, in_encoding);
|
||||||
if (conv == (iconv_t) -1)
|
if (conv == (iconv_t) -1)
|
||||||
return NULL;
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
out = reencode_string_iconv(in, strlen(in), conv);
|
out = reencode_string_iconv(in, strlen(in), conv);
|
||||||
iconv_close(conv);
|
iconv_close(conv);
|
||||||
return out;
|
return out;
|
||||||
|
Reference in New Issue
Block a user