From c7c6cf53887f9c4c7f7bf436a86b973ef91dd6ba Mon Sep 17 00:00:00 2001 From: George Hazan Date: Sun, 10 Apr 2016 09:01:07 +0000 Subject: Utf8toUcs2 exported git-svn-id: http://svn.miranda-ng.org/main/trunk@16619 1316c22d-e87f-b044-9b9b-93d7a3e3ba9c --- bin10/lib/mir_core.lib | Bin 301378 -> 301578 bytes bin10/lib/mir_core64.lib | Bin 302392 -> 302568 bytes bin12/lib/mir_core.lib | Bin 301378 -> 301578 bytes bin12/lib/mir_core64.lib | Bin 302392 -> 302568 bytes bin14/lib/mir_core.lib | Bin 301378 -> 301578 bytes bin14/lib/mir_core64.lib | Bin 302392 -> 302568 bytes include/m_core.h | 1 + src/mir_core/src/mir_core.def | 1 + src/mir_core/src/mir_core64.def | 1 + src/mir_core/src/utf.cpp | 287 +++++++++++++++++++--------------------- 10 files changed, 139 insertions(+), 151 deletions(-) diff --git a/bin10/lib/mir_core.lib b/bin10/lib/mir_core.lib index 58c24648c3..7cb68df575 100644 Binary files a/bin10/lib/mir_core.lib and b/bin10/lib/mir_core.lib differ diff --git a/bin10/lib/mir_core64.lib b/bin10/lib/mir_core64.lib index bd91bc8bb9..3bf83695fb 100644 Binary files a/bin10/lib/mir_core64.lib and b/bin10/lib/mir_core64.lib differ diff --git a/bin12/lib/mir_core.lib b/bin12/lib/mir_core.lib index 58c24648c3..7cb68df575 100644 Binary files a/bin12/lib/mir_core.lib and b/bin12/lib/mir_core.lib differ diff --git a/bin12/lib/mir_core64.lib b/bin12/lib/mir_core64.lib index bd91bc8bb9..3bf83695fb 100644 Binary files a/bin12/lib/mir_core64.lib and b/bin12/lib/mir_core64.lib differ diff --git a/bin14/lib/mir_core.lib b/bin14/lib/mir_core.lib index 58c24648c3..7cb68df575 100644 Binary files a/bin14/lib/mir_core.lib and b/bin14/lib/mir_core.lib differ diff --git a/bin14/lib/mir_core64.lib b/bin14/lib/mir_core64.lib index bd91bc8bb9..3bf83695fb 100644 Binary files a/bin14/lib/mir_core64.lib and b/bin14/lib/mir_core64.lib differ diff --git a/include/m_core.h b/include/m_core.h index c1d031f564..07ffec3156 100644 --- a/include/m_core.h +++ b/include/m_core.h @@ -593,6 +593,7 @@ MIR_CORE_DLL(void) KillObjectThreads(void* pObject); MIR_CORE_DLL(char*) Utf8Decode(char* str, wchar_t** ucs2); MIR_CORE_DLL(char*) Utf8DecodeCP(char* str, int codepage, wchar_t** ucs2); +MIR_CORE_DLL(int) Utf8toUcs2(const char *src, size_t srclen, wchar_t *dst, size_t dstlen); // returns 0 on error MIR_CORE_DLL(wchar_t*) Utf8DecodeW(const char* str); diff --git a/src/mir_core/src/mir_core.def b/src/mir_core/src/mir_core.def index 0ad6544d27..dedd3f240e 100644 --- a/src/mir_core/src/mir_core.def +++ b/src/mir_core/src/mir_core.def @@ -991,3 +991,4 @@ mir_forkthreadowner @1148 hex2bin @1149 hex2binW @1150 mir_hmac_sha256 @1151 +Utf8toUcs2 @1152 diff --git a/src/mir_core/src/mir_core64.def b/src/mir_core/src/mir_core64.def index 6cd4c84264..f81f27fc19 100644 --- a/src/mir_core/src/mir_core64.def +++ b/src/mir_core/src/mir_core64.def @@ -991,3 +991,4 @@ mir_forkthreadowner @1148 hex2bin @1149 hex2binW @1150 mir_hmac_sha256 @1151 +Utf8toUcs2 @1152 diff --git a/src/mir_core/src/utf.cpp b/src/mir_core/src/utf.cpp index b904046572..b0240b28ec 100644 --- a/src/mir_core/src/utf.cpp +++ b/src/mir_core/src/utf.cpp @@ -74,7 +74,7 @@ static int Ucs2toUtf8Len(const wchar_t *src, unsigned int srclen) len += 2; continue; } - if ( !(val = getSurrogateValue(src, srclen))) + if (!(val = getSurrogateValue(src, srclen))) return -2; if (val < 0x10000) /* 0x800-0xffff: 3 bytes */ @@ -100,149 +100,139 @@ MIR_CORE_DLL(int) Ucs2toUtf8Len(const wchar_t *src) /* return -1 on dst buffer overflow, -2 on invalid input char */ int Ucs2toUtf8(const wchar_t *src, int srclen, char *dst, int dstlen) { - int len; - - for (len = dstlen; srclen; srclen--, src++) - { - WCHAR ch = *src; - unsigned int val; - - if (ch < 0x80) /* 0x00-0x7f: 1 byte */ - { - if ( !len--) return -1; /* overflow */ - *dst++ = ch; - continue; - } - - if (ch < 0x800) /* 0x80-0x7ff: 2 bytes */ - { - if ((len -= 2) < 0) return -1; /* overflow */ - dst[1] = 0x80 | (ch & 0x3f); - ch >>= 6; - dst[0] = 0xc0 | ch; - dst += 2; - continue; - } - - if ( !(val = getSurrogateValue(src, srclen))) - { - return -2; - } - - if (val < 0x10000) /* 0x800-0xffff: 3 bytes */ - { - if ((len -= 3) < 0) return -1; /* overflow */ - dst[2] = 0x80 | (val & 0x3f); - val >>= 6; - dst[1] = 0x80 | (val & 0x3f); - val >>= 6; - dst[0] = 0xe0 | val; - dst += 3; - } - else /* 0x10000-0x10ffff: 4 bytes */ - { - if ((len -= 4) < 0) return -1; /* overflow */ - dst[3] = 0x80 | (val & 0x3f); - val >>= 6; - dst[2] = 0x80 | (val & 0x3f); - val >>= 6; - dst[1] = 0x80 | (val & 0x3f); - val >>= 6; - dst[0] = 0xf0 | val; - dst += 4; - src++; - srclen--; - } - } - return dstlen - len; + int len; + + for (len = dstlen; srclen; srclen--, src++) { + WCHAR ch = *src; + unsigned int val; + + if (ch < 0x80) { /* 0x00-0x7f: 1 byte */ + if (!len--) return -1; /* overflow */ + *dst++ = ch; + continue; + } + + if (ch < 0x800) { /* 0x80-0x7ff: 2 bytes */ + if ((len -= 2) < 0) return -1; /* overflow */ + dst[1] = 0x80 | (ch & 0x3f); + ch >>= 6; + dst[0] = 0xc0 | ch; + dst += 2; + continue; + } + + if (!(val = getSurrogateValue(src, srclen))) + return -2; + + if (val < 0x10000) { /* 0x800-0xffff: 3 bytes */ + if ((len -= 3) < 0) return -1; /* overflow */ + dst[2] = 0x80 | (val & 0x3f); + val >>= 6; + dst[1] = 0x80 | (val & 0x3f); + val >>= 6; + dst[0] = 0xe0 | val; + dst += 3; + } + else { /* 0x10000-0x10ffff: 4 bytes */ + if ((len -= 4) < 0) return -1; /* overflow */ + dst[3] = 0x80 | (val & 0x3f); + val >>= 6; + dst[2] = 0x80 | (val & 0x3f); + val >>= 6; + dst[1] = 0x80 | (val & 0x3f); + val >>= 6; + dst[0] = 0xf0 | val; + dst += 4; + src++; + srclen--; + } + } + return dstlen - len; } /* helper for the various utf8 mbstowcs functions */ static unsigned int decodeUtf8Char(unsigned char ch, const char **str, const char *strend) { - unsigned int len = utf8_length[ch-0x80]; - unsigned int res = ch & utf8_mask[len]; - const char *end = *str + len; - - if (end > strend) return ~0; - switch(len) - { - case 3: - if ((ch = end[-3] ^ 0x80) >= 0x40) break; - res = (res << 6) | ch; - (*str)++; - case 2: - if ((ch = end[-2] ^ 0x80) >= 0x40) break; - res = (res << 6) | ch; - (*str)++; - case 1: - if ((ch = end[-1] ^ 0x80) >= 0x40) break; - res = (res << 6) | ch; - (*str)++; - if (res < utf8_minval[len]) break; - return res; - } - return ~0; + unsigned int len = utf8_length[ch - 0x80]; + unsigned int res = ch & utf8_mask[len]; + const char *end = *str + len; + + if (end > strend) return ~0; + switch (len) { + case 3: + if ((ch = end[-3] ^ 0x80) >= 0x40) break; + res = (res << 6) | ch; + (*str)++; + + case 2: + if ((ch = end[-2] ^ 0x80) >= 0x40) break; + res = (res << 6) | ch; + (*str)++; + + case 1: + if ((ch = end[-1] ^ 0x80) >= 0x40) break; + res = (res << 6) | ch; + (*str)++; + if (res < utf8_minval[len]) break; + return res; + } + return ~0; } /* query necessary dst length for src string */ -static inline int Utf8toUcs2Len(const char *src, int srclen) +static int Utf8toUcs2Len(const char *src, size_t srclen) { - int ret = 0; - unsigned int res; - const char *srcend = src + srclen; - - while (src < srcend) - { - unsigned char ch = *src++; - if (ch < 0x80) /* special fast case for 7-bit ASCII */ - { - ret++; - continue; - } - if ((res = decodeUtf8Char(ch, &src, srcend)) <= 0x10ffff) - { - if (res > 0xffff) ret++; - ret++; - } - else return -2; /* bad char */ - /* otherwise ignore it */ - } - return ret; + int ret = 0; + unsigned int res; + const char *srcend = src + srclen; + + while (src < srcend) { + unsigned char ch = *src++; + if (ch < 0x80) { /* special fast case for 7-bit ASCII */ + ret++; + continue; + } + if ((res = decodeUtf8Char(ch, &src, srcend)) <= 0x10ffff) { + if (res > 0xffff) ret++; + ret++; + } + else return -2; /* bad char */ + /* otherwise ignore it */ + } + return ret; } /* UTF-8 to wide char string conversion */ /* return -1 on dst buffer overflow, -2 on invalid input char */ -int Utf8toUcs2(const char *src, int srclen, wchar_t *dst, int dstlen) +MIR_CORE_DLL(int) Utf8toUcs2(const char *src, size_t srclen, wchar_t *dst, size_t dstlen) { - unsigned int res; - const char *srcend = src + srclen; - wchar_t *dstend = dst + dstlen; - - while ((dst < dstend) && (src < srcend)) - { - unsigned char ch = *src++; - if (ch < 0x80) /* special fast case for 7-bit ASCII */ - { - *dst++ = ch; - continue; - } - if ((res = decodeUtf8Char(ch, &src, srcend)) <= 0xffff) - { - *dst++ = res; - } - else if (res <= 0x10ffff) /* we need surrogates */ - { - if (dst == dstend - 1) return -1; /* overflow */ - res -= 0x10000; - *dst++ = 0xd800 | (res >> 10); - *dst++ = 0xdc00 | (res & 0x3ff); - } - else return -2; /* bad char */ - /* otherwise ignore it */ - } - if (src < srcend) return -1; /* overflow */ - return dstlen - (dstend - dst); + unsigned int res; + const char *srcend = src + srclen; + wchar_t *dstend = dst + dstlen; + + while ((dst < dstend) && (src < srcend)) { + unsigned char ch = *src++; + if (ch < 0x80) { /* special fast case for 7-bit ASCII */ + *dst++ = ch; + continue; + } + + if ((res = decodeUtf8Char(ch, &src, srcend)) <= 0xffff) + *dst++ = res; + else if (res <= 0x10ffff) { /* we need surrogates */ + if (dst == dstend - 1) + return -1; /* overflow */ + res -= 0x10000; + *dst++ = 0xd800 | (res >> 10); + *dst++ = 0xdc00 | (res & 0x3ff); + } + else return -2; /* bad char */ + } + + if (src < srcend) + return -1; /* overflow */ + + return (int)(dstlen - (dstend - dst)); } ///////////////////////////////////////////////////////////////////////////////////////// @@ -250,7 +240,6 @@ int Utf8toUcs2(const char *src, int srclen, wchar_t *dst, int dstlen) MIR_CORE_DLL(char*) Utf8DecodeCP(char *str, int codepage, wchar_t **ucs2) { - int len; bool needs_free = false; wchar_t* tempBuf = NULL; if (ucs2) @@ -259,12 +248,11 @@ MIR_CORE_DLL(char*) Utf8DecodeCP(char *str, int codepage, wchar_t **ucs2) if (str == NULL) return NULL; - len = (int)strlen(str); - + size_t len = strlen(str); if (len < 2) { if (ucs2 != NULL) { *ucs2 = tempBuf = (wchar_t*)mir_alloc((len + 1) * sizeof(wchar_t)); - MultiByteToWideChar(codepage, 0, str, len, tempBuf, len); + MultiByteToWideChar(codepage, 0, str, (int)len, tempBuf, (int)len); tempBuf[len] = 0; } return str; @@ -275,11 +263,10 @@ MIR_CORE_DLL(char*) Utf8DecodeCP(char *str, int codepage, wchar_t **ucs2) return NULL; if (ucs2 == NULL) { - __try - { + __try { tempBuf = (wchar_t*)alloca((destlen + 1) * sizeof(wchar_t)); } - __except(EXCEPTION_EXECUTE_HANDLER) + __except (EXCEPTION_EXECUTE_HANDLER) { tempBuf = NULL; needs_free = true; @@ -294,7 +281,7 @@ MIR_CORE_DLL(char*) Utf8DecodeCP(char *str, int codepage, wchar_t **ucs2) Utf8toUcs2(str, len, tempBuf, destlen); tempBuf[destlen] = 0; - WideCharToMultiByte(codepage, 0, tempBuf, -1, str, len + 1, "?", NULL); + WideCharToMultiByte(codepage, 0, tempBuf, -1, str, (int)len + 1, "?", NULL); if (ucs2) *ucs2 = tempBuf; @@ -314,16 +301,17 @@ MIR_CORE_DLL(wchar_t*) Utf8DecodeW(const char *str) if (str == NULL) return NULL; - int len = (int)strlen(str); + size_t len = strlen(str); int destlen = Utf8toUcs2Len(str, len); - if (destlen < 0) return NULL; + if (destlen < 0) + return NULL; wchar_t* ucs2 = (wchar_t*)mir_alloc((destlen + 1) * sizeof(wchar_t)); - if (ucs2 == NULL) return NULL; + if (ucs2 == NULL) + return NULL; - if (Utf8toUcs2(str, len, ucs2, destlen) >= 0) - { + if (Utf8toUcs2(str, len, ucs2, destlen) >= 0) { ucs2[destlen] = 0; return ucs2; } @@ -339,7 +327,7 @@ MIR_CORE_DLL(wchar_t*) Utf8DecodeW(const char *str) MIR_CORE_DLL(char*) Utf8EncodeCP(const char* src, int codepage) { int len; - bool needs_free = false; + bool needs_free = false; char* result = NULL; wchar_t* tempBuf; @@ -348,11 +336,10 @@ MIR_CORE_DLL(char*) Utf8EncodeCP(const char* src, int codepage) len = (int)strlen(src); - __try - { + __try { tempBuf = (wchar_t*)alloca((len + 1) * sizeof(wchar_t)); } - __except(EXCEPTION_EXECUTE_HANDLER) + __except (EXCEPTION_EXECUTE_HANDLER) { tempBuf = (wchar_t*)mir_alloc((len + 1) * sizeof(wchar_t)); if (tempBuf == NULL) return NULL; @@ -362,11 +349,9 @@ MIR_CORE_DLL(char*) Utf8EncodeCP(const char* src, int codepage) len = MultiByteToWideChar(codepage, 0, src, -1, tempBuf, len + 1); int destlen = Ucs2toUtf8Len(tempBuf, len); - if (destlen >= 0) - { + if (destlen >= 0) { result = (char*)mir_alloc(destlen + 1); - if (result) - { + if (result) { Ucs2toUtf8(tempBuf, len, result, destlen); result[destlen] = 0; } -- cgit v1.2.3