From cb4a46e7fbe62d788e66ed6121c717a2d22a4d7c Mon Sep 17 00:00:00 2001 From: watcherhd Date: Thu, 21 Apr 2011 14:14:52 +0000 Subject: svn.miranda.im is moving to a new home! git-svn-id: http://miranda-plugins.googlecode.com/svn/trunk@7 e753b5eb-9565-29b2-b5c5-2cc6f99dfbcb --- icqj_mod/i18n.c | 477 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 477 insertions(+) create mode 100644 icqj_mod/i18n.c (limited to 'icqj_mod/i18n.c') diff --git a/icqj_mod/i18n.c b/icqj_mod/i18n.c new file mode 100644 index 0000000..f29f686 --- /dev/null +++ b/icqj_mod/i18n.c @@ -0,0 +1,477 @@ +// ---------------------------------------------------------------------------80 +// ICQ plugin for Miranda Instant Messenger +// ________________________________________ +// +// Copyright © 2000,2001 Richard Hughes, Roland Rabien, Tristan Van de Vreede +// Copyright © 2001,2002 Jon Keating, Richard Hughes +// Copyright © 2002,2003,2004 Martin berg, Sam Kothari, Robert Rainwater +// Copyright © 2004,2005,2006 Joe Kucera +// +// This program is free software; you can redistribute it and/or +// modify it under the terms of the GNU General Public License +// as published by the Free Software Foundation; either version 2 +// of the License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +// +// ----------------------------------------------------------------------------- +// +// File name : $Source: /cvsroot/miranda/miranda/protocols/IcqOscarJ/i18n.c,v $ +// Revision : $Revision: 3184 $ +// Last change on : $Date: 2006-06-20 12:40:26 +0200 (Tue, 20 Jun 2006) $ +// Last change by : $Author: jokusoftware $ +// +// DESCRIPTION: +// +// Contains helper functions to convert text messages between different +// character sets. +// +// ----------------------------------------------------------------------------- + +#include "icqoscar.h" + + +static BOOL bHasCP_UTF8 = FALSE; + + +void InitI18N(void) +{ + CPINFO CPInfo; + + + bHasCP_UTF8 = GetCPInfo(CP_UTF8, &CPInfo); +} + + + +// Returns true if the buffer only contains 7-bit characters. +BOOL IsUSASCII(const unsigned char* pBuffer, int nSize) +{ + BOOL bResult = TRUE; + int nIndex; + + for (nIndex = 0; nIndex < nSize; nIndex++) + { + if (pBuffer[nIndex] > 0x7F) + { + bResult = FALSE; + break; + } + } + + return bResult; +} + +// Returns true if the unicode buffer only contains 7-bit characters. +BOOL IsUnicodeAscii(const wchar_t* pBuffer, int nSize) +{ + BOOL bResult = TRUE; + int nIndex; + + + for (nIndex = 0; nIndex < nSize; nIndex++) + { + if (pBuffer[nIndex] > 0x7F) + { + bResult = FALSE; + break; + } + } + + return bResult; +} + + +// Scans a string encoded in UTF-8 to verify that it contains +// only valid sequences. It will return 1 if the string contains +// only legitimate encoding sequences; otherwise it will return 0; +// From 'Secure Programming Cookbook', John Viega & Matt Messier, 2003 +int UTF8_IsValid(const unsigned char* pszInput) +{ + int nb, i; + const unsigned char* c = pszInput; + + + for (c = pszInput; *c; c += (nb + 1)) + { + if (!(*c & 0x80)) + nb = 0; + else if ((*c & 0xc0) == 0x80) return 0; + else if ((*c & 0xe0) == 0xc0) nb = 1; + else if ((*c & 0xf0) == 0xe0) nb = 2; + else if ((*c & 0xf8) == 0xf0) nb = 3; + else if ((*c & 0xfc) == 0xf8) nb = 4; + else if ((*c & 0xfe) == 0xfc) nb = 5; + + for (i = 1; i<=nb; i++) // we this forward, do not cross end of string + if ((*(c + i) & 0xc0) != 0x80) + return 0; + } + + return 1; +} + + +// returns ansi string in all cases +char* detect_decode_utf8(const char *from) +{ + char* temp = NULL; + + if (IsUSASCII(from, strlennull(from)) || !UTF8_IsValid(from) || !utf8_decode(from, &temp)) return (char*)from; + SAFE_FREE((char**)&from); + + return temp; +} + + +/* + * The following UTF8 routines are + * + * Copyright (C) 2001 Peter Harris + * Copyright (C) 2001 Edmund Grimley Evans + * + * under a GPL license + * + * -------------------------------------------------------------- + * Convert a string between UTF-8 and the locale's charset. + * Invalid bytes are replaced by '#', and characters that are + * not available in the target encoding are replaced by '?'. + * + * If the locale's charset is not set explicitly then it is + * obtained using nl_langinfo(CODESET), where available, the + * environment variable CHARSET, or assumed to be US-ASCII. + * + * Return value of conversion functions: + * + * -1 : memory allocation failed + * 0 : data was converted exactly + * 1 : valid data was converted approximately (using '?') + * 2 : input was invalid (but still converted, using '#') + * 3 : unknown encoding (but still converted, using '?') + */ + + + +/* + * Convert a string between UTF-8 and the locale's charset. + */ +unsigned char *make_utf8_string(const wchar_t *unicode) +{ + int size = 0; + int index = 0; + int out_index = 0; + unsigned char* out; + unsigned short c; + + + /* first calculate the size of the target string */ + c = unicode[index++]; + while (c) + { + if (c < 0x0080) + size += 1; + else if (c < 0x0800) + size += 2; + else + size += 3; + c = unicode[index++]; + } + + out = (unsigned char*)SAFE_MALLOC(size + 1); + if (out == NULL) + return NULL; + index = 0; + + c = unicode[index++]; + while (c) + { + if (c < 0x080) + { + out[out_index++] = (unsigned char)c; + } + else if (c < 0x800) + { + out[out_index++] = 0xc0 | (c >> 6); + out[out_index++] = 0x80 | (c & 0x3f); + } + else + { + out[out_index++] = 0xe0 | (c >> 12); + out[out_index++] = 0x80 | ((c >> 6) & 0x3f); + out[out_index++] = 0x80 | (c & 0x3f); + } + c = unicode[index++]; + } + out[out_index] = 0x00; + + return out; +} + + + +wchar_t *make_unicode_string(const unsigned char *utf8) +{ + int size = 0, index = 0, out_index = 0; + wchar_t *out; + unsigned char c; + + /* first calculate the size of the target string */ + c = utf8[index++]; + while (c) + { + if ((c & 0x80) == 0) + { + index += 0; + } + else if ((c & 0xe0) == 0xe0) + { + index += 2; + } + else + { + index += 1; + } + size += 1; + c = utf8[index++]; + } + + out = (wchar_t*)SAFE_MALLOC((size + 1) * sizeof(wchar_t)); + if (out == NULL) + return NULL; + index = 0; + + c = utf8[index++]; + while (c) + { + if((c & 0x80) == 0) + { + out[out_index++] = c; + } + else if((c & 0xe0) == 0xe0) + { + out[out_index] = (c & 0x1F) << 12; + c = utf8[index++]; + out[out_index] |= (c & 0x3F) << 6; + c = utf8[index++]; + out[out_index++] |= (c & 0x3F); + } + else + { + out[out_index] = (c & 0x3F) << 6; + c = utf8[index++]; + out[out_index++] |= (c & 0x3F); + } + c = utf8[index++]; + } + out[out_index] = 0; + + return out; +} + + + +int utf8_encode(const char *from, char **to) +{ + wchar_t *unicode; + int wchars, err; + + + wchars = MultiByteToWideChar(CP_ACP, MB_PRECOMPOSED, from, + strlennull(from), NULL, 0); + + if (wchars == 0) + { + fprintf(stderr, "Unicode translation error %d\n", GetLastError()); + return -1; + } + + unicode = (wchar_t*)_alloca((wchars + 1) * sizeof(unsigned short)); + unicode[wchars] = 0; + + err = MultiByteToWideChar(CP_ACP, MB_PRECOMPOSED, from, + strlennull(from), unicode, wchars); + if(err != wchars) + { + fprintf(stderr, "Unicode translation error %d\n", GetLastError()); + return -1; + } + + /* On NT-based windows systems, we could use WideCharToMultiByte(), but + * MS doesn't actually have a consistent API across win32. + */ + *to = make_utf8_string(unicode); + + return 0; +} + + + +char *ansi_to_utf8(const char *szAnsi) +{ + char *szUtf; + + if (strlennull(szAnsi)) + { + utf8_encode(szAnsi, &szUtf); + + return szUtf; + } + else + return null_strdup(""); +} + + + +char *ansi_to_utf8_codepage(const char *szAnsi, WORD wCp) +{ + wchar_t *unicode; + int wchars = strlennull(szAnsi); + + unicode = (wchar_t*)_alloca((wchars + 1) * sizeof(wchar_t)); + ZeroMemory(unicode, (wchars + 1)*sizeof(wchar_t)); + + MultiByteToWideChar(wCp, MB_PRECOMPOSED, szAnsi, wchars, unicode, wchars); + + return make_utf8_string(unicode); +} + + + +// Returns 0 on error, 1 on success +int utf8_decode(const char *from, char **to) +{ + int nResult = 0; + + _ASSERTE(!(*to)); // You passed a non-zero pointer, make sure it doesnt point to unfreed memory + + // Validate the string + if (!UTF8_IsValid(from)) + return 0; + + // Use the native conversion routines when available + if (bHasCP_UTF8) + { + WCHAR *wszTemp = NULL; + int inlen = strlennull(from); + + wszTemp = (WCHAR *)_alloca(sizeof(WCHAR) * (inlen + 1)); + + // Convert the UTF-8 string to UCS + if (MultiByteToWideChar(CP_UTF8, 0, from, -1, wszTemp, inlen + 1)) + { + // Convert the UCS string to local ANSI codepage + *to = (char*)SAFE_MALLOC(inlen+1); + if (WideCharToMultiByte(CP_ACP, 0, wszTemp, -1, *to, inlen+1, NULL, NULL)) + { + nResult = 1; + } + else + { + SAFE_FREE(&(*to)); + } + } + } + else + { + wchar_t *unicode; + int chars; + int err; + + unicode = make_unicode_string(from); + if(unicode == NULL) + { + fprintf(stderr, "Out of memory processing string from UTF8 to UNICODE16\n"); + return 0; + } + + chars = WideCharToMultiByte(CP_ACP, WC_COMPOSITECHECK, unicode, -1, NULL, 0, NULL, NULL); + + if(chars == 0) + { + fprintf(stderr, "Unicode translation error %d\n", GetLastError()); + SAFE_FREE(&unicode); + return 0; + } + + *to = (char*)SAFE_MALLOC((chars + 1)*sizeof(unsigned char)); + if(*to == NULL) + { + fprintf(stderr, "Out of memory processing string to local charset\n"); + SAFE_FREE(&unicode); + return 0; + } + + err = WideCharToMultiByte(CP_ACP, WC_COMPOSITECHECK, unicode, -1, *to, chars, NULL, NULL); + if (err != chars) + { + fprintf(stderr, "Unicode translation error %d\n", GetLastError()); + SAFE_FREE(&unicode); + SAFE_FREE(to); + return 0; + } + + SAFE_FREE(&unicode); + + nResult = 1; + } + + return nResult; +} + + + +// Returns 0 on error, 1 on success +int utf8_decode_static(const char *from, char *to, int to_size) +{ + int nResult = 0; + + _ASSERTE(to); // You passed a zero pointer + + // Validate the string + if (!UTF8_IsValid(from)) + return 0; + + // Use the native conversion routines when available + if (bHasCP_UTF8) + { + WCHAR *wszTemp = NULL; + int inlen = strlennull(from); + + wszTemp = (WCHAR *)_alloca(sizeof(WCHAR) * (inlen + 1)); + + // Convert the UTF-8 string to UCS + if (MultiByteToWideChar(CP_UTF8, 0, from, -1, wszTemp, inlen + 1)) + { + // Convert the UCS string to local ANSI codepage + if (WideCharToMultiByte(CP_ACP, 0, wszTemp, -1, to, to_size, NULL, NULL)) + { + nResult = 1; + } + } + } + else + { + wchar_t *unicode = make_unicode_string(from); + + if (unicode == NULL) + { + fprintf(stderr, "Out of memory processing string from UTF8 to UNICODE16\n"); + return 0; + } + + WideCharToMultiByte(CP_ACP, WC_COMPOSITECHECK, unicode, -1, to, to_size, NULL, NULL); + + SAFE_FREE(&unicode); + + nResult = 1; + } + + return nResult; +} -- cgit v1.2.3