1 files changed, 541 insertions, 0 deletions
diff --git a/protocols/IcqOscarJ/src/i18n.cpp b/protocols/IcqOscarJ/src/i18n.cpp
new file mode 100644
index 0000000000..535feebcdf
--- /dev/null
+++ b/protocols/IcqOscarJ/src/i18n.cpp
@@ -0,0 +1,541 @@
+// ---------------------------------------------------------------------------80
+//                ICQ plugin for Miranda Instant Messenger
+//                ________________________________________
+//
+// Copyright © 2000-2001 Richard Hughes, Roland Rabien, Tristan Van de Vreede
+// Copyright © 2001-2002 Jon Keating, Richard Hughes
+// Copyright © 2002-2004 Martin Öberg, Sam Kothari, Robert Rainwater
+// Copyright © 2004-2010 Joe Kucera
+//
+// This program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public License
+// as published by the Free Software Foundation; either version 2
+// of the License, or (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+//
+// -----------------------------------------------------------------------------
+//  DESCRIPTION:
+//
+//  Contains helper functions to convert text messages between different
+//  character sets.
+//
+// -----------------------------------------------------------------------------
+#include "icqoscar.h"
+
+
+static BOOL bHasCP_UTF8 = FALSE;
+
+
+void InitI18N(void)
+{
+	CPINFO CPInfo;
+
+
+	bHasCP_UTF8 = GetCPInfo(CP_UTF8, &CPInfo);
+}
+
+
+
+// Returns true if the buffer only contains 7-bit characters.
+BOOL __stdcall IsUSASCII(const char *pBuffer, int nSize)
+{
+	for (int nIndex = 0; nIndex < nSize; nIndex++)
+		if (BYTE(pBuffer[nIndex]) > 0x7F)
+			return FALSE;
+
+	return TRUE;
+}
+
+// Returns true if the unicode buffer only contains 7-bit characters.
+BOOL __stdcall IsUnicodeAscii(const WCHAR *pBuffer, int nSize)
+{
+	for (int nIndex = 0; nIndex < nSize; nIndex++)
+		if (WORD(pBuffer[nIndex]) > 0x7F)
+			return FALSE;
+
+	return TRUE;
+}
+
+
+// Scans a string encoded in UTF-8 to verify that it contains
+// only valid sequences. It will return 1 if the string contains
+// only legitimate encoding sequences; otherwise it will return 0;
+// From 'Secure Programming Cookbook', John Viega & Matt Messier, 2003
+int __stdcall UTF8_IsValid(const char *pszInput)
+{
+	int nb;
+	if (!pszInput)
+		return 0;
+
+	for (	BYTE* c = ( BYTE*)pszInput; *c; c += (nb + 1))
+	{
+		if (!(*c & 0x80))
+			nb = 0;
+		else if ((*c & 0xc0) == 0x80) return 0;
+		else if ((*c & 0xe0) == 0xc0) nb = 1;
+		else if ((*c & 0xf0) == 0xe0) nb = 2;
+		else if ((*c & 0xf8) == 0xf0) nb = 3;
+		else if ((*c & 0xfc) == 0xf8) nb = 4;
+		else if ((*c & 0xfe) == 0xfc) nb = 5;
+		else nb = 0;
+
+		for (int i = 1; i<=nb; i++) // we this forward, do not cross end of string
+			if ((*(c + i) & 0xc0) != 0x80)
+				return 0;
+	}
+
+	return 1;
+}
+
+
+int __stdcall get_utf8_size(const WCHAR *unicode)
+{
+	int size = 0;
+	int index = 0;
+	/* calculate the size of the utf-8 string */
+	WCHAR c = unicode[index++];
+	while (c)
+	{
+		if (c < 0x0080) 
+			size += 1;
+		else if (c < 0x0800) 
+			size += 2;
+		else 
+			size += 3;
+		c = unicode[index++];
+	}
+	return size;
+}
+
+
+// returns ansi string in all cases
+char* __stdcall detect_decode_utf8(const char *from)
+{
+	char *temp = NULL;
+
+	if (IsUSASCII(from, strlennull(from)) || !UTF8_IsValid(from) || !utf8_decode(from, &temp)) return (char*)from;
+	SAFE_FREE((void**)&from);
+
+	return temp;
+}
+
+
+/*
+* The following UTF8 routines are
+*
+* Copyright (C) 2001 Peter Harris <peter.harris@hummingbird.com>
+* Copyright (C) 2001 Edmund Grimley Evans <edmundo@rano.org>
+*
+* under a GPL license
+*
+* --------------------------------------------------------------
+* Convert a string between UTF-8 and the locale's charset.
+* Invalid bytes are replaced by '#', and characters that are
+* not available in the target encoding are replaced by '?'.
+*
+* If the locale's charset is not set explicitly then it is
+* obtained using nl_langinfo(CODESET), where available, the
+* environment variable CHARSET, or assumed to be US-ASCII.
+*
+* Return value of conversion functions:
+*
+*  -1 : memory allocation failed
+*   0 : data was converted exactly
+*   1 : valid data was converted approximately (using '?')
+*   2 : input was invalid (but still converted, using '#')
+*   3 : unknown encoding (but still converted, using '?')
+*/
+
+
+
+/*
+* Convert a string between UTF-8 and the locale's charset.
+*/
+char* __stdcall make_utf8_string_static(const WCHAR *unicode, char *utf8, size_t utf_size)
+{
+	int index = 0;
+	unsigned int out_index = 0;
+	unsigned short c;
+
+	c = unicode[index++];
+	while (c)
+	{
+		if (c < 0x080) 
+		{
+			if (out_index + 1 >= utf_size) break;
+			utf8[out_index++] = (unsigned char)c;
+		}
+		else if (c < 0x800) 
+		{
+			if (out_index + 2 >= utf_size) break;
+			utf8[out_index++] = 0xc0 | (c >> 6);
+			utf8[out_index++] = 0x80 | (c & 0x3f);
+		}
+		else
+		{
+			if (out_index + 3 >= utf_size) break;
+			utf8[out_index++] = 0xe0 | (c >> 12);
+			utf8[out_index++] = 0x80 | ((c >> 6) & 0x3f);
+			utf8[out_index++] = 0x80 | (c & 0x3f);
+		}
+		c = unicode[index++];
+	}
+	utf8[out_index] = 0x00;
+
+	return utf8;
+}
+
+
+char* __stdcall make_utf8_string(const WCHAR *unicode)
+{
+	if (!unicode) return NULL;
+
+	/* first calculate the size of the target string */
+	size_t size = get_utf8_size(unicode);
+
+	char *out = (char*)SAFE_MALLOC(size + 1);
+	if (!out)
+		return NULL;
+
+	return make_utf8_string_static(unicode, out, size + 1);
+}
+
+
+WCHAR* __stdcall make_unicode_string_static(const char *utf8, WCHAR *unicode, size_t unicode_size)
+{
+	unsigned int out_index = 0;
+
+	if (utf8)
+	{
+		unsigned int index = 0;
+		unsigned char c = utf8[index++];
+
+		while (c)
+		{
+			if (out_index + 1 >= unicode_size) break;
+			if ((c & 0x80) == 0) 
+			{
+				unicode[out_index++] = c;
+			} 
+			else if ((c & 0xe0) == 0xe0) 
+			{
+				unicode[out_index] = (c & 0x1F) << 12;
+				c = utf8[index++];
+				unicode[out_index] |= (c & 0x3F) << 6;
+				c = utf8[index++];
+				unicode[out_index++] |= (c & 0x3F);
+			}
+			else
+			{
+				unicode[out_index] = (c & 0x3F) << 6;
+				c = utf8[index++];
+				unicode[out_index++] |= (c & 0x3F);
+			}
+			c = utf8[index++];
+		}
+	}
+	unicode[out_index] = 0;
+
+	return unicode;
+}
+
+
+WCHAR* __stdcall make_unicode_string(const char *utf8)
+{
+	int size = 0, index = 0;
+
+	if (!utf8) return NULL;
+
+	/* first calculate the size of the target string */
+	unsigned char c = utf8[index++];
+	while (c) 
+	{
+		if ((c & 0x80) == 0) 
+		{
+			index += 0;
+		}
+		else if ((c & 0xe0) == 0xe0) 
+		{
+			index += 2;
+		}
+		else
+		{
+			index += 1;
+		}
+		size += 1;
+		c = utf8[index++];
+	}
+
+	WCHAR *out = (WCHAR*)SAFE_MALLOC((size + 1) * sizeof(WCHAR));
+	if (!out)
+		return NULL;
+	else
+		return make_unicode_string_static(utf8, out, size + 1);
+}
+
+
+int __stdcall utf8_encode(const char *from, char **to)
+{
+	int wchars = MultiByteToWideChar(CP_ACP, MB_PRECOMPOSED, from, strlennull(from), NULL, 0);
+
+	if (wchars == 0)
+	{
+#ifdef _DEBUG
+		fprintf(stderr, "Unicode translation error %d\n", GetLastError());
+#endif
+		return -1;
+	}
+
+	WCHAR *unicode = (WCHAR*)_alloca((wchars + 1) * sizeof(WCHAR));
+	ZeroMemory(unicode, (wchars + 1) * sizeof(WCHAR));
+
+	int err = MultiByteToWideChar(CP_ACP, MB_PRECOMPOSED, from, strlennull(from), unicode, wchars);
+	if (err != wchars)
+	{
+#ifdef _DEBUG
+		fprintf(stderr, "Unicode translation error %d\n", GetLastError());
+#endif
+		return -1;
+	}
+
+	/* On NT-based windows systems, we could use WideCharToMultiByte(), but
+	* MS doesn't actually have a consistent API across win32.
+	*/
+	*to = make_utf8_string(unicode);
+	return 0;
+}
+
+
+char* __stdcall ansi_to_utf8(const char *ansi)
+{
+	char *szUtf = NULL;
+
+	if (strlennull(ansi))
+	{
+		utf8_encode(ansi, &szUtf);
+		return szUtf;
+	}
+
+	return null_strdup("");
+}
+
+
+char* __stdcall ansi_to_utf8_codepage(const char *ansi, WORD wCp)
+{
+	int wchars = strlennull(ansi);
+	WCHAR *unicode = (WCHAR*)_alloca((wchars + 1) * sizeof(WCHAR));
+	ZeroMemory(unicode, (wchars + 1) * sizeof(WCHAR));
+
+	MultiByteToWideChar(wCp, MB_PRECOMPOSED, ansi, wchars, unicode, wchars);
+
+	return make_utf8_string(unicode);
+}
+
+
+// Returns 0 on error, 1 on success
+int __stdcall utf8_decode_codepage(const char *from, char **to, WORD wCp)
+{
+	int nResult = 0;
+
+	_ASSERTE(!(*to)); // You passed a non-zero pointer, make sure it doesnt point to unfreed memory
+
+	// Validate the string
+	if (!UTF8_IsValid(from))
+		return 0;
+
+	// Use the native conversion routines when available
+	if (bHasCP_UTF8)
+	{
+		int inlen = strlennull(from) + 1;
+		WCHAR *wszTemp = (WCHAR *)_alloca(inlen * sizeof(WCHAR));
+    ZeroMemory(wszTemp, inlen * sizeof(WCHAR));
+
+		// Convert the UTF-8 string to UCS
+		if (MultiByteToWideChar(CP_UTF8, 0, from, -1, wszTemp, inlen))
+		{
+			// Convert the UCS string to local ANSI codepage
+			*to = (char*)SAFE_MALLOC(inlen);
+			if (WideCharToMultiByte(wCp, 0, wszTemp, -1, *to, inlen, NULL, NULL))
+			{
+				nResult = 1;
+			}
+			else
+			{
+				SAFE_FREE(to);
+			}
+		}
+	}
+	else
+	{
+		int chars = strlennull(from) + 1;
+		WCHAR *unicode = (WCHAR*)_alloca(chars * sizeof(WCHAR));
+		make_unicode_string_static(from, unicode, chars);
+
+		chars = WideCharToMultiByte(wCp, WC_COMPOSITECHECK, unicode, -1, NULL, 0, NULL, NULL);
+
+		if (chars == 0)
+		{
+#ifdef _DEBUG
+			fprintf(stderr, "Unicode translation error %d\n", GetLastError());
+#endif
+			return 0;
+		}
+
+		*to = (char*)SAFE_MALLOC((chars + 1)*sizeof(char));
+		if (*to == NULL)
+		{
+#ifdef _DEBUG
+			fprintf(stderr, "Out of memory processing string to local charset\n");
+#endif
+			return 0;
+		}
+
+		int err = WideCharToMultiByte(wCp, WC_COMPOSITECHECK, unicode, -1, *to, chars, NULL, NULL);
+		if (err != chars)
+		{
+#ifdef _DEBUG
+			fprintf(stderr, "Unicode translation error %d\n", GetLastError());
+#endif
+			SAFE_FREE(to);
+			return 0;
+		}
+
+		nResult = 1;
+	}
+
+	return nResult;
+}
+
+
+// Standard version with current codepage
+int __stdcall utf8_decode(const char *from, char **to)
+{
+	return utf8_decode_codepage(from, to, CP_ACP);
+}
+
+
+// Returns 0 on error, 1 on success
+int __stdcall utf8_decode_static(const char *from, char *to, int to_size)
+{
+	int nResult = 0;
+
+	_ASSERTE(to); // You passed a zero pointer
+
+	// Validate the string
+	if (!UTF8_IsValid(from))
+		return 0;
+
+  // Clear target
+  ZeroMemory(to, to_size);
+
+	// Use the native conversion routines when available
+	if (bHasCP_UTF8)
+	{
+		int inlen = strlennull(from) + 1;
+		WCHAR *wszTemp = (WCHAR*)_alloca(inlen * sizeof(WCHAR));
+		ZeroMemory(wszTemp, inlen * sizeof(WCHAR));
+
+		// Convert the UTF-8 string to UCS
+		if (MultiByteToWideChar(CP_UTF8, 0, from, -1, wszTemp, inlen))
+		{
+			// Convert the UCS string to local ANSI codepage
+			if (WideCharToMultiByte(CP_ACP, 0, wszTemp, -1, to, to_size, NULL, NULL))
+			{
+				nResult = 1;
+			}
+		}
+	}
+	else
+	{
+		size_t chars = strlennull(from) + 1;
+		WCHAR *unicode = (WCHAR*)_alloca(chars * sizeof(WCHAR));
+
+		make_unicode_string_static(from, unicode, chars);
+
+		WideCharToMultiByte(CP_ACP, WC_COMPOSITECHECK, unicode, -1, to, to_size, NULL, NULL);
+
+		nResult = 1;
+	}
+
+	return nResult;
+}
+
+
+WCHAR* __stdcall ansi_to_unicode(const char *ansi)
+{
+	int wchars = MultiByteToWideChar(CP_ACP, MB_PRECOMPOSED, ansi, strlennull(ansi), NULL, 0);
+
+	if (wchars == 0)
+	{
+#ifdef _DEBUG
+		fprintf(stderr, "Unicode translation error %d\n", GetLastError());
+#endif
+		return NULL;
+	}
+
+	WCHAR *unicode = (WCHAR*)SAFE_MALLOC((wchars + 1) * sizeof(WCHAR));
+
+	int err = MultiByteToWideChar(CP_ACP, MB_PRECOMPOSED, ansi, strlennull(ansi), unicode, wchars);
+	if (err != wchars)
+	{
+#ifdef _DEBUG
+		fprintf(stderr, "Unicode translation error %d\n", GetLastError());
+#endif
+    SAFE_FREE(&unicode);
+		return NULL;
+	}
+  return unicode;
+}
+
+
+char* __stdcall unicode_to_ansi_static(const WCHAR *unicode, char *ansi, int ansi_size)
+{
+  ZeroMemory(ansi, ansi_size);
+
+	if (WideCharToMultiByte(CP_ACP, WC_COMPOSITECHECK, unicode, strlennull(unicode), ansi, ansi_size, NULL, NULL) > 1)
+		return ansi;
+
+  return NULL;
+}
+
+
+char* __stdcall unicode_to_ansi(const WCHAR *unicode)
+{
+	int chars = WideCharToMultiByte(CP_ACP, WC_COMPOSITECHECK, unicode, strlennull(unicode), NULL, 0, NULL, NULL);
+
+	if (chars == 0)
+	{
+#ifdef _DEBUG
+		fprintf(stderr, "Unicode translation error %d\n", GetLastError());
+#endif
+		return NULL;
+	}
+
+	char* ansi = (char*)SAFE_MALLOC((chars + 1)*sizeof(char));
+	if (ansi == NULL)
+	{
+#ifdef _DEBUG
+		fprintf(stderr, "Out of memory processing string to local charset\n");
+#endif
+		return NULL;
+	}
+
+	int err = WideCharToMultiByte(CP_ACP, WC_COMPOSITECHECK, unicode, strlennull(unicode), ansi, chars, NULL, NULL);
+	if (err != chars)
+	{
+#ifdef _DEBUG
+		fprintf(stderr, "Unicode translation error %d\n", GetLastError());
+#endif
+		return NULL;
+	}
+
+	return ansi;
+}