svn.miranda.im is moving to a new home!

git-svn-id: http://miranda-plugins.googlecode.com/svn/trunk@7 e753b5eb-9565-29b2-b5c5-2cc6f99dfbcb
author: watcherhd <watcherhd@e753b5eb-9565-29b2-b5c5-2cc6f99dfbcb> 2011-04-21 14:14:52 +0000
committer: watcherhd <watcherhd@e753b5eb-9565-29b2-b5c5-2cc6f99dfbcb> 2011-04-21 14:14:52 +0000
commit: cb4a46e7fbe62d788e66ed6121c717a2d22a4d7c (patch)
tree: 30df260fdc5a1b5a7049c2f8cac8b7ef17513d6d /irc_mod/i18n.cpp
parent: 19b6f534d2e784a1e120bf52c4aa07004798f473 (diff)
1 files changed, 461 insertions, 0 deletions
diff --git a/irc_mod/i18n.cpp b/irc_mod/i18n.cpp
new file mode 100644
index 0000000..71b39b8
--- /dev/null
+++ b/irc_mod/i18n.cpp
@@ -0,0 +1,461 @@
+/*
+IRC plugin for Miranda IM
+
+Copyright (C) 2003 Jörgen Persson
+
+This program is free software; you can redistribute it and/or
+modify it under the terms of the GNU General Public License
+as published by the Free Software Foundation; either version 2
+of the License, or (at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
+*/
+
+//  Contains helper functions to convert text messages between different
+//  character sets.
+
+#include "irc.h"
+
+static BOOL bHasCP_UTF8 = FALSE;
+
+void InitI18N(void)
+{
+	CPINFO CPInfo;
+	bHasCP_UTF8 = GetCPInfo(CP_UTF8, &CPInfo);
+	#ifdef _DEBUG
+		DBGprintf("HasCP_UTF8 = %d\n", bHasCP_UTF8);
+	#endif
+
+}
+
+// Returns true if the buffer only contains 7-bit characters.
+BOOL IsUSASCII(const unsigned char* pBuffer, int nSize)
+{
+  BOOL bResult = TRUE;
+  int nIndex;
+
+  for (nIndex = 0; nIndex < nSize; nIndex++)
+  {
+    if (pBuffer[nIndex] > 0x7F)
+    {
+      bResult = FALSE;
+      break;
+    }
+  }
+
+  return bResult;
+}
+
+// Returns true if the unicode buffer only contains 7-bit characters.
+BOOL IsUnicodeAscii(const wchar_t* pBuffer, int nSize)
+{
+  BOOL bResult = TRUE;
+  int nIndex;
+
+
+  for (nIndex = 0; nIndex < nSize; nIndex++)
+  {
+    if (pBuffer[nIndex] > 0x7F)
+    {
+      bResult = FALSE;
+      break;
+    }
+  }
+
+  return bResult;
+}
+
+
+// Scans a string encoded in UTF-8 to verify that it contains
+// only valid sequences. It will return 1 if the string contains
+// only legitimate encoding sequences; otherwise it will return 0;
+// From 'Secure Programming Cookbook', John Viega & Matt Messier, 2003
+int UTF8_IsValid(const unsigned char* pszInput)
+{
+  int nb, i;
+  const unsigned char* c = pszInput;
+
+
+  for (c = pszInput; *c; c += (nb + 1))
+  {
+    if (!(*c & 0x80))
+      nb = 0;
+    else if ((*c & 0xc0) == 0x80) return 0;
+    else if ((*c & 0xe0) == 0xc0) nb = 1;
+    else if ((*c & 0xf0) == 0xe0) nb = 2;
+    else if ((*c & 0xf8) == 0xf0) nb = 3;
+    else if ((*c & 0xfc) == 0xf8) nb = 4;
+    else if ((*c & 0xfe) == 0xfc) nb = 5;
+
+    for (i = 1; i<=nb; i++) // we this forward, do not cross end of string
+      if ((*(c + i) & 0xc0) != 0x80)
+        return 0;
+  }
+
+  return 1;
+}
+
+
+// returns ansi string in all cases
+char* detect_decode_utf8(const char *from)
+{
+  char* temp = NULL;
+
+  if (IsUSASCII((unsigned char *)from, strlennull(from)) || !UTF8_IsValid((const unsigned char *)from) || !utf8_decode(from, &temp)) return (char*)from;
+  SAFE_FREE((void**)&from);
+
+  return temp;
+}
+
+
+/*
+ * The following UTF8 routines are
+ *
+ * Copyright (C) 2001 Peter Harris <peter.harris@hummingbird.com>
+ * Copyright (C) 2001 Edmund Grimley Evans <edmundo@rano.org>
+ *
+ * under a GPL license
+ *
+ * --------------------------------------------------------------
+ * Convert a string between UTF-8 and the locale's charset.
+ * Invalid bytes are replaced by '#', and characters that are
+ * not available in the target encoding are replaced by '?'.
+ *
+ * If the locale's charset is not set explicitly then it is
+ * obtained using nl_langinfo(CODESET), where available, the
+ * environment variable CHARSET, or assumed to be US-ASCII.
+ *
+ * Return value of conversion functions:
+ *
+ *  -1 : memory allocation failed
+ *   0 : data was converted exactly
+ *   1 : valid data was converted approximately (using '?')
+ *   2 : input was invalid (but still converted, using '#')
+ *   3 : unknown encoding (but still converted, using '?')
+ */
+
+
+
+/*
+ * Convert a string between UTF-8 and the locale's charset.
+ */
+unsigned char *make_utf8_string(const wchar_t *unicode)
+{
+  int size = 0;
+  int index = 0;
+  int out_index = 0;
+  unsigned char* out;
+  unsigned short c;
+
+
+  /* first calculate the size of the target string */
+  c = unicode[index++];
+  while (c)
+  {
+    if (c < 0x0080) 
+      size += 1;
+    else if (c < 0x0800) 
+      size += 2;
+    else 
+      size += 3;
+    c = unicode[index++];
+  }
+
+  out = (unsigned char *)malloc(size + 1);
+  if (out == NULL)
+    return NULL;
+  index = 0;
+
+  c = unicode[index++];
+  while (c)
+  {
+    if (c < 0x080) 
+    {
+      out[out_index++] = (unsigned char)c;
+    }
+    else if (c < 0x800) 
+    {
+      out[out_index++] = 0xc0 | (c >> 6);
+      out[out_index++] = 0x80 | (c & 0x3f);
+    }
+    else
+    {
+      out[out_index++] = 0xe0 | (c >> 12);
+      out[out_index++] = 0x80 | ((c >> 6) & 0x3f);
+      out[out_index++] = 0x80 | (c & 0x3f);
+    }
+    c = unicode[index++];
+  }
+  out[out_index] = 0x00;
+
+  return out;
+}
+
+
+
+wchar_t *make_unicode_string(const unsigned char *utf8)
+{
+  int size = 0, index = 0, out_index = 0;
+  wchar_t *out;
+  unsigned char c;
+
+  /* first calculate the size of the target string */
+  c = utf8[index++];
+  while (c) 
+  {
+    if ((c & 0x80) == 0) 
+    {
+      index += 0;
+    }
+    else if ((c & 0xe0) == 0xe0) 
+    {
+      index += 2;
+    }
+    else
+    {
+      index += 1;
+    }
+    size += 1;
+    c = utf8[index++];
+  }
+
+  out = (wchar_t *)malloc((size + 1) * sizeof(wchar_t));
+  if (out == NULL)
+    return NULL;
+  index = 0;
+
+  c = utf8[index++];
+  while (c)
+  {
+    if((c & 0x80) == 0) 
+    {
+      out[out_index++] = c;
+    } 
+    else if((c & 0xe0) == 0xe0) 
+    {
+      out[out_index] = (c & 0x1F) << 12;
+      c = utf8[index++];
+      out[out_index] |= (c & 0x3F) << 6;
+      c = utf8[index++];
+      out[out_index++] |= (c & 0x3F);
+    }
+    else
+    {
+      out[out_index] = (c & 0x3F) << 6;
+      c = utf8[index++];
+      out[out_index++] |= (c & 0x3F);
+    }
+    c = utf8[index++];
+  }
+  out[out_index] = 0;
+
+  return out;
+}
+
+
+
+int utf8_encode(const char *from, char **to)
+{
+  wchar_t *unicode;
+  int wchars, err;
+
+
+  wchars = MultiByteToWideChar(CP_ACP, MB_PRECOMPOSED, from,
+      strlennull(from), NULL, 0);
+
+  if (wchars == 0)
+  {
+    fprintf(stderr, "Unicode translation error %d\n", GetLastError());
+    return -1;
+  }
+
+  unicode = (wchar_t*)_alloca((wchars + 1) * sizeof(unsigned short));
+  unicode[wchars] = 0;
+
+  err = MultiByteToWideChar(CP_ACP, MB_PRECOMPOSED, from,
+      strlennull(from), unicode, wchars);
+  if(err != wchars)
+  {
+    fprintf(stderr, "Unicode translation error %d\n", GetLastError());
+    return -1;
+  }
+
+  /* On NT-based windows systems, we could use WideCharToMultiByte(), but
+   * MS doesn't actually have a consistent API across win32.
+   */
+  *to = (char*)make_utf8_string(unicode);
+
+  return 0;
+}
+
+
+
+char *ansi_to_utf8(const char *szAnsi)
+{
+  char *szUtf;
+
+  if (strlennull(szAnsi))
+  {
+    utf8_encode(szAnsi, &szUtf);
+
+    return szUtf;
+  }
+  else
+    return null_strdup("");
+}
+
+
+
+char *ansi_to_utf8_codepage(const char *szAnsi, WORD wCp)
+{
+  wchar_t *unicode;
+  int wchars = strlennull(szAnsi);
+
+  unicode = (wchar_t*)_alloca((wchars + 1) * sizeof(wchar_t));
+  unicode[wchars] = 0;
+
+  MultiByteToWideChar(wCp, MB_PRECOMPOSED, szAnsi, wchars, unicode, wchars);
+
+  return (char*)make_utf8_string(unicode);
+}
+
+
+
+// Returns 0 on error, 1 on success
+int utf8_decode(const char *from, char **to)
+{
+  int nResult = 0;
+
+  _ASSERTE(!(*to)); // You passed a non-zero pointer, make sure it doesnt point to unfreed memory
+
+  // Validate the string
+  if (!UTF8_IsValid((const unsigned char *)from))
+    return 0;
+
+  // Use the native conversion routines when available
+  if (bHasCP_UTF8)
+  {
+    WCHAR *wszTemp = NULL;
+    int inlen = strlennull(from);
+
+    wszTemp = (WCHAR *)_alloca(sizeof(WCHAR) * (inlen + 1));
+
+    // Convert the UTF-8 string to UCS
+    if (MultiByteToWideChar(CP_UTF8, 0, from, -1, wszTemp, inlen + 1))
+    {
+      // Convert the UCS string to local ANSI codepage
+      *to = (char*)malloc(inlen+1);
+      if (WideCharToMultiByte(CP_ACP, 0, wszTemp, -1, *to, inlen+1, NULL, NULL))
+      {
+        nResult = 1;
+      }
+      else
+      {
+        SAFE_FREE((void **)&(*to));
+      }
+    }
+  }
+  else
+  {
+    wchar_t *unicode;
+    int chars;
+    int err;
+
+    unicode = make_unicode_string((const unsigned char *)from);
+    if(unicode == NULL)
+    {
+      fprintf(stderr, "Out of memory processing string from UTF8 to UNICODE16\n");
+      return 0;
+    }
+
+    chars = WideCharToMultiByte(CP_ACP, WC_COMPOSITECHECK, unicode, -1, NULL, 0, NULL, NULL);
+
+    if(chars == 0)
+    {
+      fprintf(stderr, "Unicode translation error %d\n", GetLastError());
+      SAFE_FREE((void **)&unicode);
+      return 0;
+    }
+
+//    *to = calloc(chars + 1, sizeof(unsigned char));
+    if(*to == NULL)
+    {
+      fprintf(stderr, "Out of memory processing string to local charset\n");
+      SAFE_FREE((void **)&unicode);
+      return 0;
+    }
+
+    err = WideCharToMultiByte(CP_ACP, WC_COMPOSITECHECK, unicode, -1, *to, chars, NULL, NULL);
+    if (err != chars)
+    {
+      fprintf(stderr, "Unicode translation error %d\n", GetLastError());
+      SAFE_FREE((void **)&unicode);
+      SAFE_FREE((void **)to);
+      return 0;
+    }
+
+    SAFE_FREE((void **)&unicode);
+
+    nResult = 1;
+  }
+
+  return nResult;
+}
+
+
+
+// Returns 0 on error, 1 on success
+int utf8_decode_static(const char *from, char *to, int to_size)
+{
+  int nResult = 0;
+
+  _ASSERTE(to); // You passed a zero pointer
+
+  // Validate the string
+  if (!UTF8_IsValid((const unsigned char *)from))
+    return 0;
+
+  // Use the native conversion routines when available
+  if (bHasCP_UTF8)
+  {
+    WCHAR *wszTemp = NULL;
+    int inlen = strlennull(from);
+
+    wszTemp = (WCHAR *)_alloca(sizeof(WCHAR) * (inlen + 1));
+
+    // Convert the UTF-8 string to UCS
+    if (MultiByteToWideChar(CP_UTF8, 0, from, -1, wszTemp, inlen + 1))
+    {
+      // Convert the UCS string to local ANSI codepage
+      if (WideCharToMultiByte(CP_ACP, 0, wszTemp, -1, to, to_size, NULL, NULL))
+      {
+        nResult = 1;
+      }
+    }
+  }
+  else
+  {
+    wchar_t *unicode = make_unicode_string((const unsigned char *)from);
+
+    if (unicode == NULL)
+    {
+      fprintf(stderr, "Out of memory processing string from UTF8 to UNICODE16\n");
+      return 0;
+    }
+
+    WideCharToMultiByte(CP_ACP, WC_COMPOSITECHECK, unicode, -1, to, to_size, NULL, NULL);
+
+    SAFE_FREE((void **)&unicode);
+
+    nResult = 1;
+  }
+
+  return nResult;
+}
author	watcherhd <watcherhd@e753b5eb-9565-29b2-b5c5-2cc6f99dfbcb>	2011-04-21 14:14:52 +0000
committer	watcherhd <watcherhd@e753b5eb-9565-29b2-b5c5-2cc6f99dfbcb>	2011-04-21 14:14:52 +0000
commit	cb4a46e7fbe62d788e66ed6121c717a2d22a4d7c (patch)
tree	30df260fdc5a1b5a7049c2f8cac8b7ef17513d6d /irc_mod/i18n.cpp
parent	19b6f534d2e784a1e120bf52c4aa07004798f473 (diff)