Utf8CheckString moved from dbtool to mir_core

git-svn-id: http://svn.miranda-ng.org/main/trunk@1207 1316c22d-e87f-b044-9b9b-93d7a3e3ba9c
author: George Hazan <george.hazan@gmail.com> 2012-07-27 07:02:45 +0000
committer: George Hazan <george.hazan@gmail.com> 2012-07-27 07:02:45 +0000
commit: 1718ecd06e6bb305385e5dea79c5649af6e9470a (patch)
tree: 9a9e7fb3f1243d73765d17f33740518897f1c9b1 /src/mir_core
parent: 94e5f742581f4b670cdb6ca09c18c86ce05629a9 (diff)
2 files changed, 46 insertions, 0 deletions
diff --git a/src/mir_core/mir_core.def b/src/mir_core/mir_core.def
index 41585d0845..66324ec1ed 100644
--- a/src/mir_core/mir_core.def
+++ b/src/mir_core/mir_core.def
@@ -129,3 +129,4 @@ replaceStrW                   @126
 db_setCurrent                 @127
 CmdLine_GetOption             @128
 CmdLine_Parse                 @129
+Utf8CheckString               @130
diff --git a/src/mir_core/utf.cpp b/src/mir_core/utf.cpp
index ddf2d1ca9f..a9e7145973 100644
--- a/src/mir_core/utf.cpp
+++ b/src/mir_core/utf.cpp
@@ -404,3 +404,48 @@ MIR_CORE_DLL(char*) Utf8EncodeW(const wchar_t* src)
 
 	return result;
 }
+
+/////////////////////////////////////////////////////////////////////////////////////////
+// Utf8Encode - converts UCS2 string to the UTF8-encoded format
+
+MIR_CORE_DLL(BOOL) Utf8CheckString(const char* str)
+{
+	int expect_bytes = 0, utf_found = 0;
+
+	if (!str) return 0;
+
+	while (*str) {
+		if ((*str & 0x80) == 0) {
+			/* Looks like an ASCII character */
+			if (expect_bytes)
+				/* byte of UTF-8 character expected */
+				return 0;
+		}
+		else {
+			/* Looks like byte of an UTF-8 character */
+			if (expect_bytes) {
+				/* expect_bytes already set: first byte of UTF-8 char already seen */
+				if ((*str & 0xC0) != 0x80) {
+					/* again first byte ?!?! */
+					return 0;
+				}
+			}
+			else {
+				/* First byte of the UTF-8 character */
+				/* count initial one bits and set expect_bytes to 1 less */
+				char ch = *str;
+				while (ch & 0x80) {
+					expect_bytes++;
+					ch = (ch & 0x7f) << 1;
+				}
+			}
+			/* OK, next byte of UTF-8 character */
+			/* Decrement number of expected bytes */
+			if (--expect_bytes == 0)
+				utf_found = 1;
+		}
+		str++;
+	}
+
+	return (utf_found && expect_bytes == 0);
+}
author	George Hazan <george.hazan@gmail.com>	2012-07-27 07:02:45 +0000
committer	George Hazan <george.hazan@gmail.com>	2012-07-27 07:02:45 +0000
commit	1718ecd06e6bb305385e5dea79c5649af6e9470a (patch)
tree	9a9e7fb3f1243d73765d17f33740518897f1c9b1 /src/mir_core
parent	94e5f742581f4b670cdb6ca09c18c86ce05629a9 (diff)