From 1718ecd06e6bb305385e5dea79c5649af6e9470a Mon Sep 17 00:00:00 2001 From: George Hazan Date: Fri, 27 Jul 2012 07:02:45 +0000 Subject: Utf8CheckString moved from dbtool to mir_core git-svn-id: http://svn.miranda-ng.org/main/trunk@1207 1316c22d-e87f-b044-9b9b-93d7a3e3ba9c --- src/mir_core/mir_core.def | 1 + src/mir_core/utf.cpp | 45 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 46 insertions(+) (limited to 'src') diff --git a/src/mir_core/mir_core.def b/src/mir_core/mir_core.def index 41585d0845..66324ec1ed 100644 --- a/src/mir_core/mir_core.def +++ b/src/mir_core/mir_core.def @@ -129,3 +129,4 @@ replaceStrW @126 db_setCurrent @127 CmdLine_GetOption @128 CmdLine_Parse @129 +Utf8CheckString @130 diff --git a/src/mir_core/utf.cpp b/src/mir_core/utf.cpp index ddf2d1ca9f..a9e7145973 100644 --- a/src/mir_core/utf.cpp +++ b/src/mir_core/utf.cpp @@ -404,3 +404,48 @@ MIR_CORE_DLL(char*) Utf8EncodeW(const wchar_t* src) return result; } + +///////////////////////////////////////////////////////////////////////////////////////// +// Utf8Encode - converts UCS2 string to the UTF8-encoded format + +MIR_CORE_DLL(BOOL) Utf8CheckString(const char* str) +{ + int expect_bytes = 0, utf_found = 0; + + if (!str) return 0; + + while (*str) { + if ((*str & 0x80) == 0) { + /* Looks like an ASCII character */ + if (expect_bytes) + /* byte of UTF-8 character expected */ + return 0; + } + else { + /* Looks like byte of an UTF-8 character */ + if (expect_bytes) { + /* expect_bytes already set: first byte of UTF-8 char already seen */ + if ((*str & 0xC0) != 0x80) { + /* again first byte ?!?! */ + return 0; + } + } + else { + /* First byte of the UTF-8 character */ + /* count initial one bits and set expect_bytes to 1 less */ + char ch = *str; + while (ch & 0x80) { + expect_bytes++; + ch = (ch & 0x7f) << 1; + } + } + /* OK, next byte of UTF-8 character */ + /* Decrement number of expected bytes */ + if (--expect_bytes == 0) + utf_found = 1; + } + str++; + } + + return (utf_found && expect_bytes == 0); +} -- cgit v1.2.3