diff options
| author | George Hazan <george.hazan@gmail.com> | 2012-07-26 14:56:53 +0000 | 
|---|---|---|
| committer | George Hazan <george.hazan@gmail.com> | 2012-07-26 14:56:53 +0000 | 
| commit | d5d6965af7e69367babf40b7fb1d7fca8617cabe (patch) | |
| tree | 614f57382fcf26c3895d4166f67657c3b0e23e3e /plugins/DbChecker/src/utf.cpp | |
| parent | 78eb36ffcbd1b71eee1b912a927522fd3f19b347 (diff) | |
dbtool is divided into two parts: DbChecker & appendix for db3x_mmap plugin
git-svn-id: http://svn.miranda-ng.org/main/trunk@1195 1316c22d-e87f-b044-9b9b-93d7a3e3ba9c
Diffstat (limited to 'plugins/DbChecker/src/utf.cpp')
| -rw-r--r-- | plugins/DbChecker/src/utf.cpp | 69 | 
1 files changed, 69 insertions, 0 deletions
diff --git a/plugins/DbChecker/src/utf.cpp b/plugins/DbChecker/src/utf.cpp new file mode 100644 index 0000000000..ffb73d3280 --- /dev/null +++ b/plugins/DbChecker/src/utf.cpp @@ -0,0 +1,69 @@ +/*
 +
 +Miranda IM: the free IM client for Microsoft* Windows*
 +
 +Copyright 2000-2009 Miranda ICQ/IM project,
 +all portions of this codebase are copyrighted to the people
 +listed in contributors.txt.
 +
 + Copyright 2000 Alexandre Julliard of Wine project 
 + (UTF-8 conversion routines)
 +
 +This program is free software; you can redistribute it and/or
 +modify it under the terms of the GNU General Public License
 +as published by the Free Software Foundation; either version 2
 +of the License, or (at your option) any later version.
 +
 +This program is distributed in the hope that it will be useful,
 +but WITHOUT ANY WARRANTY; without even the implied warranty of
 +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 +GNU General Public License for more details.
 +
 +You should have received a copy of the GNU General Public License
 +along with this program; if not, write to the Free Software
 +Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
 +*/
 +
 +#include "dbchecker.h"
 +
 +bool is_utf8_string(const char* str)
 +{
 +	int expect_bytes = 0, utf_found = 0;
 +
 +	if (!str) return 0;
 +
 +	while (*str) {
 +		if ((*str & 0x80) == 0) {
 +			/* Looks like an ASCII character */
 +			if (expect_bytes)
 +				/* byte of UTF-8 character expected */
 +				return 0;
 +		}
 +		else {
 +			/* Looks like byte of an UTF-8 character */
 +			if (expect_bytes) {
 +				/* expect_bytes already set: first byte of UTF-8 char already seen */
 +				if ((*str & 0xC0) != 0x80) {
 +					/* again first byte ?!?! */
 +					return 0;
 +				}
 +			}
 +			else {
 +				/* First byte of the UTF-8 character */
 +				/* count initial one bits and set expect_bytes to 1 less */
 +				char ch = *str;
 +				while (ch & 0x80) {
 +					expect_bytes++;
 +					ch = (ch & 0x7f) << 1;
 +				}
 +			}
 +			/* OK, next byte of UTF-8 character */
 +			/* Decrement number of expected bytes */
 +			if (--expect_bytes == 0)
 +				utf_found = 1;
 +		}
 +		str++;
 +	}
 +
 +	return (utf_found && expect_bytes == 0);
 +}
  | 
